src/gromacs/gmxana/gmx_bar.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2010,2011,2012,2013,2014, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 #include "gmxpre.h"
  36
  37 #include "config.h"
  38
  39 #include <ctype.h>
  40 #include <float.h>
  41 #include <math.h>
  42 #include <stdlib.h>
  43 #include <string.h>
  44
  45 #include "gromacs/legacyheaders/typedefs.h"
  46 #include "gromacs/utility/smalloc.h"
  47 #include "gromacs/utility/futil.h"
  48 #include "gromacs/commandline/pargs.h"
  49 #include "gromacs/legacyheaders/macros.h"
  50 #include "gromacs/fileio/enxio.h"
  51 #include "gromacs/math/units.h"
  52 #include "gromacs/utility/fatalerror.h"
  53 #include "gromacs/fileio/xvgr.h"
  54 #include "gromacs/legacyheaders/viewit.h"
  55 #include "gmx_ana.h"
  56 #include "gromacs/math/utilities.h"
  57 #include "gromacs/utility/cstringutil.h"
  58 #include "gromacs/legacyheaders/names.h"
  59 #include "gromacs/legacyheaders/mdebin.h"
  60
  61
  62 /* Structure for the names of lambda vector components */
  63 typedef struct lambda_components_t
  64 {
  65     char **names;  /* Array of strings with names for the lambda vector
  66                       components */
  67     int    N;      /* The number of components */
  68     int    Nalloc; /* The number of allocated components */
  69 } lambda_components_t;
  70
  71 /* Structure for a lambda vector or a dhdl derivative direction */
  72 typedef struct lambda_vec_t
  73 {
  74     double                    *val;   /* The lambda vector component values. Only valid if
  75                                          dhdl == -1 */
  76     int                        dhdl;  /* The coordinate index for the derivative described by this
  77                                          structure, or -1 */
  78     const lambda_components_t *lc;    /* the associated lambda_components
  79                                          structure */
  80     int                        index; /* The state number (init-lambda-state) of this lambda
  81                                          vector, if known. If not, it is set to -1 */
  82 } lambda_vec_t;
  83
  84 /* the dhdl.xvg data from a simulation */
  85 typedef struct xvg_t
  86 {
  87     const char   *filename;
  88     int           ftp;           /* file type */
  89     int           nset;          /* number of lambdas, including dhdl */
  90     int          *np;            /* number of data points (du or hists) per lambda */
  91     int           np_alloc;      /* number of points (du or hists) allocated */
  92     double        temp;          /* temperature */
  93     lambda_vec_t *lambda;        /* the lambdas (of first index for y). */
  94     double       *t;             /* the times (of second index for y) */
  95     double      **y;             /* the dU values. y[0] holds the derivative, while
  96                                     further ones contain the energy differences between
  97                                     the native lambda and the 'foreign' lambdas. */
  98     lambda_vec_t  native_lambda; /* the native lambda */
  99
 100     struct xvg_t *next, *prev;   /*location in the global linked list of xvg_ts*/
 101 } xvg_t;
 102
 103
 104 typedef struct hist_t
 105 {
 106     unsigned int   *bin[2];                 /* the (forward + reverse) histogram values */
 107     double          dx[2];                  /* the histogram spacing. The reverse
 108                                                dx is the negative of the forward dx.*/
 109     gmx_int64_t     x0[2];                  /* the (forward + reverse) histogram start
 110                                                    point(s) as int */
 111
 112     int             nbin[2];                /* the (forward+reverse) number of bins */
 113     gmx_int64_t     sum;                    /* the total number of counts. Must be
 114                                                    the same for forward + reverse.  */
 115     int             nhist;                  /* number of hist datas (forward or reverse) */
 116
 117     double          start_time, delta_time; /* start time, end time of histogram */
 118 } hist_t;
 119
 120
 121 /* an aggregate of samples for partial free energy calculation */
 122 typedef struct samples_t
 123 {
 124     lambda_vec_t *native_lambda;  /* pointer to native lambda vector */
 125     lambda_vec_t *foreign_lambda; /* pointer to foreign lambda vector */
 126     double        temp;           /* the temperature */
 127     gmx_bool      derivative;     /* whether this sample is a derivative */
 128
 129     /* The samples come either as either delta U lists: */
 130     int     ndu;                    /* the number of delta U samples */
 131     double *du;                     /* the delta u's */
 132     double *t;                      /* the times associated with those samples, or: */
 133     double  start_time, delta_time; /*start time and delta time for linear time*/
 134
 135     /* or as histograms: */
 136     hist_t *hist; /* a histogram */
 137
 138     /* allocation data: (not NULL for data 'owned' by this struct) */
 139     double         *du_alloc, *t_alloc;  /* allocated delta u arrays  */
 140     size_t          ndu_alloc, nt_alloc; /* pre-allocated sizes */
 141     hist_t         *hist_alloc;          /* allocated hist */
 142
 143     gmx_int64_t     ntot;                /* total number of samples */
 144     const char     *filename;            /* the file name this sample comes from */
 145 } samples_t;
 146
 147 /* a sample range (start to end for du-style data, or boolean
 148     for both du-style data and histograms */
 149 typedef struct sample_range_t
 150 {
 151     int        start, end; /* start and end index for du style data */
 152     gmx_bool   use;        /* whether to use this sample */
 153
 154     samples_t *s;          /* the samples this range belongs to */
 155 } sample_range_t;
 156
 157
 158 /* a collection of samples for a partial free energy calculation
 159     (i.e. the collection of samples from one native lambda to one
 160     foreign lambda) */
 161 typedef struct sample_coll_t
 162 {
 163     lambda_vec_t   *native_lambda;     /* these should be the same for all samples
 164                                           in the histogram */
 165     lambda_vec_t   *foreign_lambda;    /* collection */
 166     double          temp;              /* the temperature */
 167
 168     int             nsamples;          /* the number of samples */
 169     samples_t     **s;                 /* the samples themselves */
 170     sample_range_t *r;                 /* the sample ranges */
 171     int             nsamples_alloc;    /* number of allocated samples */
 172
 173     gmx_int64_t     ntot;              /* total number of samples in the ranges of
 174                                               this collection */
 175
 176     struct sample_coll_t *next, *prev; /* next and previous in the list */
 177 } sample_coll_t;
 178
 179 /* all the samples associated with a lambda point */
 180 typedef struct lambda_data_t
 181 {
 182     lambda_vec_t         *lambda;      /* the native lambda (at start time if dynamic) */
 183     double                temp;        /* temperature */
 184
 185     sample_coll_t        *sc;          /* the samples */
 186
 187     sample_coll_t         sc_head;     /*the pre-allocated list head for the linked list.*/
 188
 189     struct lambda_data_t *next, *prev; /* the next and prev in the list */
 190 } lambda_data_t;
 191
 192 /* Top-level data structure of simulation data */
 193 typedef struct sim_data_t
 194 {
 195     lambda_data_t      *lb;      /* a lambda data linked list */
 196     lambda_data_t       lb_head; /* The head element of the linked list */
 197
 198     lambda_components_t lc;      /* the allowed components of the lambda
 199                                     vectors */
 200 } sim_data_t;
 201
 202 /* Top-level data structure with calculated values. */
 203 typedef struct {
 204     sample_coll_t *a, *b;            /* the simulation data */
 205
 206     double         dg;               /* the free energy difference */
 207     double         dg_err;           /* the free energy difference */
 208
 209     double         dg_disc_err;      /* discretization error */
 210     double         dg_histrange_err; /* histogram range error */
 211
 212     double         sa;               /* relative entropy of b in state a */
 213     double         sa_err;           /* error in sa */
 214     double         sb;               /* relative entropy of a in state b */
 215     double         sb_err;           /* error in sb */
 216
 217     double         dg_stddev;        /* expected dg stddev per sample */
 218     double         dg_stddev_err;    /* error in dg_stddev */
 219 } barres_t;
 220
 221
 222 /* Initialize a lambda_components structure */
 223 static void lambda_components_init(lambda_components_t *lc)
 224 {
 225     lc->N      = 0;
 226     lc->Nalloc = 2;
 227     snew(lc->names, lc->Nalloc);
 228 }
 229
 230 /* Add a component to a lambda_components structure */
 231 static void lambda_components_add(lambda_components_t *lc,
 232                                   const char *name, size_t name_length)
 233 {
 234     while (lc->N + 1 > lc->Nalloc)
 235     {
 236         lc->Nalloc = (lc->Nalloc == 0) ? 2 : 2*lc->Nalloc;
 237         srenew(lc->names, lc->Nalloc);
 238     }
 239     snew(lc->names[lc->N], name_length+1);
 240     strncpy(lc->names[lc->N], name, name_length);
 241     lc->N++;
 242 }
 243
 244 /* check whether a component with index 'index' matches the given name, or
 245    is also NULL. Returns TRUE if this is the case.
 246    the string name does not need to end */
 247 static gmx_bool lambda_components_check(const lambda_components_t *lc,
 248                                         int                        index,
 249                                         const char                *name,
 250                                         size_t                     name_length)
 251 {
 252     size_t len;
 253     if (index >= lc->N)
 254     {
 255         return FALSE;
 256     }
 257     if (name == NULL && lc->names[index] == NULL)
 258     {
 259         return TRUE;
 260     }
 261     if ((name == NULL) != (lc->names[index] == NULL))
 262     {
 263         return FALSE;
 264     }
 265     len = strlen(lc->names[index]);
 266     if (len != name_length)
 267     {
 268         return FALSE;
 269     }
 270     if (strncmp(lc->names[index], name, name_length) == 0)
 271     {
 272         return TRUE;
 273     }
 274     return FALSE;
 275 }
 276
 277 /* Find the index of a given lambda component name, or -1 if not found */
 278 static int lambda_components_find(const lambda_components_t *lc,
 279                                   const char                *name,
 280                                   size_t                     name_length)
 281 {
 282     int i;
 283
 284     for (i = 0; i < lc->N; i++)
 285     {
 286         if (strncmp(lc->names[i], name, name_length) == 0)
 287         {
 288             return i;
 289         }
 290     }
 291     return -1;
 292 }
 293
 294
 295
 296 /* initialize a lambda vector */
 297 static void lambda_vec_init(lambda_vec_t *lv, const lambda_components_t *lc)
 298 {
 299     snew(lv->val, lc->N);
 300     lv->index = -1;
 301     lv->dhdl  = -1;
 302     lv->lc    = lc;
 303 }
 304
 305 static void lambda_vec_destroy(lambda_vec_t *lv)
 306 {
 307     sfree(lv->val);
 308 }
 309
 310 static void lambda_vec_copy(lambda_vec_t *lv, const lambda_vec_t *orig)
 311 {
 312     int i;
 313
 314     lambda_vec_init(lv, orig->lc);
 315     lv->dhdl  = orig->dhdl;
 316     lv->index = orig->index;
 317     for (i = 0; i < lv->lc->N; i++)
 318     {
 319         lv->val[i] = orig->val[i];
 320     }
 321 }
 322
 323 /* write a lambda vec to a preallocated string */
 324 static void lambda_vec_print(const lambda_vec_t *lv, char *str, gmx_bool named)
 325 {
 326     int    i;
 327     size_t np;
 328
 329     str[0] = 0; /* reset the string */
 330     if (lv->dhdl < 0)
 331     {
 332         if (named)
 333         {
 334             str += sprintf(str, "delta H to ");
 335         }
 336         if (lv->lc->N > 1)
 337         {
 338             str += sprintf(str, "(");
 339         }
 340         for (i = 0; i < lv->lc->N; i++)
 341         {
 342             str += sprintf(str, "%g", lv->val[i]);
 343             if (i < lv->lc->N-1)
 344             {
 345                 str += sprintf(str, ", ");
 346             }
 347         }
 348         if (lv->lc->N > 1)
 349         {
 350             str += sprintf(str, ")");
 351         }
 352     }
 353     else
 354     {
 355         /* this lambda vector describes a derivative */
 356         str += sprintf(str, "dH/dl");
 357         if (strlen(lv->lc->names[lv->dhdl]) > 0)
 358         {
 359             sprintf(str, " (%s)", lv->lc->names[lv->dhdl]);
 360         }
 361     }
 362 }
 363
 364 /* write a shortened version of the lambda vec to a preallocated string */
 365 static void lambda_vec_print_short(const lambda_vec_t *lv, char *str)
 366 {
 367     int    i;
 368     size_t np;
 369
 370     if (lv->index >= 0)
 371     {
 372         sprintf(str, "%6d", lv->index);
 373     }
 374     else
 375     {
 376         if (lv->dhdl < 0)
 377         {
 378             sprintf(str, "%6.3f", lv->val[0]);
 379         }
 380         else
 381         {
 382             sprintf(str, "dH/dl[%d]", lv->dhdl);
 383         }
 384     }
 385 }
 386
 387 /* write an intermediate version of two lambda vecs to a preallocated string */
 388 static void lambda_vec_print_intermediate(const lambda_vec_t *a,
 389                                           const lambda_vec_t *b, char *str)
 390 {
 391     int    i;
 392     size_t np;
 393
 394     str[0] = 0;
 395     if ( (a->index >= 0) && (b->index >= 0) )
 396     {
 397         sprintf(str, "%6.3f", ((double)a->index+(double)b->index)/2.);
 398     }
 399     else
 400     {
 401         if ( (a->dhdl < 0) && (b->dhdl < 0) )
 402         {
 403             sprintf(str, "%6.3f", (a->val[0]+b->val[0])/2.);
 404         }
 405     }
 406 }
 407
 408
 409
 410 /* calculate the difference in lambda vectors: c = a-b.
 411    c must be initialized already, and a and b must describe non-derivative
 412    lambda points */
 413 static void lambda_vec_diff(const lambda_vec_t *a, const lambda_vec_t *b,
 414                             lambda_vec_t *c)
 415 {
 416     int i;
 417
 418     if ( (a->dhdl > 0)  || (b->dhdl > 0) )
 419     {
 420         gmx_fatal(FARGS,
 421                   "Trying to calculate the difference between derivatives instead of lambda points");
 422     }
 423     if ((a->lc != b->lc) || (a->lc != c->lc) )
 424     {
 425         gmx_fatal(FARGS,
 426                   "Trying to calculate the difference lambdas with differing basis set");
 427     }
 428     for (i = 0; i < a->lc->N; i++)
 429     {
 430         c->val[i] = a->val[i] - b->val[i];
 431     }
 432 }
 433
 434 /* calculate and return the absolute difference in lambda vectors: c = |a-b|.
 435    a and b must describe non-derivative lambda points */
 436 static double lambda_vec_abs_diff(const lambda_vec_t *a, const lambda_vec_t *b)
 437 {
 438     int    i;
 439     double ret = 0.;
 440
 441     if ( (a->dhdl > 0)  || (b->dhdl > 0) )
 442     {
 443         gmx_fatal(FARGS,
 444                   "Trying to calculate the difference between derivatives instead of lambda points");
 445     }
 446     if (a->lc != b->lc)
 447     {
 448         gmx_fatal(FARGS,
 449                   "Trying to calculate the difference lambdas with differing basis set");
 450     }
 451     for (i = 0; i < a->lc->N; i++)
 452     {
 453         double df = a->val[i] - b->val[i];
 454         ret += df*df;
 455     }
 456     return sqrt(ret);
 457 }
 458
 459
 460 /* check whether two lambda vectors are the same */
 461 static gmx_bool lambda_vec_same(const lambda_vec_t *a, const lambda_vec_t *b)
 462 {
 463     int i;
 464
 465     if (a->lc != b->lc)
 466     {
 467         return FALSE;
 468     }
 469     if (a->dhdl < 0)
 470     {
 471         for (i = 0; i < a->lc->N; i++)
 472         {
 473             if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
 474             {
 475                 return FALSE;
 476             }
 477         }
 478         return TRUE;
 479     }
 480     else
 481     {
 482         /* they're derivatives, so we check whether the indices match */
 483         return (a->dhdl == b->dhdl);
 484     }
 485 }
 486
 487 /* Compare the sort order of two foreign lambda vectors
 488
 489     returns 1 if a is 'bigger' than b,
 490     returns 0 if they're the same,
 491     returns -1 if a is 'smaller' than b.*/
 492 static gmx_bool lambda_vec_cmp_foreign(const lambda_vec_t *a,
 493                                        const lambda_vec_t *b)
 494 {
 495     int      i;
 496     double   norm_a    = 0, norm_b = 0;
 497     gmx_bool different = FALSE;
 498
 499     if (a->lc != b->lc)
 500     {
 501         gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
 502     }
 503     /* if either one has an index we sort based on that */
 504     if ((a->index >= 0) || (b->index >= 0))
 505     {
 506         if (a->index == b->index)
 507         {
 508             return 0;
 509         }
 510         return (a->index > b->index) ? 1 : -1;
 511     }
 512     if (a->dhdl >= 0 || b->dhdl >= 0)
 513     {
 514         /* lambda vectors that are derivatives always sort higher than those
 515            without derivatives */
 516         if ((a->dhdl >= 0)  != (b->dhdl >= 0) )
 517         {
 518             return (a->dhdl >= 0) ? 1 : -1;
 519         }
 520         return a->dhdl > b->dhdl;
 521     }
 522
 523     /* neither has an index, so we can only sort on the lambda components,
 524        which is only valid if there is one component */
 525     for (i = 0; i < a->lc->N; i++)
 526     {
 527         if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
 528         {
 529             different = TRUE;
 530         }
 531         norm_a += a->val[i]*a->val[i];
 532         norm_b += b->val[i]*b->val[i];
 533     }
 534     if (!different)
 535     {
 536         return 0;
 537     }
 538     return norm_a > norm_b;
 539 }
 540
 541 /* Compare the sort order of two native lambda vectors
 542
 543     returns 1 if a is 'bigger' than b,
 544     returns 0 if they're the same,
 545     returns -1 if a is 'smaller' than b.*/
 546 static gmx_bool lambda_vec_cmp_native(const lambda_vec_t *a,
 547                                       const lambda_vec_t *b)
 548 {
 549     int i;
 550
 551     if (a->lc != b->lc)
 552     {
 553         gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
 554     }
 555     /* if either one has an index we sort based on that */
 556     if ((a->index >= 0) || (b->index >= 0))
 557     {
 558         if (a->index == b->index)
 559         {
 560             return 0;
 561         }
 562         return (a->index > b->index) ? 1 : -1;
 563     }
 564     /* neither has an index, so we can only sort on the lambda components,
 565        which is only valid if there is one component */
 566     if (a->lc->N > 1)
 567     {
 568         gmx_fatal(FARGS,
 569                   "Can't compare lambdas with no index and > 1 component");
 570     }
 571     if (a->dhdl >= 0 || b->dhdl >= 0)
 572     {
 573         gmx_fatal(FARGS,
 574                   "Can't compare native lambdas that are derivatives");
 575     }
 576     if (gmx_within_tol(a->val[0], b->val[0], 10*GMX_REAL_EPS))
 577     {
 578         return 0;
 579     }
 580     return a->val[0] > b->val[0] ? 1 : -1;
 581 }
 582
 583
 584
 585
 586 static void hist_init(hist_t *h, int nhist, int *nbin)
 587 {
 588     int i;
 589     if (nhist > 2)
 590     {
 591         gmx_fatal(FARGS, "histogram with more than two sets of data!");
 592     }
 593     for (i = 0; i < nhist; i++)
 594     {
 595         snew(h->bin[i], nbin[i]);
 596         h->x0[i]      = 0;
 597         h->nbin[i]    = nbin[i];
 598         h->start_time = h->delta_time = 0;
 599         h->dx[i]      = 0;
 600     }
 601     h->sum   = 0;
 602     h->nhist = nhist;
 603 }
 604
 605 static void hist_destroy(hist_t *h)
 606 {
 607     sfree(h->bin);
 608 }
 609
 610
 611 static void xvg_init(xvg_t *ba)
 612 {
 613     ba->filename = NULL;
 614     ba->nset     = 0;
 615     ba->np_alloc = 0;
 616     ba->np       = NULL;
 617     ba->y        = NULL;
 618 }
 619
 620 static void samples_init(samples_t *s, lambda_vec_t *native_lambda,
 621                          lambda_vec_t *foreign_lambda, double temp,
 622                          gmx_bool derivative, const char *filename)
 623 {
 624     s->native_lambda  = native_lambda;
 625     s->foreign_lambda = foreign_lambda;
 626     s->temp           = temp;
 627     s->derivative     = derivative;
 628
 629     s->ndu        = 0;
 630     s->du         = NULL;
 631     s->t          = NULL;
 632     s->start_time = s->delta_time = 0;
 633     s->hist       = NULL;
 634     s->du_alloc   = NULL;
 635     s->t_alloc    = NULL;
 636     s->hist_alloc = NULL;
 637     s->ndu_alloc  = 0;
 638     s->nt_alloc   = 0;
 639
 640     s->ntot     = 0;
 641     s->filename = filename;
 642 }
 643
 644 static void sample_range_init(sample_range_t *r, samples_t *s)
 645 {
 646     r->start = 0;
 647     r->end   = s->ndu;
 648     r->use   = TRUE;
 649     r->s     = NULL;
 650 }
 651
 652 static void sample_coll_init(sample_coll_t *sc, lambda_vec_t *native_lambda,
 653                              lambda_vec_t *foreign_lambda, double temp)
 654 {
 655     sc->native_lambda  = native_lambda;
 656     sc->foreign_lambda = foreign_lambda;
 657     sc->temp           = temp;
 658
 659     sc->nsamples       = 0;
 660     sc->s              = NULL;
 661     sc->r              = NULL;
 662     sc->nsamples_alloc = 0;
 663
 664     sc->ntot = 0;
 665     sc->next = sc->prev = NULL;
 666 }
 667
 668 static void sample_coll_destroy(sample_coll_t *sc)
 669 {
 670     /* don't free the samples themselves */
 671     sfree(sc->r);
 672     sfree(sc->s);
 673 }
 674
 675
 676 static void lambda_data_init(lambda_data_t *l, lambda_vec_t *native_lambda,
 677                              double temp)
 678 {
 679     l->lambda = native_lambda;
 680     l->temp   = temp;
 681
 682     l->next = NULL;
 683     l->prev = NULL;
 684
 685     l->sc = &(l->sc_head);
 686
 687     sample_coll_init(l->sc, native_lambda, NULL, 0.);
 688     l->sc->next = l->sc;
 689     l->sc->prev = l->sc;
 690 }
 691
 692 static void barres_init(barres_t *br)
 693 {
 694     br->dg            = 0;
 695     br->dg_err        = 0;
 696     br->sa            = 0;
 697     br->sa_err        = 0;
 698     br->sb            = 0;
 699     br->sb_err        = 0;
 700     br->dg_stddev     = 0;
 701     br->dg_stddev_err = 0;
 702
 703     br->a = NULL;
 704     br->b = NULL;
 705 }
 706
 707
 708 /* calculate the total number of samples in a sample collection */
 709 static void sample_coll_calc_ntot(sample_coll_t *sc)
 710 {
 711     int i;
 712
 713     sc->ntot = 0;
 714     for (i = 0; i < sc->nsamples; i++)
 715     {
 716         if (sc->r[i].use)
 717         {
 718             if (sc->s[i]->hist)
 719             {
 720                 sc->ntot += sc->s[i]->ntot;
 721             }
 722             else
 723             {
 724                 sc->ntot += sc->r[i].end - sc->r[i].start;
 725             }
 726         }
 727     }
 728 }
 729
 730
 731 /* find the barsamples_t associated with a lambda that corresponds to
 732    a specific foreign lambda */
 733 static sample_coll_t *lambda_data_find_sample_coll(lambda_data_t *l,
 734                                                    lambda_vec_t  *foreign_lambda)
 735 {
 736     sample_coll_t *sc = l->sc->next;
 737
 738     while (sc != l->sc)
 739     {
 740         if (lambda_vec_same(sc->foreign_lambda, foreign_lambda))
 741         {
 742             return sc;
 743         }
 744         sc = sc->next;
 745     }
 746
 747     return NULL;
 748 }
 749
 750 /* insert li into an ordered list of lambda_colls */
 751 static void lambda_data_insert_sample_coll(lambda_data_t *l, sample_coll_t *sc)
 752 {
 753     sample_coll_t *scn = l->sc->next;
 754     while ( (scn != l->sc) )
 755     {
 756         if (lambda_vec_cmp_foreign(scn->foreign_lambda, sc->foreign_lambda) > 0)
 757         {
 758             break;
 759         }
 760         scn = scn->next;
 761     }
 762     /* now insert it before the found scn */
 763     sc->next        = scn;
 764     sc->prev        = scn->prev;
 765     scn->prev->next = sc;
 766     scn->prev       = sc;
 767 }
 768
 769 /* insert li into an ordered list of lambdas */
 770 static void lambda_data_insert_lambda(lambda_data_t *head, lambda_data_t *li)
 771 {
 772     lambda_data_t *lc = head->next;
 773     while (lc != head)
 774     {
 775         if (lambda_vec_cmp_native(lc->lambda, li->lambda) > 0)
 776         {
 777             break;
 778         }
 779         lc = lc->next;
 780     }
 781     /* now insert ourselves before the found lc */
 782     li->next       = lc;
 783     li->prev       = lc->prev;
 784     lc->prev->next = li;
 785     lc->prev       = li;
 786 }
 787
 788 /* insert a sample and a sample_range into a sample_coll. The
 789     samples are stored as a pointer, the range is copied. */
 790 static void sample_coll_insert_sample(sample_coll_t *sc, samples_t *s,
 791                                       sample_range_t *r)
 792 {
 793     /* first check if it belongs here */
 794     if (sc->temp != s->temp)
 795     {
 796         gmx_fatal(FARGS, "Temperatures in files %s and %s are not the same!",
 797                   s->filename, sc->next->s[0]->filename);
 798     }
 799     if (!lambda_vec_same(sc->native_lambda, s->native_lambda))
 800     {
 801         gmx_fatal(FARGS, "Native lambda in files %s and %s are not the same (and they should be)!",
 802                   s->filename, sc->next->s[0]->filename);
 803     }
 804     if (!lambda_vec_same(sc->foreign_lambda, s->foreign_lambda))
 805     {
 806         gmx_fatal(FARGS, "Foreign lambda in files %s and %s are not the same (and they should be)!",
 807                   s->filename, sc->next->s[0]->filename);
 808     }
 809
 810     /* check if there's room */
 811     if ( (sc->nsamples + 1) > sc->nsamples_alloc)
 812     {
 813         sc->nsamples_alloc = max(2*sc->nsamples_alloc, 2);
 814         srenew(sc->s, sc->nsamples_alloc);
 815         srenew(sc->r, sc->nsamples_alloc);
 816     }
 817     sc->s[sc->nsamples] = s;
 818     sc->r[sc->nsamples] = *r;
 819     sc->nsamples++;
 820
 821     sample_coll_calc_ntot(sc);
 822 }
 823
 824 /* insert a sample into a lambda_list, creating the right sample_coll if
 825    neccesary */
 826 static void lambda_data_list_insert_sample(lambda_data_t *head, samples_t *s)
 827 {
 828     gmx_bool       found = FALSE;
 829     sample_coll_t *sc;
 830     sample_range_t r;
 831
 832     lambda_data_t *l = head->next;
 833
 834     /* first search for the right lambda_data_t */
 835     while (l != head)
 836     {
 837         if (lambda_vec_same(l->lambda, s->native_lambda) )
 838         {
 839             found = TRUE;
 840             break;
 841         }
 842         l = l->next;
 843     }
 844
 845     if (!found)
 846     {
 847         snew(l, 1);                                     /* allocate a new one */
 848         lambda_data_init(l, s->native_lambda, s->temp); /* initialize it */
 849         lambda_data_insert_lambda(head, l);             /* add it to the list */
 850     }
 851
 852     /* now look for a sample collection */
 853     sc = lambda_data_find_sample_coll(l, s->foreign_lambda);
 854     if (!sc)
 855     {
 856         snew(sc, 1); /* allocate a new one */
 857         sample_coll_init(sc, s->native_lambda, s->foreign_lambda, s->temp);
 858         lambda_data_insert_sample_coll(l, sc);
 859     }
 860
 861     /* now insert the samples into the sample coll */
 862     sample_range_init(&r, s);
 863     sample_coll_insert_sample(sc, s, &r);
 864 }
 865
 866
 867 /* make a histogram out of a sample collection */
 868 static void sample_coll_make_hist(sample_coll_t *sc, int **bin,
 869                                   int *nbin_alloc, int *nbin,
 870                                   double *dx, double *xmin, int nbin_default)
 871 {
 872     int      i, j, k;
 873     gmx_bool dx_set   = FALSE;
 874     gmx_bool xmin_set = FALSE;
 875
 876     gmx_bool xmax_set      = FALSE;
 877     gmx_bool xmax_set_hard = FALSE; /* whether the xmax is bounded by the
 878                                        limits of a histogram */
 879     double   xmax          = -1;
 880
 881     /* first determine dx and xmin; try the histograms */
 882     for (i = 0; i < sc->nsamples; i++)
 883     {
 884         if (sc->s[i]->hist)
 885         {
 886             hist_t *hist = sc->s[i]->hist;
 887             for (k = 0; k < hist->nhist; k++)
 888             {
 889                 double hdx      = hist->dx[k];
 890                 double xmax_now = (hist->x0[k]+hist->nbin[k])*hdx;
 891
 892                 /* we use the biggest dx*/
 893                 if ( (!dx_set) || hist->dx[0] > *dx)
 894                 {
 895                     dx_set = TRUE;
 896                     *dx    = hist->dx[0];
 897                 }
 898                 if ( (!xmin_set) || (hist->x0[k]*hdx) < *xmin)
 899                 {
 900                     xmin_set = TRUE;
 901                     *xmin    = (hist->x0[k]*hdx);
 902                 }
 903
 904                 if ( (!xmax_set) || (xmax_now > xmax && !xmax_set_hard) )
 905                 {
 906                     xmax_set = TRUE;
 907                     xmax     = xmax_now;
 908                     if (hist->bin[k][hist->nbin[k]-1] != 0)
 909                     {
 910                         xmax_set_hard = TRUE;
 911                     }
 912                 }
 913                 if (hist->bin[k][hist->nbin[k]-1] != 0 && (xmax_now < xmax) )
 914                 {
 915                     xmax_set_hard = TRUE;
 916                     xmax          = xmax_now;
 917                 }
 918             }
 919         }
 920     }
 921     /* and the delta us */
 922     for (i = 0; i < sc->nsamples; i++)
 923     {
 924         if (sc->s[i]->ndu > 0)
 925         {
 926             /* determine min and max */
 927             int    starti  = sc->r[i].start;
 928             int    endi    = sc->r[i].end;
 929             double du_xmin = sc->s[i]->du[starti];
 930             double du_xmax = sc->s[i]->du[starti];
 931             for (j = starti+1; j < endi; j++)
 932             {
 933                 if (sc->s[i]->du[j] < du_xmin)
 934                 {
 935                     du_xmin = sc->s[i]->du[j];
 936                 }
 937                 if (sc->s[i]->du[j] > du_xmax)
 938                 {
 939                     du_xmax = sc->s[i]->du[j];
 940                 }
 941             }
 942
 943             /* and now change the limits */
 944             if ( (!xmin_set) || (du_xmin < *xmin) )
 945             {
 946                 xmin_set = TRUE;
 947                 *xmin    = du_xmin;
 948             }
 949             if ( (!xmax_set) || ((du_xmax > xmax) &&  !xmax_set_hard) )
 950             {
 951                 xmax_set = TRUE;
 952                 xmax     = du_xmax;
 953             }
 954         }
 955     }
 956
 957     if (!xmax_set || !xmin_set)
 958     {
 959         *nbin = 0;
 960         return;
 961     }
 962
 963
 964     if (!dx_set)
 965     {
 966         *nbin = nbin_default;
 967         *dx   = (xmax-(*xmin))/((*nbin)-2); /* -2 because we want the last bin to
 968                                                be 0, and we count from 0 */
 969     }
 970     else
 971     {
 972         *nbin = (xmax-(*xmin))/(*dx);
 973     }
 974
 975     if (*nbin > *nbin_alloc)
 976     {
 977         *nbin_alloc = *nbin;
 978         srenew(*bin, *nbin_alloc);
 979     }
 980
 981     /* reset the histogram */
 982     for (i = 0; i < (*nbin); i++)
 983     {
 984         (*bin)[i] = 0;
 985     }
 986
 987     /* now add the actual data */
 988     for (i = 0; i < sc->nsamples; i++)
 989     {
 990         if (sc->s[i]->hist)
 991         {
 992             hist_t *hist = sc->s[i]->hist;
 993             for (k = 0; k < hist->nhist; k++)
 994             {
 995                 double hdx       = hist->dx[k];
 996                 double xmin_hist = hist->x0[k]*hdx;
 997                 for (j = 0; j < hist->nbin[k]; j++)
 998                 {
 999                     /* calculate the bin corresponding to the middle of the
1000                        original bin */
1001                     double x     = hdx*(j+0.5) + xmin_hist;
1002                     int    binnr = (int)((x-(*xmin))/(*dx));
1003
1004                     if (binnr >= *nbin || binnr < 0)
1005                     {
1006                         binnr = (*nbin)-1;
1007                     }
1008
1009                     (*bin)[binnr] += hist->bin[k][j];
1010                 }
1011             }
1012         }
1013         else
1014         {
1015             int starti = sc->r[i].start;
1016             int endi   = sc->r[i].end;
1017             for (j = starti; j < endi; j++)
1018             {
1019                 int binnr = (int)((sc->s[i]->du[j]-(*xmin))/(*dx));
1020                 if (binnr >= *nbin || binnr < 0)
1021                 {
1022                     binnr = (*nbin)-1;
1023                 }
1024
1025                 (*bin)[binnr]++;
1026             }
1027         }
1028     }
1029 }
1030
1031 /* write a collection of histograms to a file */
1032 void sim_data_histogram(sim_data_t *sd, const char *filename,
1033                         int nbin_default, const output_env_t oenv)
1034 {
1035     char           label_x[STRLEN];
1036     const char    *dhdl    = "dH/d\\lambda", *deltag = "\\DeltaH", *lambda = "\\lambda";
1037     const char    *title   = "N(\\DeltaH)";
1038     const char    *label_y = "Samples";
1039     FILE          *fp;
1040     lambda_data_t *bl;
1041     int            nsets     = 0;
1042     char         **setnames  = NULL;
1043     gmx_bool       first_set = FALSE;
1044     /* histogram data: */
1045     int           *hist       = NULL;
1046     int            nbin       = 0;
1047     int            nbin_alloc = 0;
1048     double         dx         = 0;
1049     double         min        = 0;
1050     int            i;
1051     lambda_data_t *bl_head = sd->lb;
1052
1053     printf("\nWriting histogram to %s\n", filename);
1054     sprintf(label_x, "\\DeltaH (%s)", unit_energy);
1055
1056     fp = xvgropen_type(filename, title, label_x, label_y, exvggtXNY, oenv);
1057
1058     /* first get all the set names */
1059     bl = bl_head->next;
1060     /* iterate over all lambdas */
1061     while (bl != bl_head)
1062     {
1063         sample_coll_t *sc = bl->sc->next;
1064
1065         /* iterate over all samples */
1066         while (sc != bl->sc)
1067         {
1068             char buf[STRLEN], buf2[STRLEN];
1069
1070             nsets++;
1071             srenew(setnames, nsets);
1072             snew(setnames[nsets-1], STRLEN);
1073             if (sc->foreign_lambda->dhdl < 0)
1074             {
1075                 lambda_vec_print(sc->native_lambda, buf, FALSE);
1076                 lambda_vec_print(sc->foreign_lambda, buf2, FALSE);
1077                 sprintf(setnames[nsets-1], "N(%s(%s=%s) | %s=%s)",
1078                         deltag, lambda, buf2, lambda, buf);
1079             }
1080             else
1081             {
1082                 lambda_vec_print(sc->native_lambda, buf, FALSE);
1083                 sprintf(setnames[nsets-1], "N(%s | %s=%s)",
1084                         dhdl, lambda, buf);
1085             }
1086             sc = sc->next;
1087         }
1088
1089         bl = bl->next;
1090     }
1091     xvgr_legend(fp, nsets, (const char**)setnames, oenv);
1092
1093
1094     /* now make the histograms */
1095     bl = bl_head->next;
1096     /* iterate over all lambdas */
1097     while (bl != bl_head)
1098     {
1099         sample_coll_t *sc = bl->sc->next;
1100
1101         /* iterate over all samples */
1102         while (sc != bl->sc)
1103         {
1104             if (!first_set)
1105             {
1106                 xvgr_new_dataset(fp, 0, 0, NULL, oenv);
1107             }
1108
1109             sample_coll_make_hist(sc, &hist, &nbin_alloc, &nbin, &dx, &min,
1110                                   nbin_default);
1111
1112             for (i = 0; i < nbin; i++)
1113             {
1114                 double xmin = i*dx + min;
1115                 double xmax = (i+1)*dx + min;
1116
1117                 fprintf(fp, "%g %d\n%g %d\n", xmin, hist[i], xmax, hist[i]);
1118             }
1119
1120             first_set = FALSE;
1121             sc        = sc->next;
1122         }
1123
1124         bl = bl->next;
1125     }
1126
1127     if (hist)
1128     {
1129         sfree(hist);
1130     }
1131
1132     xvgrclose(fp);
1133 }
1134
1135 static int
1136 snprint_lambda_vec(char *str, int sz, const char *label, lambda_vec_t *lambda)
1137 {
1138     int n = 0;
1139
1140     n += snprintf(str+n, sz-n, "lambda vector [%s]: ", label);
1141     if (lambda->index >= 0)
1142     {
1143         n += snprintf(str+n, sz-n, " init-lambda-state=%d", lambda->index);
1144     }
1145     if (lambda->dhdl >= 0)
1146     {
1147         n += snprintf(str+n, sz-n, " dhdl index=%d", lambda->dhdl);
1148     }
1149     else
1150     {
1151         int i;
1152         for (i = 0; i < lambda->lc->N; i++)
1153         {
1154             n += snprintf(str+n, sz-n, " (%s) l=%g", lambda->lc->names[i], lambda->val[i]);
1155         }
1156     }
1157     return n;
1158 }
1159
1160 /* create a collection (array) of barres_t object given a ordered linked list
1161    of barlamda_t sample collections */
1162 static barres_t *barres_list_create(sim_data_t *sd, int *nres,
1163                                     gmx_bool use_dhdl)
1164 {
1165     lambda_data_t *bl;
1166     int            nlambda = 0;
1167     barres_t      *res;
1168     int            i;
1169     gmx_bool       dhdl    = FALSE;
1170     gmx_bool       first   = TRUE;
1171     lambda_data_t *bl_head = sd->lb;
1172
1173     /* first count the lambdas */
1174     bl = bl_head->next;
1175     while (bl != bl_head)
1176     {
1177         nlambda++;
1178         bl = bl->next;
1179     }
1180     snew(res, nlambda-1);
1181
1182     /* next put the right samples in the res */
1183     *nres = 0;
1184     bl    = bl_head->next->next; /* we start with the second one. */
1185     while (bl != bl_head)
1186     {
1187         sample_coll_t *sc, *scprev;
1188         barres_t      *br = &(res[*nres]);
1189         /* there is always a previous one. we search for that as a foreign
1190            lambda: */
1191         scprev = lambda_data_find_sample_coll(bl->prev, bl->lambda);
1192         sc     = lambda_data_find_sample_coll(bl, bl->prev->lambda);
1193
1194         barres_init(br);
1195
1196         if (use_dhdl)
1197         {
1198             /* we use dhdl */
1199
1200             scprev = lambda_data_find_sample_coll(bl->prev, bl->prev->lambda);
1201             sc     = lambda_data_find_sample_coll(bl, bl->lambda);
1202
1203             if (first)
1204             {
1205                 printf("\nWARNING: Using the derivative data (dH/dlambda) to extrapolate delta H values.\nThis will only work if the Hamiltonian is linear in lambda.\n");
1206                 dhdl = TRUE;
1207             }
1208             if (!dhdl)
1209             {
1210                 gmx_fatal(FARGS, "Some dhdl files contain only one value (dH/dl), while others \ncontain multiple values (dH/dl and/or Delta H), will not proceed \nbecause of possible inconsistencies.\n");
1211             }
1212         }
1213         else if (!scprev && !sc)
1214         {
1215             char descX[STRLEN], descY[STRLEN];
1216             snprint_lambda_vec(descX, STRLEN, "X", bl->prev->lambda);
1217             snprint_lambda_vec(descY, STRLEN, "Y", bl->lambda);
1218
1219             gmx_fatal(FARGS, "There is no path between the states X & Y below that is covered by foreign lambdas:\ncannot proceed with BAR.\nUse thermodynamic integration of dH/dl by calculating the averages of dH/dl\nwith g_analyze and integrating them.\nAlternatively, use the -extp option if (and only if) the Hamiltonian\ndepends linearly on lambda, which is NOT normally the case.\n\n%s\n%s\n", descX, descY);
1220         }
1221
1222         /* normal delta H */
1223         if (!scprev)
1224         {
1225             char descX[STRLEN], descY[STRLEN];
1226             snprint_lambda_vec(descX, STRLEN, "X", bl->lambda);
1227             snprint_lambda_vec(descY, STRLEN, "Y", bl->prev->lambda);
1228             gmx_fatal(FARGS, "Could not find a set for foreign lambda (state X below)\nin the files for main lambda (state Y below)\n\n%s\n%s\n", descX, descY);
1229         }
1230         if (!sc)
1231         {
1232             char descX[STRLEN], descY[STRLEN];
1233             snprint_lambda_vec(descX, STRLEN, "X", bl->prev->lambda);
1234             snprint_lambda_vec(descY, STRLEN, "Y", bl->lambda);
1235             gmx_fatal(FARGS, "Could not find a set for foreign lambda (state X below)\nin the files for main lambda (state Y below)\n\n%s\n%s\n", descX, descY);
1236         }
1237         br->a = scprev;
1238         br->b = sc;
1239
1240         first = FALSE;
1241         (*nres)++;
1242         bl = bl->next;
1243     }
1244     return res;
1245 }
1246
1247 /* estimate the maximum discretization error */
1248 static double barres_list_max_disc_err(barres_t *res, int nres)
1249 {
1250     int    i, j;
1251     double disc_err = 0.;
1252     double delta_lambda;
1253
1254     for (i = 0; i < nres; i++)
1255     {
1256         barres_t *br = &(res[i]);
1257
1258         delta_lambda = lambda_vec_abs_diff(br->b->native_lambda,
1259                                            br->a->native_lambda);
1260
1261         for (j = 0; j < br->a->nsamples; j++)
1262         {
1263             if (br->a->s[j]->hist)
1264             {
1265                 double Wfac = 1.;
1266                 if (br->a->s[j]->derivative)
1267                 {
1268                     Wfac =  delta_lambda;
1269                 }
1270
1271                 disc_err = max(disc_err, Wfac*br->a->s[j]->hist->dx[0]);
1272             }
1273         }
1274         for (j = 0; j < br->b->nsamples; j++)
1275         {
1276             if (br->b->s[j]->hist)
1277             {
1278                 double Wfac = 1.;
1279                 if (br->b->s[j]->derivative)
1280                 {
1281                     Wfac =  delta_lambda;
1282                 }
1283                 disc_err = max(disc_err, Wfac*br->b->s[j]->hist->dx[0]);
1284             }
1285         }
1286     }
1287     return disc_err;
1288 }
1289
1290
1291 /* impose start and end times on a sample collection, updating sample_ranges */
1292 static void sample_coll_impose_times(sample_coll_t *sc, double begin_t,
1293                                      double end_t)
1294 {
1295     int i;
1296     for (i = 0; i < sc->nsamples; i++)
1297     {
1298         samples_t      *s = sc->s[i];
1299         sample_range_t *r = &(sc->r[i]);
1300         if (s->hist)
1301         {
1302             double end_time = s->hist->delta_time*s->hist->sum +
1303                 s->hist->start_time;
1304             if (s->hist->start_time < begin_t || end_time > end_t)
1305             {
1306                 r->use = FALSE;
1307             }
1308         }
1309         else
1310         {
1311             if (!s->t)
1312             {
1313                 double end_time;
1314                 if (s->start_time < begin_t)
1315                 {
1316                     r->start = (int)((begin_t - s->start_time)/s->delta_time);
1317                 }
1318                 end_time = s->delta_time*s->ndu + s->start_time;
1319                 if (end_time > end_t)
1320                 {
1321                     r->end = (int)((end_t - s->start_time)/s->delta_time);
1322                 }
1323             }
1324             else
1325             {
1326                 int j;
1327                 for (j = 0; j < s->ndu; j++)
1328                 {
1329                     if (s->t[j] < begin_t)
1330                     {
1331                         r->start = j;
1332                     }
1333
1334                     if (s->t[j] >= end_t)
1335                     {
1336                         r->end = j;
1337                         break;
1338                     }
1339                 }
1340             }
1341             if (r->start > r->end)
1342             {
1343                 r->use = FALSE;
1344             }
1345         }
1346     }
1347     sample_coll_calc_ntot(sc);
1348 }
1349
1350 static void sim_data_impose_times(sim_data_t *sd, double begin, double end)
1351 {
1352     double         first_t, last_t;
1353     double         begin_t, end_t;
1354     lambda_data_t *lc;
1355     lambda_data_t *head = sd->lb;
1356     int            j;
1357
1358     if (begin <= 0 && end < 0)
1359     {
1360         return;
1361     }
1362
1363     /* first determine the global start and end times */
1364     first_t = -1;
1365     last_t  = -1;
1366     lc      = head->next;
1367     while (lc != head)
1368     {
1369         sample_coll_t *sc = lc->sc->next;
1370         while (sc != lc->sc)
1371         {
1372             for (j = 0; j < sc->nsamples; j++)
1373             {
1374                 double start_t, end_t;
1375
1376                 start_t = sc->s[j]->start_time;
1377                 end_t   =   sc->s[j]->start_time;
1378                 if (sc->s[j]->hist)
1379                 {
1380                     end_t += sc->s[j]->delta_time*sc->s[j]->hist->sum;
1381                 }
1382                 else
1383                 {
1384                     if (sc->s[j]->t)
1385                     {
1386                         end_t = sc->s[j]->t[sc->s[j]->ndu-1];
1387                     }
1388                     else
1389                     {
1390                         end_t += sc->s[j]->delta_time*sc->s[j]->ndu;
1391                     }
1392                 }
1393
1394                 if (start_t < first_t || first_t < 0)
1395                 {
1396                     first_t = start_t;
1397                 }
1398                 if (end_t > last_t)
1399                 {
1400                     last_t = end_t;
1401                 }
1402             }
1403             sc = sc->next;
1404         }
1405         lc = lc->next;
1406     }
1407
1408     /* calculate the actual times */
1409     if (begin > 0)
1410     {
1411         begin_t = begin;
1412     }
1413     else
1414     {
1415         begin_t = first_t;
1416     }
1417
1418     if (end > 0)
1419     {
1420         end_t = end;
1421     }
1422     else
1423     {
1424         end_t = last_t;
1425     }
1426     printf("\n   Samples in time interval: %.3f - %.3f\n", first_t, last_t);
1427
1428     if (begin_t > end_t)
1429     {
1430         return;
1431     }
1432     printf("Removing samples outside of: %.3f - %.3f\n", begin_t, end_t);
1433
1434     /* then impose them */
1435     lc = head->next;
1436     while (lc != head)
1437     {
1438         sample_coll_t *sc = lc->sc->next;
1439         while (sc != lc->sc)
1440         {
1441             sample_coll_impose_times(sc, begin_t, end_t);
1442             sc = sc->next;
1443         }
1444         lc = lc->next;
1445     }
1446 }
1447
1448
1449 /* create subsample i out of ni from an existing sample_coll */
1450 static gmx_bool sample_coll_create_subsample(sample_coll_t  *sc,
1451                                              sample_coll_t *sc_orig,
1452                                              int i, int ni)
1453 {
1454     int             j;
1455     int             hist_start, hist_end;
1456
1457     gmx_int64_t     ntot_start;
1458     gmx_int64_t     ntot_end;
1459     gmx_int64_t     ntot_so_far;
1460
1461     *sc = *sc_orig; /* just copy all fields */
1462
1463     /* allocate proprietary memory */
1464     snew(sc->s, sc_orig->nsamples);
1465     snew(sc->r, sc_orig->nsamples);
1466
1467     /* copy the samples */
1468     for (j = 0; j < sc_orig->nsamples; j++)
1469     {
1470         sc->s[j] = sc_orig->s[j];
1471         sc->r[j] = sc_orig->r[j]; /* copy the ranges too */
1472     }
1473
1474     /* now fix start and end fields */
1475     /* the casts avoid possible overflows */
1476     ntot_start  = (gmx_int64_t)(sc_orig->ntot*(double)i/(double)ni);
1477     ntot_end    = (gmx_int64_t)(sc_orig->ntot*(double)(i+1)/(double)ni);
1478     ntot_so_far = 0;
1479     for (j = 0; j < sc->nsamples; j++)
1480     {
1481         gmx_int64_t ntot_add;
1482         gmx_int64_t new_start, new_end;
1483
1484         if (sc->r[j].use)
1485         {
1486             if (sc->s[j]->hist)
1487             {
1488                 ntot_add = sc->s[j]->hist->sum;
1489             }
1490             else
1491             {
1492                 ntot_add = sc->r[j].end - sc->r[j].start;
1493             }
1494         }
1495         else
1496         {
1497             ntot_add = 0;
1498         }
1499
1500         if (!sc->s[j]->hist)
1501         {
1502             if (ntot_so_far < ntot_start)
1503             {
1504                 /* adjust starting point */
1505                 new_start = sc->r[j].start + (ntot_start - ntot_so_far);
1506             }
1507             else
1508             {
1509                 new_start = sc->r[j].start;
1510             }
1511             /* adjust end point */
1512             new_end = sc->r[j].start + (ntot_end - ntot_so_far);
1513             if (new_end > sc->r[j].end)
1514             {
1515                 new_end = sc->r[j].end;
1516             }
1517
1518             /* check if we're in range at all */
1519             if ( (new_end < new_start) || (new_start > sc->r[j].end) )
1520             {
1521                 new_start = 0;
1522                 new_end   = 0;
1523             }
1524             /* and write the new range */
1525             sc->r[j].start = (int)new_start;
1526             sc->r[j].end   = (int)new_end;
1527         }
1528         else
1529         {
1530             if (sc->r[j].use)
1531             {
1532                 double overlap;
1533                 double ntot_start_norm, ntot_end_norm;
1534                 /* calculate the amount of overlap of the
1535                    desired range (ntot_start -- ntot_end) onto
1536                    the histogram range (ntot_so_far -- ntot_so_far+ntot_add)*/
1537
1538                 /* first calculate normalized bounds
1539                    (where 0 is the start of the hist range, and 1 the end) */
1540                 ntot_start_norm = (ntot_start-ntot_so_far)/(double)ntot_add;
1541                 ntot_end_norm   = (ntot_end-ntot_so_far)/(double)ntot_add;
1542
1543                 /* now fix the boundaries */
1544                 ntot_start_norm = min(1, max(0., ntot_start_norm));
1545                 ntot_end_norm   = max(0, min(1., ntot_end_norm));
1546
1547                 /* and calculate the overlap */
1548                 overlap = ntot_end_norm - ntot_start_norm;
1549
1550                 if (overlap > 0.95) /* we allow for 5% slack */
1551                 {
1552                     sc->r[j].use = TRUE;
1553                 }
1554                 else if (overlap < 0.05)
1555                 {
1556                     sc->r[j].use = FALSE;
1557                 }
1558                 else
1559                 {
1560                     return FALSE;
1561                 }
1562             }
1563         }
1564         ntot_so_far += ntot_add;
1565     }
1566     sample_coll_calc_ntot(sc);
1567
1568     return TRUE;
1569 }
1570
1571 /* calculate minimum and maximum work values in sample collection */
1572 static void sample_coll_min_max(sample_coll_t *sc, double Wfac,
1573                                 double *Wmin, double *Wmax)
1574 {
1575     int i, j;
1576
1577     *Wmin = FLT_MAX;
1578     *Wmax = -FLT_MAX;
1579
1580     for (i = 0; i < sc->nsamples; i++)
1581     {
1582         samples_t      *s = sc->s[i];
1583         sample_range_t *r = &(sc->r[i]);
1584         if (r->use)
1585         {
1586             if (!s->hist)
1587             {
1588                 for (j = r->start; j < r->end; j++)
1589                 {
1590                     *Wmin = min(*Wmin, s->du[j]*Wfac);
1591                     *Wmax = max(*Wmax, s->du[j]*Wfac);
1592                 }
1593             }
1594             else
1595             {
1596                 int    hd = 0; /* determine the histogram direction: */
1597                 double dx;
1598                 if ( (s->hist->nhist > 1) && (Wfac < 0) )
1599                 {
1600                     hd = 1;
1601                 }
1602                 dx = s->hist->dx[hd];
1603
1604                 for (j = s->hist->nbin[hd]-1; j >= 0; j--)
1605                 {
1606                     *Wmin = min(*Wmin, Wfac*(s->hist->x0[hd])*dx);
1607                     *Wmax = max(*Wmax, Wfac*(s->hist->x0[hd])*dx);
1608                     /* look for the highest value bin with values */
1609                     if (s->hist->bin[hd][j] > 0)
1610                     {
1611                         *Wmin = min(*Wmin, Wfac*(j+s->hist->x0[hd]+1)*dx);
1612                         *Wmax = max(*Wmax, Wfac*(j+s->hist->x0[hd]+1)*dx);
1613                         break;
1614                     }
1615                 }
1616             }
1617         }
1618     }
1619 }
1620
1621 /* Initialize a sim_data structure */
1622 static void sim_data_init(sim_data_t *sd)
1623 {
1624     /* make linked list */
1625     sd->lb       = &(sd->lb_head);
1626     sd->lb->next = sd->lb;
1627     sd->lb->prev = sd->lb;
1628
1629     lambda_components_init(&(sd->lc));
1630 }
1631
1632
1633 static double calc_bar_sum(int n, const double *W, double Wfac, double sbMmDG)
1634 {
1635     int    i;
1636     double sum;
1637
1638     sum = 0;
1639
1640     for (i = 0; i < n; i++)
1641     {
1642         sum += 1./(1. + exp(Wfac*W[i] + sbMmDG));
1643     }
1644
1645     return sum;
1646 }
1647
1648 /* calculate the BAR average given a histogram
1649
1650     if type== 0, calculate the best estimate for the average,
1651     if type==-1, calculate the minimum possible value given the histogram
1652     if type== 1, calculate the maximum possible value given the histogram */
1653 static double calc_bar_sum_hist(const hist_t *hist, double Wfac, double sbMmDG,
1654                                 int type)
1655 {
1656     double sum = 0.;
1657     int    i;
1658     int    max;
1659     /* normalization factor multiplied with bin width and
1660        number of samples (we normalize through M): */
1661     double normdx = 1.;
1662     int    hd     = 0; /* determine the histogram direction: */
1663     double dx;
1664
1665     if ( (hist->nhist > 1) && (Wfac < 0) )
1666     {
1667         hd = 1;
1668     }
1669     dx  = hist->dx[hd];
1670     max = hist->nbin[hd]-1;
1671     if (type == 1)
1672     {
1673         max = hist->nbin[hd]; /* we also add whatever was out of range */
1674     }
1675
1676     for (i = 0; i < max; i++)
1677     {
1678         double x    = Wfac*((i+hist->x0[hd])+0.5)*dx; /* bin middle */
1679         double pxdx = hist->bin[0][i]*normdx;         /* p(x)dx */
1680
1681         sum += pxdx/(1. + exp(x + sbMmDG));
1682     }
1683
1684     return sum;
1685 }
1686
1687 static double calc_bar_lowlevel(sample_coll_t *ca, sample_coll_t *cb,
1688                                 double temp, double tol, int type)
1689 {
1690     double kT, beta, M;
1691     double DG;
1692     int    i, j;
1693     double Wfac1, Wfac2, Wmin, Wmax;
1694     double DG0, DG1, DG2, dDG1;
1695     double sum1, sum2;
1696     double n1, n2; /* numbers of samples as doubles */
1697
1698     kT   = BOLTZ*temp;
1699     beta = 1/kT;
1700
1701     /* count the numbers of samples */
1702     n1 = ca->ntot;
1703     n2 = cb->ntot;
1704
1705     M = log(n1/n2);
1706
1707     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1708     if (ca->foreign_lambda->dhdl < 0)
1709     {
1710         /* this is the case when the delta U were calculated directly
1711            (i.e. we're not scaling dhdl) */
1712         Wfac1 = beta;
1713         Wfac2 = beta;
1714     }
1715     else
1716     {
1717         /* we're using dhdl, so delta_lambda needs to be a
1718            multiplication factor.  */
1719         /*double delta_lambda=cb->native_lambda-ca->native_lambda;*/
1720         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1721                                                   ca->native_lambda);
1722         if (cb->native_lambda->lc->N > 1)
1723         {
1724             gmx_fatal(FARGS,
1725                       "Can't (yet) do multi-component dhdl interpolation");
1726         }
1727
1728         Wfac1 =  beta*delta_lambda;
1729         Wfac2 = -beta*delta_lambda;
1730     }
1731
1732     if (beta < 1)
1733     {
1734         /* We print the output both in kT and kJ/mol.
1735          * Here we determine DG in kT, so when beta < 1
1736          * the precision has to be increased.
1737          */
1738         tol *= beta;
1739     }
1740
1741     /* Calculate minimum and maximum work to give an initial estimate of
1742      * delta G  as their average.
1743      */
1744     {
1745         double Wmin1, Wmin2, Wmax1, Wmax2;
1746         sample_coll_min_max(ca, Wfac1, &Wmin1, &Wmax1);
1747         sample_coll_min_max(cb, Wfac2, &Wmin2, &Wmax2);
1748
1749         Wmin = min(Wmin1, Wmin2);
1750         Wmax = max(Wmax1, Wmax2);
1751     }
1752
1753     DG0 = Wmin;
1754     DG2 = Wmax;
1755
1756     if (debug)
1757     {
1758         fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1759     }
1760     /* We approximate by bisection: given our initial estimates
1761        we keep checking whether the halfway point is greater or
1762        smaller than what we get out of the BAR averages.
1763
1764        For the comparison we can use twice the tolerance. */
1765     while (DG2 - DG0 > 2*tol)
1766     {
1767         DG1 = 0.5*(DG0 + DG2);
1768
1769         /* calculate the BAR averages */
1770         dDG1 = 0.;
1771
1772         for (i = 0; i < ca->nsamples; i++)
1773         {
1774             samples_t      *s = ca->s[i];
1775             sample_range_t *r = &(ca->r[i]);
1776             if (r->use)
1777             {
1778                 if (s->hist)
1779                 {
1780                     dDG1 += calc_bar_sum_hist(s->hist, Wfac1, (M-DG1), type);
1781                 }
1782                 else
1783                 {
1784                     dDG1 += calc_bar_sum(r->end - r->start, s->du + r->start,
1785                                          Wfac1, (M-DG1));
1786                 }
1787             }
1788         }
1789         for (i = 0; i < cb->nsamples; i++)
1790         {
1791             samples_t      *s = cb->s[i];
1792             sample_range_t *r = &(cb->r[i]);
1793             if (r->use)
1794             {
1795                 if (s->hist)
1796                 {
1797                     dDG1 -= calc_bar_sum_hist(s->hist, Wfac2, -(M-DG1), type);
1798                 }
1799                 else
1800                 {
1801                     dDG1 -= calc_bar_sum(r->end - r->start, s->du + r->start,
1802                                          Wfac2, -(M-DG1));
1803                 }
1804             }
1805         }
1806
1807         if (dDG1 < 0)
1808         {
1809             DG0 = DG1;
1810         }
1811         else
1812         {
1813             DG2 = DG1;
1814         }
1815         if (debug)
1816         {
1817             fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1818         }
1819     }
1820
1821     return 0.5*(DG0 + DG2);
1822 }
1823
1824 static void calc_rel_entropy(sample_coll_t *ca, sample_coll_t *cb,
1825                              double temp, double dg, double *sa, double *sb)
1826 {
1827     int    i, j;
1828     double W_ab = 0.;
1829     double W_ba = 0.;
1830     double kT, beta;
1831     double Wfac1, Wfac2;
1832     double n1, n2;
1833
1834     kT   = BOLTZ*temp;
1835     beta = 1/kT;
1836
1837     /* count the numbers of samples */
1838     n1 = ca->ntot;
1839     n2 = cb->ntot;
1840
1841     /* to ensure the work values are the same as during the delta_G */
1842     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1843     if (ca->foreign_lambda->dhdl < 0)
1844     {
1845         /* this is the case when the delta U were calculated directly
1846            (i.e. we're not scaling dhdl) */
1847         Wfac1 = beta;
1848         Wfac2 = beta;
1849     }
1850     else
1851     {
1852         /* we're using dhdl, so delta_lambda needs to be a
1853            multiplication factor.  */
1854         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1855                                                   ca->native_lambda);
1856         Wfac1 =  beta*delta_lambda;
1857         Wfac2 = -beta*delta_lambda;
1858     }
1859
1860     /* first calculate the average work in both directions */
1861     for (i = 0; i < ca->nsamples; i++)
1862     {
1863         samples_t      *s = ca->s[i];
1864         sample_range_t *r = &(ca->r[i]);
1865         if (r->use)
1866         {
1867             if (!s->hist)
1868             {
1869                 for (j = r->start; j < r->end; j++)
1870                 {
1871                     W_ab += Wfac1*s->du[j];
1872                 }
1873             }
1874             else
1875             {
1876                 /* normalization factor multiplied with bin width and
1877                    number of samples (we normalize through M): */
1878                 double normdx = 1.;
1879                 double dx;
1880                 int    hd = 0; /* histogram direction */
1881                 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1882                 {
1883                     hd = 1;
1884                 }
1885                 dx = s->hist->dx[hd];
1886
1887                 for (j = 0; j < s->hist->nbin[0]; j++)
1888                 {
1889                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1890                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1891                     W_ab += pxdx*x;
1892                 }
1893             }
1894         }
1895     }
1896     W_ab /= n1;
1897
1898     for (i = 0; i < cb->nsamples; i++)
1899     {
1900         samples_t      *s = cb->s[i];
1901         sample_range_t *r = &(cb->r[i]);
1902         if (r->use)
1903         {
1904             if (!s->hist)
1905             {
1906                 for (j = r->start; j < r->end; j++)
1907                 {
1908                     W_ba += Wfac1*s->du[j];
1909                 }
1910             }
1911             else
1912             {
1913                 /* normalization factor multiplied with bin width and
1914                    number of samples (we normalize through M): */
1915                 double normdx = 1.;
1916                 double dx;
1917                 int    hd = 0; /* histogram direction */
1918                 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
1919                 {
1920                     hd = 1;
1921                 }
1922                 dx = s->hist->dx[hd];
1923
1924                 for (j = 0; j < s->hist->nbin[0]; j++)
1925                 {
1926                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1927                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1928                     W_ba += pxdx*x;
1929                 }
1930             }
1931         }
1932     }
1933     W_ba /= n2;
1934
1935     /* then calculate the relative entropies */
1936     *sa = (W_ab - dg);
1937     *sb = (W_ba + dg);
1938 }
1939
1940 static void calc_dg_stddev(sample_coll_t *ca, sample_coll_t *cb,
1941                            double temp, double dg, double *stddev)
1942 {
1943     int    i, j;
1944     double M;
1945     double sigmafact = 0.;
1946     double kT, beta;
1947     double Wfac1, Wfac2;
1948     double n1, n2;
1949
1950     kT   = BOLTZ*temp;
1951     beta = 1/kT;
1952
1953     /* count the numbers of samples */
1954     n1 = ca->ntot;
1955     n2 = cb->ntot;
1956
1957     /* to ensure the work values are the same as during the delta_G */
1958     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1959     if (ca->foreign_lambda->dhdl < 0)
1960     {
1961         /* this is the case when the delta U were calculated directly
1962            (i.e. we're not scaling dhdl) */
1963         Wfac1 = beta;
1964         Wfac2 = beta;
1965     }
1966     else
1967     {
1968         /* we're using dhdl, so delta_lambda needs to be a
1969            multiplication factor.  */
1970         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1971                                                   ca->native_lambda);
1972         Wfac1 =  beta*delta_lambda;
1973         Wfac2 = -beta*delta_lambda;
1974     }
1975
1976     M = log(n1/n2);
1977
1978
1979     /* calculate average in both directions */
1980     for (i = 0; i < ca->nsamples; i++)
1981     {
1982         samples_t      *s = ca->s[i];
1983         sample_range_t *r = &(ca->r[i]);
1984         if (r->use)
1985         {
1986             if (!s->hist)
1987             {
1988                 for (j = r->start; j < r->end; j++)
1989                 {
1990                     sigmafact += 1./(2. + 2.*cosh((M + Wfac1*s->du[j] - dg)));
1991                 }
1992             }
1993             else
1994             {
1995                 /* normalization factor multiplied with bin width and
1996                    number of samples (we normalize through M): */
1997                 double normdx = 1.;
1998                 double dx;
1999                 int    hd = 0; /* histogram direction */
2000                 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
2001                 {
2002                     hd = 1;
2003                 }
2004                 dx = s->hist->dx[hd];
2005
2006                 for (j = 0; j < s->hist->nbin[0]; j++)
2007                 {
2008                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
2009                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
2010
2011                     sigmafact += pxdx/(2. + 2.*cosh((M + x - dg)));
2012                 }
2013             }
2014         }
2015     }
2016     for (i = 0; i < cb->nsamples; i++)
2017     {
2018         samples_t      *s = cb->s[i];
2019         sample_range_t *r = &(cb->r[i]);
2020         if (r->use)
2021         {
2022             if (!s->hist)
2023             {
2024                 for (j = r->start; j < r->end; j++)
2025                 {
2026                     sigmafact += 1./(2. + 2.*cosh((M - Wfac2*s->du[j] - dg)));
2027                 }
2028             }
2029             else
2030             {
2031                 /* normalization factor multiplied with bin width and
2032                    number of samples (we normalize through M): */
2033                 double normdx = 1.;
2034                 double dx;
2035                 int    hd = 0; /* histogram direction */
2036                 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
2037                 {
2038                     hd = 1;
2039                 }
2040                 dx = s->hist->dx[hd];
2041
2042                 for (j = 0; j < s->hist->nbin[0]; j++)
2043                 {
2044                     double x    = Wfac2*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
2045                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
2046
2047                     sigmafact += pxdx/(2. + 2.*cosh((M - x - dg)));
2048                 }
2049             }
2050         }
2051     }
2052
2053     sigmafact /= (n1 + n2);
2054
2055
2056     /* Eq. 10 from
2057        Shirts, Bair, Hooker & Pande, Phys. Rev. Lett 91, 140601 (2003): */
2058     *stddev = sqrt(((1./sigmafact) - ( (n1+n2)/n1 + (n1+n2)/n2 )));
2059 }
2060
2061
2062
2063 static void calc_bar(barres_t *br, double tol,
2064                      int npee_min, int npee_max, gmx_bool *bEE,
2065                      double *partsum)
2066 {
2067     int      npee, p;
2068     double   dg_sig2, sa_sig2, sb_sig2, stddev_sig2; /* intermediate variance values
2069                                                         for calculated quantities */
2070     int      nsample1, nsample2;
2071     double   temp = br->a->temp;
2072     int      i, j;
2073     double   dg_min, dg_max;
2074     gmx_bool have_hist = FALSE;
2075
2076     br->dg = calc_bar_lowlevel(br->a, br->b, temp, tol, 0);
2077
2078     br->dg_disc_err      = 0.;
2079     br->dg_histrange_err = 0.;
2080
2081     /* check if there are histograms */
2082     for (i = 0; i < br->a->nsamples; i++)
2083     {
2084         if (br->a->r[i].use && br->a->s[i]->hist)
2085         {
2086             have_hist = TRUE;
2087             break;
2088         }
2089     }
2090     if (!have_hist)
2091     {
2092         for (i = 0; i < br->b->nsamples; i++)
2093         {
2094             if (br->b->r[i].use && br->b->s[i]->hist)
2095             {
2096                 have_hist = TRUE;
2097                 break;
2098             }
2099         }
2100     }
2101
2102     /* calculate histogram-specific errors */
2103     if (have_hist)
2104     {
2105         dg_min = calc_bar_lowlevel(br->a, br->b, temp, tol, -1);
2106         dg_max = calc_bar_lowlevel(br->a, br->b, temp, tol, 1);
2107
2108         if (fabs(dg_max - dg_min) > GMX_REAL_EPS*10)
2109         {
2110             /* the histogram range  error is the biggest of the differences
2111                between the best estimate and the extremes */
2112             br->dg_histrange_err = fabs(dg_max - dg_min);
2113         }
2114         br->dg_disc_err = 0.;
2115         for (i = 0; i < br->a->nsamples; i++)
2116         {
2117             if (br->a->s[i]->hist)
2118             {
2119                 br->dg_disc_err = max(br->dg_disc_err, br->a->s[i]->hist->dx[0]);
2120             }
2121         }
2122         for (i = 0; i < br->b->nsamples; i++)
2123         {
2124             if (br->b->s[i]->hist)
2125             {
2126                 br->dg_disc_err = max(br->dg_disc_err, br->b->s[i]->hist->dx[0]);
2127             }
2128         }
2129     }
2130     calc_rel_entropy(br->a, br->b, temp, br->dg, &(br->sa), &(br->sb));
2131
2132     calc_dg_stddev(br->a, br->b, temp, br->dg, &(br->dg_stddev) );
2133
2134     dg_sig2     = 0;
2135     sa_sig2     = 0;
2136     sb_sig2     = 0;
2137     stddev_sig2 = 0;
2138
2139     *bEE = TRUE;
2140     {
2141         sample_coll_t ca, cb;
2142
2143         /* initialize the samples */
2144         sample_coll_init(&ca, br->a->native_lambda, br->a->foreign_lambda,
2145                          br->a->temp);
2146         sample_coll_init(&cb, br->b->native_lambda, br->b->foreign_lambda,
2147                          br->b->temp);
2148
2149         for (npee = npee_min; npee <= npee_max; npee++)
2150         {
2151             double dgs      = 0;
2152             double dgs2     = 0;
2153             double dsa      = 0;
2154             double dsb      = 0;
2155             double dsa2     = 0;
2156             double dsb2     = 0;
2157             double dstddev  = 0;
2158             double dstddev2 = 0;
2159
2160
2161             for (p = 0; p < npee; p++)
2162             {
2163                 double   dgp;
2164                 double   stddevc;
2165                 double   sac, sbc;
2166                 gmx_bool cac, cbc;
2167
2168                 cac = sample_coll_create_subsample(&ca, br->a, p, npee);
2169                 cbc = sample_coll_create_subsample(&cb, br->b, p, npee);
2170
2171                 if (!cac || !cbc)
2172                 {
2173                     printf("WARNING: histogram number incompatible with block number for averaging: can't do error estimate\n");
2174                     *bEE = FALSE;
2175                     if (cac)
2176                     {
2177                         sample_coll_destroy(&ca);
2178                     }
2179                     if (cbc)
2180                     {
2181                         sample_coll_destroy(&cb);
2182                     }
2183                     return;
2184                 }
2185
2186                 dgp   = calc_bar_lowlevel(&ca, &cb, temp, tol, 0);
2187                 dgs  += dgp;
2188                 dgs2 += dgp*dgp;
2189
2190                 partsum[npee*(npee_max+1)+p] += dgp;
2191
2192                 calc_rel_entropy(&ca, &cb, temp, dgp, &sac, &sbc);
2193                 dsa  += sac;
2194                 dsa2 += sac*sac;
2195                 dsb  += sbc;
2196                 dsb2 += sbc*sbc;
2197                 calc_dg_stddev(&ca, &cb, temp, dgp, &stddevc );
2198
2199                 dstddev  += stddevc;
2200                 dstddev2 += stddevc*stddevc;
2201
2202                 sample_coll_destroy(&ca);
2203                 sample_coll_destroy(&cb);
2204             }
2205             dgs     /= npee;
2206             dgs2    /= npee;
2207             dg_sig2 += (dgs2-dgs*dgs)/(npee-1);
2208
2209             dsa     /= npee;
2210             dsa2    /= npee;
2211             dsb     /= npee;
2212             dsb2    /= npee;
2213             sa_sig2 += (dsa2-dsa*dsa)/(npee-1);
2214             sb_sig2 += (dsb2-dsb*dsb)/(npee-1);
2215
2216             dstddev     /= npee;
2217             dstddev2    /= npee;
2218             stddev_sig2 += (dstddev2-dstddev*dstddev)/(npee-1);
2219         }
2220         br->dg_err        = sqrt(dg_sig2/(npee_max - npee_min + 1));
2221         br->sa_err        = sqrt(sa_sig2/(npee_max - npee_min + 1));
2222         br->sb_err        = sqrt(sb_sig2/(npee_max - npee_min + 1));
2223         br->dg_stddev_err = sqrt(stddev_sig2/(npee_max - npee_min + 1));
2224     }
2225 }
2226
2227
2228 static double bar_err(int nbmin, int nbmax, const double *partsum)
2229 {
2230     int    nb, b;
2231     double svar, s, s2, dg;
2232
2233     svar = 0;
2234     for (nb = nbmin; nb <= nbmax; nb++)
2235     {
2236         s  = 0;
2237         s2 = 0;
2238         for (b = 0; b < nb; b++)
2239         {
2240             dg  = partsum[nb*(nbmax+1)+b];
2241             s  += dg;
2242             s2 += dg*dg;
2243         }
2244         s    /= nb;
2245         s2   /= nb;
2246         svar += (s2 - s*s)/(nb - 1);
2247     }
2248
2249     return sqrt(svar/(nbmax + 1 - nbmin));
2250 }
2251
2252
2253 /* Seek the end of an identifier (consecutive non-spaces), followed by
2254    an optional number of spaces or '='-signs. Returns a pointer to the
2255    first non-space value found after that. Returns NULL if the string
2256    ends before that.
2257  */
2258 static const char *find_value(const char *str)
2259 {
2260     gmx_bool name_end_found = FALSE;
2261
2262     /* if the string is a NULL pointer, return a NULL pointer. */
2263     if (str == NULL)
2264     {
2265         return NULL;
2266     }
2267     while (*str != '\0')
2268     {
2269         /* first find the end of the name */
2270         if (!name_end_found)
2271         {
2272             if (isspace(*str) || (*str == '=') )
2273             {
2274                 name_end_found = TRUE;
2275             }
2276         }
2277         else
2278         {
2279             if (!( isspace(*str) || (*str == '=') ))
2280             {
2281                 return str;
2282             }
2283         }
2284         str++;
2285     }
2286     return NULL;
2287 }
2288
2289
2290
2291 /* read a vector-notation description of a lambda vector */
2292 static gmx_bool read_lambda_compvec(const char                *str,
2293                                     lambda_vec_t              *lv,
2294                                     const lambda_components_t *lc_in,
2295                                     lambda_components_t       *lc_out,
2296                                     const char               **end,
2297                                     const char                *fn)
2298 {
2299     gmx_bool    initialize_lc = FALSE; /* whether to initialize the lambda
2300                                           components, or to check them */
2301     gmx_bool    start_reached = FALSE; /* whether the start of component names
2302                                           has been reached */
2303     gmx_bool    vector        = FALSE; /* whether there are multiple components */
2304     int         n             = 0;     /* current component number */
2305     const char *val_start     = NULL;  /* start of the component name, or NULL
2306                                           if not in a value */
2307     char       *strtod_end;
2308     gmx_bool    OK = TRUE;
2309
2310     if (end)
2311     {
2312         *end = str;
2313     }
2314
2315
2316     if (lc_out && lc_out->N == 0)
2317     {
2318         initialize_lc = TRUE;
2319     }
2320
2321     if (lc_in == NULL)
2322     {
2323         lc_in = lc_out;
2324     }
2325
2326     while (1)
2327     {
2328         if (!start_reached)
2329         {
2330             if (isalnum(*str))
2331             {
2332                 vector        = FALSE;
2333                 start_reached = TRUE;
2334                 val_start     = str;
2335             }
2336             else if (*str == '(')
2337             {
2338                 vector        = TRUE;
2339                 start_reached = TRUE;
2340             }
2341             else if (!isspace(*str))
2342             {
2343                 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2344             }
2345         }
2346         else
2347         {
2348             if (val_start)
2349             {
2350                 if (isspace(*str) || *str == ')' || *str == ',' || *str == '\0')
2351                 {
2352                     /* end of value */
2353                     if (lv == NULL)
2354                     {
2355                         if (initialize_lc)
2356                         {
2357                             lambda_components_add(lc_out, val_start,
2358                                                   (str-val_start));
2359                         }
2360                         else
2361                         {
2362                             if (!lambda_components_check(lc_out, n, val_start,
2363                                                          (str-val_start)))
2364                             {
2365                                 return FALSE;
2366                             }
2367                         }
2368                     }
2369                     else
2370                     {
2371                         /* add a vector component to lv */
2372                         lv->val[n] = strtod(val_start, &strtod_end);
2373                         if (val_start == strtod_end)
2374                         {
2375                             gmx_fatal(FARGS,
2376                                       "Error reading lambda vector in %s", fn);
2377                         }
2378                     }
2379                     /* reset for the next identifier */
2380                     val_start = NULL;
2381                     n++;
2382                     if (!vector)
2383                     {
2384                         return OK;
2385                     }
2386                 }
2387             }
2388             else if (isalnum(*str))
2389             {
2390                 val_start = str;
2391             }
2392             if (*str == ')')
2393             {
2394                 str++;
2395                 if (end)
2396                 {
2397                     *end = str;
2398                 }
2399                 if (!vector)
2400                 {
2401                     gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2402                 }
2403                 else
2404                 {
2405                     if (n == lc_in->N)
2406                     {
2407                         return OK;
2408                     }
2409                     else if (lv == NULL)
2410                     {
2411                         return FALSE;
2412                     }
2413                     else
2414                     {
2415                         gmx_fatal(FARGS, "Incomplete lambda vector data in %s",
2416                                   fn);
2417                         return FALSE;
2418                     }
2419
2420                 }
2421             }
2422         }
2423         if (*str == '\0')
2424         {
2425             break;
2426         }
2427         str++;
2428         if (end)
2429         {
2430             *end = str;
2431         }
2432     }
2433     if (vector)
2434     {
2435         gmx_fatal(FARGS, "Incomplete lambda components data in %s", fn);
2436         return FALSE;
2437     }
2438     return OK;
2439 }
2440
2441 /* read and check the component names from a string */
2442 static gmx_bool read_lambda_components(const char          *str,
2443                                        lambda_components_t *lc,
2444                                        const char         **end,
2445                                        const char          *fn)
2446 {
2447     return read_lambda_compvec(str, NULL, NULL, lc, end, fn);
2448 }
2449
2450 /* read an initialized lambda vector from a string */
2451 static gmx_bool read_lambda_vector(const char   *str,
2452                                    lambda_vec_t *lv,
2453                                    const char  **end,
2454                                    const char   *fn)
2455 {
2456     return read_lambda_compvec(str, lv, lv->lc, NULL, end, fn);
2457 }
2458
2459
2460
2461 /* deduce lambda value from legend.
2462     fn = the file name
2463     legend = the legend string
2464     ba = the xvg data
2465     lam = the initialized lambda vector
2466    returns whether to use the data in this set.
2467  */
2468 static gmx_bool legend2lambda(const char   *fn,
2469                               const char   *legend,
2470                               lambda_vec_t *lam)
2471 {
2472     double        lambda = 0;
2473     const char   *ptr    = NULL, *ptr2 = NULL;
2474     gmx_bool      ok     = FALSE;
2475     gmx_bool      bdhdl  = FALSE;
2476     const char   *tostr  = " to ";
2477
2478     if (legend == NULL)
2479     {
2480         gmx_fatal(FARGS, "There is no legend in file '%s', can not deduce lambda", fn);
2481     }
2482
2483     /* look for the last 'to': */
2484     ptr2 = legend;
2485     do
2486     {
2487         ptr2 = strstr(ptr2, tostr);
2488         if (ptr2 != NULL)
2489         {
2490             ptr = ptr2;
2491             ptr2++;
2492         }
2493     }
2494     while (ptr2 != NULL && *ptr2 != '\0');
2495
2496     if (ptr)
2497     {
2498         ptr += strlen(tostr)-1; /* and advance past that 'to' */
2499     }
2500     else
2501     {
2502         /* look for the = sign */
2503         ptr = strrchr(legend, '=');
2504         if (!ptr)
2505         {
2506             /* otherwise look for the last space */
2507             ptr = strrchr(legend, ' ');
2508         }
2509     }
2510
2511     if (strstr(legend, "dH"))
2512     {
2513         ok    = TRUE;
2514         bdhdl = TRUE;
2515     }
2516     else if (strchr(legend, 'D') != NULL && strchr(legend, 'H') != NULL)
2517     {
2518         ok    = TRUE;
2519         bdhdl = FALSE;
2520     }
2521     else /*if (strstr(legend, "pV"))*/
2522     {
2523         return FALSE;
2524     }
2525     if (!ptr)
2526     {
2527         ok = FALSE;
2528     }
2529
2530     if (!ok)
2531     {
2532         gmx_fatal(FARGS, "There is no proper lambda legend in file '%s', can not deduce lambda", fn);
2533     }
2534     if (!bdhdl)
2535     {
2536         ptr = find_value(ptr);
2537         if (!ptr || !read_lambda_vector(ptr, lam, NULL, fn))
2538         {
2539             gmx_fatal(FARGS, "lambda vector '%s' %s faulty", legend, fn);
2540         }
2541     }
2542     else
2543     {
2544         int         dhdl_index;
2545         const char *end;
2546         char        buf[STRLEN];
2547
2548         ptr = strrchr(legend, '=');
2549         end = ptr;
2550         if (ptr)
2551         {
2552             /* there must be a component name */
2553             ptr--;
2554             if (ptr < legend)
2555             {
2556                 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2557             }
2558             /* now backtrack to the start of the identifier */
2559             while (isspace(*ptr))
2560             {
2561                 end = ptr;
2562                 ptr--;
2563                 if (ptr < legend)
2564                 {
2565                     gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2566                 }
2567             }
2568             while (!isspace(*ptr))
2569             {
2570                 ptr--;
2571                 if (ptr < legend)
2572                 {
2573                     gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2574                 }
2575             }
2576             ptr++;
2577             strncpy(buf, ptr, (end-ptr));
2578             buf[(end-ptr)] = '\0';
2579             dhdl_index     = lambda_components_find(lam->lc, ptr, (end-ptr));
2580             if (dhdl_index < 0)
2581             {
2582                 char buf[STRLEN];
2583                 strncpy(buf, ptr, (end-ptr));
2584                 buf[(end-ptr)] = '\0';
2585                 gmx_fatal(FARGS,
2586                           "Did not find lambda component for '%s' in %s",
2587                           buf, fn);
2588             }
2589         }
2590         else
2591         {
2592             if (lam->lc->N > 1)
2593             {
2594                 gmx_fatal(FARGS,
2595                           "dhdl without component name with >1 lambda component in %s",
2596                           fn);
2597             }
2598             dhdl_index = 0;
2599         }
2600         lam->dhdl = dhdl_index;
2601     }
2602     return TRUE;
2603 }
2604
2605 static gmx_bool subtitle2lambda(const char *subtitle, xvg_t *ba, const char *fn,
2606                                 lambda_components_t *lc)
2607 {
2608     gmx_bool    bFound;
2609     const char *ptr;
2610     char       *end;
2611     double      native_lambda;
2612
2613     bFound = FALSE;
2614
2615     /* first check for a state string */
2616     ptr = strstr(subtitle, "state");
2617     if (ptr)
2618     {
2619         int         index = -1;
2620         const char *val_end;
2621
2622         /* the new 4.6 style lambda vectors */
2623         ptr = find_value(ptr);
2624         if (ptr)
2625         {
2626             index = strtol(ptr, &end, 10);
2627             if (ptr == end)
2628             {
2629                 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2630                 return FALSE;
2631             }
2632             ptr = end;
2633         }
2634         else
2635         {
2636             gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2637             return FALSE;
2638         }
2639         /* now find the lambda vector component names */
2640         while (*ptr != '(' && !isalnum(*ptr))
2641         {
2642             ptr++;
2643             if (*ptr == '\0')
2644             {
2645                 gmx_fatal(FARGS,
2646                           "Incomplete lambda vector component data in %s", fn);
2647                 return FALSE;
2648             }
2649         }
2650         val_end = ptr;
2651         if (!read_lambda_components(ptr, lc, &val_end, fn))
2652         {
2653             gmx_fatal(FARGS,
2654                       "lambda vector components in %s don't match those previously read",
2655                       fn);
2656         }
2657         ptr = find_value(val_end);
2658         if (!ptr)
2659         {
2660             gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2661             return FALSE;
2662         }
2663         lambda_vec_init(&(ba->native_lambda), lc);
2664         if (!read_lambda_vector(ptr, &(ba->native_lambda), NULL, fn))
2665         {
2666             gmx_fatal(FARGS, "lambda vector in %s faulty", fn);
2667         }
2668         ba->native_lambda.index = index;
2669         bFound                  = TRUE;
2670     }
2671     else
2672     {
2673         /* compatibility mode: check for lambda in other ways. */
2674         /* plain text lambda string */
2675         ptr = strstr(subtitle, "lambda");
2676         if (ptr == NULL)
2677         {
2678             /* xmgrace formatted lambda string */
2679             ptr = strstr(subtitle, "\\xl\\f{}");
2680         }
2681         if (ptr == NULL)
2682         {
2683             /* xmgr formatted lambda string */
2684             ptr = strstr(subtitle, "\\8l\\4");
2685         }
2686         if (ptr != NULL)
2687         {
2688             ptr = strstr(ptr, "=");
2689         }
2690         if (ptr != NULL)
2691         {
2692             bFound = (sscanf(ptr+1, "%lf", &(native_lambda)) == 1);
2693             /* add the lambda component name as an empty string */
2694             if (lc->N > 0)
2695             {
2696                 if (!lambda_components_check(lc, 0, "", 0))
2697                 {
2698                     gmx_fatal(FARGS,
2699                               "lambda vector components in %s don't match those previously read",
2700                               fn);
2701                 }
2702             }
2703             else
2704             {
2705                 lambda_components_add(lc, "", 0);
2706             }
2707             lambda_vec_init(&(ba->native_lambda), lc);
2708             ba->native_lambda.val[0] = native_lambda;
2709         }
2710     }
2711
2712     return bFound;
2713 }
2714
2715 static void filename2lambda(const char *fn)
2716 {
2717     double        lambda;
2718     const char   *ptr, *digitptr;
2719     char         *endptr;
2720     int           dirsep;
2721     ptr = fn;
2722     /* go to the end of the path string and search backward to find the last
2723        directory in the path which has to contain the value of lambda
2724      */
2725     while (ptr[1] != '\0')
2726     {
2727         ptr++;
2728     }
2729     /* searching backward to find the second directory separator */
2730     dirsep   = 0;
2731     digitptr = NULL;
2732     while (ptr >= fn)
2733     {
2734         if (ptr[0] != DIR_SEPARATOR && ptr[1] == DIR_SEPARATOR)
2735         {
2736             if (dirsep == 1)
2737             {
2738                 break;
2739             }
2740             dirsep++;
2741         }
2742         /* save the last position of a digit between the last two
2743            separators = in the last dirname */
2744         if (dirsep > 0 && isdigit(*ptr))
2745         {
2746             digitptr = ptr;
2747         }
2748         ptr--;
2749     }
2750     if (!digitptr)
2751     {
2752         gmx_fatal(FARGS, "While trying to read the lambda value from the file path:"
2753                   " last directory in the path '%s' does not contain a number", fn);
2754     }
2755     if (digitptr[-1] == '-')
2756     {
2757         digitptr--;
2758     }
2759     lambda = strtod(digitptr, &endptr);
2760     if (endptr == digitptr)
2761     {
2762         gmx_fatal(FARGS, "Malformed number in file path '%s'", fn);
2763     }
2764 }
2765
2766 static void read_bar_xvg_lowlevel(const char *fn, real *temp, xvg_t *ba,
2767                                   lambda_components_t *lc)
2768 {
2769     int          i;
2770     char        *subtitle, **legend, *ptr;
2771     int          np;
2772     gmx_bool     native_lambda_read = FALSE;
2773     char         buf[STRLEN];
2774     lambda_vec_t lv;
2775
2776     xvg_init(ba);
2777
2778     ba->filename = fn;
2779
2780     np = read_xvg_legend(fn, &ba->y, &ba->nset, &subtitle, &legend);
2781     if (!ba->y)
2782     {
2783         gmx_fatal(FARGS, "File %s contains no usable data.", fn);
2784     }
2785     /* Reorder the data */
2786     ba->t  = ba->y[0];
2787     for (i = 1; i < ba->nset; i++)
2788     {
2789         ba->y[i-1] = ba->y[i];
2790     }
2791     ba->nset--;
2792
2793     snew(ba->np, ba->nset);
2794     for (i = 0; i < ba->nset; i++)
2795     {
2796         ba->np[i] = np;
2797     }
2798
2799     ba->temp = -1;
2800     if (subtitle != NULL)
2801     {
2802         /* try to extract temperature */
2803         ptr = strstr(subtitle, "T =");
2804         if (ptr != NULL)
2805         {
2806             ptr += 3;
2807             if (sscanf(ptr, "%lf", &ba->temp) == 1)
2808             {
2809                 if (ba->temp <= 0)
2810                 {
2811                     gmx_fatal(FARGS, "Found temperature of %f in file '%s'",
2812                               ba->temp, fn);
2813                 }
2814             }
2815         }
2816     }
2817     if (ba->temp < 0)
2818     {
2819         if (*temp <= 0)
2820         {
2821             gmx_fatal(FARGS, "Did not find a temperature in the subtitle in file '%s', use the -temp option of [TT]gmx bar[tt]", fn);
2822         }
2823         ba->temp = *temp;
2824     }
2825
2826     /* Try to deduce lambda from the subtitle */
2827     if (subtitle)
2828     {
2829         if (subtitle2lambda(subtitle, ba, fn, lc))
2830         {
2831             native_lambda_read = TRUE;
2832         }
2833     }
2834     snew(ba->lambda, ba->nset);
2835     if (legend == NULL)
2836     {
2837         /* Check if we have a single set, no legend, nset=1 means t and dH/dl */
2838         if (ba->nset == 1)
2839         {
2840             if (!native_lambda_read)
2841             {
2842                 /* Deduce lambda from the file name */
2843                 filename2lambda(fn);
2844                 native_lambda_read = TRUE;
2845             }
2846             ba->lambda[0] = ba->native_lambda;
2847         }
2848         else
2849         {
2850             gmx_fatal(FARGS, "File %s contains multiple sets but no legends, can not determine the lambda values", fn);
2851         }
2852     }
2853     else
2854     {
2855         for (i = 0; i < ba->nset; )
2856         {
2857             gmx_bool use = FALSE;
2858             /* Read lambda from the legend */
2859             lambda_vec_init( &(ba->lambda[i]), lc );
2860             lambda_vec_copy( &(ba->lambda[i]), &(ba->native_lambda));
2861             use = legend2lambda(fn, legend[i], &(ba->lambda[i]));
2862             if (use)
2863             {
2864                 lambda_vec_print(&(ba->lambda[i]), buf, FALSE);
2865                 i++;
2866             }
2867             else
2868             {
2869                 int j;
2870                 printf("%s: Ignoring set '%s'.\n", fn, legend[i]);
2871                 for (j = i+1; j < ba->nset; j++)
2872                 {
2873                     ba->y[j-1]  = ba->y[j];
2874                     legend[j-1] = legend[j];
2875                 }
2876                 ba->nset--;
2877             }
2878         }
2879     }
2880
2881     if (!native_lambda_read)
2882     {
2883         gmx_fatal(FARGS, "File %s contains multiple sets but no indication of the native lambda", fn);
2884     }
2885
2886     if (legend != NULL)
2887     {
2888         for (i = 0; i < ba->nset-1; i++)
2889         {
2890             sfree(legend[i]);
2891         }
2892         sfree(legend);
2893     }
2894 }
2895
2896 static void read_bar_xvg(char *fn, real *temp, sim_data_t *sd)
2897 {
2898     xvg_t     *barsim;
2899     samples_t *s;
2900     int        i;
2901     double    *lambda;
2902
2903     snew(barsim, 1);
2904
2905     read_bar_xvg_lowlevel(fn, temp, barsim, &(sd->lc));
2906
2907     if (barsim->nset < 1)
2908     {
2909         gmx_fatal(FARGS, "File '%s' contains fewer than two columns", fn);
2910     }
2911
2912     if (!gmx_within_tol(*temp, barsim->temp, GMX_FLOAT_EPS) && (*temp > 0) )
2913     {
2914         gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
2915     }
2916     *temp = barsim->temp;
2917
2918     /* now create a series of samples_t */
2919     snew(s, barsim->nset);
2920     for (i = 0; i < barsim->nset; i++)
2921     {
2922         samples_init(s+i, &(barsim->native_lambda), &(barsim->lambda[i]),
2923                      barsim->temp, lambda_vec_same(&(barsim->native_lambda),
2924                                                    &(barsim->lambda[i])),
2925                      fn);
2926         s[i].du  = barsim->y[i];
2927         s[i].ndu = barsim->np[i];
2928         s[i].t   = barsim->t;
2929
2930         lambda_data_list_insert_sample(sd->lb, s+i);
2931     }
2932     {
2933         char buf[STRLEN];
2934
2935         lambda_vec_print(s[0].native_lambda, buf, FALSE);
2936         printf("%s: %.1f - %.1f; lambda = %s\n    dH/dl & foreign lambdas:\n",
2937                fn, s[0].t[0], s[0].t[s[0].ndu-1], buf);
2938         for (i = 0; i < barsim->nset; i++)
2939         {
2940             lambda_vec_print(s[i].foreign_lambda, buf, TRUE);
2941             printf("        %s (%d pts)\n", buf, s[i].ndu);
2942         }
2943     }
2944     printf("\n\n");
2945 }
2946
2947 static void read_edr_rawdh_block(samples_t **smp, int *ndu, t_enxblock *blk,
2948                                  double start_time, double delta_time,
2949                                  lambda_vec_t *native_lambda, double temp,
2950                                  double *last_t, const char *filename)
2951 {
2952     int           i, j;
2953     gmx_bool      allocated;
2954     double        old_foreign_lambda;
2955     lambda_vec_t *foreign_lambda;
2956     int           type;
2957     samples_t    *s; /* convenience pointer */
2958     int           startj;
2959
2960     /* check the block types etc. */
2961     if ( (blk->nsub < 3) ||
2962          (blk->sub[0].type != xdr_datatype_int) ||
2963          (blk->sub[1].type != xdr_datatype_double) ||
2964          (
2965              (blk->sub[2].type != xdr_datatype_float) &&
2966              (blk->sub[2].type != xdr_datatype_double)
2967          ) ||
2968          (blk->sub[0].nr < 1) ||
2969          (blk->sub[1].nr < 1) )
2970     {
2971         gmx_fatal(FARGS,
2972                   "Unexpected/corrupted block data in file %s around time %f.",
2973                   filename, start_time);
2974     }
2975
2976     snew(foreign_lambda, 1);
2977     lambda_vec_init(foreign_lambda, native_lambda->lc);
2978     lambda_vec_copy(foreign_lambda, native_lambda);
2979     type = blk->sub[0].ival[0];
2980     if (type == dhbtDH)
2981     {
2982         for (i = 0; i < native_lambda->lc->N; i++)
2983         {
2984             foreign_lambda->val[i] = blk->sub[1].dval[i];
2985         }
2986     }
2987     else
2988     {
2989         if (blk->sub[0].nr > 1)
2990         {
2991             foreign_lambda->dhdl = blk->sub[0].ival[1];
2992         }
2993         else
2994         {
2995             foreign_lambda->dhdl = 0;
2996         }
2997     }
2998
2999     if (!*smp)
3000     {
3001         /* initialize the samples structure if it's empty. */
3002         snew(*smp, 1);
3003         samples_init(*smp, native_lambda, foreign_lambda, temp,
3004                      type == dhbtDHDL, filename);
3005         (*smp)->start_time = start_time;
3006         (*smp)->delta_time = delta_time;
3007     }
3008
3009     /* set convenience pointer */
3010     s = *smp;
3011
3012     /* now double check */
3013     if (!lambda_vec_same(s->foreign_lambda, foreign_lambda) )
3014     {
3015         char buf[STRLEN], buf2[STRLEN];
3016         lambda_vec_print(foreign_lambda, buf, FALSE);
3017         lambda_vec_print(s->foreign_lambda, buf2, FALSE);
3018         fprintf(stderr, "Got foreign lambda=%s, expected: %s\n", buf, buf2);
3019         gmx_fatal(FARGS, "Corrupted data in file %s around t=%f.",
3020                   filename, start_time);
3021     }
3022
3023     /* make room for the data */
3024     if (s->ndu_alloc < (size_t)(s->ndu + blk->sub[2].nr) )
3025     {
3026         s->ndu_alloc += (s->ndu_alloc < (size_t)blk->sub[2].nr) ?
3027             blk->sub[2].nr*2 : s->ndu_alloc;
3028         srenew(s->du_alloc, s->ndu_alloc);
3029         s->du = s->du_alloc;
3030     }
3031     startj   = s->ndu;
3032     s->ndu  += blk->sub[2].nr;
3033     s->ntot += blk->sub[2].nr;
3034     *ndu     = blk->sub[2].nr;
3035
3036     /* and copy the data*/
3037     for (j = 0; j < blk->sub[2].nr; j++)
3038     {
3039         if (blk->sub[2].type == xdr_datatype_float)
3040         {
3041             s->du[startj+j] = blk->sub[2].fval[j];
3042         }
3043         else
3044         {
3045             s->du[startj+j] = blk->sub[2].dval[j];
3046         }
3047     }
3048     if (start_time + blk->sub[2].nr*delta_time > *last_t)
3049     {
3050         *last_t = start_time + blk->sub[2].nr*delta_time;
3051     }
3052 }
3053
3054 static samples_t *read_edr_hist_block(int *nsamples, t_enxblock *blk,
3055                                       double start_time, double delta_time,
3056                                       lambda_vec_t *native_lambda, double temp,
3057                                       double *last_t, const char *filename)
3058 {
3059     int           i, j;
3060     samples_t    *s;
3061     int           nhist;
3062     double        old_foreign_lambda;
3063     lambda_vec_t *foreign_lambda;
3064     int           type;
3065     int           nbins[2];
3066
3067     /* check the block types etc. */
3068     if ( (blk->nsub < 2) ||
3069          (blk->sub[0].type != xdr_datatype_double) ||
3070          (blk->sub[1].type != xdr_datatype_int64) ||
3071          (blk->sub[0].nr < 2)  ||
3072          (blk->sub[1].nr < 2) )
3073     {
3074         gmx_fatal(FARGS,
3075                   "Unexpected/corrupted block data in file %s around time %f",
3076                   filename, start_time);
3077     }
3078
3079     nhist = blk->nsub-2;
3080     if (nhist == 0)
3081     {
3082         return NULL;
3083     }
3084     if (nhist > 2)
3085     {
3086         gmx_fatal(FARGS,
3087                   "Unexpected/corrupted block data in file %s around time %f",
3088                   filename, start_time);
3089     }
3090
3091     snew(s, 1);
3092     *nsamples = 1;
3093
3094     snew(foreign_lambda, 1);
3095     lambda_vec_init(foreign_lambda, native_lambda->lc);
3096     lambda_vec_copy(foreign_lambda, native_lambda);
3097     type = (int)(blk->sub[1].lval[1]);
3098     if (type == dhbtDH)
3099     {
3100         double old_foreign_lambda;
3101
3102         old_foreign_lambda = blk->sub[0].dval[0];
3103         if (old_foreign_lambda >= 0)
3104         {
3105             foreign_lambda->val[0] = old_foreign_lambda;
3106             if (foreign_lambda->lc->N > 1)
3107             {
3108                 gmx_fatal(FARGS,
3109                           "Single-component lambda in multi-component file %s",
3110                           filename);
3111             }
3112         }
3113         else
3114         {
3115             for (i = 0; i < native_lambda->lc->N; i++)
3116             {
3117                 foreign_lambda->val[i] = blk->sub[0].dval[i+2];
3118             }
3119         }
3120     }
3121     else
3122     {
3123         if (foreign_lambda->lc->N > 1)
3124         {
3125             if (blk->sub[1].nr < 3 + nhist)
3126             {
3127                 gmx_fatal(FARGS,
3128                           "Missing derivative coord in multi-component file %s",
3129                           filename);
3130             }
3131             foreign_lambda->dhdl = blk->sub[1].lval[2 + nhist];
3132         }
3133         else
3134         {
3135             foreign_lambda->dhdl = 0;
3136         }
3137     }
3138
3139     samples_init(s, native_lambda, foreign_lambda, temp, type == dhbtDHDL,
3140                  filename);
3141     snew(s->hist, 1);
3142
3143     for (i = 0; i < nhist; i++)
3144     {
3145         nbins[i] = blk->sub[i+2].nr;
3146     }
3147
3148     hist_init(s->hist, nhist, nbins);
3149
3150     for (i = 0; i < nhist; i++)
3151     {
3152         s->hist->x0[i] = blk->sub[1].lval[2+i];
3153         s->hist->dx[i] = blk->sub[0].dval[1];
3154         if (i == 1)
3155         {
3156             s->hist->dx[i] = -s->hist->dx[i];
3157         }
3158     }
3159
3160     s->hist->start_time = start_time;
3161     s->hist->delta_time = delta_time;
3162     s->start_time       = start_time;
3163     s->delta_time       = delta_time;
3164
3165     for (i = 0; i < nhist; i++)
3166     {
3167         int             nbin;
3168         gmx_int64_t     sum = 0;
3169
3170         for (j = 0; j < s->hist->nbin[i]; j++)
3171         {
3172             int binv = (int)(blk->sub[i+2].ival[j]);
3173
3174             s->hist->bin[i][j] = binv;
3175             sum               += binv;
3176
3177         }
3178         if (i == 0)
3179         {
3180             s->ntot      = sum;
3181             s->hist->sum = sum;
3182         }
3183         else
3184         {
3185             if (s->ntot != sum)
3186             {
3187                 gmx_fatal(FARGS, "Histogram counts don't match in %s",
3188                           filename);
3189             }
3190         }
3191     }
3192
3193     if (start_time + s->hist->sum*delta_time > *last_t)
3194     {
3195         *last_t = start_time + s->hist->sum*delta_time;
3196     }
3197     return s;
3198 }
3199
3200
3201 static void read_barsim_edr(char *fn, real *temp, sim_data_t *sd)
3202 {
3203     int            i, j;
3204     ener_file_t    fp;
3205     t_enxframe    *fr;
3206     int            nre;
3207     gmx_enxnm_t   *enm           = NULL;
3208     double         first_t       = -1;
3209     double         last_t        = -1;
3210     samples_t    **samples_rawdh = NULL; /* contains samples for raw delta_h  */
3211     int           *nhists        = NULL; /* array to keep count & print at end */
3212     int           *npts          = NULL; /* array to keep count & print at end */
3213     lambda_vec_t **lambdas       = NULL; /* array to keep count & print at end */
3214     lambda_vec_t  *native_lambda;
3215     double         end_time;             /* the end time of the last batch of samples */
3216     int            nsamples = 0;
3217     lambda_vec_t   start_lambda;
3218
3219     fp = open_enx(fn, "r");
3220     do_enxnms(fp, &nre, &enm);
3221     snew(fr, 1);
3222
3223     snew(native_lambda, 1);
3224     start_lambda.lc = NULL;
3225
3226     while (do_enx(fp, fr))
3227     {
3228         /* count the data blocks */
3229         int    nblocks_raw  = 0;
3230         int    nblocks_hist = 0;
3231         int    nlam         = 0;
3232         int    k;
3233         /* DHCOLL block information: */
3234         double start_time = 0, delta_time = 0, old_start_lambda = 0, delta_lambda = 0;
3235         double rtemp      = 0;
3236
3237         /* count the blocks and handle collection information: */
3238         for (i = 0; i < fr->nblock; i++)
3239         {
3240             if (fr->block[i].id == enxDHHIST)
3241             {
3242                 nblocks_hist++;
3243             }
3244             if (fr->block[i].id == enxDH)
3245             {
3246                 nblocks_raw++;
3247             }
3248             if (fr->block[i].id == enxDHCOLL)
3249             {
3250                 nlam++;
3251                 if ( (fr->block[i].nsub < 1) ||
3252                      (fr->block[i].sub[0].type != xdr_datatype_double) ||
3253                      (fr->block[i].sub[0].nr < 5))
3254                 {
3255                     gmx_fatal(FARGS, "Unexpected block data in file %s", fn);
3256                 }
3257
3258                 /* read the data from the DHCOLL block */
3259                 rtemp            =        fr->block[i].sub[0].dval[0];
3260                 start_time       =   fr->block[i].sub[0].dval[1];
3261                 delta_time       =   fr->block[i].sub[0].dval[2];
3262                 old_start_lambda = fr->block[i].sub[0].dval[3];
3263                 delta_lambda     = fr->block[i].sub[0].dval[4];
3264
3265                 if (delta_lambda > 0)
3266                 {
3267                     gmx_fatal(FARGS, "Lambda values not constant in %s: can't apply BAR method", fn);
3268                 }
3269                 if ( ( *temp != rtemp) && (*temp > 0) )
3270                 {
3271                     gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
3272                 }
3273                 *temp = rtemp;
3274
3275                 if (old_start_lambda >= 0)
3276                 {
3277                     if (sd->lc.N > 0)
3278                     {
3279                         if (!lambda_components_check(&(sd->lc), 0, "", 0))
3280                         {
3281                             gmx_fatal(FARGS,
3282                                       "lambda vector components in %s don't match those previously read",
3283                                       fn);
3284                         }
3285                     }
3286                     else
3287                     {
3288                         lambda_components_add(&(sd->lc), "", 0);
3289                     }
3290                     if (!start_lambda.lc)
3291                     {
3292                         lambda_vec_init(&start_lambda, &(sd->lc));
3293                     }
3294                     start_lambda.val[0] = old_start_lambda;
3295                 }
3296                 else
3297                 {
3298                     /* read lambda vector */
3299                     int      n_lambda_vec;
3300                     gmx_bool check = (sd->lc.N > 0);
3301                     if (fr->block[i].nsub < 2)
3302                     {
3303                         gmx_fatal(FARGS,
3304                                   "No lambda vector, but start_lambda=%f\n",
3305                                   old_start_lambda);
3306                     }
3307                     n_lambda_vec = fr->block[i].sub[1].ival[1];
3308                     for (j = 0; j < n_lambda_vec; j++)
3309                     {
3310                         const char *name =
3311                             efpt_singular_names[fr->block[i].sub[1].ival[1+j]];
3312                         if (check)
3313                         {
3314                             /* check the components */
3315                             lambda_components_check(&(sd->lc), j, name,
3316                                                     strlen(name));
3317                         }
3318                         else
3319                         {
3320                             lambda_components_add(&(sd->lc), name,
3321                                                   strlen(name));
3322                         }
3323                     }
3324                     lambda_vec_init(&start_lambda, &(sd->lc));
3325                     start_lambda.index = fr->block[i].sub[1].ival[0];
3326                     for (j = 0; j < n_lambda_vec; j++)
3327                     {
3328                         start_lambda.val[j] = fr->block[i].sub[0].dval[5+j];
3329                     }
3330                 }
3331                 if (first_t < 0)
3332                 {
3333                     first_t = start_time;
3334                 }
3335             }
3336         }
3337
3338         if (nlam != 1)
3339         {
3340             gmx_fatal(FARGS, "Did not find delta H information in file %s", fn);
3341         }
3342         if (nblocks_raw > 0 && nblocks_hist > 0)
3343         {
3344             gmx_fatal(FARGS, "Can't handle both raw delta U data and histograms in the same file %s", fn);
3345         }
3346
3347         if (nsamples > 0)
3348         {
3349             /* check the native lambda */
3350             if (!lambda_vec_same(&start_lambda, native_lambda) )
3351             {
3352                 gmx_fatal(FARGS, "Native lambda not constant in file %s: started at %f, and becomes %f at time %f",
3353                           fn, native_lambda, start_lambda, start_time);
3354             }
3355             /* check the number of samples against the previous number */
3356             if ( ((nblocks_raw+nblocks_hist) != nsamples) || (nlam != 1 ) )
3357             {
3358                 gmx_fatal(FARGS, "Unexpected block count in %s: was %d, now %d\n",
3359                           fn, nsamples+1, nblocks_raw+nblocks_hist+nlam);
3360             }
3361             /* check whether last iterations's end time matches with
3362                the currrent start time */
3363             if ( (fabs(last_t - start_time) > 2*delta_time)  && last_t >= 0)
3364             {
3365                 /* it didn't. We need to store our samples and reallocate */
3366                 for (i = 0; i < nsamples; i++)
3367                 {
3368                     if (samples_rawdh[i])
3369                     {
3370                         /* insert it into the existing list */
3371                         lambda_data_list_insert_sample(sd->lb,
3372                                                        samples_rawdh[i]);
3373                         /* and make sure we'll allocate a new one this time
3374                            around */
3375                         samples_rawdh[i] = NULL;
3376                     }
3377                 }
3378             }
3379         }
3380         else
3381         {
3382             /* this is the first round; allocate the associated data
3383                structures */
3384             /*native_lambda=start_lambda;*/
3385             lambda_vec_init(native_lambda, &(sd->lc));
3386             lambda_vec_copy(native_lambda, &start_lambda);
3387             nsamples = nblocks_raw+nblocks_hist;
3388             snew(nhists, nsamples);
3389             snew(npts, nsamples);
3390             snew(lambdas, nsamples);
3391             snew(samples_rawdh, nsamples);
3392             for (i = 0; i < nsamples; i++)
3393             {
3394                 nhists[i]        = 0;
3395                 npts[i]          = 0;
3396                 lambdas[i]       = NULL;
3397                 samples_rawdh[i] = NULL; /* init to NULL so we know which
3398                                             ones contain values */
3399             }
3400         }
3401
3402         /* and read them */
3403         k = 0; /* counter for the lambdas, etc. arrays */
3404         for (i = 0; i < fr->nblock; i++)
3405         {
3406             if (fr->block[i].id == enxDH)
3407             {
3408                 int type = (fr->block[i].sub[0].ival[0]);
3409                 if (type == dhbtDH || type == dhbtDHDL)
3410                 {
3411                     int ndu;
3412                     read_edr_rawdh_block(&(samples_rawdh[k]),
3413                                          &ndu,
3414                                          &(fr->block[i]),
3415                                          start_time, delta_time,
3416                                          native_lambda, rtemp,
3417                                          &last_t, fn);
3418                     npts[k] += ndu;
3419                     if (samples_rawdh[k])
3420                     {
3421                         lambdas[k] = samples_rawdh[k]->foreign_lambda;
3422                     }
3423                     k++;
3424                 }
3425             }
3426             else if (fr->block[i].id == enxDHHIST)
3427             {
3428                 int type = (int)(fr->block[i].sub[1].lval[1]);
3429                 if (type == dhbtDH || type == dhbtDHDL)
3430                 {
3431                     int        j;
3432                     int        nb = 0;
3433                     samples_t *s; /* this is where the data will go */
3434                     s = read_edr_hist_block(&nb, &(fr->block[i]),
3435                                             start_time, delta_time,
3436                                             native_lambda, rtemp,
3437                                             &last_t, fn);
3438                     nhists[k] += nb;
3439                     if (nb > 0)
3440                     {
3441                         lambdas[k] = s->foreign_lambda;
3442                     }
3443                     k++;
3444                     /* and insert the new sample immediately */
3445                     for (j = 0; j < nb; j++)
3446                     {
3447                         lambda_data_list_insert_sample(sd->lb, s+j);
3448                     }
3449                 }
3450             }
3451         }
3452     }
3453     /* Now store all our extant sample collections */
3454     for (i = 0; i < nsamples; i++)
3455     {
3456         if (samples_rawdh[i])
3457         {
3458             /* insert it into the existing list */
3459             lambda_data_list_insert_sample(sd->lb, samples_rawdh[i]);
3460         }
3461     }
3462
3463
3464     {
3465         char buf[STRLEN];
3466         printf("\n");
3467         lambda_vec_print(native_lambda, buf, FALSE);
3468         printf("%s: %.1f - %.1f; lambda = %s\n    foreign lambdas:\n",
3469                fn, first_t, last_t, buf);
3470         for (i = 0; i < nsamples; i++)
3471         {
3472             if (lambdas[i])
3473             {
3474                 lambda_vec_print(lambdas[i], buf, TRUE);
3475                 if (nhists[i] > 0)
3476                 {
3477                     printf("        %s (%d hists)\n", buf, nhists[i]);
3478                 }
3479                 else
3480                 {
3481                     printf("        %s (%d pts)\n", buf, npts[i]);
3482                 }
3483             }
3484         }
3485     }
3486     printf("\n\n");
3487     sfree(npts);
3488     sfree(nhists);
3489     sfree(lambdas);
3490 }
3491
3492
3493 int gmx_bar(int argc, char *argv[])
3494 {
3495     static const char *desc[] = {
3496         "[THISMODULE] calculates free energy difference estimates through ",
3497         "Bennett's acceptance ratio method (BAR). It also automatically",
3498         "adds series of individual free energies obtained with BAR into",
3499         "a combined free energy estimate.[PAR]",
3500
3501         "Every individual BAR free energy difference relies on two ",
3502         "simulations at different states: say state A and state B, as",
3503         "controlled by a parameter, [GRK]lambda[grk] (see the [TT].mdp[tt] parameter",
3504         "[TT]init_lambda[tt]). The BAR method calculates a ratio of weighted",
3505         "average of the Hamiltonian difference of state B given state A and",
3506         "vice versa.",
3507         "The energy differences to the other state must be calculated",
3508         "explicitly during the simulation. This can be done with",
3509         "the [TT].mdp[tt] option [TT]foreign_lambda[tt].[PAR]",
3510
3511         "Input option [TT]-f[tt] expects multiple [TT]dhdl.xvg[tt] files. ",
3512         "Two types of input files are supported:[BR]",
3513         "[TT]*[tt]  Files with more than one [IT]y[it]-value. ",
3514         "The files should have columns ",
3515         "with dH/d[GRK]lambda[grk] and [GRK]Delta[grk][GRK]lambda[grk]. ",
3516         "The [GRK]lambda[grk] values are inferred ",
3517         "from the legends: [GRK]lambda[grk] of the simulation from the legend of ",
3518         "dH/d[GRK]lambda[grk] and the foreign [GRK]lambda[grk] values from the ",
3519         "legends of Delta H",
3520         "[BR]",
3521         "[TT]*[tt]  Files with only one [IT]y[it]-value. Using the",
3522         "[TT]-extp[tt] option for these files, it is assumed",
3523         "that the [IT]y[it]-value is dH/d[GRK]lambda[grk] and that the ",
3524         "Hamiltonian depends linearly on [GRK]lambda[grk]. ",
3525         "The [GRK]lambda[grk] value of the simulation is inferred from the ",
3526         "subtitle (if present), otherwise from a number in the subdirectory ",
3527         "in the file name.[PAR]",
3528
3529         "The [GRK]lambda[grk] of the simulation is parsed from ",
3530         "[TT]dhdl.xvg[tt] file's legend containing the string 'dH', the ",
3531         "foreign [GRK]lambda[grk] values from the legend containing the ",
3532         "capitalized letters 'D' and 'H'. The temperature is parsed from ",
3533         "the legend line containing 'T ='.[PAR]",
3534
3535         "The input option [TT]-g[tt] expects multiple [TT].edr[tt] files. ",
3536         "These can contain either lists of energy differences (see the ",
3537         "[TT].mdp[tt] option [TT]separate_dhdl_file[tt]), or a series of ",
3538         "histograms (see the [TT].mdp[tt] options [TT]dh_hist_size[tt] and ",
3539         "[TT]dh_hist_spacing[tt]).", "The temperature and [GRK]lambda[grk] ",
3540         "values are automatically deduced from the [TT]ener.edr[tt] file.[PAR]",
3541
3542         "In addition to the [TT].mdp[tt] option [TT]foreign_lambda[tt], ",
3543         "the energy difference can also be extrapolated from the ",
3544         "dH/d[GRK]lambda[grk] values. This is done with the[TT]-extp[tt]",
3545         "option, which assumes that the system's Hamiltonian depends linearly",
3546         "on [GRK]lambda[grk], which is not normally the case.[PAR]",
3547
3548         "The free energy estimates are determined using BAR with bisection, ",
3549         "with the precision of the output set with [TT]-prec[tt]. ",
3550         "An error estimate taking into account time correlations ",
3551         "is made by splitting the data into blocks and determining ",
3552         "the free energy differences over those blocks and assuming ",
3553         "the blocks are independent. ",
3554         "The final error estimate is determined from the average variance ",
3555         "over 5 blocks. A range of block numbers for error estimation can ",
3556         "be provided with the options [TT]-nbmin[tt] and [TT]-nbmax[tt].[PAR]",
3557
3558         "[THISMODULE] tries to aggregate samples with the same 'native' and ",
3559         "'foreign' [GRK]lambda[grk] values, but always assumes independent ",
3560         "samples. [BB]Note[bb] that when aggregating energy ",
3561         "differences/derivatives with different sampling intervals, this is ",
3562         "almost certainly not correct. Usually subsequent energies are ",
3563         "correlated and different time intervals mean different degrees ",
3564         "of correlation between samples.[PAR]",
3565
3566         "The results are split in two parts: the last part contains the final ",
3567         "results in kJ/mol, together with the error estimate for each part ",
3568         "and the total. The first part contains detailed free energy ",
3569         "difference estimates and phase space overlap measures in units of ",
3570         "kT (together with their computed error estimate). The printed ",
3571         "values are:[BR]",
3572         "[TT]*[tt]  lam_A: the [GRK]lambda[grk] values for point A.[BR]",
3573         "[TT]*[tt]  lam_B: the [GRK]lambda[grk] values for point B.[BR]",
3574         "[TT]*[tt]     DG: the free energy estimate.[BR]",
3575         "[TT]*[tt]    s_A: an estimate of the relative entropy of B in A.[BR]",
3576         "[TT]*[tt]    s_B: an estimate of the relative entropy of A in B.[BR]",
3577         "[TT]*[tt]  stdev: an estimate expected per-sample standard deviation.[PAR]",
3578
3579         "The relative entropy of both states in each other's ensemble can be ",
3580         "interpreted as a measure of phase space overlap: ",
3581         "the relative entropy s_A of the work samples of lambda_B in the ",
3582         "ensemble of lambda_A (and vice versa for s_B), is a ",
3583         "measure of the 'distance' between Boltzmann distributions of ",
3584         "the two states, that goes to zero for identical distributions. See ",
3585         "Wu & Kofke, J. Chem. Phys. 123 084109 (2005) for more information.",
3586         "[PAR]",
3587         "The estimate of the expected per-sample standard deviation, as given ",
3588         "in Bennett's original BAR paper: Bennett, J. Comp. Phys. 22, p 245 (1976).",
3589         "Eq. 10 therein gives an estimate of the quality of sampling (not directly",
3590         "of the actual statistical error, because it assumes independent samples).[PAR]",
3591
3592         "To get a visual estimate of the phase space overlap, use the ",
3593         "[TT]-oh[tt] option to write series of histograms, together with the ",
3594         "[TT]-nbin[tt] option.[PAR]"
3595     };
3596     static real        begin    = 0, end = -1, temp = -1;
3597     int                nd       = 2, nbmin = 5, nbmax = 5;
3598     int                nbin     = 100;
3599     gmx_bool           use_dhdl = FALSE;
3600     gmx_bool           calc_s, calc_v;
3601     t_pargs            pa[] = {
3602         { "-b",    FALSE, etREAL, {&begin},  "Begin time for BAR" },
3603         { "-e",    FALSE, etREAL, {&end},    "End time for BAR" },
3604         { "-temp", FALSE, etREAL, {&temp},   "Temperature (K)" },
3605         { "-prec", FALSE, etINT,  {&nd},     "The number of digits after the decimal point" },
3606         { "-nbmin",  FALSE, etINT,  {&nbmin}, "Minimum number of blocks for error estimation" },
3607         { "-nbmax",  FALSE, etINT,  {&nbmax}, "Maximum number of blocks for error estimation" },
3608         { "-nbin",  FALSE, etINT, {&nbin}, "Number of bins for histogram output"},
3609         { "-extp",  FALSE, etBOOL, {&use_dhdl}, "Whether to linearly extrapolate dH/dl values to use as energies"}
3610     };
3611
3612     t_filenm           fnm[] = {
3613         { efXVG, "-f",  "dhdl",   ffOPTRDMULT },
3614         { efEDR, "-g",  "ener",   ffOPTRDMULT },
3615         { efXVG, "-o",  "bar",    ffOPTWR },
3616         { efXVG, "-oi", "barint", ffOPTWR },
3617         { efXVG, "-oh", "histogram", ffOPTWR }
3618     };
3619 #define NFILE asize(fnm)
3620
3621     int          f, i, j;
3622     int          nf = 0;    /* file counter */
3623     int          nbs;
3624     int          nfile_tot; /* total number of input files */
3625     int          nxvgfile = 0;
3626     int          nedrfile = 0;
3627     char       **fxvgnms;
3628     char       **fedrnms;
3629     sim_data_t   sim_data; /* the simulation data */
3630     barres_t    *results;  /* the results */
3631     int          nresults; /* number of results in results array */
3632
3633     double      *partsum;
3634     double       prec, dg_tot, dg, sig, dg_tot_max, dg_tot_min;
3635     FILE        *fpb, *fpi;
3636     char         dgformat[20], xvg2format[STRLEN], xvg3format[STRLEN];
3637     char         buf[STRLEN], buf2[STRLEN];
3638     char         ktformat[STRLEN], sktformat[STRLEN];
3639     char         kteformat[STRLEN], skteformat[STRLEN];
3640     output_env_t oenv;
3641     double       kT, beta;
3642     gmx_bool     result_OK = TRUE, bEE = TRUE;
3643
3644     gmx_bool     disc_err          = FALSE;
3645     double       sum_disc_err      = 0.; /* discretization error */
3646     gmx_bool     histrange_err     = FALSE;
3647     double       sum_histrange_err = 0.; /* histogram range error */
3648     double       stat_err          = 0.; /* statistical error */
3649
3650     if (!parse_common_args(&argc, argv,
3651                            PCA_CAN_VIEW,
3652                            NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &oenv))
3653     {
3654         return 0;
3655     }
3656
3657     if (opt2bSet("-f", NFILE, fnm))
3658     {
3659         nxvgfile = opt2fns(&fxvgnms, "-f", NFILE, fnm);
3660     }
3661     if (opt2bSet("-g", NFILE, fnm))
3662     {
3663         nedrfile = opt2fns(&fedrnms, "-g", NFILE, fnm);
3664     }
3665
3666     sim_data_init(&sim_data);
3667 #if 0
3668     /* make linked list */
3669     lb = &lambda_head;
3670     lambda_data_init(lb, 0, 0);
3671     lb->next = lb;
3672     lb->prev = lb;
3673 #endif
3674
3675
3676     nfile_tot = nxvgfile + nedrfile;
3677
3678     if (nfile_tot == 0)
3679     {
3680         gmx_fatal(FARGS, "No input files!");
3681     }
3682
3683     if (nd < 0)
3684     {
3685         gmx_fatal(FARGS, "Can not have negative number of digits");
3686     }
3687     prec = pow(10, -nd);
3688
3689     snew(partsum, (nbmax+1)*(nbmax+1));
3690     nf = 0;
3691
3692     /* read in all files. First xvg files */
3693     for (f = 0; f < nxvgfile; f++)
3694     {
3695         read_bar_xvg(fxvgnms[f], &temp, &sim_data);
3696         nf++;
3697     }
3698     /* then .edr files */
3699     for (f = 0; f < nedrfile; f++)
3700     {
3701         read_barsim_edr(fedrnms[f], &temp, &sim_data);;
3702         nf++;
3703     }
3704
3705     /* fix the times to allow for equilibration */
3706     sim_data_impose_times(&sim_data, begin, end);
3707
3708     if (opt2bSet("-oh", NFILE, fnm))
3709     {
3710         sim_data_histogram(&sim_data, opt2fn("-oh", NFILE, fnm), nbin, oenv);
3711     }
3712
3713     /* assemble the output structures from the lambdas */
3714     results = barres_list_create(&sim_data, &nresults, use_dhdl);
3715
3716     sum_disc_err = barres_list_max_disc_err(results, nresults);
3717
3718     if (nresults == 0)
3719     {
3720         printf("\nNo results to calculate.\n");
3721         return 0;
3722     }
3723
3724     if (sum_disc_err > prec)
3725     {
3726         prec = sum_disc_err;
3727         nd   = ceil(-log10(prec));
3728         printf("WARNING: setting the precision to %g because that is the minimum\n         reasonable number, given the expected discretization error.\n", prec);
3729     }
3730
3731
3732     /*sprintf(lamformat,"%%6.3f");*/
3733     sprintf( dgformat, "%%%d.%df", 3+nd, nd);
3734     /* the format strings of the results in kT */
3735     sprintf( ktformat, "%%%d.%df", 5+nd, nd);
3736     sprintf( sktformat, "%%%ds", 6+nd);
3737     /* the format strings of the errors in kT */
3738     sprintf( kteformat, "%%%d.%df", 3+nd, nd);
3739     sprintf( skteformat, "%%%ds", 4+nd);
3740     sprintf(xvg2format, "%s %s\n", "%s", dgformat);
3741     sprintf(xvg3format, "%s %s %s\n", "%s", dgformat, dgformat);
3742
3743
3744
3745     fpb = NULL;
3746     if (opt2bSet("-o", NFILE, fnm))
3747     {
3748         sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3749         fpb = xvgropen_type(opt2fn("-o", NFILE, fnm), "Free energy differences",
3750                             "\\lambda", buf, exvggtXYDY, oenv);
3751     }
3752
3753     fpi = NULL;
3754     if (opt2bSet("-oi", NFILE, fnm))
3755     {
3756         sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3757         fpi = xvgropen(opt2fn("-oi", NFILE, fnm), "Free energy integral",
3758                        "\\lambda", buf, oenv);
3759     }
3760
3761
3762
3763     if (nbmin > nbmax)
3764     {
3765         nbmin = nbmax;
3766     }
3767
3768     /* first calculate results */
3769     bEE      = TRUE;
3770     disc_err = FALSE;
3771     for (f = 0; f < nresults; f++)
3772     {
3773         /* Determine the free energy difference with a factor of 10
3774          * more accuracy than requested for printing.
3775          */
3776         calc_bar(&(results[f]), 0.1*prec, nbmin, nbmax,
3777                  &bEE, partsum);
3778
3779         if (results[f].dg_disc_err > prec/10.)
3780         {
3781             disc_err = TRUE;
3782         }
3783         if (results[f].dg_histrange_err > prec/10.)
3784         {
3785             histrange_err = TRUE;
3786         }
3787     }
3788
3789     /* print results in kT */
3790     kT   = BOLTZ*temp;
3791     beta = 1/kT;
3792
3793     printf("\nTemperature: %g K\n", temp);
3794
3795     printf("\nDetailed results in kT (see help for explanation):\n\n");
3796     printf("%6s ", " lam_A");
3797     printf("%6s ", " lam_B");
3798     printf(sktformat,  "DG ");
3799     if (bEE)
3800     {
3801         printf(skteformat, "+/- ");
3802     }
3803     if (disc_err)
3804     {
3805         printf(skteformat, "disc ");
3806     }
3807     if (histrange_err)
3808     {
3809         printf(skteformat, "range ");
3810     }
3811     printf(sktformat,  "s_A ");
3812     if (bEE)
3813     {
3814         printf(skteformat, "+/- " );
3815     }
3816     printf(sktformat,  "s_B ");
3817     if (bEE)
3818     {
3819         printf(skteformat, "+/- " );
3820     }
3821     printf(sktformat,  "stdev ");
3822     if (bEE)
3823     {
3824         printf(skteformat, "+/- ");
3825     }
3826     printf("\n");
3827     for (f = 0; f < nresults; f++)
3828     {
3829         lambda_vec_print_short(results[f].a->native_lambda, buf);
3830         printf("%s ", buf);
3831         lambda_vec_print_short(results[f].b->native_lambda, buf);
3832         printf("%s ", buf);
3833         printf(ktformat,  results[f].dg);
3834         printf(" ");
3835         if (bEE)
3836         {
3837             printf(kteformat, results[f].dg_err);
3838             printf(" ");
3839         }
3840         if (disc_err)
3841         {
3842             printf(kteformat, results[f].dg_disc_err);
3843             printf(" ");
3844         }
3845         if (histrange_err)
3846         {
3847             printf(kteformat, results[f].dg_histrange_err);
3848             printf(" ");
3849         }
3850         printf(ktformat,  results[f].sa);
3851         printf(" ");
3852         if (bEE)
3853         {
3854             printf(kteformat, results[f].sa_err);
3855             printf(" ");
3856         }
3857         printf(ktformat,  results[f].sb);
3858         printf(" ");
3859         if (bEE)
3860         {
3861             printf(kteformat, results[f].sb_err);
3862             printf(" ");
3863         }
3864         printf(ktformat,  results[f].dg_stddev);
3865         printf(" ");
3866         if (bEE)
3867         {
3868             printf(kteformat, results[f].dg_stddev_err);
3869         }
3870         printf("\n");
3871
3872         /* Check for negative relative entropy with a 95% certainty. */
3873         if (results[f].sa < -2*results[f].sa_err ||
3874             results[f].sb < -2*results[f].sb_err)
3875         {
3876             result_OK = FALSE;
3877         }
3878     }
3879
3880     if (!result_OK)
3881     {
3882         printf("\nWARNING: Some of these results violate the Second Law of "
3883                "Thermodynamics: \n"
3884                "         This is can be the result of severe undersampling, or "
3885                "(more likely)\n"
3886                "         there is something wrong with the simulations.\n");
3887     }
3888
3889
3890     /* final results in kJ/mol */
3891     printf("\n\nFinal results in kJ/mol:\n\n");
3892     dg_tot  = 0;
3893     for (f = 0; f < nresults; f++)
3894     {
3895
3896         if (fpi != NULL)
3897         {
3898             lambda_vec_print_short(results[f].a->native_lambda, buf);
3899             fprintf(fpi, xvg2format, buf, dg_tot);
3900         }
3901
3902
3903         if (fpb != NULL)
3904         {
3905             lambda_vec_print_intermediate(results[f].a->native_lambda,
3906                                           results[f].b->native_lambda,
3907                                           buf);
3908
3909             fprintf(fpb, xvg3format, buf, results[f].dg, results[f].dg_err);
3910         }
3911
3912         printf("point ");
3913         lambda_vec_print_short(results[f].a->native_lambda, buf);
3914         lambda_vec_print_short(results[f].b->native_lambda, buf2);
3915         printf("%s - %s", buf, buf2);
3916         printf(",   DG ");
3917
3918         printf(dgformat, results[f].dg*kT);
3919         if (bEE)
3920         {
3921             printf(" +/- ");
3922             printf(dgformat, results[f].dg_err*kT);
3923         }
3924         if (histrange_err)
3925         {
3926             printf(" (max. range err. = ");
3927             printf(dgformat, results[f].dg_histrange_err*kT);
3928             printf(")");
3929             sum_histrange_err += results[f].dg_histrange_err*kT;
3930         }
3931
3932         printf("\n");
3933         dg_tot += results[f].dg;
3934     }
3935     printf("\n");
3936     printf("total ");
3937     lambda_vec_print_short(results[0].a->native_lambda, buf);
3938     lambda_vec_print_short(results[nresults-1].b->native_lambda, buf2);
3939     printf("%s - %s", buf, buf2);
3940     printf(",   DG ");
3941
3942     printf(dgformat, dg_tot*kT);
3943     if (bEE)
3944     {
3945         stat_err = bar_err(nbmin, nbmax, partsum)*kT;
3946         printf(" +/- ");
3947         printf(dgformat, max(max(stat_err, sum_disc_err), sum_histrange_err));
3948     }
3949     printf("\n");
3950     if (disc_err)
3951     {
3952         printf("\nmaximum discretization error = ");
3953         printf(dgformat, sum_disc_err);
3954         if (bEE && stat_err < sum_disc_err)
3955         {
3956             printf("WARNING: discretization error (%g) is larger than statistical error.\n       Decrease histogram spacing for more accurate results\n", stat_err);
3957         }
3958     }
3959     if (histrange_err)
3960     {
3961         printf("\nmaximum histogram range error = ");
3962         printf(dgformat, sum_histrange_err);
3963         if (bEE && stat_err < sum_histrange_err)
3964         {
3965             printf("WARNING: histogram range error (%g) is larger than statistical error.\n       Increase histogram range for more accurate results\n", stat_err);
3966         }
3967
3968     }
3969     printf("\n");
3970
3971
3972     if (fpi != NULL)
3973     {
3974         lambda_vec_print_short(results[nresults-1].b->native_lambda, buf);
3975         fprintf(fpi, xvg2format, buf, dg_tot);
3976         gmx_ffclose(fpi);
3977     }
3978
3979     do_view(oenv, opt2fn_null("-o", NFILE, fnm), "-xydy");
3980     do_view(oenv, opt2fn_null("-oi", NFILE, fnm), "-xydy");
3981
3982     return 0;
3983 }