src/gromacs/gmxana/gmx_bar.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2010,2011,2012,2013,2014,2015,2017,2018,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 #include "gmxpre.h"
  36
  37 #include <cctype>
  38 #include <cmath>
  39 #include <cstdlib>
  40 #include <cstring>
  41
  42 #include <algorithm>
  43 #include <limits>
  44
  45 #include "gromacs/commandline/pargs.h"
  46 #include "gromacs/commandline/viewit.h"
  47 #include "gromacs/fileio/enxio.h"
  48 #include "gromacs/fileio/xvgr.h"
  49 #include "gromacs/gmxana/gmx_ana.h"
  50 #include "gromacs/math/units.h"
  51 #include "gromacs/math/utilities.h"
  52 #include "gromacs/mdlib/energyoutput.h"
  53 #include "gromacs/mdtypes/md_enums.h"
  54 #include "gromacs/trajectory/energyframe.h"
  55 #include "gromacs/utility/arraysize.h"
  56 #include "gromacs/utility/cstringutil.h"
  57 #include "gromacs/utility/dir_separator.h"
  58 #include "gromacs/utility/fatalerror.h"
  59 #include "gromacs/utility/gmxassert.h"
  60 #include "gromacs/utility/smalloc.h"
  61 #include "gromacs/utility/snprintf.h"
  62
  63
  64 /* Structure for the names of lambda vector components */
  65 typedef struct lambda_components_t
  66 {
  67     char **names;  /* Array of strings with names for the lambda vector
  68                       components */
  69     int    N;      /* The number of components */
  70     int    Nalloc; /* The number of allocated components */
  71 } lambda_components_t;
  72
  73 /* Structure for a lambda vector or a dhdl derivative direction */
  74 typedef struct lambda_vec_t
  75 {
  76     double                    *val;   /* The lambda vector component values. Only valid if
  77                                          dhdl == -1 */
  78     int                        dhdl;  /* The coordinate index for the derivative described by this
  79                                          structure, or -1 */
  80     const lambda_components_t *lc;    /* the associated lambda_components
  81                                          structure */
  82     int                        index; /* The state number (init-lambda-state) of this lambda
  83                                          vector, if known. If not, it is set to -1 */
  84 } lambda_vec_t;
  85
  86 /* the dhdl.xvg data from a simulation */
  87 typedef struct xvg_t
  88 {
  89     const char   *filename;
  90     int           ftp;           /* file type */
  91     int           nset;          /* number of lambdas, including dhdl */
  92     int          *np;            /* number of data points (du or hists) per lambda */
  93     int           np_alloc;      /* number of points (du or hists) allocated */
  94     double        temp;          /* temperature */
  95     lambda_vec_t *lambda;        /* the lambdas (of first index for y). */
  96     double       *t;             /* the times (of second index for y) */
  97     double      **y;             /* the dU values. y[0] holds the derivative, while
  98                                     further ones contain the energy differences between
  99                                     the native lambda and the 'foreign' lambdas. */
 100     lambda_vec_t  native_lambda; /* the native lambda */
 101
 102     struct xvg_t *next, *prev;   /*location in the global linked list of xvg_ts*/
 103 } xvg_t;
 104
 105
 106 typedef struct hist_t
 107 {
 108     unsigned int   *bin[2];                 /* the (forward + reverse) histogram values */
 109     double          dx[2];                  /* the histogram spacing. The reverse
 110                                                dx is the negative of the forward dx.*/
 111     int64_t         x0[2];                  /* the (forward + reverse) histogram start
 112                                                        point(s) as int */
 113
 114     int             nbin[2];                /* the (forward+reverse) number of bins */
 115     int64_t         sum;                    /* the total number of counts. Must be
 116                                                        the same for forward + reverse.  */
 117     int             nhist;                  /* number of hist datas (forward or reverse) */
 118
 119     double          start_time, delta_time; /* start time, end time of histogram */
 120 } hist_t;
 121
 122
 123 /* an aggregate of samples for partial free energy calculation */
 124 typedef struct samples_t
 125 {
 126     lambda_vec_t *native_lambda;  /* pointer to native lambda vector */
 127     lambda_vec_t *foreign_lambda; /* pointer to foreign lambda vector */
 128     double        temp;           /* the temperature */
 129     gmx_bool      derivative;     /* whether this sample is a derivative */
 130
 131     /* The samples come either as either delta U lists: */
 132     int     ndu;                    /* the number of delta U samples */
 133     double *du;                     /* the delta u's */
 134     double *t;                      /* the times associated with those samples, or: */
 135     double  start_time, delta_time; /*start time and delta time for linear time*/
 136
 137     /* or as histograms: */
 138     hist_t *hist; /* a histogram */
 139
 140     /* allocation data: (not NULL for data 'owned' by this struct) */
 141     double         *du_alloc, *t_alloc;  /* allocated delta u arrays  */
 142     size_t          ndu_alloc, nt_alloc; /* pre-allocated sizes */
 143     hist_t         *hist_alloc;          /* allocated hist */
 144
 145     int64_t         ntot;                /* total number of samples */
 146     const char     *filename;            /* the file name this sample comes from */
 147 } samples_t;
 148
 149 /* a sample range (start to end for du-style data, or boolean
 150     for both du-style data and histograms */
 151 typedef struct sample_range_t
 152 {
 153     int        start, end; /* start and end index for du style data */
 154     gmx_bool   use;        /* whether to use this sample */
 155
 156     samples_t *s;          /* the samples this range belongs to */
 157 } sample_range_t;
 158
 159
 160 /* a collection of samples for a partial free energy calculation
 161     (i.e. the collection of samples from one native lambda to one
 162     foreign lambda) */
 163 typedef struct sample_coll_t
 164 {
 165     lambda_vec_t   *native_lambda;     /* these should be the same for all samples
 166                                           in the histogram */
 167     lambda_vec_t   *foreign_lambda;    /* collection */
 168     double          temp;              /* the temperature */
 169
 170     int             nsamples;          /* the number of samples */
 171     samples_t     **s;                 /* the samples themselves */
 172     sample_range_t *r;                 /* the sample ranges */
 173     int             nsamples_alloc;    /* number of allocated samples */
 174
 175     int64_t         ntot;              /* total number of samples in the ranges of
 176                                                   this collection */
 177
 178     struct sample_coll_t *next, *prev; /* next and previous in the list */
 179 } sample_coll_t;
 180
 181 /* all the samples associated with a lambda point */
 182 typedef struct lambda_data_t
 183 {
 184     lambda_vec_t         *lambda;      /* the native lambda (at start time if dynamic) */
 185     double                temp;        /* temperature */
 186
 187     sample_coll_t        *sc;          /* the samples */
 188
 189     sample_coll_t         sc_head;     /*the pre-allocated list head for the linked list.*/
 190
 191     struct lambda_data_t *next, *prev; /* the next and prev in the list */
 192 } lambda_data_t;
 193
 194 /* Top-level data structure of simulation data */
 195 typedef struct sim_data_t
 196 {
 197     lambda_data_t      *lb;      /* a lambda data linked list */
 198     lambda_data_t       lb_head; /* The head element of the linked list */
 199
 200     lambda_components_t lc;      /* the allowed components of the lambda
 201                                     vectors */
 202 } sim_data_t;
 203
 204 /* Top-level data structure with calculated values. */
 205 typedef struct {
 206     sample_coll_t *a, *b;            /* the simulation data */
 207
 208     double         dg;               /* the free energy difference */
 209     double         dg_err;           /* the free energy difference */
 210
 211     double         dg_disc_err;      /* discretization error */
 212     double         dg_histrange_err; /* histogram range error */
 213
 214     double         sa;               /* relative entropy of b in state a */
 215     double         sa_err;           /* error in sa */
 216     double         sb;               /* relative entropy of a in state b */
 217     double         sb_err;           /* error in sb */
 218
 219     double         dg_stddev;        /* expected dg stddev per sample */
 220     double         dg_stddev_err;    /* error in dg_stddev */
 221 } barres_t;
 222
 223
 224 /* Initialize a lambda_components structure */
 225 static void lambda_components_init(lambda_components_t *lc)
 226 {
 227     lc->N      = 0;
 228     lc->Nalloc = 2;
 229     snew(lc->names, lc->Nalloc);
 230 }
 231
 232 /* Add a component to a lambda_components structure */
 233 static void lambda_components_add(lambda_components_t *lc,
 234                                   const char *name, size_t name_length)
 235 {
 236     while (lc->N + 1 > lc->Nalloc)
 237     {
 238         lc->Nalloc = (lc->Nalloc == 0) ? 2 : 2*lc->Nalloc;
 239         srenew(lc->names, lc->Nalloc);
 240     }
 241     snew(lc->names[lc->N], name_length+1);
 242     std::strncpy(lc->names[lc->N], name, name_length);
 243     lc->N++;
 244 }
 245
 246 /* check whether a component with index 'index' matches the given name, or
 247    is also NULL. Returns TRUE if this is the case.
 248    the string name does not need to end */
 249 static gmx_bool lambda_components_check(const lambda_components_t *lc,
 250                                         int                        index,
 251                                         const char                *name,
 252                                         size_t                     name_length)
 253 {
 254     size_t len;
 255     if (!lc || index >= lc->N)
 256     {
 257         return FALSE;
 258     }
 259     if ((name == nullptr) && (lc->names[index] == nullptr))
 260     {
 261         return TRUE;
 262     }
 263     if (((name != nullptr) && (lc->names[index] == nullptr)) ||
 264         ((name == nullptr) && (lc->names[index] != nullptr)))
 265     {
 266         return FALSE;
 267     }
 268     GMX_RELEASE_ASSERT((name != nullptr) || (name_length == 0),
 269                        "If name is empty, the length of the substring to examine within it must be zero");
 270     len = std::strlen(lc->names[index]);
 271     if (len != name_length)
 272     {
 273         return FALSE;
 274     }
 275     if (name_length == 0)
 276     {
 277         // Everything matches a zero-length substring. This branch is
 278         // needed because name could in principle be nullptr.
 279         return TRUE;
 280     }
 281     return std::strncmp(lc->names[index], name, name_length) == 0;
 282 }
 283
 284 /* Find the index of a given lambda component name, or -1 if not found */
 285 static int lambda_components_find(const lambda_components_t *lc,
 286                                   const char                *name,
 287                                   size_t                     name_length)
 288 {
 289     int i;
 290
 291     for (i = 0; i < lc->N; i++)
 292     {
 293         if (std::strncmp(lc->names[i], name, name_length) == 0)
 294         {
 295             return i;
 296         }
 297     }
 298     return -1;
 299 }
 300
 301
 302
 303 /* initialize a lambda vector */
 304 static void lambda_vec_init(lambda_vec_t *lv, const lambda_components_t *lc)
 305 {
 306     snew(lv->val, lc->N);
 307     lv->index = -1;
 308     lv->dhdl  = -1;
 309     lv->lc    = lc;
 310 }
 311
 312 static void lambda_vec_copy(lambda_vec_t *lv, const lambda_vec_t *orig)
 313 {
 314     int i;
 315
 316     lambda_vec_init(lv, orig->lc);
 317     lv->dhdl  = orig->dhdl;
 318     lv->index = orig->index;
 319     for (i = 0; i < lv->lc->N; i++)
 320     {
 321         lv->val[i] = orig->val[i];
 322     }
 323 }
 324
 325 /* write a lambda vec to a preallocated string */
 326 static void lambda_vec_print(const lambda_vec_t *lv, char *str, gmx_bool named)
 327 {
 328     int    i;
 329
 330     str[0] = 0; /* reset the string */
 331     if (lv->dhdl < 0)
 332     {
 333         if (named)
 334         {
 335             str += sprintf(str, "delta H to ");
 336         }
 337         if (lv->lc->N > 1)
 338         {
 339             str += sprintf(str, "(");
 340         }
 341         for (i = 0; i < lv->lc->N; i++)
 342         {
 343             str += sprintf(str, "%g", lv->val[i]);
 344             if (i < lv->lc->N-1)
 345             {
 346                 str += sprintf(str, ", ");
 347             }
 348         }
 349         if (lv->lc->N > 1)
 350         {
 351             sprintf(str, ")");
 352         }
 353     }
 354     else
 355     {
 356         /* this lambda vector describes a derivative */
 357         str += sprintf(str, "dH/dl");
 358         if (std::strlen(lv->lc->names[lv->dhdl]) > 0)
 359         {
 360             sprintf(str, " (%s)", lv->lc->names[lv->dhdl]);
 361         }
 362     }
 363 }
 364
 365 /* write a shortened version of the lambda vec to a preallocated string */
 366 static void lambda_vec_print_short(const lambda_vec_t *lv, char *str)
 367 {
 368     if (lv->index >= 0)
 369     {
 370         sprintf(str, "%6d", lv->index);
 371     }
 372     else
 373     {
 374         if (lv->dhdl < 0)
 375         {
 376             sprintf(str, "%6.3f", lv->val[0]);
 377         }
 378         else
 379         {
 380             sprintf(str, "dH/dl[%d]", lv->dhdl);
 381         }
 382     }
 383 }
 384
 385 /* write an intermediate version of two lambda vecs to a preallocated string */
 386 static void lambda_vec_print_intermediate(const lambda_vec_t *a,
 387                                           const lambda_vec_t *b, char *str)
 388 {
 389     str[0] = 0;
 390     if ( (a->index >= 0) && (b->index >= 0) )
 391     {
 392         sprintf(str, "%6.3f", (a->index+b->index)/2.0);
 393     }
 394     else
 395     {
 396         if ( (a->dhdl < 0) && (b->dhdl < 0) )
 397         {
 398             sprintf(str, "%6.3f", (a->val[0]+b->val[0])/2.0);
 399         }
 400     }
 401 }
 402
 403
 404 /* calculate and return the absolute difference in lambda vectors: c = |a-b|.
 405    a and b must describe non-derivative lambda points */
 406 static double lambda_vec_abs_diff(const lambda_vec_t *a, const lambda_vec_t *b)
 407 {
 408     int    i;
 409     double ret = 0.;
 410
 411     if ( (a->dhdl > 0)  || (b->dhdl > 0) )
 412     {
 413         gmx_fatal(FARGS,
 414                   "Trying to calculate the difference between derivatives instead of lambda points");
 415     }
 416     if (a->lc != b->lc)
 417     {
 418         gmx_fatal(FARGS,
 419                   "Trying to calculate the difference lambdas with differing basis set");
 420     }
 421     for (i = 0; i < a->lc->N; i++)
 422     {
 423         double df = a->val[i] - b->val[i];
 424         ret += df*df;
 425     }
 426     return std::sqrt(ret);
 427 }
 428
 429
 430 /* check whether two lambda vectors are the same */
 431 static gmx_bool lambda_vec_same(const lambda_vec_t *a, const lambda_vec_t *b)
 432 {
 433     int i;
 434
 435     if (a->lc != b->lc)
 436     {
 437         return FALSE;
 438     }
 439     if (a->dhdl < 0)
 440     {
 441         for (i = 0; i < a->lc->N; i++)
 442         {
 443             if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
 444             {
 445                 return FALSE;
 446             }
 447         }
 448         return TRUE;
 449     }
 450     else
 451     {
 452         /* they're derivatives, so we check whether the indices match */
 453         return (a->dhdl == b->dhdl);
 454     }
 455 }
 456
 457 /* Compare the sort order of two foreign lambda vectors
 458
 459     returns 1 if a is 'bigger' than b,
 460     returns 0 if they're the same,
 461     returns -1 if a is 'smaller' than b.*/
 462 static int lambda_vec_cmp_foreign(const lambda_vec_t *a,
 463                                   const lambda_vec_t *b)
 464 {
 465     int      i;
 466     double   norm_a    = 0, norm_b = 0;
 467     gmx_bool different = FALSE;
 468
 469     if (a->lc != b->lc)
 470     {
 471         gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
 472     }
 473     /* if either one has an index we sort based on that */
 474     if ((a->index >= 0) || (b->index >= 0))
 475     {
 476         if (a->index == b->index)
 477         {
 478             return 0;
 479         }
 480         return (a->index > b->index) ? 1 : -1;
 481     }
 482     if (a->dhdl >= 0 || b->dhdl >= 0)
 483     {
 484         /* lambda vectors that are derivatives always sort higher than those
 485            without derivatives */
 486         if ((a->dhdl >= 0)  != (b->dhdl >= 0) )
 487         {
 488             return (a->dhdl >= 0) ? 1 : -1;
 489         }
 490         return 0;
 491     }
 492
 493     /* neither has an index, so we can only sort on the lambda components,
 494        which is only valid if there is one component */
 495     for (i = 0; i < a->lc->N; i++)
 496     {
 497         if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
 498         {
 499             different = TRUE;
 500         }
 501         norm_a += a->val[i]*a->val[i];
 502         norm_b += b->val[i]*b->val[i];
 503     }
 504     if (!different)
 505     {
 506         return 0;
 507     }
 508     return (norm_a > norm_b) ? 1 : -1;
 509 }
 510
 511 /* Compare the sort order of two native lambda vectors
 512
 513     returns 1 if a is 'bigger' than b,
 514     returns 0 if they're the same,
 515     returns -1 if a is 'smaller' than b.*/
 516 static int lambda_vec_cmp_native(const lambda_vec_t *a,
 517                                  const lambda_vec_t *b)
 518 {
 519     if (a->lc != b->lc)
 520     {
 521         gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
 522     }
 523     /* if either one has an index we sort based on that */
 524     if ((a->index >= 0) || (b->index >= 0))
 525     {
 526         if (a->index == b->index)
 527         {
 528             return 0;
 529         }
 530         return (a->index > b->index) ? 1 : -1;
 531     }
 532     /* neither has an index, so we can only sort on the lambda components,
 533        which is only valid if there is one component */
 534     if (a->lc->N > 1)
 535     {
 536         gmx_fatal(FARGS,
 537                   "Can't compare lambdas with no index and > 1 component");
 538     }
 539     if (a->dhdl >= 0 || b->dhdl >= 0)
 540     {
 541         gmx_fatal(FARGS,
 542                   "Can't compare native lambdas that are derivatives");
 543     }
 544     if (gmx_within_tol(a->val[0], b->val[0], 10*GMX_REAL_EPS))
 545     {
 546         return 0;
 547     }
 548     return a->val[0] > b->val[0] ? 1 : -1;
 549 }
 550
 551
 552
 553
 554 static void hist_init(hist_t *h, int nhist, int *nbin)
 555 {
 556     int i;
 557     if (nhist > 2)
 558     {
 559         gmx_fatal(FARGS, "histogram with more than two sets of data!");
 560     }
 561     for (i = 0; i < nhist; i++)
 562     {
 563         snew(h->bin[i], nbin[i]);
 564         h->x0[i]      = 0;
 565         h->nbin[i]    = nbin[i];
 566         h->start_time = h->delta_time = 0;
 567         h->dx[i]      = 0;
 568     }
 569     h->sum   = 0;
 570     h->nhist = nhist;
 571 }
 572
 573 static void xvg_init(xvg_t *ba)
 574 {
 575     ba->filename = nullptr;
 576     ba->nset     = 0;
 577     ba->np_alloc = 0;
 578     ba->np       = nullptr;
 579     ba->y        = nullptr;
 580 }
 581
 582 static void samples_init(samples_t *s, lambda_vec_t *native_lambda,
 583                          lambda_vec_t *foreign_lambda, double temp,
 584                          gmx_bool derivative, const char *filename)
 585 {
 586     s->native_lambda  = native_lambda;
 587     s->foreign_lambda = foreign_lambda;
 588     s->temp           = temp;
 589     s->derivative     = derivative;
 590
 591     s->ndu        = 0;
 592     s->du         = nullptr;
 593     s->t          = nullptr;
 594     s->start_time = s->delta_time = 0;
 595     s->hist       = nullptr;
 596     s->du_alloc   = nullptr;
 597     s->t_alloc    = nullptr;
 598     s->hist_alloc = nullptr;
 599     s->ndu_alloc  = 0;
 600     s->nt_alloc   = 0;
 601
 602     s->ntot     = 0;
 603     s->filename = filename;
 604 }
 605
 606 static void sample_range_init(sample_range_t *r, samples_t *s)
 607 {
 608     r->start = 0;
 609     r->end   = s->ndu;
 610     r->use   = TRUE;
 611     r->s     = nullptr;
 612 }
 613
 614 static void sample_coll_init(sample_coll_t *sc, lambda_vec_t *native_lambda,
 615                              lambda_vec_t *foreign_lambda, double temp)
 616 {
 617     sc->native_lambda  = native_lambda;
 618     sc->foreign_lambda = foreign_lambda;
 619     sc->temp           = temp;
 620
 621     sc->nsamples       = 0;
 622     sc->s              = nullptr;
 623     sc->r              = nullptr;
 624     sc->nsamples_alloc = 0;
 625
 626     sc->ntot = 0;
 627     sc->next = sc->prev = nullptr;
 628 }
 629
 630 static void sample_coll_destroy(sample_coll_t *sc)
 631 {
 632     /* don't free the samples themselves */
 633     sfree(sc->r);
 634     sfree(sc->s);
 635 }
 636
 637
 638 static void lambda_data_init(lambda_data_t *l, lambda_vec_t *native_lambda,
 639                              double temp)
 640 {
 641     l->lambda = native_lambda;
 642     l->temp   = temp;
 643
 644     l->next = nullptr;
 645     l->prev = nullptr;
 646
 647     l->sc = &(l->sc_head);
 648
 649     sample_coll_init(l->sc, native_lambda, nullptr, 0.);
 650     l->sc->next = l->sc;
 651     l->sc->prev = l->sc;
 652 }
 653
 654 static void barres_init(barres_t *br)
 655 {
 656     br->dg            = 0;
 657     br->dg_err        = 0;
 658     br->sa            = 0;
 659     br->sa_err        = 0;
 660     br->sb            = 0;
 661     br->sb_err        = 0;
 662     br->dg_stddev     = 0;
 663     br->dg_stddev_err = 0;
 664
 665     br->a = nullptr;
 666     br->b = nullptr;
 667 }
 668
 669
 670 /* calculate the total number of samples in a sample collection */
 671 static void sample_coll_calc_ntot(sample_coll_t *sc)
 672 {
 673     int i;
 674
 675     sc->ntot = 0;
 676     for (i = 0; i < sc->nsamples; i++)
 677     {
 678         if (sc->r[i].use)
 679         {
 680             if (sc->s[i]->hist)
 681             {
 682                 sc->ntot += sc->s[i]->ntot;
 683             }
 684             else
 685             {
 686                 sc->ntot += sc->r[i].end - sc->r[i].start;
 687             }
 688         }
 689     }
 690 }
 691
 692
 693 /* find the barsamples_t associated with a lambda that corresponds to
 694    a specific foreign lambda */
 695 static sample_coll_t *lambda_data_find_sample_coll(lambda_data_t *l,
 696                                                    lambda_vec_t  *foreign_lambda)
 697 {
 698     sample_coll_t *sc = l->sc->next;
 699
 700     while (sc != l->sc)
 701     {
 702         if (lambda_vec_same(sc->foreign_lambda, foreign_lambda))
 703         {
 704             return sc;
 705         }
 706         sc = sc->next;
 707     }
 708
 709     return nullptr;
 710 }
 711
 712 /* insert li into an ordered list of lambda_colls */
 713 static void lambda_data_insert_sample_coll(lambda_data_t *l, sample_coll_t *sc)
 714 {
 715     sample_coll_t *scn = l->sc->next;
 716     while ( (scn != l->sc) )
 717     {
 718         if (lambda_vec_cmp_foreign(scn->foreign_lambda, sc->foreign_lambda) > 0)
 719         {
 720             break;
 721         }
 722         scn = scn->next;
 723     }
 724     /* now insert it before the found scn */
 725     sc->next        = scn;
 726     sc->prev        = scn->prev;
 727     scn->prev->next = sc;
 728     scn->prev       = sc;
 729 }
 730
 731 /* insert li into an ordered list of lambdas */
 732 static void lambda_data_insert_lambda(lambda_data_t *head, lambda_data_t *li)
 733 {
 734     lambda_data_t *lc = head->next;
 735     while (lc != head)
 736     {
 737         if (lambda_vec_cmp_native(lc->lambda, li->lambda) > 0)
 738         {
 739             break;
 740         }
 741         lc = lc->next;
 742     }
 743     /* now insert ourselves before the found lc */
 744     li->next       = lc;
 745     li->prev       = lc->prev;
 746     lc->prev->next = li;
 747     lc->prev       = li;
 748 }
 749
 750 /* insert a sample and a sample_range into a sample_coll. The
 751     samples are stored as a pointer, the range is copied. */
 752 static void sample_coll_insert_sample(sample_coll_t *sc, samples_t *s,
 753                                       sample_range_t *r)
 754 {
 755     /* first check if it belongs here */
 756     GMX_ASSERT(sc->next->s, "Next not properly initialized!");
 757     if (sc->temp != s->temp)
 758     {
 759         gmx_fatal(FARGS, "Temperatures in files %s and %s are not the same!",
 760                   s->filename, sc->next->s[0]->filename);
 761     }
 762     if (!lambda_vec_same(sc->native_lambda, s->native_lambda))
 763     {
 764         gmx_fatal(FARGS, "Native lambda in files %s and %s are not the same (and they should be)!",
 765                   s->filename, sc->next->s[0]->filename);
 766     }
 767     if (!lambda_vec_same(sc->foreign_lambda, s->foreign_lambda))
 768     {
 769         gmx_fatal(FARGS, "Foreign lambda in files %s and %s are not the same (and they should be)!",
 770                   s->filename, sc->next->s[0]->filename);
 771     }
 772
 773     /* check if there's room */
 774     if ( (sc->nsamples + 1) > sc->nsamples_alloc)
 775     {
 776         sc->nsamples_alloc = std::max(2*sc->nsamples_alloc, 2);
 777         srenew(sc->s, sc->nsamples_alloc);
 778         srenew(sc->r, sc->nsamples_alloc);
 779     }
 780     sc->s[sc->nsamples] = s;
 781     sc->r[sc->nsamples] = *r;
 782     sc->nsamples++;
 783
 784     sample_coll_calc_ntot(sc);
 785 }
 786
 787 /* insert a sample into a lambda_list, creating the right sample_coll if
 788    neccesary */
 789 static void lambda_data_list_insert_sample(lambda_data_t *head, samples_t *s)
 790 {
 791     gmx_bool       found = FALSE;
 792     sample_coll_t *sc;
 793     sample_range_t r;
 794
 795     lambda_data_t *l = head->next;
 796
 797     /* first search for the right lambda_data_t */
 798     while (l != head)
 799     {
 800         if (lambda_vec_same(l->lambda, s->native_lambda) )
 801         {
 802             found = TRUE;
 803             break;
 804         }
 805         l = l->next;
 806     }
 807
 808     if (!found)
 809     {
 810         snew(l, 1);                                     /* allocate a new one */
 811         lambda_data_init(l, s->native_lambda, s->temp); /* initialize it */
 812         lambda_data_insert_lambda(head, l);             /* add it to the list */
 813     }
 814
 815     /* now look for a sample collection */
 816     sc = lambda_data_find_sample_coll(l, s->foreign_lambda);
 817     if (!sc)
 818     {
 819         snew(sc, 1); /* allocate a new one */
 820         sample_coll_init(sc, s->native_lambda, s->foreign_lambda, s->temp);
 821         lambda_data_insert_sample_coll(l, sc);
 822     }
 823
 824     /* now insert the samples into the sample coll */
 825     sample_range_init(&r, s);
 826     sample_coll_insert_sample(sc, s, &r);
 827 }
 828
 829
 830 /* make a histogram out of a sample collection */
 831 static void sample_coll_make_hist(sample_coll_t *sc, int **bin,
 832                                   int *nbin_alloc, int *nbin,
 833                                   double *dx, double *xmin, int nbin_default)
 834 {
 835     int      i, j, k;
 836     gmx_bool dx_set   = FALSE;
 837     gmx_bool xmin_set = FALSE;
 838
 839     gmx_bool xmax_set      = FALSE;
 840     gmx_bool xmax_set_hard = FALSE; /* whether the xmax is bounded by the
 841                                        limits of a histogram */
 842     double   xmax          = -1;
 843
 844     /* first determine dx and xmin; try the histograms */
 845     for (i = 0; i < sc->nsamples; i++)
 846     {
 847         if (sc->s[i]->hist)
 848         {
 849             hist_t *hist = sc->s[i]->hist;
 850             for (k = 0; k < hist->nhist; k++)
 851             {
 852                 double hdx      = hist->dx[k];
 853                 double xmax_now = (hist->x0[k]+hist->nbin[k])*hdx;
 854
 855                 /* we use the biggest dx*/
 856                 if ( (!dx_set) || hist->dx[0] > *dx)
 857                 {
 858                     dx_set = TRUE;
 859                     *dx    = hist->dx[0];
 860                 }
 861                 if ( (!xmin_set) || (hist->x0[k]*hdx) < *xmin)
 862                 {
 863                     xmin_set = TRUE;
 864                     *xmin    = (hist->x0[k]*hdx);
 865                 }
 866
 867                 if ( (!xmax_set) || (xmax_now > xmax && !xmax_set_hard) )
 868                 {
 869                     xmax_set = TRUE;
 870                     xmax     = xmax_now;
 871                     if (hist->bin[k][hist->nbin[k]-1] != 0)
 872                     {
 873                         xmax_set_hard = TRUE;
 874                     }
 875                 }
 876                 if (hist->bin[k][hist->nbin[k]-1] != 0 && (xmax_now < xmax) )
 877                 {
 878                     xmax_set_hard = TRUE;
 879                     xmax          = xmax_now;
 880                 }
 881             }
 882         }
 883     }
 884     /* and the delta us */
 885     for (i = 0; i < sc->nsamples; i++)
 886     {
 887         if (sc->s[i]->ndu > 0)
 888         {
 889             /* determine min and max */
 890             int    starti  = sc->r[i].start;
 891             int    endi    = sc->r[i].end;
 892             double du_xmin = sc->s[i]->du[starti];
 893             double du_xmax = sc->s[i]->du[starti];
 894             for (j = starti+1; j < endi; j++)
 895             {
 896                 if (sc->s[i]->du[j] < du_xmin)
 897                 {
 898                     du_xmin = sc->s[i]->du[j];
 899                 }
 900                 if (sc->s[i]->du[j] > du_xmax)
 901                 {
 902                     du_xmax = sc->s[i]->du[j];
 903                 }
 904             }
 905
 906             /* and now change the limits */
 907             if ( (!xmin_set) || (du_xmin < *xmin) )
 908             {
 909                 xmin_set = TRUE;
 910                 *xmin    = du_xmin;
 911             }
 912             if ( (!xmax_set) || ((du_xmax > xmax) &&  !xmax_set_hard) )
 913             {
 914                 xmax_set = TRUE;
 915                 xmax     = du_xmax;
 916             }
 917         }
 918     }
 919
 920     if (!xmax_set || !xmin_set)
 921     {
 922         *nbin = 0;
 923         return;
 924     }
 925
 926
 927     if (!dx_set)
 928     {
 929         *nbin = nbin_default;
 930         *dx   = (xmax-(*xmin))/((*nbin)-2); /* -2 because we want the last bin to
 931                                                be 0, and we count from 0 */
 932     }
 933     else
 934     {
 935         *nbin = static_cast<int>((xmax-(*xmin))/(*dx));
 936     }
 937
 938     if (*nbin > *nbin_alloc)
 939     {
 940         *nbin_alloc = *nbin;
 941         srenew(*bin, *nbin_alloc);
 942     }
 943
 944     /* reset the histogram */
 945     for (i = 0; i < (*nbin); i++)
 946     {
 947         (*bin)[i] = 0;
 948     }
 949
 950     /* now add the actual data */
 951     for (i = 0; i < sc->nsamples; i++)
 952     {
 953         if (sc->s[i]->hist)
 954         {
 955             hist_t *hist = sc->s[i]->hist;
 956             for (k = 0; k < hist->nhist; k++)
 957             {
 958                 double hdx       = hist->dx[k];
 959                 double xmin_hist = hist->x0[k]*hdx;
 960                 for (j = 0; j < hist->nbin[k]; j++)
 961                 {
 962                     /* calculate the bin corresponding to the middle of the
 963                        original bin */
 964                     double x     = hdx*(j+0.5) + xmin_hist;
 965                     int    binnr = static_cast<int>((x-(*xmin))/(*dx));
 966
 967                     if (binnr >= *nbin || binnr < 0)
 968                     {
 969                         binnr = (*nbin)-1;
 970                     }
 971
 972                     (*bin)[binnr] += hist->bin[k][j];
 973                 }
 974             }
 975         }
 976         else
 977         {
 978             int starti = sc->r[i].start;
 979             int endi   = sc->r[i].end;
 980             for (j = starti; j < endi; j++)
 981             {
 982                 int binnr = static_cast<int>((sc->s[i]->du[j]-(*xmin))/(*dx));
 983                 if (binnr >= *nbin || binnr < 0)
 984                 {
 985                     binnr = (*nbin)-1;
 986                 }
 987
 988                 (*bin)[binnr]++;
 989             }
 990         }
 991     }
 992 }
 993
 994 /* write a collection of histograms to a file */
 995 static void sim_data_histogram(sim_data_t *sd, const char *filename,
 996                                int nbin_default, const gmx_output_env_t *oenv)
 997 {
 998     char           label_x[STRLEN];
 999     const char    *dhdl    = "dH/d\\lambda", *deltag = "\\DeltaH", *lambda = "\\lambda";
1000     const char    *title   = "N(\\DeltaH)";
1001     const char    *label_y = "Samples";
1002     FILE          *fp;
1003     lambda_data_t *bl;
1004     int            nsets     = 0;
1005     char         **setnames  = nullptr;
1006     gmx_bool       first_set = FALSE;
1007     /* histogram data: */
1008     int           *hist       = nullptr;
1009     int            nbin       = 0;
1010     int            nbin_alloc = 0;
1011     double         dx         = 0;
1012     double         minval     = 0;
1013     int            i;
1014     lambda_data_t *bl_head = sd->lb;
1015
1016     printf("\nWriting histogram to %s\n", filename);
1017     sprintf(label_x, "\\DeltaH (%s)", unit_energy);
1018
1019     fp = xvgropen_type(filename, title, label_x, label_y, exvggtXNY, oenv);
1020
1021     /* first get all the set names */
1022     bl = bl_head->next;
1023     /* iterate over all lambdas */
1024     while (bl != bl_head)
1025     {
1026         sample_coll_t *sc = bl->sc->next;
1027
1028         /* iterate over all samples */
1029         while (sc != bl->sc)
1030         {
1031             char buf[STRLEN], buf2[STRLEN];
1032
1033             nsets++;
1034             srenew(setnames, nsets);
1035             snew(setnames[nsets-1], STRLEN);
1036             if (sc->foreign_lambda->dhdl < 0)
1037             {
1038                 lambda_vec_print(sc->native_lambda, buf, FALSE);
1039                 lambda_vec_print(sc->foreign_lambda, buf2, FALSE);
1040                 sprintf(setnames[nsets-1], "N(%s(%s=%s) | %s=%s)",
1041                         deltag, lambda, buf2, lambda, buf);
1042             }
1043             else
1044             {
1045                 lambda_vec_print(sc->native_lambda, buf, FALSE);
1046                 sprintf(setnames[nsets-1], "N(%s | %s=%s)",
1047                         dhdl, lambda, buf);
1048             }
1049             sc = sc->next;
1050         }
1051
1052         bl = bl->next;
1053     }
1054     xvgr_legend(fp, nsets, setnames, oenv);
1055
1056
1057     /* now make the histograms */
1058     bl = bl_head->next;
1059     /* iterate over all lambdas */
1060     while (bl != bl_head)
1061     {
1062         sample_coll_t *sc = bl->sc->next;
1063
1064         /* iterate over all samples */
1065         while (sc != bl->sc)
1066         {
1067             if (!first_set)
1068             {
1069                 xvgr_new_dataset(fp, 0, 0, nullptr, oenv);
1070             }
1071
1072             sample_coll_make_hist(sc, &hist, &nbin_alloc, &nbin, &dx, &minval,
1073                                   nbin_default);
1074
1075             for (i = 0; i < nbin; i++)
1076             {
1077                 double xmin = i*dx + minval;
1078                 double xmax = (i+1)*dx + minval;
1079
1080                 fprintf(fp, "%g %d\n%g %d\n", xmin, hist[i], xmax, hist[i]);
1081             }
1082
1083             first_set = FALSE;
1084             sc        = sc->next;
1085         }
1086
1087         bl = bl->next;
1088     }
1089
1090     if (hist)
1091     {
1092         sfree(hist);
1093     }
1094
1095     xvgrclose(fp);
1096 }
1097
1098 static int
1099 snprint_lambda_vec(char *str, int sz, const char *label, lambda_vec_t *lambda)
1100 {
1101     int n = 0;
1102
1103     n += snprintf(str+n, sz-n, "lambda vector [%s]: ", label);
1104     if (lambda->index >= 0)
1105     {
1106         n += snprintf(str+n, sz-n, " init-lambda-state=%d", lambda->index);
1107     }
1108     if (lambda->dhdl >= 0)
1109     {
1110         n += snprintf(str+n, sz-n, " dhdl index=%d", lambda->dhdl);
1111     }
1112     else
1113     {
1114         int i;
1115         for (i = 0; i < lambda->lc->N; i++)
1116         {
1117             n += snprintf(str+n, sz-n, " (%s) l=%g", lambda->lc->names[i], lambda->val[i]);
1118         }
1119     }
1120     return n;
1121 }
1122
1123 /* create a collection (array) of barres_t object given a ordered linked list
1124    of barlamda_t sample collections */
1125 static barres_t *barres_list_create(sim_data_t *sd, int *nres,
1126                                     gmx_bool use_dhdl)
1127 {
1128     lambda_data_t *bl;
1129     int            nlambda = 0;
1130     barres_t      *res;
1131     gmx_bool       dhdl    = FALSE;
1132     gmx_bool       first   = TRUE;
1133     lambda_data_t *bl_head = sd->lb;
1134
1135     /* first count the lambdas */
1136     bl = bl_head->next;
1137     while (bl != bl_head)
1138     {
1139         nlambda++;
1140         bl = bl->next;
1141     }
1142     snew(res, nlambda-1);
1143
1144     /* next put the right samples in the res */
1145     *nres = 0;
1146     bl    = bl_head->next->next; /* we start with the second one. */
1147     while (bl != bl_head)
1148     {
1149         sample_coll_t *sc, *scprev;
1150         barres_t      *br = &(res[*nres]);
1151         /* there is always a previous one. we search for that as a foreign
1152            lambda: */
1153         scprev = lambda_data_find_sample_coll(bl->prev, bl->lambda);
1154         sc     = lambda_data_find_sample_coll(bl, bl->prev->lambda);
1155
1156         barres_init(br);
1157
1158         if (use_dhdl)
1159         {
1160             /* we use dhdl */
1161
1162             scprev = lambda_data_find_sample_coll(bl->prev, bl->prev->lambda);
1163             sc     = lambda_data_find_sample_coll(bl, bl->lambda);
1164
1165             if (first)
1166             {
1167                 printf("\nWARNING: Using the derivative data (dH/dlambda) to extrapolate delta H values.\nThis will only work if the Hamiltonian is linear in lambda.\n");
1168                 dhdl = TRUE;
1169             }
1170             if (!dhdl)
1171             {
1172                 gmx_fatal(FARGS, "Some dhdl files contain only one value (dH/dl), while others \ncontain multiple values (dH/dl and/or Delta H), will not proceed \nbecause of possible inconsistencies.\n");
1173             }
1174         }
1175         else if (!scprev && !sc)
1176         {
1177             char descX[STRLEN], descY[STRLEN];
1178             snprint_lambda_vec(descX, STRLEN, "X", bl->prev->lambda);
1179             snprint_lambda_vec(descY, STRLEN, "Y", bl->lambda);
1180
1181             gmx_fatal(FARGS, "There is no path between the states X & Y below that is covered by foreign lambdas:\ncannot proceed with BAR.\nUse thermodynamic integration of dH/dl by calculating the averages of dH/dl\nwith g_analyze and integrating them.\nAlternatively, use the -extp option if (and only if) the Hamiltonian\ndepends linearly on lambda, which is NOT normally the case.\n\n%s\n%s\n", descX, descY);
1182         }
1183
1184         /* normal delta H */
1185         if (!scprev)
1186         {
1187             char descX[STRLEN], descY[STRLEN];
1188             snprint_lambda_vec(descX, STRLEN, "X", bl->lambda);
1189             snprint_lambda_vec(descY, STRLEN, "Y", bl->prev->lambda);
1190             gmx_fatal(FARGS, "Could not find a set for foreign lambda (state X below)\nin the files for main lambda (state Y below)\n\n%s\n%s\n", descX, descY);
1191         }
1192         if (!sc)
1193         {
1194             char descX[STRLEN], descY[STRLEN];
1195             snprint_lambda_vec(descX, STRLEN, "X", bl->prev->lambda);
1196             snprint_lambda_vec(descY, STRLEN, "Y", bl->lambda);
1197             gmx_fatal(FARGS, "Could not find a set for foreign lambda (state X below)\nin the files for main lambda (state Y below)\n\n%s\n%s\n", descX, descY);
1198         }
1199         br->a = scprev;
1200         br->b = sc;
1201
1202         first = FALSE;
1203         (*nres)++;
1204         bl = bl->next;
1205     }
1206     return res;
1207 }
1208
1209 /* estimate the maximum discretization error */
1210 static double barres_list_max_disc_err(barres_t *res, int nres)
1211 {
1212     int    i, j;
1213     double disc_err = 0.;
1214     double delta_lambda;
1215
1216     for (i = 0; i < nres; i++)
1217     {
1218         barres_t *br = &(res[i]);
1219
1220         delta_lambda = lambda_vec_abs_diff(br->b->native_lambda,
1221                                            br->a->native_lambda);
1222
1223         for (j = 0; j < br->a->nsamples; j++)
1224         {
1225             if (br->a->s[j]->hist)
1226             {
1227                 double Wfac = 1.;
1228                 if (br->a->s[j]->derivative)
1229                 {
1230                     Wfac =  delta_lambda;
1231                 }
1232
1233                 disc_err = std::max(disc_err, Wfac*br->a->s[j]->hist->dx[0]);
1234             }
1235         }
1236         for (j = 0; j < br->b->nsamples; j++)
1237         {
1238             if (br->b->s[j]->hist)
1239             {
1240                 double Wfac = 1.;
1241                 if (br->b->s[j]->derivative)
1242                 {
1243                     Wfac =  delta_lambda;
1244                 }
1245                 disc_err = std::max(disc_err, Wfac*br->b->s[j]->hist->dx[0]);
1246             }
1247         }
1248     }
1249     return disc_err;
1250 }
1251
1252
1253 /* impose start and end times on a sample collection, updating sample_ranges */
1254 static void sample_coll_impose_times(sample_coll_t *sc, double begin_t,
1255                                      double end_t)
1256 {
1257     int i;
1258     for (i = 0; i < sc->nsamples; i++)
1259     {
1260         samples_t      *s = sc->s[i];
1261         sample_range_t *r = &(sc->r[i]);
1262         if (s->hist)
1263         {
1264             double end_time = s->hist->delta_time*s->hist->sum +
1265                 s->hist->start_time;
1266             if (s->hist->start_time < begin_t || end_time > end_t)
1267             {
1268                 r->use = FALSE;
1269             }
1270         }
1271         else
1272         {
1273             if (!s->t)
1274             {
1275                 double end_time;
1276                 if (s->start_time < begin_t)
1277                 {
1278                     r->start = static_cast<int>((begin_t - s->start_time)/s->delta_time);
1279                 }
1280                 end_time = s->delta_time*s->ndu + s->start_time;
1281                 if (end_time > end_t)
1282                 {
1283                     r->end = static_cast<int>((end_t - s->start_time)/s->delta_time);
1284                 }
1285             }
1286             else
1287             {
1288                 int j;
1289                 for (j = 0; j < s->ndu; j++)
1290                 {
1291                     if (s->t[j] < begin_t)
1292                     {
1293                         r->start = j;
1294                     }
1295
1296                     if (s->t[j] >= end_t)
1297                     {
1298                         r->end = j;
1299                         break;
1300                     }
1301                 }
1302             }
1303             if (r->start > r->end)
1304             {
1305                 r->use = FALSE;
1306             }
1307         }
1308     }
1309     sample_coll_calc_ntot(sc);
1310 }
1311
1312 static void sim_data_impose_times(sim_data_t *sd, double begin, double end)
1313 {
1314     double         first_t, last_t;
1315     double         begin_t, end_t;
1316     lambda_data_t *lc;
1317     lambda_data_t *head = sd->lb;
1318     int            j;
1319
1320     if (begin <= 0 && end < 0)
1321     {
1322         return;
1323     }
1324
1325     /* first determine the global start and end times */
1326     first_t = -1;
1327     last_t  = -1;
1328     lc      = head->next;
1329     while (lc != head)
1330     {
1331         sample_coll_t *sc = lc->sc->next;
1332         while (sc != lc->sc)
1333         {
1334             for (j = 0; j < sc->nsamples; j++)
1335             {
1336                 double start_t, end_t;
1337
1338                 start_t = sc->s[j]->start_time;
1339                 end_t   =   sc->s[j]->start_time;
1340                 if (sc->s[j]->hist)
1341                 {
1342                     end_t += sc->s[j]->delta_time*sc->s[j]->hist->sum;
1343                 }
1344                 else
1345                 {
1346                     if (sc->s[j]->t)
1347                     {
1348                         end_t = sc->s[j]->t[sc->s[j]->ndu-1];
1349                     }
1350                     else
1351                     {
1352                         end_t += sc->s[j]->delta_time*sc->s[j]->ndu;
1353                     }
1354                 }
1355
1356                 if (start_t < first_t || first_t < 0)
1357                 {
1358                     first_t = start_t;
1359                 }
1360                 if (end_t > last_t)
1361                 {
1362                     last_t = end_t;
1363                 }
1364             }
1365             sc = sc->next;
1366         }
1367         lc = lc->next;
1368     }
1369
1370     /* calculate the actual times */
1371     if (begin > 0)
1372     {
1373         begin_t = begin;
1374     }
1375     else
1376     {
1377         begin_t = first_t;
1378     }
1379
1380     if (end > 0)
1381     {
1382         end_t = end;
1383     }
1384     else
1385     {
1386         end_t = last_t;
1387     }
1388     printf("\n   Samples in time interval: %.3f - %.3f\n", first_t, last_t);
1389
1390     if (begin_t > end_t)
1391     {
1392         return;
1393     }
1394     printf("Removing samples outside of: %.3f - %.3f\n", begin_t, end_t);
1395
1396     /* then impose them */
1397     lc = head->next;
1398     while (lc != head)
1399     {
1400         sample_coll_t *sc = lc->sc->next;
1401         while (sc != lc->sc)
1402         {
1403             sample_coll_impose_times(sc, begin_t, end_t);
1404             sc = sc->next;
1405         }
1406         lc = lc->next;
1407     }
1408 }
1409
1410
1411 /* create subsample i out of ni from an existing sample_coll */
1412 static gmx_bool sample_coll_create_subsample(sample_coll_t  *sc,
1413                                              sample_coll_t *sc_orig,
1414                                              int i, int ni)
1415 {
1416     int             j;
1417
1418     int64_t         ntot_start;
1419     int64_t         ntot_end;
1420     int64_t         ntot_so_far;
1421
1422     *sc = *sc_orig; /* just copy all fields */
1423
1424     /* allocate proprietary memory */
1425     snew(sc->s, sc_orig->nsamples);
1426     snew(sc->r, sc_orig->nsamples);
1427
1428     /* copy the samples */
1429     for (j = 0; j < sc_orig->nsamples; j++)
1430     {
1431         sc->s[j] = sc_orig->s[j];
1432         sc->r[j] = sc_orig->r[j]; /* copy the ranges too */
1433     }
1434
1435     /* now fix start and end fields */
1436     /* the casts avoid possible overflows */
1437     ntot_start  = static_cast<int64_t>(sc_orig->ntot*static_cast<double>(i)/static_cast<double>(ni));
1438     ntot_end    = static_cast<int64_t>(sc_orig->ntot*static_cast<double>(i+1)/static_cast<double>(ni));
1439     ntot_so_far = 0;
1440     for (j = 0; j < sc->nsamples; j++)
1441     {
1442         int64_t ntot_add;
1443         int64_t new_start, new_end;
1444
1445         if (sc->r[j].use)
1446         {
1447             if (sc->s[j]->hist)
1448             {
1449                 ntot_add = sc->s[j]->hist->sum;
1450             }
1451             else
1452             {
1453                 ntot_add = sc->r[j].end - sc->r[j].start;
1454             }
1455         }
1456         else
1457         {
1458             ntot_add = 0;
1459         }
1460
1461         if (!sc->s[j]->hist)
1462         {
1463             if (ntot_so_far < ntot_start)
1464             {
1465                 /* adjust starting point */
1466                 new_start = sc->r[j].start + (ntot_start - ntot_so_far);
1467             }
1468             else
1469             {
1470                 new_start = sc->r[j].start;
1471             }
1472             /* adjust end point */
1473             new_end = sc->r[j].start + (ntot_end - ntot_so_far);
1474             if (new_end > sc->r[j].end)
1475             {
1476                 new_end = sc->r[j].end;
1477             }
1478
1479             /* check if we're in range at all */
1480             if ( (new_end < new_start) || (new_start > sc->r[j].end) )
1481             {
1482                 new_start = 0;
1483                 new_end   = 0;
1484             }
1485             /* and write the new range */
1486             GMX_RELEASE_ASSERT(new_start <= std::numeric_limits<int>::max(), "Value of 'new_start' too large for int converstion");
1487             GMX_RELEASE_ASSERT(new_end <= std::numeric_limits<int>::max(), "Value of 'new_end' too large for int converstion");
1488             sc->r[j].start = static_cast<int>(new_start);
1489             sc->r[j].end   = static_cast<int>(new_end);
1490         }
1491         else
1492         {
1493             if (sc->r[j].use)
1494             {
1495                 double overlap;
1496                 double ntot_start_norm, ntot_end_norm;
1497                 /* calculate the amount of overlap of the
1498                    desired range (ntot_start -- ntot_end) onto
1499                    the histogram range (ntot_so_far -- ntot_so_far+ntot_add)*/
1500
1501                 /* first calculate normalized bounds
1502                    (where 0 is the start of the hist range, and 1 the end) */
1503                 ntot_start_norm = (ntot_start-ntot_so_far)/static_cast<double>(ntot_add);
1504                 ntot_end_norm   = (ntot_end-ntot_so_far)/static_cast<double>(ntot_add);
1505
1506                 /* now fix the boundaries */
1507                 ntot_start_norm = std::min(1.0, std::max(0.0, ntot_start_norm));
1508                 ntot_end_norm   = std::max(0.0, std::min(1.0, ntot_end_norm));
1509
1510                 /* and calculate the overlap */
1511                 overlap = ntot_end_norm - ntot_start_norm;
1512
1513                 if (overlap > 0.95) /* we allow for 5% slack */
1514                 {
1515                     sc->r[j].use = TRUE;
1516                 }
1517                 else if (overlap < 0.05)
1518                 {
1519                     sc->r[j].use = FALSE;
1520                 }
1521                 else
1522                 {
1523                     return FALSE;
1524                 }
1525             }
1526         }
1527         ntot_so_far += ntot_add;
1528     }
1529     sample_coll_calc_ntot(sc);
1530
1531     return TRUE;
1532 }
1533
1534 /* calculate minimum and maximum work values in sample collection */
1535 static void sample_coll_min_max(sample_coll_t *sc, double Wfac,
1536                                 double *Wmin, double *Wmax)
1537 {
1538     int i, j;
1539
1540     *Wmin =  std::numeric_limits<float>::max();
1541     *Wmax = -std::numeric_limits<float>::max();
1542
1543     for (i = 0; i < sc->nsamples; i++)
1544     {
1545         samples_t      *s = sc->s[i];
1546         sample_range_t *r = &(sc->r[i]);
1547         if (r->use)
1548         {
1549             if (!s->hist)
1550             {
1551                 for (j = r->start; j < r->end; j++)
1552                 {
1553                     *Wmin = std::min(*Wmin, s->du[j]*Wfac);
1554                     *Wmax = std::max(*Wmax, s->du[j]*Wfac);
1555                 }
1556             }
1557             else
1558             {
1559                 int    hd = 0; /* determine the histogram direction: */
1560                 double dx;
1561                 if ( (s->hist->nhist > 1) && (Wfac < 0) )
1562                 {
1563                     hd = 1;
1564                 }
1565                 dx = s->hist->dx[hd];
1566
1567                 for (j = s->hist->nbin[hd]-1; j >= 0; j--)
1568                 {
1569                     *Wmin = std::min(*Wmin, Wfac*(s->hist->x0[hd])*dx);
1570                     *Wmax = std::max(*Wmax, Wfac*(s->hist->x0[hd])*dx);
1571                     /* look for the highest value bin with values */
1572                     if (s->hist->bin[hd][j] > 0)
1573                     {
1574                         *Wmin = std::min(*Wmin, Wfac*(j+s->hist->x0[hd]+1)*dx);
1575                         *Wmax = std::max(*Wmax, Wfac*(j+s->hist->x0[hd]+1)*dx);
1576                         break;
1577                     }
1578                 }
1579             }
1580         }
1581     }
1582 }
1583
1584 /* Initialize a sim_data structure */
1585 static void sim_data_init(sim_data_t *sd)
1586 {
1587     /* make linked list */
1588     sd->lb       = &(sd->lb_head);
1589     sd->lb->next = sd->lb;
1590     sd->lb->prev = sd->lb;
1591
1592     lambda_components_init(&(sd->lc));
1593 }
1594
1595
1596 static double calc_bar_sum(int n, const double *W, double Wfac, double sbMmDG)
1597 {
1598     int    i;
1599     double sum;
1600
1601     sum = 0;
1602
1603     for (i = 0; i < n; i++)
1604     {
1605         sum += 1./(1. + std::exp(Wfac*W[i] + sbMmDG));
1606     }
1607
1608     return sum;
1609 }
1610
1611 /* calculate the BAR average given a histogram
1612
1613     if type== 0, calculate the best estimate for the average,
1614     if type==-1, calculate the minimum possible value given the histogram
1615     if type== 1, calculate the maximum possible value given the histogram */
1616 static double calc_bar_sum_hist(const hist_t *hist, double Wfac, double sbMmDG,
1617                                 int type)
1618 {
1619     double sum = 0.;
1620     int    i;
1621     int    maxbin;
1622     /* normalization factor multiplied with bin width and
1623        number of samples (we normalize through M): */
1624     double normdx = 1.;
1625     int    hd     = 0; /* determine the histogram direction: */
1626     double dx;
1627
1628     if ( (hist->nhist > 1) && (Wfac < 0) )
1629     {
1630         hd = 1;
1631     }
1632     dx     = hist->dx[hd];
1633     maxbin = hist->nbin[hd]-1;
1634     if (type == 1)
1635     {
1636         maxbin = hist->nbin[hd]; /* we also add whatever was out of range */
1637     }
1638
1639     for (i = 0; i < maxbin; i++)
1640     {
1641         double x    = Wfac*((i+hist->x0[hd])+0.5)*dx; /* bin middle */
1642         double pxdx = hist->bin[0][i]*normdx;         /* p(x)dx */
1643
1644         sum += pxdx/(1. + std::exp(x + sbMmDG));
1645     }
1646
1647     return sum;
1648 }
1649
1650 static double calc_bar_lowlevel(sample_coll_t *ca, sample_coll_t *cb,
1651                                 double temp, double tol, int type)
1652 {
1653     double kT, beta, M;
1654     int    i;
1655     double Wfac1, Wfac2, Wmin, Wmax;
1656     double DG0, DG1, DG2, dDG1;
1657     double n1, n2; /* numbers of samples as doubles */
1658
1659     kT   = BOLTZ*temp;
1660     beta = 1/kT;
1661
1662     /* count the numbers of samples */
1663     n1 = ca->ntot;
1664     n2 = cb->ntot;
1665
1666     M = std::log(n1/n2);
1667
1668     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1669     if (ca->foreign_lambda->dhdl < 0)
1670     {
1671         /* this is the case when the delta U were calculated directly
1672            (i.e. we're not scaling dhdl) */
1673         Wfac1 = beta;
1674         Wfac2 = beta;
1675     }
1676     else
1677     {
1678         /* we're using dhdl, so delta_lambda needs to be a
1679            multiplication factor.  */
1680         /*double delta_lambda=cb->native_lambda-ca->native_lambda;*/
1681         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1682                                                   ca->native_lambda);
1683         if (cb->native_lambda->lc->N > 1)
1684         {
1685             gmx_fatal(FARGS,
1686                       "Can't (yet) do multi-component dhdl interpolation");
1687         }
1688
1689         Wfac1 =  beta*delta_lambda;
1690         Wfac2 = -beta*delta_lambda;
1691     }
1692
1693     if (beta < 1)
1694     {
1695         /* We print the output both in kT and kJ/mol.
1696          * Here we determine DG in kT, so when beta < 1
1697          * the precision has to be increased.
1698          */
1699         tol *= beta;
1700     }
1701
1702     /* Calculate minimum and maximum work to give an initial estimate of
1703      * delta G  as their average.
1704      */
1705     {
1706         double Wmin1, Wmin2, Wmax1, Wmax2;
1707         sample_coll_min_max(ca, Wfac1, &Wmin1, &Wmax1);
1708         sample_coll_min_max(cb, Wfac2, &Wmin2, &Wmax2);
1709
1710         Wmin = std::min(Wmin1, Wmin2);
1711         Wmax = std::max(Wmax1, Wmax2);
1712     }
1713
1714     DG0 = Wmin;
1715     DG2 = Wmax;
1716
1717     if (debug)
1718     {
1719         fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1720     }
1721     /* We approximate by bisection: given our initial estimates
1722        we keep checking whether the halfway point is greater or
1723        smaller than what we get out of the BAR averages.
1724
1725        For the comparison we can use twice the tolerance. */
1726     while (DG2 - DG0 > 2*tol)
1727     {
1728         DG1 = 0.5*(DG0 + DG2);
1729
1730         /* calculate the BAR averages */
1731         dDG1 = 0.;
1732
1733         for (i = 0; i < ca->nsamples; i++)
1734         {
1735             samples_t      *s = ca->s[i];
1736             sample_range_t *r = &(ca->r[i]);
1737             if (r->use)
1738             {
1739                 if (s->hist)
1740                 {
1741                     dDG1 += calc_bar_sum_hist(s->hist, Wfac1, (M-DG1), type);
1742                 }
1743                 else
1744                 {
1745                     dDG1 += calc_bar_sum(r->end - r->start, s->du + r->start,
1746                                          Wfac1, (M-DG1));
1747                 }
1748             }
1749         }
1750         for (i = 0; i < cb->nsamples; i++)
1751         {
1752             samples_t      *s = cb->s[i];
1753             sample_range_t *r = &(cb->r[i]);
1754             if (r->use)
1755             {
1756                 if (s->hist)
1757                 {
1758                     dDG1 -= calc_bar_sum_hist(s->hist, Wfac2, -(M-DG1), type);
1759                 }
1760                 else
1761                 {
1762                     dDG1 -= calc_bar_sum(r->end - r->start, s->du + r->start,
1763                                          Wfac2, -(M-DG1));
1764                 }
1765             }
1766         }
1767
1768         if (dDG1 < 0)
1769         {
1770             DG0 = DG1;
1771         }
1772         else
1773         {
1774             DG2 = DG1;
1775         }
1776         if (debug)
1777         {
1778             fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1779         }
1780     }
1781
1782     return 0.5*(DG0 + DG2);
1783 }
1784
1785 static void calc_rel_entropy(sample_coll_t *ca, sample_coll_t *cb,
1786                              double temp, double dg, double *sa, double *sb)
1787 {
1788     int    i, j;
1789     double W_ab = 0.;
1790     double W_ba = 0.;
1791     double kT, beta;
1792     double Wfac1, Wfac2;
1793     double n1, n2;
1794
1795     kT   = BOLTZ*temp;
1796     beta = 1/kT;
1797
1798     /* count the numbers of samples */
1799     n1 = ca->ntot;
1800     n2 = cb->ntot;
1801
1802     /* to ensure the work values are the same as during the delta_G */
1803     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1804     if (ca->foreign_lambda->dhdl < 0)
1805     {
1806         /* this is the case when the delta U were calculated directly
1807            (i.e. we're not scaling dhdl) */
1808         Wfac1 = beta;
1809         Wfac2 = beta;
1810     }
1811     else
1812     {
1813         /* we're using dhdl, so delta_lambda needs to be a
1814            multiplication factor.  */
1815         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1816                                                   ca->native_lambda);
1817         Wfac1 =  beta*delta_lambda;
1818         Wfac2 = -beta*delta_lambda;
1819     }
1820
1821     /* first calculate the average work in both directions */
1822     for (i = 0; i < ca->nsamples; i++)
1823     {
1824         samples_t      *s = ca->s[i];
1825         sample_range_t *r = &(ca->r[i]);
1826         if (r->use)
1827         {
1828             if (!s->hist)
1829             {
1830                 for (j = r->start; j < r->end; j++)
1831                 {
1832                     W_ab += Wfac1*s->du[j];
1833                 }
1834             }
1835             else
1836             {
1837                 /* normalization factor multiplied with bin width and
1838                    number of samples (we normalize through M): */
1839                 double normdx = 1.;
1840                 double dx;
1841                 int    hd = 0; /* histogram direction */
1842                 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1843                 {
1844                     hd = 1;
1845                 }
1846                 dx = s->hist->dx[hd];
1847
1848                 for (j = 0; j < s->hist->nbin[0]; j++)
1849                 {
1850                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1851                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1852                     W_ab += pxdx*x;
1853                 }
1854             }
1855         }
1856     }
1857     W_ab /= n1;
1858
1859     for (i = 0; i < cb->nsamples; i++)
1860     {
1861         samples_t      *s = cb->s[i];
1862         sample_range_t *r = &(cb->r[i]);
1863         if (r->use)
1864         {
1865             if (!s->hist)
1866             {
1867                 for (j = r->start; j < r->end; j++)
1868                 {
1869                     W_ba += Wfac1*s->du[j];
1870                 }
1871             }
1872             else
1873             {
1874                 /* normalization factor multiplied with bin width and
1875                    number of samples (we normalize through M): */
1876                 double normdx = 1.;
1877                 double dx;
1878                 int    hd = 0; /* histogram direction */
1879                 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
1880                 {
1881                     hd = 1;
1882                 }
1883                 dx = s->hist->dx[hd];
1884
1885                 for (j = 0; j < s->hist->nbin[0]; j++)
1886                 {
1887                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1888                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1889                     W_ba += pxdx*x;
1890                 }
1891             }
1892         }
1893     }
1894     W_ba /= n2;
1895
1896     /* then calculate the relative entropies */
1897     *sa = (W_ab - dg);
1898     *sb = (W_ba + dg);
1899 }
1900
1901 static void calc_dg_stddev(sample_coll_t *ca, sample_coll_t *cb,
1902                            double temp, double dg, double *stddev)
1903 {
1904     int    i, j;
1905     double M;
1906     double sigmafact = 0.;
1907     double kT, beta;
1908     double Wfac1, Wfac2;
1909     double n1, n2;
1910
1911     kT   = BOLTZ*temp;
1912     beta = 1/kT;
1913
1914     /* count the numbers of samples */
1915     n1 = ca->ntot;
1916     n2 = cb->ntot;
1917
1918     /* to ensure the work values are the same as during the delta_G */
1919     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1920     if (ca->foreign_lambda->dhdl < 0)
1921     {
1922         /* this is the case when the delta U were calculated directly
1923            (i.e. we're not scaling dhdl) */
1924         Wfac1 = beta;
1925         Wfac2 = beta;
1926     }
1927     else
1928     {
1929         /* we're using dhdl, so delta_lambda needs to be a
1930            multiplication factor.  */
1931         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1932                                                   ca->native_lambda);
1933         Wfac1 =  beta*delta_lambda;
1934         Wfac2 = -beta*delta_lambda;
1935     }
1936
1937     M = std::log(n1/n2);
1938
1939
1940     /* calculate average in both directions */
1941     for (i = 0; i < ca->nsamples; i++)
1942     {
1943         samples_t      *s = ca->s[i];
1944         sample_range_t *r = &(ca->r[i]);
1945         if (r->use)
1946         {
1947             if (!s->hist)
1948             {
1949                 for (j = r->start; j < r->end; j++)
1950                 {
1951                     sigmafact += 1./(2. + 2.*std::cosh((M + Wfac1*s->du[j] - dg)));
1952                 }
1953             }
1954             else
1955             {
1956                 /* normalization factor multiplied with bin width and
1957                    number of samples (we normalize through M): */
1958                 double normdx = 1.;
1959                 double dx;
1960                 int    hd = 0; /* histogram direction */
1961                 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1962                 {
1963                     hd = 1;
1964                 }
1965                 dx = s->hist->dx[hd];
1966
1967                 for (j = 0; j < s->hist->nbin[0]; j++)
1968                 {
1969                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1970                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1971
1972                     sigmafact += pxdx/(2. + 2.*std::cosh((M + x - dg)));
1973                 }
1974             }
1975         }
1976     }
1977     for (i = 0; i < cb->nsamples; i++)
1978     {
1979         samples_t      *s = cb->s[i];
1980         sample_range_t *r = &(cb->r[i]);
1981         if (r->use)
1982         {
1983             if (!s->hist)
1984             {
1985                 for (j = r->start; j < r->end; j++)
1986                 {
1987                     sigmafact += 1./(2. + 2.*std::cosh((M - Wfac2*s->du[j] - dg)));
1988                 }
1989             }
1990             else
1991             {
1992                 /* normalization factor multiplied with bin width and
1993                    number of samples (we normalize through M): */
1994                 double normdx = 1.;
1995                 double dx;
1996                 int    hd = 0; /* histogram direction */
1997                 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
1998                 {
1999                     hd = 1;
2000                 }
2001                 dx = s->hist->dx[hd];
2002
2003                 for (j = 0; j < s->hist->nbin[0]; j++)
2004                 {
2005                     double x    = Wfac2*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
2006                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
2007
2008                     sigmafact += pxdx/(2. + 2.*std::cosh((M - x - dg)));
2009                 }
2010             }
2011         }
2012     }
2013
2014     sigmafact /= (n1 + n2);
2015
2016
2017     /* Eq. 10 from
2018        Shirts, Bair, Hooker & Pande, Phys. Rev. Lett 91, 140601 (2003): */
2019     *stddev = std::sqrt(((1.0/sigmafact) - ( (n1+n2)/n1 + (n1+n2)/n2 )));
2020 }
2021
2022
2023
2024 static void calc_bar(barres_t *br, double tol,
2025                      int npee_min, int npee_max, gmx_bool *bEE,
2026                      double *partsum)
2027 {
2028     int      npee, p;
2029     double   dg_sig2, sa_sig2, sb_sig2, stddev_sig2; /* intermediate variance values
2030                                                         for calculated quantities */
2031     double   temp = br->a->temp;
2032     int      i;
2033     double   dg_min, dg_max;
2034     gmx_bool have_hist = FALSE;
2035
2036     br->dg = calc_bar_lowlevel(br->a, br->b, temp, tol, 0);
2037
2038     br->dg_disc_err      = 0.;
2039     br->dg_histrange_err = 0.;
2040
2041     /* check if there are histograms */
2042     for (i = 0; i < br->a->nsamples; i++)
2043     {
2044         if (br->a->r[i].use && br->a->s[i]->hist)
2045         {
2046             have_hist = TRUE;
2047             break;
2048         }
2049     }
2050     if (!have_hist)
2051     {
2052         for (i = 0; i < br->b->nsamples; i++)
2053         {
2054             if (br->b->r[i].use && br->b->s[i]->hist)
2055             {
2056                 have_hist = TRUE;
2057                 break;
2058             }
2059         }
2060     }
2061
2062     /* calculate histogram-specific errors */
2063     if (have_hist)
2064     {
2065         dg_min = calc_bar_lowlevel(br->a, br->b, temp, tol, -1);
2066         dg_max = calc_bar_lowlevel(br->a, br->b, temp, tol, 1);
2067
2068         if (std::abs(dg_max - dg_min) > GMX_REAL_EPS*10)
2069         {
2070             /* the histogram range  error is the biggest of the differences
2071                between the best estimate and the extremes */
2072             br->dg_histrange_err = std::abs(dg_max - dg_min);
2073         }
2074         br->dg_disc_err = 0.;
2075         for (i = 0; i < br->a->nsamples; i++)
2076         {
2077             if (br->a->s[i]->hist)
2078             {
2079                 br->dg_disc_err = std::max(br->dg_disc_err, br->a->s[i]->hist->dx[0]);
2080             }
2081         }
2082         for (i = 0; i < br->b->nsamples; i++)
2083         {
2084             if (br->b->s[i]->hist)
2085             {
2086                 br->dg_disc_err = std::max(br->dg_disc_err, br->b->s[i]->hist->dx[0]);
2087             }
2088         }
2089     }
2090     calc_rel_entropy(br->a, br->b, temp, br->dg, &(br->sa), &(br->sb));
2091
2092     calc_dg_stddev(br->a, br->b, temp, br->dg, &(br->dg_stddev) );
2093
2094     dg_sig2     = 0;
2095     sa_sig2     = 0;
2096     sb_sig2     = 0;
2097     stddev_sig2 = 0;
2098
2099     *bEE = TRUE;
2100     {
2101         sample_coll_t ca, cb;
2102
2103         /* initialize the samples */
2104         sample_coll_init(&ca, br->a->native_lambda, br->a->foreign_lambda,
2105                          br->a->temp);
2106         sample_coll_init(&cb, br->b->native_lambda, br->b->foreign_lambda,
2107                          br->b->temp);
2108
2109         for (npee = npee_min; npee <= npee_max; npee++)
2110         {
2111             double dgs      = 0;
2112             double dgs2     = 0;
2113             double dsa      = 0;
2114             double dsb      = 0;
2115             double dsa2     = 0;
2116             double dsb2     = 0;
2117             double dstddev  = 0;
2118             double dstddev2 = 0;
2119
2120
2121             for (p = 0; p < npee; p++)
2122             {
2123                 double   dgp;
2124                 double   stddevc;
2125                 double   sac, sbc;
2126                 gmx_bool cac, cbc;
2127
2128                 cac = sample_coll_create_subsample(&ca, br->a, p, npee);
2129                 cbc = sample_coll_create_subsample(&cb, br->b, p, npee);
2130
2131                 if (!cac || !cbc)
2132                 {
2133                     printf("WARNING: histogram number incompatible with block number for averaging: can't do error estimate\n");
2134                     *bEE = FALSE;
2135                     if (cac)
2136                     {
2137                         sample_coll_destroy(&ca);
2138                     }
2139                     if (cbc)
2140                     {
2141                         sample_coll_destroy(&cb);
2142                     }
2143                     return;
2144                 }
2145
2146                 dgp   = calc_bar_lowlevel(&ca, &cb, temp, tol, 0);
2147                 dgs  += dgp;
2148                 dgs2 += dgp*dgp;
2149
2150                 partsum[npee*(npee_max+1)+p] += dgp;
2151
2152                 calc_rel_entropy(&ca, &cb, temp, dgp, &sac, &sbc);
2153                 dsa  += sac;
2154                 dsa2 += sac*sac;
2155                 dsb  += sbc;
2156                 dsb2 += sbc*sbc;
2157                 calc_dg_stddev(&ca, &cb, temp, dgp, &stddevc );
2158
2159                 dstddev  += stddevc;
2160                 dstddev2 += stddevc*stddevc;
2161
2162                 sample_coll_destroy(&ca);
2163                 sample_coll_destroy(&cb);
2164             }
2165             dgs     /= npee;
2166             dgs2    /= npee;
2167             dg_sig2 += (dgs2-dgs*dgs)/(npee-1);
2168
2169             dsa     /= npee;
2170             dsa2    /= npee;
2171             dsb     /= npee;
2172             dsb2    /= npee;
2173             sa_sig2 += (dsa2-dsa*dsa)/(npee-1);
2174             sb_sig2 += (dsb2-dsb*dsb)/(npee-1);
2175
2176             dstddev     /= npee;
2177             dstddev2    /= npee;
2178             stddev_sig2 += (dstddev2-dstddev*dstddev)/(npee-1);
2179         }
2180         br->dg_err        = std::sqrt(dg_sig2/(npee_max - npee_min + 1));
2181         br->sa_err        = std::sqrt(sa_sig2/(npee_max - npee_min + 1));
2182         br->sb_err        = std::sqrt(sb_sig2/(npee_max - npee_min + 1));
2183         br->dg_stddev_err = std::sqrt(stddev_sig2/(npee_max - npee_min + 1));
2184     }
2185 }
2186
2187
2188 static double bar_err(int nbmin, int nbmax, const double *partsum)
2189 {
2190     int    nb, b;
2191     double svar, s, s2, dg;
2192
2193     svar = 0;
2194     for (nb = nbmin; nb <= nbmax; nb++)
2195     {
2196         s  = 0;
2197         s2 = 0;
2198         for (b = 0; b < nb; b++)
2199         {
2200             dg  = partsum[nb*(nbmax+1)+b];
2201             s  += dg;
2202             s2 += dg*dg;
2203         }
2204         s    /= nb;
2205         s2   /= nb;
2206         svar += (s2 - s*s)/(nb - 1);
2207     }
2208
2209     return std::sqrt(svar/(nbmax + 1 - nbmin));
2210 }
2211
2212
2213 /* Seek the end of an identifier (consecutive non-spaces), followed by
2214    an optional number of spaces or '='-signs. Returns a pointer to the
2215    first non-space value found after that. Returns NULL if the string
2216    ends before that.
2217  */
2218 static const char *find_value(const char *str)
2219 {
2220     gmx_bool name_end_found = FALSE;
2221
2222     /* if the string is a NULL pointer, return a NULL pointer. */
2223     if (str == nullptr)
2224     {
2225         return nullptr;
2226     }
2227     while (*str != '\0')
2228     {
2229         /* first find the end of the name */
2230         if (!name_end_found)
2231         {
2232             if (std::isspace(*str) || (*str == '=') )
2233             {
2234                 name_end_found = TRUE;
2235             }
2236         }
2237         else
2238         {
2239             if (!( std::isspace(*str) || (*str == '=') ))
2240             {
2241                 return str;
2242             }
2243         }
2244         str++;
2245     }
2246     return nullptr;
2247 }
2248
2249
2250 /* read a vector-notation description of a lambda vector */
2251 static gmx_bool read_lambda_compvec(const char                *str,
2252                                     lambda_vec_t              *lv,
2253                                     const lambda_components_t *lc_in,
2254                                     lambda_components_t       *lc_out,
2255                                     const char               **end,
2256                                     const char                *fn)
2257 {
2258     gmx_bool    initialize_lc = FALSE;   /* whether to initialize the lambda
2259                                             components, or to check them */
2260     gmx_bool    start_reached = FALSE;   /* whether the start of component names
2261                                             has been reached */
2262     gmx_bool    vector        = FALSE;   /* whether there are multiple components */
2263     int         n             = 0;       /* current component number */
2264     const char *val_start     = nullptr; /* start of the component name, or NULL
2265                                             if not in a value */
2266     char       *strtod_end;
2267     gmx_bool    OK = TRUE;
2268
2269     if (end)
2270     {
2271         *end = str;
2272     }
2273
2274
2275     if (lc_out && lc_out->N == 0)
2276     {
2277         initialize_lc = TRUE;
2278     }
2279
2280     if (lc_in == nullptr)
2281     {
2282         lc_in = lc_out;
2283     }
2284
2285     while (true)
2286     {
2287         if (!start_reached)
2288         {
2289             if (std::isalnum(*str))
2290             {
2291                 vector        = FALSE;
2292                 start_reached = TRUE;
2293                 val_start     = str;
2294             }
2295             else if (*str == '(')
2296             {
2297                 vector        = TRUE;
2298                 start_reached = TRUE;
2299             }
2300             else if (!std::isspace(*str))
2301             {
2302                 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2303             }
2304         }
2305         else
2306         {
2307             if (val_start)
2308             {
2309                 if (std::isspace(*str) || *str == ')' || *str == ',' || *str == '\0')
2310                 {
2311                     /* end of value */
2312                     if (lv == nullptr)
2313                     {
2314                         if (initialize_lc)
2315                         {
2316                             lambda_components_add(lc_out, val_start,
2317                                                   (str-val_start));
2318                         }
2319                         else
2320                         {
2321                             if (!lambda_components_check(lc_out, n, val_start,
2322                                                          (str-val_start)))
2323                             {
2324                                 return FALSE;
2325                             }
2326                         }
2327                     }
2328                     else
2329                     {
2330                         /* add a vector component to lv */
2331                         lv->val[n] = strtod(val_start, &strtod_end);
2332                         if (val_start == strtod_end)
2333                         {
2334                             gmx_fatal(FARGS,
2335                                       "Error reading lambda vector in %s", fn);
2336                         }
2337                     }
2338                     /* reset for the next identifier */
2339                     val_start = nullptr;
2340                     n++;
2341                     if (!vector)
2342                     {
2343                         return OK;
2344                     }
2345                 }
2346             }
2347             else if (std::isalnum(*str))
2348             {
2349                 val_start = str;
2350             }
2351             if (*str == ')')
2352             {
2353                 str++;
2354                 if (end)
2355                 {
2356                     *end = str;
2357                 }
2358                 if (!vector)
2359                 {
2360                     gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2361                 }
2362                 else
2363                 {
2364                     GMX_RELEASE_ASSERT(lc_in != nullptr, "Internal inconsistency? lc_in==NULL");
2365                     if (n == lc_in->N)
2366                     {
2367                         return OK;
2368                     }
2369                     else if (lv == nullptr)
2370                     {
2371                         return FALSE;
2372                     }
2373                     else
2374                     {
2375                         gmx_fatal(FARGS, "Incomplete lambda vector data in %s",
2376                                   fn);
2377                         return FALSE;
2378                     }
2379
2380                 }
2381             }
2382         }
2383         if (*str == '\0')
2384         {
2385             break;
2386         }
2387         str++;
2388         if (end)
2389         {
2390             *end = str;
2391         }
2392     }
2393     if (vector)
2394     {
2395         gmx_fatal(FARGS, "Incomplete lambda components data in %s", fn);
2396         return FALSE;
2397     }
2398     return OK;
2399 }
2400
2401 /* read and check the component names from a string */
2402 static gmx_bool read_lambda_components(const char          *str,
2403                                        lambda_components_t *lc,
2404                                        const char         **end,
2405                                        const char          *fn)
2406 {
2407     return read_lambda_compvec(str, nullptr, nullptr, lc, end, fn);
2408 }
2409
2410 /* read an initialized lambda vector from a string */
2411 static gmx_bool read_lambda_vector(const char   *str,
2412                                    lambda_vec_t *lv,
2413                                    const char  **end,
2414                                    const char   *fn)
2415 {
2416     return read_lambda_compvec(str, lv, lv->lc, nullptr, end, fn);
2417 }
2418
2419
2420
2421 /* deduce lambda value from legend.
2422     fn = the file name
2423     legend = the legend string
2424     ba = the xvg data
2425     lam = the initialized lambda vector
2426    returns whether to use the data in this set.
2427  */
2428 static gmx_bool legend2lambda(const char   *fn,
2429                               const char   *legend,
2430                               lambda_vec_t *lam)
2431 {
2432     const char   *ptr    = nullptr, *ptr2 = nullptr;
2433     gmx_bool      ok     = FALSE;
2434     gmx_bool      bdhdl  = FALSE;
2435     const char   *tostr  = " to ";
2436
2437     if (legend == nullptr)
2438     {
2439         gmx_fatal(FARGS, "There is no legend in file '%s', can not deduce lambda", fn);
2440     }
2441
2442     /* look for the last 'to': */
2443     ptr2 = legend;
2444     do
2445     {
2446         ptr2 = std::strstr(ptr2, tostr);
2447         if (ptr2 != nullptr)
2448         {
2449             ptr = ptr2;
2450             ptr2++;
2451         }
2452     }
2453     while (ptr2 != nullptr && *ptr2 != '\0');
2454
2455     if (ptr)
2456     {
2457         ptr += std::strlen(tostr)-1; /* and advance past that 'to' */
2458     }
2459     else
2460     {
2461         /* look for the = sign */
2462         ptr = std::strrchr(legend, '=');
2463         if (!ptr)
2464         {
2465             /* otherwise look for the last space */
2466             ptr = std::strrchr(legend, ' ');
2467         }
2468     }
2469
2470     if (std::strstr(legend, "dH"))
2471     {
2472         ok    = TRUE;
2473         bdhdl = TRUE;
2474     }
2475     else if (std::strchr(legend, 'D') != nullptr && std::strchr(legend, 'H') != nullptr)
2476     {
2477         ok    = TRUE;
2478         bdhdl = FALSE;
2479     }
2480     else /*if (std::strstr(legend, "pV"))*/
2481     {
2482         return FALSE;
2483     }
2484     if (!ptr)
2485     {
2486         ok = FALSE;
2487     }
2488
2489     if (!ok)
2490     {
2491         gmx_fatal(FARGS, "There is no proper lambda legend in file '%s', can not deduce lambda", fn);
2492     }
2493     if (!bdhdl)
2494     {
2495         ptr = find_value(ptr);
2496         if (!ptr || !read_lambda_vector(ptr, lam, nullptr, fn))
2497         {
2498             gmx_fatal(FARGS, "lambda vector '%s' %s faulty", legend, fn);
2499         }
2500     }
2501     else
2502     {
2503         int         dhdl_index;
2504         const char *end;
2505
2506         ptr = std::strrchr(legend, '=');
2507         end = ptr;
2508         if (ptr)
2509         {
2510             /* there must be a component name */
2511             ptr--;
2512             if (ptr < legend)
2513             {
2514                 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2515             }
2516             /* now backtrack to the start of the identifier */
2517             while (isspace(*ptr))
2518             {
2519                 end = ptr;
2520                 ptr--;
2521                 if (ptr < legend)
2522                 {
2523                     gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2524                 }
2525             }
2526             while (!std::isspace(*ptr))
2527             {
2528                 ptr--;
2529                 if (ptr < legend)
2530                 {
2531                     gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2532                 }
2533             }
2534             ptr++;
2535             dhdl_index     = lambda_components_find(lam->lc, ptr, (end-ptr));
2536             if (dhdl_index < 0)
2537             {
2538                 char buf[STRLEN];
2539                 std::strncpy(buf, ptr, (end-ptr));
2540                 buf[(end-ptr)] = '\0';
2541                 gmx_fatal(FARGS,
2542                           "Did not find lambda component for '%s' in %s",
2543                           buf, fn);
2544             }
2545         }
2546         else
2547         {
2548             if (lam->lc->N > 1)
2549             {
2550                 gmx_fatal(FARGS,
2551                           "dhdl without component name with >1 lambda component in %s",
2552                           fn);
2553             }
2554             dhdl_index = 0;
2555         }
2556         lam->dhdl = dhdl_index;
2557     }
2558     return TRUE;
2559 }
2560
2561 static gmx_bool subtitle2lambda(const char *subtitle, xvg_t *ba, const char *fn,
2562                                 lambda_components_t *lc)
2563 {
2564     gmx_bool    bFound;
2565     const char *ptr;
2566     char       *end;
2567     double      native_lambda;
2568
2569     bFound = FALSE;
2570
2571     /* first check for a state string */
2572     ptr = std::strstr(subtitle, "state");
2573     if (ptr)
2574     {
2575         int         index = -1;
2576         const char *val_end;
2577
2578         /* the new 4.6 style lambda vectors */
2579         ptr = find_value(ptr);
2580         if (ptr)
2581         {
2582             index = std::strtol(ptr, &end, 10);
2583             if (ptr == end)
2584             {
2585                 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2586                 return FALSE;
2587             }
2588             ptr = end;
2589         }
2590         else
2591         {
2592             gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2593             return FALSE;
2594         }
2595         /* now find the lambda vector component names */
2596         while (*ptr != '(' && !std::isalnum(*ptr))
2597         {
2598             ptr++;
2599             if (*ptr == '\0')
2600             {
2601                 gmx_fatal(FARGS,
2602                           "Incomplete lambda vector component data in %s", fn);
2603                 return FALSE;
2604             }
2605         }
2606         val_end = ptr;
2607         if (!read_lambda_components(ptr, lc, &val_end, fn))
2608         {
2609             gmx_fatal(FARGS,
2610                       "lambda vector components in %s don't match those previously read",
2611                       fn);
2612         }
2613         ptr = find_value(val_end);
2614         if (!ptr)
2615         {
2616             gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2617             return FALSE;
2618         }
2619         lambda_vec_init(&(ba->native_lambda), lc);
2620         if (!read_lambda_vector(ptr, &(ba->native_lambda), nullptr, fn))
2621         {
2622             gmx_fatal(FARGS, "lambda vector in %s faulty", fn);
2623         }
2624         ba->native_lambda.index = index;
2625         bFound                  = TRUE;
2626     }
2627     else
2628     {
2629         /* compatibility mode: check for lambda in other ways. */
2630         /* plain text lambda string */
2631         ptr = std::strstr(subtitle, "lambda");
2632         if (ptr == nullptr)
2633         {
2634             /* xmgrace formatted lambda string */
2635             ptr = std::strstr(subtitle, "\\xl\\f{}");
2636         }
2637         if (ptr == nullptr)
2638         {
2639             /* xmgr formatted lambda string */
2640             ptr = std::strstr(subtitle, "\\8l\\4");
2641         }
2642         if (ptr != nullptr)
2643         {
2644             ptr = std::strstr(ptr, "=");
2645         }
2646         if (ptr != nullptr)
2647         {
2648             bFound = (sscanf(ptr+1, "%lf", &(native_lambda)) == 1);
2649             /* add the lambda component name as an empty string */
2650             if (lc->N > 0)
2651             {
2652                 if (!lambda_components_check(lc, 0, "", 0))
2653                 {
2654                     gmx_fatal(FARGS,
2655                               "lambda vector components in %s don't match those previously read",
2656                               fn);
2657                 }
2658             }
2659             else
2660             {
2661                 lambda_components_add(lc, "", 0);
2662             }
2663             lambda_vec_init(&(ba->native_lambda), lc);
2664             ba->native_lambda.val[0] = native_lambda;
2665         }
2666     }
2667
2668     return bFound;
2669 }
2670
2671 static void read_bar_xvg_lowlevel(const char *fn, const real *temp, xvg_t *ba,
2672                                   lambda_components_t *lc)
2673 {
2674     int          i;
2675     char        *subtitle, **legend, *ptr;
2676     int          np;
2677     gmx_bool     native_lambda_read = FALSE;
2678     char         buf[STRLEN];
2679
2680     xvg_init(ba);
2681
2682     ba->filename = fn;
2683
2684     np = read_xvg_legend(fn, &ba->y, &ba->nset, &subtitle, &legend);
2685     if (!ba->y)
2686     {
2687         gmx_fatal(FARGS, "File %s contains no usable data.", fn);
2688     }
2689     /* Reorder the data */
2690     ba->t  = ba->y[0];
2691     for (i = 1; i < ba->nset; i++)
2692     {
2693         ba->y[i-1] = ba->y[i];
2694     }
2695     ba->nset--;
2696
2697     snew(ba->np, ba->nset);
2698     for (i = 0; i < ba->nset; i++)
2699     {
2700         ba->np[i] = np;
2701     }
2702
2703     ba->temp = -1;
2704     if (subtitle != nullptr)
2705     {
2706         /* try to extract temperature */
2707         ptr = std::strstr(subtitle, "T =");
2708         if (ptr != nullptr)
2709         {
2710             ptr += 3;
2711             if (sscanf(ptr, "%lf", &ba->temp) == 1)
2712             {
2713                 if (ba->temp <= 0)
2714                 {
2715                     gmx_fatal(FARGS, "Found temperature of %f in file '%s'",
2716                               ba->temp, fn);
2717                 }
2718             }
2719         }
2720     }
2721     if (ba->temp < 0)
2722     {
2723         if (*temp <= 0)
2724         {
2725             gmx_fatal(FARGS, "Did not find a temperature in the subtitle in file '%s', use the -temp option of [TT]gmx bar[tt]", fn);
2726         }
2727         ba->temp = *temp;
2728     }
2729
2730     /* Try to deduce lambda from the subtitle */
2731     if (subtitle)
2732     {
2733         if (subtitle2lambda(subtitle, ba, fn, lc))
2734         {
2735             native_lambda_read = TRUE;
2736         }
2737     }
2738     snew(ba->lambda, ba->nset);
2739     if (legend == nullptr)
2740     {
2741         /* Check if we have a single set, no legend, nset=1 means t and dH/dl */
2742         if (ba->nset == 1)
2743         {
2744             ba->lambda[0] = ba->native_lambda;
2745         }
2746         else
2747         {
2748             gmx_fatal(FARGS, "File %s contains multiple sets but no legends, can not determine the lambda values", fn);
2749         }
2750     }
2751     else
2752     {
2753         for (i = 0; i < ba->nset; )
2754         {
2755             /* Read lambda from the legend */
2756             lambda_vec_init( &(ba->lambda[i]), lc );
2757             lambda_vec_copy( &(ba->lambda[i]), &(ba->native_lambda));
2758             gmx_bool use = legend2lambda(fn, legend[i], &(ba->lambda[i]));
2759             if (use)
2760             {
2761                 lambda_vec_print(&(ba->lambda[i]), buf, FALSE);
2762                 i++;
2763             }
2764             else
2765             {
2766                 int j;
2767                 printf("%s: Ignoring set '%s'.\n", fn, legend[i]);
2768                 for (j = i+1; j < ba->nset; j++)
2769                 {
2770                     ba->y[j-1]  = ba->y[j];
2771                     legend[j-1] = legend[j];
2772                 }
2773                 ba->nset--;
2774             }
2775         }
2776     }
2777
2778     if (!native_lambda_read)
2779     {
2780         gmx_fatal(FARGS, "File %s contains multiple sets but no indication of the native lambda", fn);
2781     }
2782
2783     if (legend != nullptr)
2784     {
2785         for (i = 0; i < ba->nset-1; i++)
2786         {
2787             sfree(legend[i]);
2788         }
2789         sfree(legend);
2790     }
2791 }
2792
2793 static void read_bar_xvg(const char *fn, real *temp, sim_data_t *sd)
2794 {
2795     xvg_t     *barsim;
2796     samples_t *s;
2797     int        i;
2798
2799     snew(barsim, 1);
2800
2801     read_bar_xvg_lowlevel(fn, temp, barsim, &(sd->lc));
2802
2803     if (barsim->nset < 1)
2804     {
2805         gmx_fatal(FARGS, "File '%s' contains fewer than two columns", fn);
2806     }
2807
2808     if (!gmx_within_tol(*temp, barsim->temp, GMX_FLOAT_EPS) && (*temp > 0) )
2809     {
2810         gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
2811     }
2812     *temp = barsim->temp;
2813
2814     /* now create a series of samples_t */
2815     snew(s, barsim->nset);
2816     for (i = 0; i < barsim->nset; i++)
2817     {
2818         samples_init(s+i, &(barsim->native_lambda), &(barsim->lambda[i]),
2819                      barsim->temp, lambda_vec_same(&(barsim->native_lambda),
2820                                                    &(barsim->lambda[i])),
2821                      fn);
2822         s[i].du  = barsim->y[i];
2823         s[i].ndu = barsim->np[i];
2824         s[i].t   = barsim->t;
2825
2826         lambda_data_list_insert_sample(sd->lb, s+i);
2827     }
2828     {
2829         char buf[STRLEN];
2830
2831         lambda_vec_print(s[0].native_lambda, buf, FALSE);
2832         printf("%s: %.1f - %.1f; lambda = %s\n    dH/dl & foreign lambdas:\n",
2833                fn, s[0].t[0], s[0].t[s[0].ndu-1], buf);
2834         for (i = 0; i < barsim->nset; i++)
2835         {
2836             lambda_vec_print(s[i].foreign_lambda, buf, TRUE);
2837             printf("        %s (%d pts)\n", buf, s[i].ndu);
2838         }
2839     }
2840     printf("\n\n");
2841 }
2842
2843 static void read_edr_rawdh_block(samples_t **smp, int *ndu, t_enxblock *blk,
2844                                  double start_time, double delta_time,
2845                                  lambda_vec_t *native_lambda, double temp,
2846                                  double *last_t, const char *filename)
2847 {
2848     int           i, j;
2849     lambda_vec_t *foreign_lambda;
2850     int           type;
2851     samples_t    *s; /* convenience pointer */
2852     int           startj;
2853
2854     /* check the block types etc. */
2855     if ( (blk->nsub < 3) ||
2856          (blk->sub[0].type != xdr_datatype_int) ||
2857          (blk->sub[1].type != xdr_datatype_double) ||
2858          (
2859              (blk->sub[2].type != xdr_datatype_float) &&
2860              (blk->sub[2].type != xdr_datatype_double)
2861          ) ||
2862          (blk->sub[0].nr < 1) ||
2863          (blk->sub[1].nr < 1) )
2864     {
2865         gmx_fatal(FARGS,
2866                   "Unexpected/corrupted block data in file %s around time %f.",
2867                   filename, start_time);
2868     }
2869
2870     snew(foreign_lambda, 1);
2871     lambda_vec_init(foreign_lambda, native_lambda->lc);
2872     lambda_vec_copy(foreign_lambda, native_lambda);
2873     type = blk->sub[0].ival[0];
2874     if (type == dhbtDH)
2875     {
2876         for (i = 0; i < native_lambda->lc->N; i++)
2877         {
2878             foreign_lambda->val[i] = blk->sub[1].dval[i];
2879         }
2880     }
2881     else
2882     {
2883         if (blk->sub[0].nr > 1)
2884         {
2885             foreign_lambda->dhdl = blk->sub[0].ival[1];
2886         }
2887         else
2888         {
2889             foreign_lambda->dhdl = 0;
2890         }
2891     }
2892
2893     if (!*smp)
2894     {
2895         /* initialize the samples structure if it's empty. */
2896         snew(*smp, 1);
2897         samples_init(*smp, native_lambda, foreign_lambda, temp,
2898                      type == dhbtDHDL, filename);
2899         (*smp)->start_time = start_time;
2900         (*smp)->delta_time = delta_time;
2901     }
2902
2903     /* set convenience pointer */
2904     s = *smp;
2905
2906     /* now double check */
2907     if (!lambda_vec_same(s->foreign_lambda, foreign_lambda) )
2908     {
2909         char buf[STRLEN], buf2[STRLEN];
2910         lambda_vec_print(foreign_lambda, buf, FALSE);
2911         lambda_vec_print(s->foreign_lambda, buf2, FALSE);
2912         fprintf(stderr, "Got foreign lambda=%s, expected: %s\n", buf, buf2);
2913         gmx_fatal(FARGS, "Corrupted data in file %s around t=%f.",
2914                   filename, start_time);
2915     }
2916
2917     /* make room for the data */
2918     if (gmx::index(s->ndu_alloc) < s->ndu + blk->sub[2].nr)
2919     {
2920         s->ndu_alloc += (s->ndu_alloc < static_cast<size_t>(blk->sub[2].nr)) ?
2921             blk->sub[2].nr*2 : s->ndu_alloc;
2922         srenew(s->du_alloc, s->ndu_alloc);
2923         s->du = s->du_alloc;
2924     }
2925     startj   = s->ndu;
2926     s->ndu  += blk->sub[2].nr;
2927     s->ntot += blk->sub[2].nr;
2928     *ndu     = blk->sub[2].nr;
2929
2930     /* and copy the data*/
2931     for (j = 0; j < blk->sub[2].nr; j++)
2932     {
2933         if (blk->sub[2].type == xdr_datatype_float)
2934         {
2935             s->du[startj+j] = blk->sub[2].fval[j];
2936         }
2937         else
2938         {
2939             s->du[startj+j] = blk->sub[2].dval[j];
2940         }
2941     }
2942     if (start_time + blk->sub[2].nr*delta_time > *last_t)
2943     {
2944         *last_t = start_time + blk->sub[2].nr*delta_time;
2945     }
2946 }
2947
2948 static samples_t *read_edr_hist_block(int *nsamples, t_enxblock *blk,
2949                                       double start_time, double delta_time,
2950                                       lambda_vec_t *native_lambda, double temp,
2951                                       double *last_t, const char *filename)
2952 {
2953     int           i, j;
2954     samples_t    *s;
2955     int           nhist;
2956     lambda_vec_t *foreign_lambda;
2957     int           type;
2958     int           nbins[2];
2959
2960     /* check the block types etc. */
2961     if ( (blk->nsub < 2) ||
2962          (blk->sub[0].type != xdr_datatype_double) ||
2963          (blk->sub[1].type != xdr_datatype_int64) ||
2964          (blk->sub[0].nr < 2)  ||
2965          (blk->sub[1].nr < 2) )
2966     {
2967         gmx_fatal(FARGS,
2968                   "Unexpected/corrupted block data in file %s around time %f",
2969                   filename, start_time);
2970     }
2971
2972     nhist = blk->nsub-2;
2973     if (nhist == 0)
2974     {
2975         return nullptr;
2976     }
2977     if (nhist > 2)
2978     {
2979         gmx_fatal(FARGS,
2980                   "Unexpected/corrupted block data in file %s around time %f",
2981                   filename, start_time);
2982     }
2983
2984     snew(s, 1);
2985     *nsamples = 1;
2986
2987     snew(foreign_lambda, 1);
2988     lambda_vec_init(foreign_lambda, native_lambda->lc);
2989     lambda_vec_copy(foreign_lambda, native_lambda);
2990     type = static_cast<int>(blk->sub[1].lval[1]);
2991     if (type == dhbtDH)
2992     {
2993         double old_foreign_lambda;
2994
2995         old_foreign_lambda = blk->sub[0].dval[0];
2996         if (old_foreign_lambda >= 0)
2997         {
2998             foreign_lambda->val[0] = old_foreign_lambda;
2999             if (foreign_lambda->lc->N > 1)
3000             {
3001                 gmx_fatal(FARGS,
3002                           "Single-component lambda in multi-component file %s",
3003                           filename);
3004             }
3005         }
3006         else
3007         {
3008             for (i = 0; i < native_lambda->lc->N; i++)
3009             {
3010                 foreign_lambda->val[i] = blk->sub[0].dval[i+2];
3011             }
3012         }
3013     }
3014     else
3015     {
3016         if (foreign_lambda->lc->N > 1)
3017         {
3018             if (blk->sub[1].nr < 3 + nhist)
3019             {
3020                 gmx_fatal(FARGS,
3021                           "Missing derivative coord in multi-component file %s",
3022                           filename);
3023             }
3024             foreign_lambda->dhdl = blk->sub[1].lval[2 + nhist];
3025         }
3026         else
3027         {
3028             foreign_lambda->dhdl = 0;
3029         }
3030     }
3031
3032     samples_init(s, native_lambda, foreign_lambda, temp, type == dhbtDHDL,
3033                  filename);
3034     snew(s->hist, 1);
3035
3036     for (i = 0; i < nhist; i++)
3037     {
3038         nbins[i] = blk->sub[i+2].nr;
3039     }
3040
3041     hist_init(s->hist, nhist, nbins);
3042
3043     for (i = 0; i < nhist; i++)
3044     {
3045         s->hist->x0[i] = blk->sub[1].lval[2+i];
3046         s->hist->dx[i] = blk->sub[0].dval[1];
3047         if (i == 1)
3048         {
3049             s->hist->dx[i] = -s->hist->dx[i];
3050         }
3051     }
3052
3053     s->hist->start_time = start_time;
3054     s->hist->delta_time = delta_time;
3055     s->start_time       = start_time;
3056     s->delta_time       = delta_time;
3057
3058     for (i = 0; i < nhist; i++)
3059     {
3060         int64_t     sum = 0;
3061
3062         for (j = 0; j < s->hist->nbin[i]; j++)
3063         {
3064             int binv = static_cast<int>(blk->sub[i+2].ival[j]);
3065
3066             s->hist->bin[i][j] = binv;
3067             sum               += binv;
3068
3069         }
3070         if (i == 0)
3071         {
3072             s->ntot      = sum;
3073             s->hist->sum = sum;
3074         }
3075         else
3076         {
3077             if (s->ntot != sum)
3078             {
3079                 gmx_fatal(FARGS, "Histogram counts don't match in %s",
3080                           filename);
3081             }
3082         }
3083     }
3084
3085     if (start_time + s->hist->sum*delta_time > *last_t)
3086     {
3087         *last_t = start_time + s->hist->sum*delta_time;
3088     }
3089     return s;
3090 }
3091
3092
3093 static void read_barsim_edr(const char *fn, real *temp, sim_data_t *sd)
3094 {
3095     int            i, j;
3096     ener_file_t    fp;
3097     t_enxframe    *fr;
3098     int            nre;
3099     gmx_enxnm_t   *enm           = nullptr;
3100     double         first_t       = -1;
3101     double         last_t        = -1;
3102     samples_t    **samples_rawdh = nullptr; /* contains samples for raw delta_h  */
3103     int           *nhists        = nullptr; /* array to keep count & print at end */
3104     int           *npts          = nullptr; /* array to keep count & print at end */
3105     lambda_vec_t **lambdas       = nullptr; /* array to keep count & print at end */
3106     lambda_vec_t  *native_lambda;
3107     int            nsamples = 0;
3108     lambda_vec_t   start_lambda;
3109
3110     fp = open_enx(fn, "r");
3111     do_enxnms(fp, &nre, &enm);
3112     snew(fr, 1);
3113
3114     snew(native_lambda, 1);
3115     start_lambda.lc  = nullptr;
3116     start_lambda.val = nullptr;
3117
3118     while (do_enx(fp, fr))
3119     {
3120         /* count the data blocks */
3121         int    nblocks_raw  = 0;
3122         int    nblocks_hist = 0;
3123         int    nlam         = 0;
3124         int    k;
3125         /* DHCOLL block information: */
3126         double start_time = 0, delta_time = 0, old_start_lambda = 0, delta_lambda = 0;
3127         double rtemp      = 0;
3128
3129         /* count the blocks and handle collection information: */
3130         for (i = 0; i < fr->nblock; i++)
3131         {
3132             if (fr->block[i].id == enxDHHIST)
3133             {
3134                 nblocks_hist++;
3135             }
3136             if (fr->block[i].id == enxDH)
3137             {
3138                 nblocks_raw++;
3139             }
3140             if (fr->block[i].id == enxDHCOLL)
3141             {
3142                 nlam++;
3143                 if ( (fr->block[i].nsub < 1) ||
3144                      (fr->block[i].sub[0].type != xdr_datatype_double) ||
3145                      (fr->block[i].sub[0].nr < 5))
3146                 {
3147                     gmx_fatal(FARGS, "Unexpected block data in file %s", fn);
3148                 }
3149
3150                 /* read the data from the DHCOLL block */
3151                 rtemp            = fr->block[i].sub[0].dval[0];
3152                 start_time       = fr->block[i].sub[0].dval[1];
3153                 delta_time       = fr->block[i].sub[0].dval[2];
3154                 old_start_lambda = fr->block[i].sub[0].dval[3];
3155                 delta_lambda     = fr->block[i].sub[0].dval[4];
3156
3157                 if (delta_lambda > 0)
3158                 {
3159                     gmx_fatal(FARGS, "Lambda values not constant in %s: can't apply BAR method", fn);
3160                 }
3161                 if ( ( *temp != rtemp) && (*temp > 0) )
3162                 {
3163                     gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
3164                 }
3165                 *temp = rtemp;
3166
3167                 if (old_start_lambda >= 0)
3168                 {
3169                     if (sd->lc.N > 0)
3170                     {
3171                         if (!lambda_components_check(&(sd->lc), 0, "", 0))
3172                         {
3173                             gmx_fatal(FARGS,
3174                                       "lambda vector components in %s don't match those previously read",
3175                                       fn);
3176                         }
3177                     }
3178                     else
3179                     {
3180                         lambda_components_add(&(sd->lc), "", 0);
3181                     }
3182                     if (!start_lambda.lc)
3183                     {
3184                         lambda_vec_init(&start_lambda, &(sd->lc));
3185                     }
3186                     start_lambda.val[0] = old_start_lambda;
3187                 }
3188                 else
3189                 {
3190                     /* read lambda vector */
3191                     int      n_lambda_vec;
3192                     gmx_bool check = (sd->lc.N > 0);
3193                     if (fr->block[i].nsub < 2)
3194                     {
3195                         gmx_fatal(FARGS,
3196                                   "No lambda vector, but start_lambda=%f\n",
3197                                   old_start_lambda);
3198                     }
3199                     n_lambda_vec = fr->block[i].sub[1].ival[1];
3200                     for (j = 0; j < n_lambda_vec; j++)
3201                     {
3202                         const char *name =
3203                             efpt_singular_names[fr->block[i].sub[1].ival[1+j]];
3204                         if (check)
3205                         {
3206                             /* check the components */
3207                             lambda_components_check(&(sd->lc), j, name,
3208                                                     std::strlen(name));
3209                         }
3210                         else
3211                         {
3212                             lambda_components_add(&(sd->lc), name,
3213                                                   std::strlen(name));
3214                         }
3215                     }
3216                     lambda_vec_init(&start_lambda, &(sd->lc));
3217                     start_lambda.index = fr->block[i].sub[1].ival[0];
3218                     for (j = 0; j < n_lambda_vec; j++)
3219                     {
3220                         start_lambda.val[j] = fr->block[i].sub[0].dval[5+j];
3221                     }
3222                 }
3223                 if (first_t < 0)
3224                 {
3225                     first_t = start_time;
3226                 }
3227             }
3228         }
3229
3230         if (nlam != 1)
3231         {
3232             gmx_fatal(FARGS, "Did not find delta H information in file %s", fn);
3233         }
3234         if (nblocks_raw > 0 && nblocks_hist > 0)
3235         {
3236             gmx_fatal(FARGS, "Can't handle both raw delta U data and histograms in the same file %s", fn);
3237         }
3238
3239         if (nsamples == 0)
3240         {
3241             /* this is the first round; allocate the associated data
3242                structures */
3243             /*native_lambda=start_lambda;*/
3244             lambda_vec_init(native_lambda, &(sd->lc));
3245             lambda_vec_copy(native_lambda, &start_lambda);
3246             nsamples = nblocks_raw+nblocks_hist;
3247             snew(nhists, nsamples);
3248             snew(npts, nsamples);
3249             snew(lambdas, nsamples);
3250             snew(samples_rawdh, nsamples);
3251             for (i = 0; i < nsamples; i++)
3252             {
3253                 nhists[i]        = 0;
3254                 npts[i]          = 0;
3255                 lambdas[i]       = nullptr;
3256                 samples_rawdh[i] = nullptr; /* init to NULL so we know which
3257                                                ones contain values */
3258             }
3259         }
3260         else
3261         {
3262             // nsamples > 0 means this is NOT the first iteration
3263
3264             /* check the native lambda */
3265             if (!lambda_vec_same(&start_lambda, native_lambda) )
3266             {
3267                 gmx_fatal(FARGS, "Native lambda not constant in file %s: started at %f, and becomes %f at time %f",
3268                           fn, native_lambda->val[0], start_lambda.val[0], start_time);
3269             }
3270             /* check the number of samples against the previous number */
3271             if ( ((nblocks_raw+nblocks_hist) != nsamples) || (nlam != 1 ) )
3272             {
3273                 gmx_fatal(FARGS, "Unexpected block count in %s: was %d, now %d\n",
3274                           fn, nsamples+1, nblocks_raw+nblocks_hist+nlam);
3275             }
3276             /* check whether last iterations's end time matches with
3277                the currrent start time */
3278             if ( (std::abs(last_t - start_time) > 2*delta_time)  && last_t >= 0)
3279             {
3280                 /* it didn't. We need to store our samples and reallocate */
3281                 for (i = 0; i < nsamples; i++)
3282                 {
3283                     if (samples_rawdh[i])
3284                     {
3285                         /* insert it into the existing list */
3286                         lambda_data_list_insert_sample(sd->lb,
3287                                                        samples_rawdh[i]);
3288                         /* and make sure we'll allocate a new one this time
3289                            around */
3290                         samples_rawdh[i] = nullptr;
3291                     }
3292                 }
3293             }
3294         }
3295
3296         /* and read them */
3297         k = 0; /* counter for the lambdas, etc. arrays */
3298         for (i = 0; i < fr->nblock; i++)
3299         {
3300             if (fr->block[i].id == enxDH)
3301             {
3302                 int type = (fr->block[i].sub[0].ival[0]);
3303                 if (type == dhbtDH || type == dhbtDHDL)
3304                 {
3305                     int ndu;
3306                     read_edr_rawdh_block(&(samples_rawdh[k]),
3307                                          &ndu,
3308                                          &(fr->block[i]),
3309                                          start_time, delta_time,
3310                                          native_lambda, rtemp,
3311                                          &last_t, fn);
3312                     npts[k] += ndu;
3313                     if (samples_rawdh[k])
3314                     {
3315                         lambdas[k] = samples_rawdh[k]->foreign_lambda;
3316                     }
3317                     k++;
3318                 }
3319             }
3320             else if (fr->block[i].id == enxDHHIST)
3321             {
3322                 int type = static_cast<int>(fr->block[i].sub[1].lval[1]);
3323                 if (type == dhbtDH || type == dhbtDHDL)
3324                 {
3325                     int        j;
3326                     int        nb = 0;
3327                     samples_t *s; /* this is where the data will go */
3328                     s = read_edr_hist_block(&nb, &(fr->block[i]),
3329                                             start_time, delta_time,
3330                                             native_lambda, rtemp,
3331                                             &last_t, fn);
3332                     nhists[k] += nb;
3333                     if (nb > 0)
3334                     {
3335                         lambdas[k] = s->foreign_lambda;
3336                     }
3337                     k++;
3338                     /* and insert the new sample immediately */
3339                     for (j = 0; j < nb; j++)
3340                     {
3341                         lambda_data_list_insert_sample(sd->lb, s+j);
3342                     }
3343                 }
3344             }
3345         }
3346     }
3347     /* Now store all our extant sample collections */
3348     for (i = 0; i < nsamples; i++)
3349     {
3350         if (samples_rawdh[i])
3351         {
3352             /* insert it into the existing list */
3353             lambda_data_list_insert_sample(sd->lb, samples_rawdh[i]);
3354         }
3355     }
3356
3357
3358     {
3359         char buf[STRLEN];
3360         printf("\n");
3361         lambda_vec_print(native_lambda, buf, FALSE);
3362         printf("%s: %.1f - %.1f; lambda = %s\n    foreign lambdas:\n",
3363                fn, first_t, last_t, buf);
3364         for (i = 0; i < nsamples; i++)
3365         {
3366             if (lambdas[i])
3367             {
3368                 lambda_vec_print(lambdas[i], buf, TRUE);
3369                 if (nhists[i] > 0)
3370                 {
3371                     printf("        %s (%d hists)\n", buf, nhists[i]);
3372                 }
3373                 else
3374                 {
3375                     printf("        %s (%d pts)\n", buf, npts[i]);
3376                 }
3377             }
3378         }
3379     }
3380     printf("\n\n");
3381     sfree(npts);
3382     sfree(nhists);
3383     sfree(lambdas);
3384 }
3385
3386
3387 int gmx_bar(int argc, char *argv[])
3388 {
3389     static const char *desc[] = {
3390         "[THISMODULE] calculates free energy difference estimates through ",
3391         "Bennett's acceptance ratio method (BAR). It also automatically",
3392         "adds series of individual free energies obtained with BAR into",
3393         "a combined free energy estimate.[PAR]",
3394
3395         "Every individual BAR free energy difference relies on two ",
3396         "simulations at different states: say state A and state B, as",
3397         "controlled by a parameter, [GRK]lambda[grk] (see the [REF].mdp[ref] parameter",
3398         "[TT]init_lambda[tt]). The BAR method calculates a ratio of weighted",
3399         "average of the Hamiltonian difference of state B given state A and",
3400         "vice versa.",
3401         "The energy differences to the other state must be calculated",
3402         "explicitly during the simulation. This can be done with",
3403         "the [REF].mdp[ref] option [TT]foreign_lambda[tt].[PAR]",
3404
3405         "Input option [TT]-f[tt] expects multiple [TT]dhdl.xvg[tt] files. ",
3406         "Two types of input files are supported:",
3407         "",
3408         " * Files with more than one [IT]y[it]-value. ",
3409         "   The files should have columns ",
3410         "   with dH/d[GRK]lambda[grk] and [GRK]Delta[grk][GRK]lambda[grk]. ",
3411         "   The [GRK]lambda[grk] values are inferred ",
3412         "   from the legends: [GRK]lambda[grk] of the simulation from the legend of ",
3413         "   dH/d[GRK]lambda[grk] and the foreign [GRK]lambda[grk] values from the ",
3414         "   legends of Delta H",
3415         " * Files with only one [IT]y[it]-value. Using the",
3416         "   [TT]-extp[tt] option for these files, it is assumed",
3417         "   that the [IT]y[it]-value is dH/d[GRK]lambda[grk] and that the ",
3418         "   Hamiltonian depends linearly on [GRK]lambda[grk]. ",
3419         "   The [GRK]lambda[grk] value of the simulation is inferred from the ",
3420         "   subtitle (if present), otherwise from a number in the subdirectory ",
3421         "   in the file name.",
3422         "",
3423
3424         "The [GRK]lambda[grk] of the simulation is parsed from ",
3425         "[TT]dhdl.xvg[tt] file's legend containing the string 'dH', the ",
3426         "foreign [GRK]lambda[grk] values from the legend containing the ",
3427         "capitalized letters 'D' and 'H'. The temperature is parsed from ",
3428         "the legend line containing 'T ='.[PAR]",
3429
3430         "The input option [TT]-g[tt] expects multiple [REF].edr[ref] files. ",
3431         "These can contain either lists of energy differences (see the ",
3432         "[REF].mdp[ref] option [TT]separate_dhdl_file[tt]), or a series of ",
3433         "histograms (see the [REF].mdp[ref] options [TT]dh_hist_size[tt] and ",
3434         "[TT]dh_hist_spacing[tt]).", "The temperature and [GRK]lambda[grk] ",
3435         "values are automatically deduced from the [TT]ener.edr[tt] file.[PAR]",
3436
3437         "In addition to the [REF].mdp[ref] option [TT]foreign_lambda[tt], ",
3438         "the energy difference can also be extrapolated from the ",
3439         "dH/d[GRK]lambda[grk] values. This is done with the[TT]-extp[tt]",
3440         "option, which assumes that the system's Hamiltonian depends linearly",
3441         "on [GRK]lambda[grk], which is not normally the case.[PAR]",
3442
3443         "The free energy estimates are determined using BAR with bisection, ",
3444         "with the precision of the output set with [TT]-prec[tt]. ",
3445         "An error estimate taking into account time correlations ",
3446         "is made by splitting the data into blocks and determining ",
3447         "the free energy differences over those blocks and assuming ",
3448         "the blocks are independent. ",
3449         "The final error estimate is determined from the average variance ",
3450         "over 5 blocks. A range of block numbers for error estimation can ",
3451         "be provided with the options [TT]-nbmin[tt] and [TT]-nbmax[tt].[PAR]",
3452
3453         "[THISMODULE] tries to aggregate samples with the same 'native' and ",
3454         "'foreign' [GRK]lambda[grk] values, but always assumes independent ",
3455         "samples. [BB]Note[bb] that when aggregating energy ",
3456         "differences/derivatives with different sampling intervals, this is ",
3457         "almost certainly not correct. Usually subsequent energies are ",
3458         "correlated and different time intervals mean different degrees ",
3459         "of correlation between samples.[PAR]",
3460
3461         "The results are split in two parts: the last part contains the final ",
3462         "results in kJ/mol, together with the error estimate for each part ",
3463         "and the total. The first part contains detailed free energy ",
3464         "difference estimates and phase space overlap measures in units of ",
3465         "kT (together with their computed error estimate). The printed ",
3466         "values are:",
3467         "",
3468         " * lam_A: the [GRK]lambda[grk] values for point A.",
3469         " * lam_B: the [GRK]lambda[grk] values for point B.",
3470         " *    DG: the free energy estimate.",
3471         " *   s_A: an estimate of the relative entropy of B in A.",
3472         " *   s_B: an estimate of the relative entropy of A in B.",
3473         " * stdev: an estimate expected per-sample standard deviation.",
3474         "",
3475
3476         "The relative entropy of both states in each other's ensemble can be ",
3477         "interpreted as a measure of phase space overlap: ",
3478         "the relative entropy s_A of the work samples of lambda_B in the ",
3479         "ensemble of lambda_A (and vice versa for s_B), is a ",
3480         "measure of the 'distance' between Boltzmann distributions of ",
3481         "the two states, that goes to zero for identical distributions. See ",
3482         "Wu & Kofke, J. Chem. Phys. 123 084109 (2005) for more information.",
3483         "[PAR]",
3484         "The estimate of the expected per-sample standard deviation, as given ",
3485         "in Bennett's original BAR paper: Bennett, J. Comp. Phys. 22, p 245 (1976).",
3486         "Eq. 10 therein gives an estimate of the quality of sampling (not directly",
3487         "of the actual statistical error, because it assumes independent samples).[PAR]",
3488
3489         "To get a visual estimate of the phase space overlap, use the ",
3490         "[TT]-oh[tt] option to write series of histograms, together with the ",
3491         "[TT]-nbin[tt] option.[PAR]"
3492     };
3493     static real        begin    = 0, end = -1, temp = -1;
3494     int                nd       = 2, nbmin = 5, nbmax = 5;
3495     int                nbin     = 100;
3496     gmx_bool           use_dhdl = FALSE;
3497     t_pargs            pa[]     = {
3498         { "-b",    FALSE, etREAL, {&begin},  "Begin time for BAR" },
3499         { "-e",    FALSE, etREAL, {&end},    "End time for BAR" },
3500         { "-temp", FALSE, etREAL, {&temp},   "Temperature (K)" },
3501         { "-prec", FALSE, etINT,  {&nd},     "The number of digits after the decimal point" },
3502         { "-nbmin",  FALSE, etINT,  {&nbmin}, "Minimum number of blocks for error estimation" },
3503         { "-nbmax",  FALSE, etINT,  {&nbmax}, "Maximum number of blocks for error estimation" },
3504         { "-nbin",  FALSE, etINT, {&nbin}, "Number of bins for histogram output"},
3505         { "-extp",  FALSE, etBOOL, {&use_dhdl}, "Whether to linearly extrapolate dH/dl values to use as energies"}
3506     };
3507
3508     t_filenm           fnm[] = {
3509         { efXVG, "-f",  "dhdl",   ffOPTRDMULT },
3510         { efEDR, "-g",  "ener",   ffOPTRDMULT },
3511         { efXVG, "-o",  "bar",    ffOPTWR },
3512         { efXVG, "-oi", "barint", ffOPTWR },
3513         { efXVG, "-oh", "histogram", ffOPTWR }
3514     };
3515 #define NFILE asize(fnm)
3516
3517     int               f;
3518     int               nf = 0;    /* file counter */
3519     int               nfile_tot; /* total number of input files */
3520     sim_data_t        sim_data;  /* the simulation data */
3521     barres_t         *results;   /* the results */
3522     int               nresults;  /* number of results in results array */
3523
3524     double           *partsum;
3525     double            prec, dg_tot;
3526     FILE             *fpb, *fpi;
3527     char              dgformat[20], xvg2format[STRLEN], xvg3format[STRLEN];
3528     char              buf[STRLEN], buf2[STRLEN];
3529     char              ktformat[STRLEN], sktformat[STRLEN];
3530     char              kteformat[STRLEN], skteformat[STRLEN];
3531     gmx_output_env_t *oenv;
3532     double            kT;
3533     gmx_bool          result_OK = TRUE, bEE = TRUE;
3534
3535     gmx_bool          disc_err          = FALSE;
3536     double            sum_disc_err      = 0.; /* discretization error */
3537     gmx_bool          histrange_err     = FALSE;
3538     double            sum_histrange_err = 0.; /* histogram range error */
3539     double            stat_err          = 0.; /* statistical error */
3540
3541     if (!parse_common_args(&argc, argv,
3542                            PCA_CAN_VIEW,
3543                            NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, nullptr, &oenv))
3544     {
3545         return 0;
3546     }
3547
3548     gmx::ArrayRef<const std::string> xvgFiles = opt2fnsIfOptionSet("-f", NFILE, fnm);
3549     gmx::ArrayRef<const std::string> edrFiles = opt2fnsIfOptionSet("-g", NFILE, fnm);
3550
3551     sim_data_init(&sim_data);
3552 #if 0
3553     /* make linked list */
3554     lb = &lambda_head;
3555     lambda_data_init(lb, 0, 0);
3556     lb->next = lb;
3557     lb->prev = lb;
3558 #endif
3559
3560
3561     nfile_tot = xvgFiles.size() + edrFiles.size();
3562
3563     if (nfile_tot == 0)
3564     {
3565         gmx_fatal(FARGS, "No input files!");
3566     }
3567
3568     if (nd < 0)
3569     {
3570         gmx_fatal(FARGS, "Can not have negative number of digits");
3571     }
3572     prec = std::pow(10.0, static_cast<double>(-nd));
3573
3574     snew(partsum, (nbmax+1)*(nbmax+1));
3575     nf = 0;
3576
3577     /* read in all files. First xvg files */
3578     for (const std::string &filenm : xvgFiles)
3579     {
3580         read_bar_xvg(filenm.c_str(), &temp, &sim_data);
3581         nf++;
3582     }
3583     /* then .edr files */
3584     for (const std::string &filenm : edrFiles)
3585     {
3586         read_barsim_edr(filenm.c_str(), &temp, &sim_data);;
3587         nf++;
3588     }
3589
3590     /* fix the times to allow for equilibration */
3591     sim_data_impose_times(&sim_data, begin, end);
3592
3593     if (opt2bSet("-oh", NFILE, fnm))
3594     {
3595         sim_data_histogram(&sim_data, opt2fn("-oh", NFILE, fnm), nbin, oenv);
3596     }
3597
3598     /* assemble the output structures from the lambdas */
3599     results = barres_list_create(&sim_data, &nresults, use_dhdl);
3600
3601     sum_disc_err = barres_list_max_disc_err(results, nresults);
3602
3603     if (nresults == 0)
3604     {
3605         printf("\nNo results to calculate.\n");
3606         return 0;
3607     }
3608
3609     if (sum_disc_err > prec)
3610     {
3611         prec = sum_disc_err;
3612         nd   = static_cast<int>(std::ceil(-std::log10(prec)));
3613         printf("WARNING: setting the precision to %g because that is the minimum\n         reasonable number, given the expected discretization error.\n", prec);
3614     }
3615
3616
3617     /*sprintf(lamformat,"%%6.3f");*/
3618     sprintf( dgformat, "%%%d.%df", 3+nd, nd);
3619     /* the format strings of the results in kT */
3620     sprintf( ktformat, "%%%d.%df", 5+nd, nd);
3621     sprintf( sktformat, "%%%ds", 6+nd);
3622     /* the format strings of the errors in kT */
3623     sprintf( kteformat, "%%%d.%df", 3+nd, nd);
3624     sprintf( skteformat, "%%%ds", 4+nd);
3625     sprintf(xvg2format, "%s %s\n", "%s", dgformat);
3626     sprintf(xvg3format, "%s %s %s\n", "%s", dgformat, dgformat);
3627
3628
3629
3630     fpb = nullptr;
3631     if (opt2bSet("-o", NFILE, fnm))
3632     {
3633         sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3634         fpb = xvgropen_type(opt2fn("-o", NFILE, fnm), "Free energy differences",
3635                             "\\lambda", buf, exvggtXYDY, oenv);
3636     }
3637
3638     fpi = nullptr;
3639     if (opt2bSet("-oi", NFILE, fnm))
3640     {
3641         sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3642         fpi = xvgropen(opt2fn("-oi", NFILE, fnm), "Free energy integral",
3643                        "\\lambda", buf, oenv);
3644     }
3645
3646
3647
3648     if (nbmin > nbmax)
3649     {
3650         nbmin = nbmax;
3651     }
3652
3653     /* first calculate results */
3654     bEE      = TRUE;
3655     disc_err = FALSE;
3656     for (f = 0; f < nresults; f++)
3657     {
3658         /* Determine the free energy difference with a factor of 10
3659          * more accuracy than requested for printing.
3660          */
3661         calc_bar(&(results[f]), 0.1*prec, nbmin, nbmax,
3662                  &bEE, partsum);
3663
3664         if (results[f].dg_disc_err > prec/10.)
3665         {
3666             disc_err = TRUE;
3667         }
3668         if (results[f].dg_histrange_err > prec/10.)
3669         {
3670             histrange_err = TRUE;
3671         }
3672     }
3673
3674     /* print results in kT */
3675     kT   = BOLTZ*temp;
3676
3677     printf("\nTemperature: %g K\n", temp);
3678
3679     printf("\nDetailed results in kT (see help for explanation):\n\n");
3680     printf("%6s ", " lam_A");
3681     printf("%6s ", " lam_B");
3682     printf(sktformat,  "DG ");
3683     if (bEE)
3684     {
3685         printf(skteformat, "+/- ");
3686     }
3687     if (disc_err)
3688     {
3689         printf(skteformat, "disc ");
3690     }
3691     if (histrange_err)
3692     {
3693         printf(skteformat, "range ");
3694     }
3695     printf(sktformat,  "s_A ");
3696     if (bEE)
3697     {
3698         printf(skteformat, "+/- " );
3699     }
3700     printf(sktformat,  "s_B ");
3701     if (bEE)
3702     {
3703         printf(skteformat, "+/- " );
3704     }
3705     printf(sktformat,  "stdev ");
3706     if (bEE)
3707     {
3708         printf(skteformat, "+/- ");
3709     }
3710     printf("\n");
3711     for (f = 0; f < nresults; f++)
3712     {
3713         lambda_vec_print_short(results[f].a->native_lambda, buf);
3714         printf("%s ", buf);
3715         lambda_vec_print_short(results[f].b->native_lambda, buf);
3716         printf("%s ", buf);
3717         printf(ktformat,  results[f].dg);
3718         printf(" ");
3719         if (bEE)
3720         {
3721             printf(kteformat, results[f].dg_err);
3722             printf(" ");
3723         }
3724         if (disc_err)
3725         {
3726             printf(kteformat, results[f].dg_disc_err);
3727             printf(" ");
3728         }
3729         if (histrange_err)
3730         {
3731             printf(kteformat, results[f].dg_histrange_err);
3732             printf(" ");
3733         }
3734         printf(ktformat,  results[f].sa);
3735         printf(" ");
3736         if (bEE)
3737         {
3738             printf(kteformat, results[f].sa_err);
3739             printf(" ");
3740         }
3741         printf(ktformat,  results[f].sb);
3742         printf(" ");
3743         if (bEE)
3744         {
3745             printf(kteformat, results[f].sb_err);
3746             printf(" ");
3747         }
3748         printf(ktformat,  results[f].dg_stddev);
3749         printf(" ");
3750         if (bEE)
3751         {
3752             printf(kteformat, results[f].dg_stddev_err);
3753         }
3754         printf("\n");
3755
3756         /* Check for negative relative entropy with a 95% certainty. */
3757         if (results[f].sa < -2*results[f].sa_err ||
3758             results[f].sb < -2*results[f].sb_err)
3759         {
3760             result_OK = FALSE;
3761         }
3762     }
3763
3764     if (!result_OK)
3765     {
3766         printf("\nWARNING: Some of these results violate the Second Law of "
3767                "Thermodynamics: \n"
3768                "         This is can be the result of severe undersampling, or "
3769                "(more likely)\n"
3770                "         there is something wrong with the simulations.\n");
3771     }
3772
3773
3774     /* final results in kJ/mol */
3775     printf("\n\nFinal results in kJ/mol:\n\n");
3776     dg_tot  = 0;
3777     for (f = 0; f < nresults; f++)
3778     {
3779
3780         if (fpi != nullptr)
3781         {
3782             lambda_vec_print_short(results[f].a->native_lambda, buf);
3783             fprintf(fpi, xvg2format, buf, dg_tot);
3784         }
3785
3786
3787         if (fpb != nullptr)
3788         {
3789             lambda_vec_print_intermediate(results[f].a->native_lambda,
3790                                           results[f].b->native_lambda,
3791                                           buf);
3792
3793             fprintf(fpb, xvg3format, buf, results[f].dg, results[f].dg_err);
3794         }
3795
3796         printf("point ");
3797         lambda_vec_print_short(results[f].a->native_lambda, buf);
3798         lambda_vec_print_short(results[f].b->native_lambda, buf2);
3799         printf("%s - %s", buf, buf2);
3800         printf(",   DG ");
3801
3802         printf(dgformat, results[f].dg*kT);
3803         if (bEE)
3804         {
3805             printf(" +/- ");
3806             printf(dgformat, results[f].dg_err*kT);
3807         }
3808         if (histrange_err)
3809         {
3810             printf(" (max. range err. = ");
3811             printf(dgformat, results[f].dg_histrange_err*kT);
3812             printf(")");
3813             sum_histrange_err += results[f].dg_histrange_err*kT;
3814         }
3815
3816         printf("\n");
3817         dg_tot += results[f].dg;
3818     }
3819     printf("\n");
3820     printf("total ");
3821     lambda_vec_print_short(results[0].a->native_lambda, buf);
3822     lambda_vec_print_short(results[nresults-1].b->native_lambda, buf2);
3823     printf("%s - %s", buf, buf2);
3824     printf(",   DG ");
3825
3826     printf(dgformat, dg_tot*kT);
3827     if (bEE)
3828     {
3829         stat_err = bar_err(nbmin, nbmax, partsum)*kT;
3830         printf(" +/- ");
3831         printf(dgformat, std::max(std::max(stat_err, sum_disc_err), sum_histrange_err));
3832     }
3833     printf("\n");
3834     if (disc_err)
3835     {
3836         printf("\nmaximum discretization error = ");
3837         printf(dgformat, sum_disc_err);
3838         if (bEE && stat_err < sum_disc_err)
3839         {
3840             printf("WARNING: discretization error (%g) is larger than statistical error.\n       Decrease histogram spacing for more accurate results\n", stat_err);
3841         }
3842     }
3843     if (histrange_err)
3844     {
3845         printf("\nmaximum histogram range error = ");
3846         printf(dgformat, sum_histrange_err);
3847         if (bEE && stat_err < sum_histrange_err)
3848         {
3849             printf("WARNING: histogram range error (%g) is larger than statistical error.\n       Increase histogram range for more accurate results\n", stat_err);
3850         }
3851
3852     }
3853     printf("\n");
3854
3855
3856     if (fpi != nullptr)
3857     {
3858         lambda_vec_print_short(results[nresults-1].b->native_lambda, buf);
3859         fprintf(fpi, xvg2format, buf, dg_tot);
3860         xvgrclose(fpi);
3861     }
3862     if (fpb != nullptr)
3863     {
3864         xvgrclose(fpb);
3865     }
3866
3867     do_view(oenv, opt2fn_null("-o", NFILE, fnm), "-xydy");
3868     do_view(oenv, opt2fn_null("-oi", NFILE, fnm), "-xydy");
3869
3870     return 0;
3871 }