src/gromacs/gmxana/gmx_bar.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2010-2018, The GROMACS development team.
   5  * Copyright (c) 2019,2020, by the GROMACS development team, led by
   6  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   7  * and including many others, as listed in the AUTHORS file in the
   8  * top-level source directory and at http://www.gromacs.org.
   9  *
  10  * GROMACS is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public License
  12  * as published by the Free Software Foundation; either version 2.1
  13  * of the License, or (at your option) any later version.
  14  *
  15  * GROMACS is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with GROMACS; if not, see
  22  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  23  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  24  *
  25  * If you want to redistribute modifications to GROMACS, please
  26  * consider that scientific software is very special. Version
  27  * control is crucial - bugs must be traceable. We will be happy to
  28  * consider code for inclusion in the official distribution, but
  29  * derived work must not be called official GROMACS. Details are found
  30  * in the README & COPYING files - if they are missing, get the
  31  * official version at http://www.gromacs.org.
  32  *
  33  * To help us fund GROMACS development, we humbly ask that you cite
  34  * the research papers on the package. Check out http://www.gromacs.org.
  35  */
  36 #include "gmxpre.h"
  37
  38 #include <cctype>
  39 #include <cmath>
  40 #include <cstdlib>
  41 #include <cstring>
  42
  43 #include <algorithm>
  44 #include <limits>
  45 #include <vector>
  46
  47 #include "gromacs/commandline/pargs.h"
  48 #include "gromacs/commandline/viewit.h"
  49 #include "gromacs/fileio/enxio.h"
  50 #include "gromacs/fileio/xvgr.h"
  51 #include "gromacs/gmxana/gmx_ana.h"
  52 #include "gromacs/math/units.h"
  53 #include "gromacs/math/utilities.h"
  54 #include "gromacs/mdlib/energyoutput.h"
  55 #include "gromacs/mdtypes/md_enums.h"
  56 #include "gromacs/trajectory/energyframe.h"
  57 #include "gromacs/utility/arraysize.h"
  58 #include "gromacs/utility/cstringutil.h"
  59 #include "gromacs/utility/dir_separator.h"
  60 #include "gromacs/utility/fatalerror.h"
  61 #include "gromacs/utility/gmxassert.h"
  62 #include "gromacs/utility/smalloc.h"
  63 #include "gromacs/utility/snprintf.h"
  64
  65
  66 /* Structure for the names of lambda vector components */
  67 typedef struct lambda_components_t
  68 {
  69     char** names; /* Array of strings with names for the lambda vector
  70                      components */
  71     int N;        /* The number of components */
  72     int Nalloc;   /* The number of allocated components */
  73 } lambda_components_t;
  74
  75 /* Structure for a lambda vector or a dhdl derivative direction */
  76 typedef struct lambda_vec_t
  77 {
  78     double* val;                   /* The lambda vector component values. Only valid if
  79                                       dhdl == -1 */
  80     int dhdl;                      /* The coordinate index for the derivative described by this
  81                                       structure, or -1 */
  82     const lambda_components_t* lc; /* the associated lambda_components
  83                                       structure */
  84     int index;                     /* The state number (init-lambda-state) of this lambda
  85                                       vector, if known. If not, it is set to -1 */
  86 } lambda_vec_t;
  87
  88 /* the dhdl.xvg data from a simulation */
  89 typedef struct xvg_t
  90 {
  91     const char*   filename;
  92     int           ftp;          /* file type */
  93     int           nset;         /* number of lambdas, including dhdl */
  94     int*          np;           /* number of data points (du or hists) per lambda */
  95     int           np_alloc;     /* number of points (du or hists) allocated */
  96     double        temp;         /* temperature */
  97     lambda_vec_t* lambda;       /* the lambdas (of first index for y). */
  98     double*       t;            /* the times (of second index for y) */
  99     double**      y;            /* the dU values. y[0] holds the derivative, while
 100                                    further ones contain the energy differences between
 101                                    the native lambda and the 'foreign' lambdas. */
 102     lambda_vec_t native_lambda; /* the native lambda */
 103
 104     struct xvg_t *next, *prev; /*location in the global linked list of xvg_ts*/
 105 } xvg_t;
 106
 107
 108 typedef struct hist_t
 109 {
 110     unsigned int* bin[2]; /* the (forward + reverse) histogram values */
 111     double        dx[2];  /* the histogram spacing. The reverse
 112                              dx is the negative of the forward dx.*/
 113     int64_t x0[2];        /* the (forward + reverse) histogram start
 114                                      point(s) as int */
 115
 116     int     nbin[2]; /* the (forward+reverse) number of bins */
 117     int64_t sum;     /* the total number of counts. Must be
 118                                 the same for forward + reverse.  */
 119     int nhist;       /* number of hist datas (forward or reverse) */
 120
 121     double start_time, delta_time; /* start time, end time of histogram */
 122 } hist_t;
 123
 124
 125 /* an aggregate of samples for partial free energy calculation */
 126 typedef struct samples_t
 127 {
 128     lambda_vec_t* native_lambda;  /* pointer to native lambda vector */
 129     lambda_vec_t* foreign_lambda; /* pointer to foreign lambda vector */
 130     double        temp;           /* the temperature */
 131     gmx_bool      derivative;     /* whether this sample is a derivative */
 132
 133     /* The samples come either as either delta U lists: */
 134     int     ndu;                    /* the number of delta U samples */
 135     double* du;                     /* the delta u's */
 136     double* t;                      /* the times associated with those samples, or: */
 137     double  start_time, delta_time; /*start time and delta time for linear time*/
 138
 139     /* or as histograms: */
 140     hist_t* hist; /* a histogram */
 141
 142     /* allocation data: (not NULL for data 'owned' by this struct) */
 143     double *du_alloc, *t_alloc;  /* allocated delta u arrays  */
 144     size_t  ndu_alloc, nt_alloc; /* pre-allocated sizes */
 145     hist_t* hist_alloc;          /* allocated hist */
 146
 147     int64_t     ntot;     /* total number of samples */
 148     const char* filename; /* the file name this sample comes from */
 149 } samples_t;
 150
 151 /* a sample range (start to end for du-style data, or boolean
 152     for both du-style data and histograms */
 153 typedef struct sample_range_t
 154 {
 155     int      start, end; /* start and end index for du style data */
 156     gmx_bool use;        /* whether to use this sample */
 157
 158     samples_t* s; /* the samples this range belongs to */
 159 } sample_range_t;
 160
 161
 162 /* a collection of samples for a partial free energy calculation
 163     (i.e. the collection of samples from one native lambda to one
 164     foreign lambda) */
 165 typedef struct sample_coll_t
 166 {
 167     lambda_vec_t* native_lambda;  /* these should be the same for all samples
 168                                      in the histogram */
 169     lambda_vec_t* foreign_lambda; /* collection */
 170     double        temp;           /* the temperature */
 171
 172     int             nsamples;       /* the number of samples */
 173     samples_t**     s;              /* the samples themselves */
 174     sample_range_t* r;              /* the sample ranges */
 175     int             nsamples_alloc; /* number of allocated samples */
 176
 177     int64_t ntot; /* total number of samples in the ranges of
 178                              this collection */
 179
 180     struct sample_coll_t *next, *prev; /* next and previous in the list */
 181 } sample_coll_t;
 182
 183 /* all the samples associated with a lambda point */
 184 typedef struct lambda_data_t
 185 {
 186     lambda_vec_t* lambda; /* the native lambda (at start time if dynamic) */
 187     double        temp;   /* temperature */
 188
 189     sample_coll_t* sc; /* the samples */
 190
 191     sample_coll_t sc_head; /*the pre-allocated list head for the linked list.*/
 192
 193     struct lambda_data_t *next, *prev; /* the next and prev in the list */
 194 } lambda_data_t;
 195
 196 /* Top-level data structure of simulation data */
 197 typedef struct sim_data_t
 198 {
 199     lambda_data_t* lb;      /* a lambda data linked list */
 200     lambda_data_t  lb_head; /* The head element of the linked list */
 201
 202     lambda_components_t lc; /* the allowed components of the lambda
 203                                vectors */
 204 } sim_data_t;
 205
 206 /* Top-level data structure with calculated values. */
 207 typedef struct
 208 {
 209     sample_coll_t *a, *b; /* the simulation data */
 210
 211     double dg;     /* the free energy difference */
 212     double dg_err; /* the free energy difference */
 213
 214     double dg_disc_err;      /* discretization error */
 215     double dg_histrange_err; /* histogram range error */
 216
 217     double sa;     /* relative entropy of b in state a */
 218     double sa_err; /* error in sa */
 219     double sb;     /* relative entropy of a in state b */
 220     double sb_err; /* error in sb */
 221
 222     double dg_stddev;     /* expected dg stddev per sample */
 223     double dg_stddev_err; /* error in dg_stddev */
 224 } barres_t;
 225
 226
 227 /* Initialize a lambda_components structure */
 228 static void lambda_components_init(lambda_components_t* lc)
 229 {
 230     lc->N      = 0;
 231     lc->Nalloc = 2;
 232     snew(lc->names, lc->Nalloc);
 233 }
 234
 235 /* Add a component to a lambda_components structure */
 236 static void lambda_components_add(lambda_components_t* lc, const char* name, size_t name_length)
 237 {
 238     while (lc->N + 1 > lc->Nalloc)
 239     {
 240         lc->Nalloc = (lc->Nalloc == 0) ? 2 : 2 * lc->Nalloc;
 241         srenew(lc->names, lc->Nalloc);
 242     }
 243     snew(lc->names[lc->N], name_length + 1);
 244     std::strncpy(lc->names[lc->N], name, name_length);
 245     lc->N++;
 246 }
 247
 248 /* check whether a component with index 'index' matches the given name, or
 249    is also NULL. Returns TRUE if this is the case.
 250    the string name does not need to end */
 251 static gmx_bool lambda_components_check(const lambda_components_t* lc, int index, const char* name, size_t name_length)
 252 {
 253     size_t len;
 254     if (!lc || index >= lc->N)
 255     {
 256         return FALSE;
 257     }
 258     if ((name == nullptr) && (lc->names[index] == nullptr))
 259     {
 260         return TRUE;
 261     }
 262     if (((name != nullptr) && (lc->names[index] == nullptr))
 263         || ((name == nullptr) && (lc->names[index] != nullptr)))
 264     {
 265         return FALSE;
 266     }
 267     GMX_RELEASE_ASSERT(
 268             (name != nullptr) || (name_length == 0),
 269             "If name is empty, the length of the substring to examine within it must be zero");
 270     len = std::strlen(lc->names[index]);
 271     if (len != name_length)
 272     {
 273         return FALSE;
 274     }
 275     if (name_length == 0)
 276     {
 277         // Everything matches a zero-length substring. This branch is
 278         // needed because name could in principle be nullptr.
 279         return TRUE;
 280     }
 281     return std::strncmp(lc->names[index], name, name_length) == 0;
 282 }
 283
 284 /* Find the index of a given lambda component name, or -1 if not found */
 285 static int lambda_components_find(const lambda_components_t* lc, const char* name, size_t name_length)
 286 {
 287     int i;
 288
 289     for (i = 0; i < lc->N; i++)
 290     {
 291         if (std::strncmp(lc->names[i], name, name_length) == 0)
 292         {
 293             return i;
 294         }
 295     }
 296     return -1;
 297 }
 298
 299
 300 /* initialize a lambda vector */
 301 static void lambda_vec_init(lambda_vec_t* lv, const lambda_components_t* lc)
 302 {
 303     snew(lv->val, lc->N);
 304     lv->index = -1;
 305     lv->dhdl  = -1;
 306     lv->lc    = lc;
 307 }
 308
 309 static void lambda_vec_copy(lambda_vec_t* lv, const lambda_vec_t* orig)
 310 {
 311     int i;
 312
 313     lambda_vec_init(lv, orig->lc);
 314     lv->dhdl  = orig->dhdl;
 315     lv->index = orig->index;
 316     for (i = 0; i < lv->lc->N; i++)
 317     {
 318         lv->val[i] = orig->val[i];
 319     }
 320 }
 321
 322 /* write a lambda vec to a preallocated string */
 323 static void lambda_vec_print(const lambda_vec_t* lv, char* str, gmx_bool named)
 324 {
 325     int i;
 326
 327     str[0] = 0; /* reset the string */
 328     if (lv->dhdl < 0)
 329     {
 330         if (named)
 331         {
 332             str += sprintf(str, "delta H to ");
 333         }
 334         if (lv->lc->N > 1)
 335         {
 336             str += sprintf(str, "(");
 337         }
 338         for (i = 0; i < lv->lc->N; i++)
 339         {
 340             str += sprintf(str, "%g", lv->val[i]);
 341             if (i < lv->lc->N - 1)
 342             {
 343                 str += sprintf(str, ", ");
 344             }
 345         }
 346         if (lv->lc->N > 1)
 347         {
 348             sprintf(str, ")");
 349         }
 350     }
 351     else
 352     {
 353         /* this lambda vector describes a derivative */
 354         str += sprintf(str, "dH/dl");
 355         if (std::strlen(lv->lc->names[lv->dhdl]) > 0)
 356         {
 357             sprintf(str, " (%s)", lv->lc->names[lv->dhdl]);
 358         }
 359     }
 360 }
 361
 362 /* write a shortened version of the lambda vec to a preallocated string */
 363 static void lambda_vec_print_short(const lambda_vec_t* lv, char* str)
 364 {
 365     if (lv->index >= 0)
 366     {
 367         sprintf(str, "%6d", lv->index);
 368     }
 369     else
 370     {
 371         if (lv->dhdl < 0)
 372         {
 373             sprintf(str, "%6.3f", lv->val[0]);
 374         }
 375         else
 376         {
 377             sprintf(str, "dH/dl[%d]", lv->dhdl);
 378         }
 379     }
 380 }
 381
 382 /* write an intermediate version of two lambda vecs to a preallocated string */
 383 static void lambda_vec_print_intermediate(const lambda_vec_t* a, const lambda_vec_t* b, char* str)
 384 {
 385     str[0] = 0;
 386     if ((a->index >= 0) && (b->index >= 0))
 387     {
 388         sprintf(str, "%6.3f", (a->index + b->index) / 2.0);
 389     }
 390     else
 391     {
 392         if ((a->dhdl < 0) && (b->dhdl < 0))
 393         {
 394             sprintf(str, "%6.3f", (a->val[0] + b->val[0]) / 2.0);
 395         }
 396     }
 397 }
 398
 399
 400 /* calculate and return the absolute difference in lambda vectors: c = |a-b|.
 401    a and b must describe non-derivative lambda points */
 402 static double lambda_vec_abs_diff(const lambda_vec_t* a, const lambda_vec_t* b)
 403 {
 404     int    i;
 405     double ret = 0.;
 406
 407     if ((a->dhdl > 0) || (b->dhdl > 0))
 408     {
 409         gmx_fatal(
 410                 FARGS,
 411                 "Trying to calculate the difference between derivatives instead of lambda points");
 412     }
 413     if (a->lc != b->lc)
 414     {
 415         gmx_fatal(FARGS, "Trying to calculate the difference lambdas with differing basis set");
 416     }
 417     for (i = 0; i < a->lc->N; i++)
 418     {
 419         double df = a->val[i] - b->val[i];
 420         ret += df * df;
 421     }
 422     return std::sqrt(ret);
 423 }
 424
 425
 426 /* check whether two lambda vectors are the same */
 427 static gmx_bool lambda_vec_same(const lambda_vec_t* a, const lambda_vec_t* b)
 428 {
 429     int i;
 430
 431     if (a->lc != b->lc)
 432     {
 433         return FALSE;
 434     }
 435     if (a->dhdl < 0)
 436     {
 437         for (i = 0; i < a->lc->N; i++)
 438         {
 439             if (!gmx_within_tol(a->val[i], b->val[i], 10 * GMX_REAL_EPS))
 440             {
 441                 return FALSE;
 442             }
 443         }
 444         return TRUE;
 445     }
 446     else
 447     {
 448         /* they're derivatives, so we check whether the indices match */
 449         return (a->dhdl == b->dhdl);
 450     }
 451 }
 452
 453 /* Compare the sort order of two foreign lambda vectors
 454
 455     returns 1 if a is 'bigger' than b,
 456     returns 0 if they're the same,
 457     returns -1 if a is 'smaller' than b.*/
 458 static int lambda_vec_cmp_foreign(const lambda_vec_t* a, const lambda_vec_t* b)
 459 {
 460     int      i;
 461     double   norm_a = 0, norm_b = 0;
 462     gmx_bool different = FALSE;
 463
 464     if (a->lc != b->lc)
 465     {
 466         gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
 467     }
 468     /* if either one has an index we sort based on that */
 469     if ((a->index >= 0) || (b->index >= 0))
 470     {
 471         if (a->index == b->index)
 472         {
 473             return 0;
 474         }
 475         return (a->index > b->index) ? 1 : -1;
 476     }
 477     if (a->dhdl >= 0 || b->dhdl >= 0)
 478     {
 479         /* lambda vectors that are derivatives always sort higher than those
 480            without derivatives */
 481         if ((a->dhdl >= 0) != (b->dhdl >= 0))
 482         {
 483             return (a->dhdl >= 0) ? 1 : -1;
 484         }
 485         return 0;
 486     }
 487
 488     /* neither has an index, so we can only sort on the lambda components,
 489        which is only valid if there is one component */
 490     for (i = 0; i < a->lc->N; i++)
 491     {
 492         if (!gmx_within_tol(a->val[i], b->val[i], 10 * GMX_REAL_EPS))
 493         {
 494             different = TRUE;
 495         }
 496         norm_a += a->val[i] * a->val[i];
 497         norm_b += b->val[i] * b->val[i];
 498     }
 499     if (!different)
 500     {
 501         return 0;
 502     }
 503     return (norm_a > norm_b) ? 1 : -1;
 504 }
 505
 506 /* Compare the sort order of two native lambda vectors
 507
 508     returns 1 if a is 'bigger' than b,
 509     returns 0 if they're the same,
 510     returns -1 if a is 'smaller' than b.*/
 511 static int lambda_vec_cmp_native(const lambda_vec_t* a, const lambda_vec_t* b)
 512 {
 513     if (a->lc != b->lc)
 514     {
 515         gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
 516     }
 517     /* if either one has an index we sort based on that */
 518     if ((a->index >= 0) || (b->index >= 0))
 519     {
 520         if (a->index == b->index)
 521         {
 522             return 0;
 523         }
 524         return (a->index > b->index) ? 1 : -1;
 525     }
 526     /* neither has an index, so we can only sort on the lambda components,
 527        which is only valid if there is one component */
 528     if (a->lc->N > 1)
 529     {
 530         gmx_fatal(FARGS, "Can't compare lambdas with no index and > 1 component");
 531     }
 532     if (a->dhdl >= 0 || b->dhdl >= 0)
 533     {
 534         gmx_fatal(FARGS, "Can't compare native lambdas that are derivatives");
 535     }
 536     if (gmx_within_tol(a->val[0], b->val[0], 10 * GMX_REAL_EPS))
 537     {
 538         return 0;
 539     }
 540     return a->val[0] > b->val[0] ? 1 : -1;
 541 }
 542
 543
 544 static void hist_init(hist_t* h, int nhist, int* nbin)
 545 {
 546     int i;
 547     if (nhist > 2)
 548     {
 549         gmx_fatal(FARGS, "histogram with more than two sets of data!");
 550     }
 551     for (i = 0; i < nhist; i++)
 552     {
 553         snew(h->bin[i], nbin[i]);
 554         h->x0[i]      = 0;
 555         h->nbin[i]    = nbin[i];
 556         h->start_time = h->delta_time = 0;
 557         h->dx[i]                      = 0;
 558     }
 559     h->sum   = 0;
 560     h->nhist = nhist;
 561 }
 562
 563 static void xvg_init(xvg_t* ba)
 564 {
 565     ba->filename = nullptr;
 566     ba->nset     = 0;
 567     ba->np_alloc = 0;
 568     ba->np       = nullptr;
 569     ba->y        = nullptr;
 570 }
 571
 572 static void samples_init(samples_t*    s,
 573                          lambda_vec_t* native_lambda,
 574                          lambda_vec_t* foreign_lambda,
 575                          double        temp,
 576                          gmx_bool      derivative,
 577                          const char*   filename)
 578 {
 579     s->native_lambda  = native_lambda;
 580     s->foreign_lambda = foreign_lambda;
 581     s->temp           = temp;
 582     s->derivative     = derivative;
 583
 584     s->ndu        = 0;
 585     s->du         = nullptr;
 586     s->t          = nullptr;
 587     s->start_time = s->delta_time = 0;
 588     s->hist                       = nullptr;
 589     s->du_alloc                   = nullptr;
 590     s->t_alloc                    = nullptr;
 591     s->hist_alloc                 = nullptr;
 592     s->ndu_alloc                  = 0;
 593     s->nt_alloc                   = 0;
 594
 595     s->ntot     = 0;
 596     s->filename = filename;
 597 }
 598
 599 static void sample_range_init(sample_range_t* r, samples_t* s)
 600 {
 601     r->start = 0;
 602     r->end   = s->ndu;
 603     r->use   = TRUE;
 604     r->s     = nullptr;
 605 }
 606
 607 static void sample_coll_init(sample_coll_t* sc, lambda_vec_t* native_lambda, lambda_vec_t* foreign_lambda, double temp)
 608 {
 609     sc->native_lambda  = native_lambda;
 610     sc->foreign_lambda = foreign_lambda;
 611     sc->temp           = temp;
 612
 613     sc->nsamples       = 0;
 614     sc->s              = nullptr;
 615     sc->r              = nullptr;
 616     sc->nsamples_alloc = 0;
 617
 618     sc->ntot = 0;
 619     sc->next = sc->prev = nullptr;
 620 }
 621
 622 static void sample_coll_destroy(sample_coll_t* sc)
 623 {
 624     /* don't free the samples themselves */
 625     sfree(sc->r);
 626     sfree(sc->s);
 627 }
 628
 629
 630 static void lambda_data_init(lambda_data_t* l, lambda_vec_t* native_lambda, double temp)
 631 {
 632     l->lambda = native_lambda;
 633     l->temp   = temp;
 634
 635     l->next = nullptr;
 636     l->prev = nullptr;
 637
 638     l->sc = &(l->sc_head);
 639
 640     sample_coll_init(l->sc, native_lambda, nullptr, 0.);
 641     l->sc->next = l->sc;
 642     l->sc->prev = l->sc;
 643 }
 644
 645 static void barres_init(barres_t* br)
 646 {
 647     br->dg            = 0;
 648     br->dg_err        = 0;
 649     br->sa            = 0;
 650     br->sa_err        = 0;
 651     br->sb            = 0;
 652     br->sb_err        = 0;
 653     br->dg_stddev     = 0;
 654     br->dg_stddev_err = 0;
 655
 656     br->a = nullptr;
 657     br->b = nullptr;
 658 }
 659
 660
 661 /* calculate the total number of samples in a sample collection */
 662 static void sample_coll_calc_ntot(sample_coll_t* sc)
 663 {
 664     int i;
 665
 666     sc->ntot = 0;
 667     for (i = 0; i < sc->nsamples; i++)
 668     {
 669         if (sc->r[i].use)
 670         {
 671             if (sc->s[i]->hist)
 672             {
 673                 sc->ntot += sc->s[i]->ntot;
 674             }
 675             else
 676             {
 677                 sc->ntot += sc->r[i].end - sc->r[i].start;
 678             }
 679         }
 680     }
 681 }
 682
 683
 684 /* find the barsamples_t associated with a lambda that corresponds to
 685    a specific foreign lambda */
 686 static sample_coll_t* lambda_data_find_sample_coll(lambda_data_t* l, lambda_vec_t* foreign_lambda)
 687 {
 688     sample_coll_t* sc = l->sc->next;
 689
 690     while (sc != l->sc)
 691     {
 692         if (lambda_vec_same(sc->foreign_lambda, foreign_lambda))
 693         {
 694             return sc;
 695         }
 696         sc = sc->next;
 697     }
 698
 699     return nullptr;
 700 }
 701
 702 /* insert li into an ordered list of lambda_colls */
 703 static void lambda_data_insert_sample_coll(lambda_data_t* l, sample_coll_t* sc)
 704 {
 705     sample_coll_t* scn = l->sc->next;
 706     while ((scn != l->sc))
 707     {
 708         if (lambda_vec_cmp_foreign(scn->foreign_lambda, sc->foreign_lambda) > 0)
 709         {
 710             break;
 711         }
 712         scn = scn->next;
 713     }
 714     /* now insert it before the found scn */
 715     sc->next        = scn;
 716     sc->prev        = scn->prev;
 717     scn->prev->next = sc;
 718     scn->prev       = sc;
 719 }
 720
 721 /* insert li into an ordered list of lambdas */
 722 static void lambda_data_insert_lambda(lambda_data_t* head, lambda_data_t* li)
 723 {
 724     lambda_data_t* lc = head->next;
 725     while (lc != head)
 726     {
 727         if (lambda_vec_cmp_native(lc->lambda, li->lambda) > 0)
 728         {
 729             break;
 730         }
 731         lc = lc->next;
 732     }
 733     /* now insert ourselves before the found lc */
 734     li->next       = lc;
 735     li->prev       = lc->prev;
 736     lc->prev->next = li;
 737     lc->prev       = li;
 738 }
 739
 740 /* insert a sample and a sample_range into a sample_coll. The
 741     samples are stored as a pointer, the range is copied. */
 742 static void sample_coll_insert_sample(sample_coll_t* sc, samples_t* s, sample_range_t* r)
 743 {
 744     /* first check if it belongs here */
 745     GMX_ASSERT(sc->next->s, "Next not properly initialized!");
 746     if (sc->temp != s->temp)
 747     {
 748         gmx_fatal(FARGS,
 749                   "Temperatures in files %s and %s are not the same!",
 750                   s->filename,
 751                   sc->next->s[0]->filename);
 752     }
 753     if (!lambda_vec_same(sc->native_lambda, s->native_lambda))
 754     {
 755         gmx_fatal(FARGS,
 756                   "Native lambda in files %s and %s are not the same (and they should be)!",
 757                   s->filename,
 758                   sc->next->s[0]->filename);
 759     }
 760     if (!lambda_vec_same(sc->foreign_lambda, s->foreign_lambda))
 761     {
 762         gmx_fatal(FARGS,
 763                   "Foreign lambda in files %s and %s are not the same (and they should be)!",
 764                   s->filename,
 765                   sc->next->s[0]->filename);
 766     }
 767
 768     /* check if there's room */
 769     if ((sc->nsamples + 1) > sc->nsamples_alloc)
 770     {
 771         sc->nsamples_alloc = std::max(2 * sc->nsamples_alloc, 2);
 772         srenew(sc->s, sc->nsamples_alloc);
 773         srenew(sc->r, sc->nsamples_alloc);
 774     }
 775     sc->s[sc->nsamples] = s;
 776     sc->r[sc->nsamples] = *r;
 777     sc->nsamples++;
 778
 779     sample_coll_calc_ntot(sc);
 780 }
 781
 782 /* insert a sample into a lambda_list, creating the right sample_coll if
 783    neccesary */
 784 static void lambda_data_list_insert_sample(lambda_data_t* head, samples_t* s)
 785 {
 786     gmx_bool       found = FALSE;
 787     sample_coll_t* sc;
 788     sample_range_t r;
 789
 790     lambda_data_t* l = head->next;
 791
 792     /* first search for the right lambda_data_t */
 793     while (l != head)
 794     {
 795         if (lambda_vec_same(l->lambda, s->native_lambda))
 796         {
 797             found = TRUE;
 798             break;
 799         }
 800         l = l->next;
 801     }
 802
 803     if (!found)
 804     {
 805         snew(l, 1);                                     /* allocate a new one */
 806         lambda_data_init(l, s->native_lambda, s->temp); /* initialize it */
 807         lambda_data_insert_lambda(head, l);             /* add it to the list */
 808     }
 809
 810     /* now look for a sample collection */
 811     sc = lambda_data_find_sample_coll(l, s->foreign_lambda);
 812     if (!sc)
 813     {
 814         snew(sc, 1); /* allocate a new one */
 815         sample_coll_init(sc, s->native_lambda, s->foreign_lambda, s->temp);
 816         lambda_data_insert_sample_coll(l, sc);
 817     }
 818
 819     /* now insert the samples into the sample coll */
 820     sample_range_init(&r, s);
 821     sample_coll_insert_sample(sc, s, &r);
 822 }
 823
 824
 825 /* make a histogram out of a sample collection */
 826 static void sample_coll_make_hist(sample_coll_t* sc, std::vector<int>* bin, double* dx, double* xmin, int nbin_default)
 827 {
 828     int      i, j, k;
 829     gmx_bool dx_set   = FALSE;
 830     gmx_bool xmin_set = FALSE;
 831
 832     gmx_bool xmax_set      = FALSE;
 833     gmx_bool xmax_set_hard = FALSE; /* whether the xmax is bounded by the
 834                                        limits of a histogram */
 835     double xmax = -1;
 836
 837     /* first determine dx and xmin; try the histograms */
 838     for (i = 0; i < sc->nsamples; i++)
 839     {
 840         if (sc->s[i]->hist)
 841         {
 842             hist_t* hist = sc->s[i]->hist;
 843             for (k = 0; k < hist->nhist; k++)
 844             {
 845                 double hdx      = hist->dx[k];
 846                 double xmax_now = (hist->x0[k] + hist->nbin[k]) * hdx;
 847
 848                 /* we use the biggest dx*/
 849                 if ((!dx_set) || hist->dx[0] > *dx)
 850                 {
 851                     dx_set = TRUE;
 852                     *dx    = hist->dx[0];
 853                 }
 854                 if ((!xmin_set) || (hist->x0[k] * hdx) < *xmin)
 855                 {
 856                     xmin_set = TRUE;
 857                     *xmin    = (hist->x0[k] * hdx);
 858                 }
 859
 860                 if ((!xmax_set) || (xmax_now > xmax && !xmax_set_hard))
 861                 {
 862                     xmax_set = TRUE;
 863                     xmax     = xmax_now;
 864                     if (hist->bin[k][hist->nbin[k] - 1] != 0)
 865                     {
 866                         xmax_set_hard = TRUE;
 867                     }
 868                 }
 869                 if (hist->bin[k][hist->nbin[k] - 1] != 0 && (xmax_now < xmax))
 870                 {
 871                     xmax_set_hard = TRUE;
 872                     xmax          = xmax_now;
 873                 }
 874             }
 875         }
 876     }
 877     /* and the delta us */
 878     for (i = 0; i < sc->nsamples; i++)
 879     {
 880         if (sc->s[i]->ndu > 0)
 881         {
 882             /* determine min and max */
 883             int    starti  = sc->r[i].start;
 884             int    endi    = sc->r[i].end;
 885             double du_xmin = sc->s[i]->du[starti];
 886             double du_xmax = sc->s[i]->du[starti];
 887             for (j = starti + 1; j < endi; j++)
 888             {
 889                 if (sc->s[i]->du[j] < du_xmin)
 890                 {
 891                     du_xmin = sc->s[i]->du[j];
 892                 }
 893                 if (sc->s[i]->du[j] > du_xmax)
 894                 {
 895                     du_xmax = sc->s[i]->du[j];
 896                 }
 897             }
 898
 899             /* and now change the limits */
 900             if ((!xmin_set) || (du_xmin < *xmin))
 901             {
 902                 xmin_set = TRUE;
 903                 *xmin    = du_xmin;
 904             }
 905             if ((!xmax_set) || ((du_xmax > xmax) && !xmax_set_hard))
 906             {
 907                 xmax_set = TRUE;
 908                 xmax     = du_xmax;
 909             }
 910         }
 911     }
 912
 913     if (!xmax_set || !xmin_set)
 914     {
 915         bin->clear();
 916         return;
 917     }
 918
 919
 920     if (!dx_set)
 921     {
 922         bin->resize(nbin_default);
 923         *dx = (xmax - (*xmin)) / ((bin->size()) - 2); /* -2 because we want the last bin to
 924                                                    be 0, and we count from 0 */
 925     }
 926     else
 927     {
 928         bin->resize(static_cast<int>((xmax - (*xmin)) / (*dx)));
 929     }
 930
 931     /* reset the histogram */
 932     std::fill(bin->begin(), bin->end(), 0);
 933
 934     /* now add the actual data */
 935     for (i = 0; i < sc->nsamples; i++)
 936     {
 937         if (sc->s[i]->hist)
 938         {
 939             hist_t* hist = sc->s[i]->hist;
 940             for (k = 0; k < hist->nhist; k++)
 941             {
 942                 double hdx       = hist->dx[k];
 943                 double xmin_hist = hist->x0[k] * hdx;
 944                 for (j = 0; j < hist->nbin[k]; j++)
 945                 {
 946                     /* calculate the bin corresponding to the middle of the
 947                        original bin */
 948                     double x     = hdx * (j + 0.5) + xmin_hist;
 949                     int    binnr = static_cast<int>((x - (*xmin)) / (*dx));
 950
 951                     if (binnr >= gmx::ssize(*bin) || binnr < 0)
 952                     {
 953                         binnr = (bin->size()) - 1;
 954                     }
 955
 956                     (*bin)[binnr] += hist->bin[k][j];
 957                 }
 958             }
 959         }
 960         else
 961         {
 962             int starti = sc->r[i].start;
 963             int endi   = sc->r[i].end;
 964             for (j = starti; j < endi; j++)
 965             {
 966                 int binnr = static_cast<int>((sc->s[i]->du[j] - (*xmin)) / (*dx));
 967                 if (binnr >= gmx::ssize(*bin) || binnr < 0)
 968                 {
 969                     binnr = (bin->size()) - 1;
 970                 }
 971
 972                 (*bin)[binnr]++;
 973             }
 974         }
 975     }
 976 }
 977
 978 /* write a collection of histograms to a file */
 979 static void sim_data_histogram(sim_data_t* sd, const char* filename, int nbin_default, const gmx_output_env_t* oenv)
 980 {
 981     char           label_x[STRLEN];
 982     const char *   dhdl = "dH/d\\lambda", *deltag = "\\DeltaH", *lambda = "\\lambda";
 983     const char*    title   = "N(\\DeltaH)";
 984     const char*    label_y = "Samples";
 985     FILE*          fp;
 986     lambda_data_t* bl;
 987     int            nsets     = 0;
 988     char**         setnames  = nullptr;
 989     gmx_bool       first_set = FALSE;
 990     /* histogram data: */
 991     std::vector<int> hist;
 992     double           dx      = 0;
 993     double           minval  = 0;
 994     lambda_data_t*   bl_head = sd->lb;
 995
 996     printf("\nWriting histogram to %s\n", filename);
 997     sprintf(label_x, "\\DeltaH (%s)", unit_energy);
 998
 999     fp = xvgropen_type(filename, title, label_x, label_y, exvggtXNY, oenv);
1000
1001     /* first get all the set names */
1002     bl = bl_head->next;
1003     /* iterate over all lambdas */
1004     while (bl != bl_head)
1005     {
1006         sample_coll_t* sc = bl->sc->next;
1007
1008         /* iterate over all samples */
1009         while (sc != bl->sc)
1010         {
1011             char buf[STRLEN], buf2[STRLEN];
1012
1013             nsets++;
1014             srenew(setnames, nsets);
1015             snew(setnames[nsets - 1], STRLEN);
1016             if (sc->foreign_lambda->dhdl < 0)
1017             {
1018                 lambda_vec_print(sc->native_lambda, buf, FALSE);
1019                 lambda_vec_print(sc->foreign_lambda, buf2, FALSE);
1020                 sprintf(setnames[nsets - 1], "N(%s(%s=%s) | %s=%s)", deltag, lambda, buf2, lambda, buf);
1021             }
1022             else
1023             {
1024                 lambda_vec_print(sc->native_lambda, buf, FALSE);
1025                 sprintf(setnames[nsets - 1], "N(%s | %s=%s)", dhdl, lambda, buf);
1026             }
1027             sc = sc->next;
1028         }
1029
1030         bl = bl->next;
1031     }
1032     xvgr_legend(fp, nsets, setnames, oenv);
1033
1034
1035     /* now make the histograms */
1036     bl = bl_head->next;
1037     /* iterate over all lambdas */
1038     while (bl != bl_head)
1039     {
1040         sample_coll_t* sc = bl->sc->next;
1041
1042         /* iterate over all samples */
1043         while (sc != bl->sc)
1044         {
1045             if (!first_set)
1046             {
1047                 xvgr_new_dataset(fp, 0, 0, nullptr, oenv);
1048             }
1049
1050             sample_coll_make_hist(sc, &hist, &dx, &minval, nbin_default);
1051
1052             for (gmx::index i = 0; i < gmx::ssize(hist); i++)
1053             {
1054                 double xmin = i * dx + minval;
1055                 double xmax = (i + 1) * dx + minval;
1056
1057                 fprintf(fp, "%g %d\n%g %d\n", xmin, hist[i], xmax, hist[i]);
1058             }
1059
1060             first_set = FALSE;
1061             sc        = sc->next;
1062         }
1063
1064         bl = bl->next;
1065     }
1066
1067     xvgrclose(fp);
1068 }
1069
1070 static int snprint_lambda_vec(char* str, int sz, const char* label, lambda_vec_t* lambda)
1071 {
1072     int n = 0;
1073
1074     n += snprintf(str + n, sz - n, "lambda vector [%s]: ", label);
1075     if (lambda->index >= 0)
1076     {
1077         n += snprintf(str + n, sz - n, " init-lambda-state=%d", lambda->index);
1078     }
1079     if (lambda->dhdl >= 0)
1080     {
1081         n += snprintf(str + n, sz - n, " dhdl index=%d", lambda->dhdl);
1082     }
1083     else
1084     {
1085         int i;
1086         for (i = 0; i < lambda->lc->N; i++)
1087         {
1088             n += snprintf(str + n, sz - n, " (%s) l=%g", lambda->lc->names[i], lambda->val[i]);
1089         }
1090     }
1091     return n;
1092 }
1093
1094 /* create a collection (array) of barres_t object given a ordered linked list
1095    of barlamda_t sample collections */
1096 static barres_t* barres_list_create(sim_data_t* sd, int* nres, gmx_bool use_dhdl)
1097 {
1098     lambda_data_t* bl;
1099     int            nlambda = 0;
1100     barres_t*      res;
1101     gmx_bool       dhdl    = FALSE;
1102     gmx_bool       first   = TRUE;
1103     lambda_data_t* bl_head = sd->lb;
1104
1105     /* first count the lambdas */
1106     bl = bl_head->next;
1107     while (bl != bl_head)
1108     {
1109         nlambda++;
1110         bl = bl->next;
1111     }
1112     snew(res, nlambda - 1);
1113
1114     /* next put the right samples in the res */
1115     *nres = 0;
1116     bl    = bl_head->next->next; /* we start with the second one. */
1117     while (bl != bl_head)
1118     {
1119         sample_coll_t *sc, *scprev;
1120         barres_t*      br = &(res[*nres]);
1121         /* there is always a previous one. we search for that as a foreign
1122            lambda: */
1123         scprev = lambda_data_find_sample_coll(bl->prev, bl->lambda);
1124         sc     = lambda_data_find_sample_coll(bl, bl->prev->lambda);
1125
1126         barres_init(br);
1127
1128         if (use_dhdl)
1129         {
1130             /* we use dhdl */
1131
1132             scprev = lambda_data_find_sample_coll(bl->prev, bl->prev->lambda);
1133             sc     = lambda_data_find_sample_coll(bl, bl->lambda);
1134
1135             if (first)
1136             {
1137                 printf("\nWARNING: Using the derivative data (dH/dlambda) to extrapolate delta H "
1138                        "values.\nThis will only work if the Hamiltonian is linear in lambda.\n");
1139                 dhdl = TRUE;
1140             }
1141             if (!dhdl)
1142             {
1143                 gmx_fatal(FARGS,
1144                           "Some dhdl files contain only one value (dH/dl), while others \ncontain "
1145                           "multiple values (dH/dl and/or Delta H), will not proceed \nbecause of "
1146                           "possible inconsistencies.\n");
1147             }
1148         }
1149         else if (!scprev && !sc)
1150         {
1151             char descX[STRLEN], descY[STRLEN];
1152             snprint_lambda_vec(descX, STRLEN, "X", bl->prev->lambda);
1153             snprint_lambda_vec(descY, STRLEN, "Y", bl->lambda);
1154
1155             gmx_fatal(FARGS,
1156                       "There is no path between the states X & Y below that is covered by foreign "
1157                       "lambdas:\ncannot proceed with BAR.\nUse thermodynamic integration of dH/dl "
1158                       "by calculating the averages of dH/dl\nwith g_analyze and integrating "
1159                       "them.\nAlternatively, use the -extp option if (and only if) the "
1160                       "Hamiltonian\ndepends linearly on lambda, which is NOT normally the "
1161                       "case.\n\n%s\n%s\n",
1162                       descX,
1163                       descY);
1164         }
1165
1166         /* normal delta H */
1167         if (!scprev)
1168         {
1169             char descX[STRLEN], descY[STRLEN];
1170             snprint_lambda_vec(descX, STRLEN, "X", bl->lambda);
1171             snprint_lambda_vec(descY, STRLEN, "Y", bl->prev->lambda);
1172             gmx_fatal(FARGS,
1173                       "Could not find a set for foreign lambda (state X below)\nin the files for "
1174                       "main lambda (state Y below)\n\n%s\n%s\n",
1175                       descX,
1176                       descY);
1177         }
1178         if (!sc)
1179         {
1180             char descX[STRLEN], descY[STRLEN];
1181             snprint_lambda_vec(descX, STRLEN, "X", bl->prev->lambda);
1182             snprint_lambda_vec(descY, STRLEN, "Y", bl->lambda);
1183             gmx_fatal(FARGS,
1184                       "Could not find a set for foreign lambda (state X below)\nin the files for "
1185                       "main lambda (state Y below)\n\n%s\n%s\n",
1186                       descX,
1187                       descY);
1188         }
1189         br->a = scprev;
1190         br->b = sc;
1191
1192         first = FALSE;
1193         (*nres)++;
1194         bl = bl->next;
1195     }
1196     return res;
1197 }
1198
1199 /* estimate the maximum discretization error */
1200 static double barres_list_max_disc_err(barres_t* res, int nres)
1201 {
1202     int    i, j;
1203     double disc_err = 0.;
1204     double delta_lambda;
1205
1206     for (i = 0; i < nres; i++)
1207     {
1208         barres_t* br = &(res[i]);
1209
1210         delta_lambda = lambda_vec_abs_diff(br->b->native_lambda, br->a->native_lambda);
1211
1212         for (j = 0; j < br->a->nsamples; j++)
1213         {
1214             if (br->a->s[j]->hist)
1215             {
1216                 double Wfac = 1.;
1217                 if (br->a->s[j]->derivative)
1218                 {
1219                     Wfac = delta_lambda;
1220                 }
1221
1222                 disc_err = std::max(disc_err, Wfac * br->a->s[j]->hist->dx[0]);
1223             }
1224         }
1225         for (j = 0; j < br->b->nsamples; j++)
1226         {
1227             if (br->b->s[j]->hist)
1228             {
1229                 double Wfac = 1.;
1230                 if (br->b->s[j]->derivative)
1231                 {
1232                     Wfac = delta_lambda;
1233                 }
1234                 disc_err = std::max(disc_err, Wfac * br->b->s[j]->hist->dx[0]);
1235             }
1236         }
1237     }
1238     return disc_err;
1239 }
1240
1241
1242 /* impose start and end times on a sample collection, updating sample_ranges */
1243 static void sample_coll_impose_times(sample_coll_t* sc, double begin_t, double end_t)
1244 {
1245     int i;
1246     for (i = 0; i < sc->nsamples; i++)
1247     {
1248         samples_t*      s = sc->s[i];
1249         sample_range_t* r = &(sc->r[i]);
1250         if (s->hist)
1251         {
1252             double end_time = s->hist->delta_time * s->hist->sum + s->hist->start_time;
1253             if (s->hist->start_time < begin_t || end_time > end_t)
1254             {
1255                 r->use = FALSE;
1256             }
1257         }
1258         else
1259         {
1260             if (!s->t)
1261             {
1262                 double end_time;
1263                 if (s->start_time < begin_t)
1264                 {
1265                     r->start = static_cast<int>((begin_t - s->start_time) / s->delta_time);
1266                 }
1267                 end_time = s->delta_time * s->ndu + s->start_time;
1268                 if (end_time > end_t)
1269                 {
1270                     r->end = static_cast<int>((end_t - s->start_time) / s->delta_time);
1271                 }
1272             }
1273             else
1274             {
1275                 int j;
1276                 for (j = 0; j < s->ndu; j++)
1277                 {
1278                     if (s->t[j] < begin_t)
1279                     {
1280                         r->start = j;
1281                     }
1282
1283                     if (s->t[j] >= end_t)
1284                     {
1285                         r->end = j;
1286                         break;
1287                     }
1288                 }
1289             }
1290             if (r->start > r->end)
1291             {
1292                 r->use = FALSE;
1293             }
1294         }
1295     }
1296     sample_coll_calc_ntot(sc);
1297 }
1298
1299 static void sim_data_impose_times(sim_data_t* sd, double begin, double end)
1300 {
1301     double         first_t, last_t;
1302     double         begin_t, end_t;
1303     lambda_data_t* lc;
1304     lambda_data_t* head = sd->lb;
1305     int            j;
1306
1307     if (begin <= 0 && end < 0)
1308     {
1309         return;
1310     }
1311
1312     /* first determine the global start and end times */
1313     first_t = -1;
1314     last_t  = -1;
1315     lc      = head->next;
1316     while (lc != head)
1317     {
1318         sample_coll_t* sc = lc->sc->next;
1319         while (sc != lc->sc)
1320         {
1321             for (j = 0; j < sc->nsamples; j++)
1322             {
1323                 double start_t, end_t;
1324
1325                 start_t = sc->s[j]->start_time;
1326                 end_t   = sc->s[j]->start_time;
1327                 if (sc->s[j]->hist)
1328                 {
1329                     end_t += sc->s[j]->delta_time * sc->s[j]->hist->sum;
1330                 }
1331                 else
1332                 {
1333                     if (sc->s[j]->t)
1334                     {
1335                         end_t = sc->s[j]->t[sc->s[j]->ndu - 1];
1336                     }
1337                     else
1338                     {
1339                         end_t += sc->s[j]->delta_time * sc->s[j]->ndu;
1340                     }
1341                 }
1342
1343                 if (start_t < first_t || first_t < 0)
1344                 {
1345                     first_t = start_t;
1346                 }
1347                 if (end_t > last_t)
1348                 {
1349                     last_t = end_t;
1350                 }
1351             }
1352             sc = sc->next;
1353         }
1354         lc = lc->next;
1355     }
1356
1357     /* calculate the actual times */
1358     if (begin > 0)
1359     {
1360         begin_t = begin;
1361     }
1362     else
1363     {
1364         begin_t = first_t;
1365     }
1366
1367     if (end > 0)
1368     {
1369         end_t = end;
1370     }
1371     else
1372     {
1373         end_t = last_t;
1374     }
1375     printf("\n   Samples in time interval: %.3f - %.3f\n", first_t, last_t);
1376
1377     if (begin_t > end_t)
1378     {
1379         return;
1380     }
1381     printf("Removing samples outside of: %.3f - %.3f\n", begin_t, end_t);
1382
1383     /* then impose them */
1384     lc = head->next;
1385     while (lc != head)
1386     {
1387         sample_coll_t* sc = lc->sc->next;
1388         while (sc != lc->sc)
1389         {
1390             sample_coll_impose_times(sc, begin_t, end_t);
1391             sc = sc->next;
1392         }
1393         lc = lc->next;
1394     }
1395 }
1396
1397
1398 /* create subsample i out of ni from an existing sample_coll */
1399 static gmx_bool sample_coll_create_subsample(sample_coll_t* sc, sample_coll_t* sc_orig, int i, int ni)
1400 {
1401     int j;
1402
1403     int64_t ntot_start;
1404     int64_t ntot_end;
1405     int64_t ntot_so_far;
1406
1407     *sc = *sc_orig; /* just copy all fields */
1408
1409     /* allocate proprietary memory */
1410     snew(sc->s, sc_orig->nsamples);
1411     snew(sc->r, sc_orig->nsamples);
1412
1413     /* copy the samples */
1414     for (j = 0; j < sc_orig->nsamples; j++)
1415     {
1416         sc->s[j] = sc_orig->s[j];
1417         sc->r[j] = sc_orig->r[j]; /* copy the ranges too */
1418     }
1419
1420     /* now fix start and end fields */
1421     /* the casts avoid possible overflows */
1422     ntot_start = static_cast<int64_t>(sc_orig->ntot * static_cast<double>(i) / static_cast<double>(ni));
1423     ntot_end = static_cast<int64_t>(sc_orig->ntot * static_cast<double>(i + 1) / static_cast<double>(ni));
1424     ntot_so_far = 0;
1425     for (j = 0; j < sc->nsamples; j++)
1426     {
1427         int64_t ntot_add;
1428         int64_t new_start, new_end;
1429
1430         if (sc->r[j].use)
1431         {
1432             if (sc->s[j]->hist)
1433             {
1434                 ntot_add = sc->s[j]->hist->sum;
1435             }
1436             else
1437             {
1438                 ntot_add = sc->r[j].end - sc->r[j].start;
1439             }
1440         }
1441         else
1442         {
1443             ntot_add = 0;
1444         }
1445
1446         if (!sc->s[j]->hist)
1447         {
1448             if (ntot_so_far < ntot_start)
1449             {
1450                 /* adjust starting point */
1451                 new_start = sc->r[j].start + (ntot_start - ntot_so_far);
1452             }
1453             else
1454             {
1455                 new_start = sc->r[j].start;
1456             }
1457             /* adjust end point */
1458             new_end = sc->r[j].start + (ntot_end - ntot_so_far);
1459             if (new_end > sc->r[j].end)
1460             {
1461                 new_end = sc->r[j].end;
1462             }
1463
1464             /* check if we're in range at all */
1465             if ((new_end < new_start) || (new_start > sc->r[j].end))
1466             {
1467                 new_start = 0;
1468                 new_end   = 0;
1469             }
1470             /* and write the new range */
1471             GMX_RELEASE_ASSERT(new_start <= std::numeric_limits<int>::max(),
1472                                "Value of 'new_start' too large for int converstion");
1473             GMX_RELEASE_ASSERT(new_end <= std::numeric_limits<int>::max(),
1474                                "Value of 'new_end' too large for int converstion");
1475             sc->r[j].start = static_cast<int>(new_start);
1476             sc->r[j].end   = static_cast<int>(new_end);
1477         }
1478         else
1479         {
1480             if (sc->r[j].use)
1481             {
1482                 double overlap;
1483                 double ntot_start_norm, ntot_end_norm;
1484                 /* calculate the amount of overlap of the
1485                    desired range (ntot_start -- ntot_end) onto
1486                    the histogram range (ntot_so_far -- ntot_so_far+ntot_add)*/
1487
1488                 /* first calculate normalized bounds
1489                    (where 0 is the start of the hist range, and 1 the end) */
1490                 ntot_start_norm = (ntot_start - ntot_so_far) / static_cast<double>(ntot_add);
1491                 ntot_end_norm   = (ntot_end - ntot_so_far) / static_cast<double>(ntot_add);
1492
1493                 /* now fix the boundaries */
1494                 ntot_start_norm = std::min(1.0, std::max(0.0, ntot_start_norm));
1495                 ntot_end_norm   = std::max(0.0, std::min(1.0, ntot_end_norm));
1496
1497                 /* and calculate the overlap */
1498                 overlap = ntot_end_norm - ntot_start_norm;
1499
1500                 if (overlap > 0.95) /* we allow for 5% slack */
1501                 {
1502                     sc->r[j].use = TRUE;
1503                 }
1504                 else if (overlap < 0.05)
1505                 {
1506                     sc->r[j].use = FALSE;
1507                 }
1508                 else
1509                 {
1510                     return FALSE;
1511                 }
1512             }
1513         }
1514         ntot_so_far += ntot_add;
1515     }
1516     sample_coll_calc_ntot(sc);
1517
1518     return TRUE;
1519 }
1520
1521 /* calculate minimum and maximum work values in sample collection */
1522 static void sample_coll_min_max(sample_coll_t* sc, double Wfac, double* Wmin, double* Wmax)
1523 {
1524     int i, j;
1525
1526     *Wmin = std::numeric_limits<float>::max();
1527     *Wmax = -std::numeric_limits<float>::max();
1528
1529     for (i = 0; i < sc->nsamples; i++)
1530     {
1531         samples_t*      s = sc->s[i];
1532         sample_range_t* r = &(sc->r[i]);
1533         if (r->use)
1534         {
1535             if (!s->hist)
1536             {
1537                 for (j = r->start; j < r->end; j++)
1538                 {
1539                     *Wmin = std::min(*Wmin, s->du[j] * Wfac);
1540                     *Wmax = std::max(*Wmax, s->du[j] * Wfac);
1541                 }
1542             }
1543             else
1544             {
1545                 int    hd = 0; /* determine the histogram direction: */
1546                 double dx;
1547                 if ((s->hist->nhist > 1) && (Wfac < 0))
1548                 {
1549                     hd = 1;
1550                 }
1551                 dx = s->hist->dx[hd];
1552
1553                 for (j = s->hist->nbin[hd] - 1; j >= 0; j--)
1554                 {
1555                     *Wmin = std::min(*Wmin, Wfac * (s->hist->x0[hd]) * dx);
1556                     *Wmax = std::max(*Wmax, Wfac * (s->hist->x0[hd]) * dx);
1557                     /* look for the highest value bin with values */
1558                     if (s->hist->bin[hd][j] > 0)
1559                     {
1560                         *Wmin = std::min(*Wmin, Wfac * (j + s->hist->x0[hd] + 1) * dx);
1561                         *Wmax = std::max(*Wmax, Wfac * (j + s->hist->x0[hd] + 1) * dx);
1562                         break;
1563                     }
1564                 }
1565             }
1566         }
1567     }
1568 }
1569
1570 /* Initialize a sim_data structure */
1571 static void sim_data_init(sim_data_t* sd)
1572 {
1573     /* make linked list */
1574     sd->lb       = &(sd->lb_head);
1575     sd->lb->next = sd->lb;
1576     sd->lb->prev = sd->lb;
1577
1578     lambda_components_init(&(sd->lc));
1579 }
1580
1581
1582 static double calc_bar_sum(int n, const double* W, double Wfac, double sbMmDG)
1583 {
1584     int    i;
1585     double sum;
1586
1587     sum = 0;
1588
1589     for (i = 0; i < n; i++)
1590     {
1591         sum += 1. / (1. + std::exp(Wfac * W[i] + sbMmDG));
1592     }
1593
1594     return sum;
1595 }
1596
1597 /* calculate the BAR average given a histogram
1598
1599     if type== 0, calculate the best estimate for the average,
1600     if type==-1, calculate the minimum possible value given the histogram
1601     if type== 1, calculate the maximum possible value given the histogram */
1602 static double calc_bar_sum_hist(const hist_t* hist, double Wfac, double sbMmDG, int type)
1603 {
1604     double sum = 0.;
1605     int    i;
1606     int    maxbin;
1607     /* normalization factor multiplied with bin width and
1608        number of samples (we normalize through M): */
1609     double normdx = 1.;
1610     int    hd     = 0; /* determine the histogram direction: */
1611     double dx;
1612
1613     if ((hist->nhist > 1) && (Wfac < 0))
1614     {
1615         hd = 1;
1616     }
1617     dx     = hist->dx[hd];
1618     maxbin = hist->nbin[hd] - 1;
1619     if (type == 1)
1620     {
1621         maxbin = hist->nbin[hd]; /* we also add whatever was out of range */
1622     }
1623
1624     for (i = 0; i < maxbin; i++)
1625     {
1626         double x    = Wfac * ((i + hist->x0[hd]) + 0.5) * dx; /* bin middle */
1627         double pxdx = hist->bin[0][i] * normdx;               /* p(x)dx */
1628
1629         sum += pxdx / (1. + std::exp(x + sbMmDG));
1630     }
1631
1632     return sum;
1633 }
1634
1635 static double calc_bar_lowlevel(sample_coll_t* ca, sample_coll_t* cb, double temp, double tol, int type)
1636 {
1637     double kT, beta, M;
1638     int    i;
1639     double Wfac1, Wfac2, Wmin, Wmax;
1640     double DG0, DG1, DG2, dDG1;
1641     double n1, n2; /* numbers of samples as doubles */
1642
1643     kT   = BOLTZ * temp;
1644     beta = 1 / kT;
1645
1646     /* count the numbers of samples */
1647     n1 = ca->ntot;
1648     n2 = cb->ntot;
1649
1650     M = std::log(n1 / n2);
1651
1652     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1653     if (ca->foreign_lambda->dhdl < 0)
1654     {
1655         /* this is the case when the delta U were calculated directly
1656            (i.e. we're not scaling dhdl) */
1657         Wfac1 = beta;
1658         Wfac2 = beta;
1659     }
1660     else
1661     {
1662         /* we're using dhdl, so delta_lambda needs to be a
1663            multiplication factor.  */
1664         /*double delta_lambda=cb->native_lambda-ca->native_lambda;*/
1665         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda, ca->native_lambda);
1666         if (cb->native_lambda->lc->N > 1)
1667         {
1668             gmx_fatal(FARGS, "Can't (yet) do multi-component dhdl interpolation");
1669         }
1670
1671         Wfac1 = beta * delta_lambda;
1672         Wfac2 = -beta * delta_lambda;
1673     }
1674
1675     if (beta < 1)
1676     {
1677         /* We print the output both in kT and kJ/mol.
1678          * Here we determine DG in kT, so when beta < 1
1679          * the precision has to be increased.
1680          */
1681         tol *= beta;
1682     }
1683
1684     /* Calculate minimum and maximum work to give an initial estimate of
1685      * delta G  as their average.
1686      */
1687     {
1688         double Wmin1, Wmin2, Wmax1, Wmax2;
1689         sample_coll_min_max(ca, Wfac1, &Wmin1, &Wmax1);
1690         sample_coll_min_max(cb, Wfac2, &Wmin2, &Wmax2);
1691
1692         Wmin = std::min(Wmin1, Wmin2);
1693         Wmax = std::max(Wmax1, Wmax2);
1694     }
1695
1696     DG0 = Wmin;
1697     DG2 = Wmax;
1698
1699     if (debug)
1700     {
1701         fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1702     }
1703     /* We approximate by bisection: given our initial estimates
1704        we keep checking whether the halfway point is greater or
1705        smaller than what we get out of the BAR averages.
1706
1707        For the comparison we can use twice the tolerance. */
1708     while (DG2 - DG0 > 2 * tol)
1709     {
1710         DG1 = 0.5 * (DG0 + DG2);
1711
1712         /* calculate the BAR averages */
1713         dDG1 = 0.;
1714
1715         for (i = 0; i < ca->nsamples; i++)
1716         {
1717             samples_t*      s = ca->s[i];
1718             sample_range_t* r = &(ca->r[i]);
1719             if (r->use)
1720             {
1721                 if (s->hist)
1722                 {
1723                     dDG1 += calc_bar_sum_hist(s->hist, Wfac1, (M - DG1), type);
1724                 }
1725                 else
1726                 {
1727                     dDG1 += calc_bar_sum(r->end - r->start, s->du + r->start, Wfac1, (M - DG1));
1728                 }
1729             }
1730         }
1731         for (i = 0; i < cb->nsamples; i++)
1732         {
1733             samples_t*      s = cb->s[i];
1734             sample_range_t* r = &(cb->r[i]);
1735             if (r->use)
1736             {
1737                 if (s->hist)
1738                 {
1739                     dDG1 -= calc_bar_sum_hist(s->hist, Wfac2, -(M - DG1), type);
1740                 }
1741                 else
1742                 {
1743                     dDG1 -= calc_bar_sum(r->end - r->start, s->du + r->start, Wfac2, -(M - DG1));
1744                 }
1745             }
1746         }
1747
1748         if (dDG1 < 0)
1749         {
1750             DG0 = DG1;
1751         }
1752         else
1753         {
1754             DG2 = DG1;
1755         }
1756         if (debug)
1757         {
1758             fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1759         }
1760     }
1761
1762     return 0.5 * (DG0 + DG2);
1763 }
1764
1765 static void calc_rel_entropy(sample_coll_t* ca, sample_coll_t* cb, double temp, double dg, double* sa, double* sb)
1766 {
1767     int    i, j;
1768     double W_ab = 0.;
1769     double W_ba = 0.;
1770     double kT, beta;
1771     double Wfac1, Wfac2;
1772     double n1, n2;
1773
1774     kT   = BOLTZ * temp;
1775     beta = 1 / kT;
1776
1777     /* count the numbers of samples */
1778     n1 = ca->ntot;
1779     n2 = cb->ntot;
1780
1781     /* to ensure the work values are the same as during the delta_G */
1782     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1783     if (ca->foreign_lambda->dhdl < 0)
1784     {
1785         /* this is the case when the delta U were calculated directly
1786            (i.e. we're not scaling dhdl) */
1787         Wfac1 = beta;
1788         Wfac2 = beta;
1789     }
1790     else
1791     {
1792         /* we're using dhdl, so delta_lambda needs to be a
1793            multiplication factor.  */
1794         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda, ca->native_lambda);
1795         Wfac1               = beta * delta_lambda;
1796         Wfac2               = -beta * delta_lambda;
1797     }
1798
1799     /* first calculate the average work in both directions */
1800     for (i = 0; i < ca->nsamples; i++)
1801     {
1802         samples_t*      s = ca->s[i];
1803         sample_range_t* r = &(ca->r[i]);
1804         if (r->use)
1805         {
1806             if (!s->hist)
1807             {
1808                 for (j = r->start; j < r->end; j++)
1809                 {
1810                     W_ab += Wfac1 * s->du[j];
1811                 }
1812             }
1813             else
1814             {
1815                 /* normalization factor multiplied with bin width and
1816                    number of samples (we normalize through M): */
1817                 double normdx = 1.;
1818                 double dx;
1819                 int    hd = 0; /* histogram direction */
1820                 if ((s->hist->nhist > 1) && (Wfac1 < 0))
1821                 {
1822                     hd = 1;
1823                 }
1824                 dx = s->hist->dx[hd];
1825
1826                 for (j = 0; j < s->hist->nbin[0]; j++)
1827                 {
1828                     double x    = Wfac1 * ((j + s->hist->x0[0]) + 0.5) * dx; /*bin ctr*/
1829                     double pxdx = s->hist->bin[0][j] * normdx;               /* p(x)dx */
1830                     W_ab += pxdx * x;
1831                 }
1832             }
1833         }
1834     }
1835     W_ab /= n1;
1836
1837     for (i = 0; i < cb->nsamples; i++)
1838     {
1839         samples_t*      s = cb->s[i];
1840         sample_range_t* r = &(cb->r[i]);
1841         if (r->use)
1842         {
1843             if (!s->hist)
1844             {
1845                 for (j = r->start; j < r->end; j++)
1846                 {
1847                     W_ba += Wfac1 * s->du[j];
1848                 }
1849             }
1850             else
1851             {
1852                 /* normalization factor multiplied with bin width and
1853                    number of samples (we normalize through M): */
1854                 double normdx = 1.;
1855                 double dx;
1856                 int    hd = 0; /* histogram direction */
1857                 if ((s->hist->nhist > 1) && (Wfac2 < 0))
1858                 {
1859                     hd = 1;
1860                 }
1861                 dx = s->hist->dx[hd];
1862
1863                 for (j = 0; j < s->hist->nbin[0]; j++)
1864                 {
1865                     double x    = Wfac1 * ((j + s->hist->x0[0]) + 0.5) * dx; /*bin ctr*/
1866                     double pxdx = s->hist->bin[0][j] * normdx;               /* p(x)dx */
1867                     W_ba += pxdx * x;
1868                 }
1869             }
1870         }
1871     }
1872     W_ba /= n2;
1873
1874     /* then calculate the relative entropies */
1875     *sa = (W_ab - dg);
1876     *sb = (W_ba + dg);
1877 }
1878
1879 static void calc_dg_stddev(sample_coll_t* ca, sample_coll_t* cb, double temp, double dg, double* stddev)
1880 {
1881     int    i, j;
1882     double M;
1883     double sigmafact = 0.;
1884     double kT, beta;
1885     double Wfac1, Wfac2;
1886     double n1, n2;
1887
1888     kT   = BOLTZ * temp;
1889     beta = 1 / kT;
1890
1891     /* count the numbers of samples */
1892     n1 = ca->ntot;
1893     n2 = cb->ntot;
1894
1895     /* to ensure the work values are the same as during the delta_G */
1896     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1897     if (ca->foreign_lambda->dhdl < 0)
1898     {
1899         /* this is the case when the delta U were calculated directly
1900            (i.e. we're not scaling dhdl) */
1901         Wfac1 = beta;
1902         Wfac2 = beta;
1903     }
1904     else
1905     {
1906         /* we're using dhdl, so delta_lambda needs to be a
1907            multiplication factor.  */
1908         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda, ca->native_lambda);
1909         Wfac1               = beta * delta_lambda;
1910         Wfac2               = -beta * delta_lambda;
1911     }
1912
1913     M = std::log(n1 / n2);
1914
1915
1916     /* calculate average in both directions */
1917     for (i = 0; i < ca->nsamples; i++)
1918     {
1919         samples_t*      s = ca->s[i];
1920         sample_range_t* r = &(ca->r[i]);
1921         if (r->use)
1922         {
1923             if (!s->hist)
1924             {
1925                 for (j = r->start; j < r->end; j++)
1926                 {
1927                     sigmafact += 1. / (2. + 2. * std::cosh((M + Wfac1 * s->du[j] - dg)));
1928                 }
1929             }
1930             else
1931             {
1932                 /* normalization factor multiplied with bin width and
1933                    number of samples (we normalize through M): */
1934                 double normdx = 1.;
1935                 double dx;
1936                 int    hd = 0; /* histogram direction */
1937                 if ((s->hist->nhist > 1) && (Wfac1 < 0))
1938                 {
1939                     hd = 1;
1940                 }
1941                 dx = s->hist->dx[hd];
1942
1943                 for (j = 0; j < s->hist->nbin[0]; j++)
1944                 {
1945                     double x    = Wfac1 * ((j + s->hist->x0[0]) + 0.5) * dx; /*bin ctr*/
1946                     double pxdx = s->hist->bin[0][j] * normdx;               /* p(x)dx */
1947
1948                     sigmafact += pxdx / (2. + 2. * std::cosh((M + x - dg)));
1949                 }
1950             }
1951         }
1952     }
1953     for (i = 0; i < cb->nsamples; i++)
1954     {
1955         samples_t*      s = cb->s[i];
1956         sample_range_t* r = &(cb->r[i]);
1957         if (r->use)
1958         {
1959             if (!s->hist)
1960             {
1961                 for (j = r->start; j < r->end; j++)
1962                 {
1963                     sigmafact += 1. / (2. + 2. * std::cosh((M - Wfac2 * s->du[j] - dg)));
1964                 }
1965             }
1966             else
1967             {
1968                 /* normalization factor multiplied with bin width and
1969                    number of samples (we normalize through M): */
1970                 double normdx = 1.;
1971                 double dx;
1972                 int    hd = 0; /* histogram direction */
1973                 if ((s->hist->nhist > 1) && (Wfac2 < 0))
1974                 {
1975                     hd = 1;
1976                 }
1977                 dx = s->hist->dx[hd];
1978
1979                 for (j = 0; j < s->hist->nbin[0]; j++)
1980                 {
1981                     double x    = Wfac2 * ((j + s->hist->x0[0]) + 0.5) * dx; /*bin ctr*/
1982                     double pxdx = s->hist->bin[0][j] * normdx;               /* p(x)dx */
1983
1984                     sigmafact += pxdx / (2. + 2. * std::cosh((M - x - dg)));
1985                 }
1986             }
1987         }
1988     }
1989
1990     sigmafact /= (n1 + n2);
1991
1992
1993     /* Eq. 10 from
1994        Shirts, Bair, Hooker & Pande, Phys. Rev. Lett 91, 140601 (2003): */
1995     *stddev = std::sqrt(((1.0 / sigmafact) - ((n1 + n2) / n1 + (n1 + n2) / n2)));
1996 }
1997
1998
1999 static void calc_bar(barres_t* br, double tol, int npee_min, int npee_max, gmx_bool* bEE, double* partsum)
2000 {
2001     int    npee, p;
2002     double dg_sig2, sa_sig2, sb_sig2, stddev_sig2; /* intermediate variance values
2003                                                       for calculated quantities */
2004     double   temp = br->a->temp;
2005     int      i;
2006     double   dg_min, dg_max;
2007     gmx_bool have_hist = FALSE;
2008
2009     br->dg = calc_bar_lowlevel(br->a, br->b, temp, tol, 0);
2010
2011     br->dg_disc_err      = 0.;
2012     br->dg_histrange_err = 0.;
2013
2014     /* check if there are histograms */
2015     for (i = 0; i < br->a->nsamples; i++)
2016     {
2017         if (br->a->r[i].use && br->a->s[i]->hist)
2018         {
2019             have_hist = TRUE;
2020             break;
2021         }
2022     }
2023     if (!have_hist)
2024     {
2025         for (i = 0; i < br->b->nsamples; i++)
2026         {
2027             if (br->b->r[i].use && br->b->s[i]->hist)
2028             {
2029                 have_hist = TRUE;
2030                 break;
2031             }
2032         }
2033     }
2034
2035     /* calculate histogram-specific errors */
2036     if (have_hist)
2037     {
2038         dg_min = calc_bar_lowlevel(br->a, br->b, temp, tol, -1);
2039         dg_max = calc_bar_lowlevel(br->a, br->b, temp, tol, 1);
2040
2041         if (std::abs(dg_max - dg_min) > GMX_REAL_EPS * 10)
2042         {
2043             /* the histogram range  error is the biggest of the differences
2044                between the best estimate and the extremes */
2045             br->dg_histrange_err = std::abs(dg_max - dg_min);
2046         }
2047         br->dg_disc_err = 0.;
2048         for (i = 0; i < br->a->nsamples; i++)
2049         {
2050             if (br->a->s[i]->hist)
2051             {
2052                 br->dg_disc_err = std::max(br->dg_disc_err, br->a->s[i]->hist->dx[0]);
2053             }
2054         }
2055         for (i = 0; i < br->b->nsamples; i++)
2056         {
2057             if (br->b->s[i]->hist)
2058             {
2059                 br->dg_disc_err = std::max(br->dg_disc_err, br->b->s[i]->hist->dx[0]);
2060             }
2061         }
2062     }
2063     calc_rel_entropy(br->a, br->b, temp, br->dg, &(br->sa), &(br->sb));
2064
2065     calc_dg_stddev(br->a, br->b, temp, br->dg, &(br->dg_stddev));
2066
2067     dg_sig2     = 0;
2068     sa_sig2     = 0;
2069     sb_sig2     = 0;
2070     stddev_sig2 = 0;
2071
2072     *bEE = TRUE;
2073     {
2074         sample_coll_t ca, cb;
2075
2076         /* initialize the samples */
2077         sample_coll_init(&ca, br->a->native_lambda, br->a->foreign_lambda, br->a->temp);
2078         sample_coll_init(&cb, br->b->native_lambda, br->b->foreign_lambda, br->b->temp);
2079
2080         for (npee = npee_min; npee <= npee_max; npee++)
2081         {
2082             double dgs      = 0;
2083             double dgs2     = 0;
2084             double dsa      = 0;
2085             double dsb      = 0;
2086             double dsa2     = 0;
2087             double dsb2     = 0;
2088             double dstddev  = 0;
2089             double dstddev2 = 0;
2090
2091
2092             for (p = 0; p < npee; p++)
2093             {
2094                 double   dgp;
2095                 double   stddevc;
2096                 double   sac, sbc;
2097                 gmx_bool cac, cbc;
2098
2099                 cac = sample_coll_create_subsample(&ca, br->a, p, npee);
2100                 cbc = sample_coll_create_subsample(&cb, br->b, p, npee);
2101
2102                 if (!cac || !cbc)
2103                 {
2104                     printf("WARNING: histogram number incompatible with block number for "
2105                            "averaging: can't do error estimate\n");
2106                     *bEE = FALSE;
2107                     if (cac)
2108                     {
2109                         sample_coll_destroy(&ca);
2110                     }
2111                     if (cbc)
2112                     {
2113                         sample_coll_destroy(&cb);
2114                     }
2115                     return;
2116                 }
2117
2118                 dgp = calc_bar_lowlevel(&ca, &cb, temp, tol, 0);
2119                 dgs += dgp;
2120                 dgs2 += dgp * dgp;
2121
2122                 partsum[npee * (npee_max + 1) + p] += dgp;
2123
2124                 calc_rel_entropy(&ca, &cb, temp, dgp, &sac, &sbc);
2125                 dsa += sac;
2126                 dsa2 += sac * sac;
2127                 dsb += sbc;
2128                 dsb2 += sbc * sbc;
2129                 calc_dg_stddev(&ca, &cb, temp, dgp, &stddevc);
2130
2131                 dstddev += stddevc;
2132                 dstddev2 += stddevc * stddevc;
2133
2134                 sample_coll_destroy(&ca);
2135                 sample_coll_destroy(&cb);
2136             }
2137             dgs /= npee;
2138             dgs2 /= npee;
2139             dg_sig2 += (dgs2 - dgs * dgs) / (npee - 1);
2140
2141             dsa /= npee;
2142             dsa2 /= npee;
2143             dsb /= npee;
2144             dsb2 /= npee;
2145             sa_sig2 += (dsa2 - dsa * dsa) / (npee - 1);
2146             sb_sig2 += (dsb2 - dsb * dsb) / (npee - 1);
2147
2148             dstddev /= npee;
2149             dstddev2 /= npee;
2150             stddev_sig2 += (dstddev2 - dstddev * dstddev) / (npee - 1);
2151         }
2152         br->dg_err        = std::sqrt(dg_sig2 / (npee_max - npee_min + 1));
2153         br->sa_err        = std::sqrt(sa_sig2 / (npee_max - npee_min + 1));
2154         br->sb_err        = std::sqrt(sb_sig2 / (npee_max - npee_min + 1));
2155         br->dg_stddev_err = std::sqrt(stddev_sig2 / (npee_max - npee_min + 1));
2156     }
2157 }
2158
2159
2160 static double bar_err(int nbmin, int nbmax, const double* partsum)
2161 {
2162     int    nb, b;
2163     double svar, s, s2, dg;
2164
2165     svar = 0;
2166     for (nb = nbmin; nb <= nbmax; nb++)
2167     {
2168         s  = 0;
2169         s2 = 0;
2170         for (b = 0; b < nb; b++)
2171         {
2172             dg = partsum[nb * (nbmax + 1) + b];
2173             s += dg;
2174             s2 += dg * dg;
2175         }
2176         s /= nb;
2177         s2 /= nb;
2178         svar += (s2 - s * s) / (nb - 1);
2179     }
2180
2181     return std::sqrt(svar / (nbmax + 1 - nbmin));
2182 }
2183
2184
2185 /* Seek the end of an identifier (consecutive non-spaces), followed by
2186    an optional number of spaces or '='-signs. Returns a pointer to the
2187    first non-space value found after that. Returns NULL if the string
2188    ends before that.
2189  */
2190 static const char* find_value(const char* str)
2191 {
2192     gmx_bool name_end_found = FALSE;
2193
2194     /* if the string is a NULL pointer, return a NULL pointer. */
2195     if (str == nullptr)
2196     {
2197         return nullptr;
2198     }
2199     while (*str != '\0')
2200     {
2201         /* first find the end of the name */
2202         if (!name_end_found)
2203         {
2204             if (std::isspace(*str) || (*str == '='))
2205             {
2206                 name_end_found = TRUE;
2207             }
2208         }
2209         else
2210         {
2211             if (!(std::isspace(*str) || (*str == '=')))
2212             {
2213                 return str;
2214             }
2215         }
2216         str++;
2217     }
2218     return nullptr;
2219 }
2220
2221
2222 /* read a vector-notation description of a lambda vector */
2223 static gmx_bool read_lambda_compvec(const char*                str,
2224                                     lambda_vec_t*              lv,
2225                                     const lambda_components_t* lc_in,
2226                                     lambda_components_t*       lc_out,
2227                                     const char**               end,
2228                                     const char*                fn)
2229 {
2230     gmx_bool initialize_lc = FALSE;  /* whether to initialize the lambda
2231                                         components, or to check them */
2232     gmx_bool start_reached = FALSE;  /* whether the start of component names
2233                                         has been reached */
2234     gmx_bool    vector    = FALSE;   /* whether there are multiple components */
2235     int         n         = 0;       /* current component number */
2236     const char* val_start = nullptr; /* start of the component name, or NULL
2237                                         if not in a value */
2238     char*    strtod_end;
2239     gmx_bool OK = TRUE;
2240
2241     if (end)
2242     {
2243         *end = str;
2244     }
2245
2246
2247     if (lc_out && lc_out->N == 0)
2248     {
2249         initialize_lc = TRUE;
2250     }
2251
2252     if (lc_in == nullptr)
2253     {
2254         lc_in = lc_out;
2255     }
2256
2257     while (true)
2258     {
2259         if (!start_reached)
2260         {
2261             if (std::isalnum(*str))
2262             {
2263                 vector        = FALSE;
2264                 start_reached = TRUE;
2265                 val_start     = str;
2266             }
2267             else if (*str == '(')
2268             {
2269                 vector        = TRUE;
2270                 start_reached = TRUE;
2271             }
2272             else if (!std::isspace(*str))
2273             {
2274                 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2275             }
2276         }
2277         else
2278         {
2279             if (val_start)
2280             {
2281                 if (std::isspace(*str) || *str == ')' || *str == ',' || *str == '\0')
2282                 {
2283                     /* end of value */
2284                     if (lv == nullptr)
2285                     {
2286                         if (initialize_lc)
2287                         {
2288                             lambda_components_add(lc_out, val_start, (str - val_start));
2289                         }
2290                         else
2291                         {
2292                             if (!lambda_components_check(lc_out, n, val_start, (str - val_start)))
2293                             {
2294                                 return FALSE;
2295                             }
2296                         }
2297                     }
2298                     else
2299                     {
2300                         /* add a vector component to lv */
2301                         lv->val[n] = strtod(val_start, &strtod_end);
2302                         if (val_start == strtod_end)
2303                         {
2304                             gmx_fatal(FARGS, "Error reading lambda vector in %s", fn);
2305                         }
2306                     }
2307                     /* reset for the next identifier */
2308                     val_start = nullptr;
2309                     n++;
2310                     if (!vector)
2311                     {
2312                         return OK;
2313                     }
2314                 }
2315             }
2316             else if (std::isalnum(*str))
2317             {
2318                 val_start = str;
2319             }
2320             if (*str == ')')
2321             {
2322                 str++;
2323                 if (end)
2324                 {
2325                     *end = str;
2326                 }
2327                 if (!vector)
2328                 {
2329                     gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2330                 }
2331                 else
2332                 {
2333                     GMX_RELEASE_ASSERT(lc_in != nullptr, "Internal inconsistency? lc_in==NULL");
2334                     if (n == lc_in->N)
2335                     {
2336                         return OK;
2337                     }
2338                     else if (lv == nullptr)
2339                     {
2340                         return FALSE;
2341                     }
2342                     else
2343                     {
2344                         gmx_fatal(FARGS, "Incomplete lambda vector data in %s", fn);
2345                         return FALSE;
2346                     }
2347                 }
2348             }
2349         }
2350         if (*str == '\0')
2351         {
2352             break;
2353         }
2354         str++;
2355         if (end)
2356         {
2357             *end = str;
2358         }
2359     }
2360     if (vector)
2361     {
2362         gmx_fatal(FARGS, "Incomplete lambda components data in %s", fn);
2363         return FALSE;
2364     }
2365     return OK;
2366 }
2367
2368 /* read and check the component names from a string */
2369 static gmx_bool read_lambda_components(const char* str, lambda_components_t* lc, const char** end, const char* fn)
2370 {
2371     return read_lambda_compvec(str, nullptr, nullptr, lc, end, fn);
2372 }
2373
2374 /* read an initialized lambda vector from a string */
2375 static gmx_bool read_lambda_vector(const char* str, lambda_vec_t* lv, const char** end, const char* fn)
2376 {
2377     return read_lambda_compvec(str, lv, lv->lc, nullptr, end, fn);
2378 }
2379
2380
2381 /* deduce lambda value from legend.
2382     fn = the file name
2383     legend = the legend string
2384     ba = the xvg data
2385     lam = the initialized lambda vector
2386    returns whether to use the data in this set.
2387  */
2388 static gmx_bool legend2lambda(const char* fn, const char* legend, lambda_vec_t* lam)
2389 {
2390     const char *ptr = nullptr, *ptr2 = nullptr;
2391     gmx_bool    ok    = FALSE;
2392     gmx_bool    bdhdl = FALSE;
2393     const char* tostr = " to ";
2394
2395     if (legend == nullptr)
2396     {
2397         gmx_fatal(FARGS, "There is no legend in file '%s', can not deduce lambda", fn);
2398     }
2399
2400     /* look for the last 'to': */
2401     ptr2 = legend;
2402     do
2403     {
2404         ptr2 = std::strstr(ptr2, tostr);
2405         if (ptr2 != nullptr)
2406         {
2407             ptr = ptr2;
2408             ptr2++;
2409         }
2410     } while (ptr2 != nullptr && *ptr2 != '\0');
2411
2412     if (ptr)
2413     {
2414         ptr += std::strlen(tostr) - 1; /* and advance past that 'to' */
2415     }
2416     else
2417     {
2418         /* look for the = sign */
2419         ptr = std::strrchr(legend, '=');
2420         if (!ptr)
2421         {
2422             /* otherwise look for the last space */
2423             ptr = std::strrchr(legend, ' ');
2424         }
2425     }
2426
2427     if (std::strstr(legend, "dH"))
2428     {
2429         ok    = TRUE;
2430         bdhdl = TRUE;
2431     }
2432     else if (std::strchr(legend, 'D') != nullptr && std::strchr(legend, 'H') != nullptr)
2433     {
2434         ok    = TRUE;
2435         bdhdl = FALSE;
2436     }
2437     else /*if (std::strstr(legend, "pV"))*/
2438     {
2439         return FALSE;
2440     }
2441     if (!ptr)
2442     {
2443         ok = FALSE;
2444     }
2445
2446     if (!ok)
2447     {
2448         gmx_fatal(FARGS, "There is no proper lambda legend in file '%s', can not deduce lambda", fn);
2449     }
2450     if (!bdhdl)
2451     {
2452         ptr = find_value(ptr);
2453         if (!ptr || !read_lambda_vector(ptr, lam, nullptr, fn))
2454         {
2455             gmx_fatal(FARGS, "lambda vector '%s' %s faulty", legend, fn);
2456         }
2457     }
2458     else
2459     {
2460         int         dhdl_index;
2461         const char* end;
2462
2463         ptr = std::strrchr(legend, '=');
2464         end = ptr;
2465         if (ptr)
2466         {
2467             /* there must be a component name */
2468             ptr--;
2469             if (ptr < legend)
2470             {
2471                 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2472             }
2473             /* now backtrack to the start of the identifier */
2474             while (isspace(*ptr))
2475             {
2476                 end = ptr;
2477                 ptr--;
2478                 if (ptr < legend)
2479                 {
2480                     gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2481                 }
2482             }
2483             while (!std::isspace(*ptr))
2484             {
2485                 ptr--;
2486                 if (ptr < legend)
2487                 {
2488                     gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2489                 }
2490             }
2491             ptr++;
2492             dhdl_index = lambda_components_find(lam->lc, ptr, (end - ptr));
2493             if (dhdl_index < 0)
2494             {
2495                 char buf[STRLEN];
2496                 std::strncpy(buf, ptr, (end - ptr));
2497                 buf[(end - ptr)] = '\0';
2498                 gmx_fatal(FARGS, "Did not find lambda component for '%s' in %s", buf, fn);
2499             }
2500         }
2501         else
2502         {
2503             if (lam->lc->N > 1)
2504             {
2505                 gmx_fatal(FARGS, "dhdl without component name with >1 lambda component in %s", fn);
2506             }
2507             dhdl_index = 0;
2508         }
2509         lam->dhdl = dhdl_index;
2510     }
2511     return TRUE;
2512 }
2513
2514 static gmx_bool subtitle2lambda(const char* subtitle, xvg_t* ba, const char* fn, lambda_components_t* lc)
2515 {
2516     gmx_bool    bFound;
2517     const char* ptr;
2518     char*       end;
2519     double      native_lambda;
2520
2521     bFound = FALSE;
2522
2523     /* first check for a state string */
2524     ptr = std::strstr(subtitle, "state");
2525     if (ptr)
2526     {
2527         int         index = -1;
2528         const char* val_end;
2529
2530         /* the new 4.6 style lambda vectors */
2531         ptr = find_value(ptr);
2532         if (ptr)
2533         {
2534             index = std::strtol(ptr, &end, 10);
2535             if (ptr == end)
2536             {
2537                 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2538                 return FALSE;
2539             }
2540             ptr = end;
2541         }
2542         else
2543         {
2544             gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2545             return FALSE;
2546         }
2547         /* now find the lambda vector component names */
2548         while (*ptr != '(' && !std::isalnum(*ptr))
2549         {
2550             ptr++;
2551             if (*ptr == '\0')
2552             {
2553                 gmx_fatal(FARGS, "Incomplete lambda vector component data in %s", fn);
2554                 return FALSE;
2555             }
2556         }
2557         val_end = ptr;
2558         if (!read_lambda_components(ptr, lc, &val_end, fn))
2559         {
2560             gmx_fatal(FARGS, "lambda vector components in %s don't match those previously read", fn);
2561         }
2562         ptr = find_value(val_end);
2563         if (!ptr)
2564         {
2565             gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2566             return FALSE;
2567         }
2568         lambda_vec_init(&(ba->native_lambda), lc);
2569         if (!read_lambda_vector(ptr, &(ba->native_lambda), nullptr, fn))
2570         {
2571             gmx_fatal(FARGS, "lambda vector in %s faulty", fn);
2572         }
2573         ba->native_lambda.index = index;
2574         bFound                  = TRUE;
2575     }
2576     else
2577     {
2578         /* compatibility mode: check for lambda in other ways. */
2579         /* plain text lambda string */
2580         ptr = std::strstr(subtitle, "lambda");
2581         if (ptr == nullptr)
2582         {
2583             /* xmgrace formatted lambda string */
2584             ptr = std::strstr(subtitle, "\\xl\\f{}");
2585         }
2586         if (ptr == nullptr)
2587         {
2588             /* xmgr formatted lambda string */
2589             ptr = std::strstr(subtitle, "\\8l\\4");
2590         }
2591         if (ptr != nullptr)
2592         {
2593             ptr = std::strstr(ptr, "=");
2594         }
2595         if (ptr != nullptr)
2596         {
2597             bFound = (sscanf(ptr + 1, "%lf", &(native_lambda)) == 1);
2598             /* add the lambda component name as an empty string */
2599             if (lc->N > 0)
2600             {
2601                 if (!lambda_components_check(lc, 0, "", 0))
2602                 {
2603                     gmx_fatal(FARGS,
2604                               "lambda vector components in %s don't match those previously read",
2605                               fn);
2606                 }
2607             }
2608             else
2609             {
2610                 lambda_components_add(lc, "", 0);
2611             }
2612             lambda_vec_init(&(ba->native_lambda), lc);
2613             ba->native_lambda.val[0] = native_lambda;
2614         }
2615     }
2616
2617     return bFound;
2618 }
2619
2620 static void read_bar_xvg_lowlevel(const char* fn, const real* temp, xvg_t* ba, lambda_components_t* lc)
2621 {
2622     int      i;
2623     char *   subtitle, **legend, *ptr;
2624     int      np;
2625     gmx_bool native_lambda_read = FALSE;
2626     char     buf[STRLEN];
2627
2628     xvg_init(ba);
2629
2630     ba->filename = fn;
2631
2632     np = read_xvg_legend(fn, &ba->y, &ba->nset, &subtitle, &legend);
2633     if (!ba->y)
2634     {
2635         gmx_fatal(FARGS, "File %s contains no usable data.", fn);
2636     }
2637     /* Reorder the data */
2638     ba->t = ba->y[0];
2639     for (i = 1; i < ba->nset; i++)
2640     {
2641         ba->y[i - 1] = ba->y[i];
2642     }
2643     ba->nset--;
2644
2645     snew(ba->np, ba->nset);
2646     for (i = 0; i < ba->nset; i++)
2647     {
2648         ba->np[i] = np;
2649     }
2650
2651     ba->temp = -1;
2652     if (subtitle != nullptr)
2653     {
2654         /* try to extract temperature */
2655         ptr = std::strstr(subtitle, "T =");
2656         if (ptr != nullptr)
2657         {
2658             ptr += 3;
2659             if (sscanf(ptr, "%lf", &ba->temp) == 1)
2660             {
2661                 if (ba->temp <= 0)
2662                 {
2663                     gmx_fatal(FARGS, "Found temperature of %f in file '%s'", ba->temp, fn);
2664                 }
2665             }
2666         }
2667     }
2668     if (ba->temp < 0)
2669     {
2670         if (*temp <= 0)
2671         {
2672             gmx_fatal(FARGS,
2673                       "Did not find a temperature in the subtitle in file '%s', use the -temp "
2674                       "option of [TT]gmx bar[tt]",
2675                       fn);
2676         }
2677         ba->temp = *temp;
2678     }
2679
2680     /* Try to deduce lambda from the subtitle */
2681     if (subtitle)
2682     {
2683         if (subtitle2lambda(subtitle, ba, fn, lc))
2684         {
2685             native_lambda_read = TRUE;
2686         }
2687     }
2688     snew(ba->lambda, ba->nset);
2689     if (legend == nullptr)
2690     {
2691         /* Check if we have a single set, no legend, nset=1 means t and dH/dl */
2692         if (ba->nset == 1)
2693         {
2694             ba->lambda[0] = ba->native_lambda;
2695         }
2696         else
2697         {
2698             gmx_fatal(FARGS,
2699                       "File %s contains multiple sets but no legends, can not determine the lambda "
2700                       "values",
2701                       fn);
2702         }
2703     }
2704     else
2705     {
2706         for (i = 0; i < ba->nset;)
2707         {
2708             /* Read lambda from the legend */
2709             lambda_vec_init(&(ba->lambda[i]), lc);
2710             lambda_vec_copy(&(ba->lambda[i]), &(ba->native_lambda));
2711             gmx_bool use = legend2lambda(fn, legend[i], &(ba->lambda[i]));
2712             if (use)
2713             {
2714                 lambda_vec_print(&(ba->lambda[i]), buf, FALSE);
2715                 i++;
2716             }
2717             else
2718             {
2719                 int j;
2720                 printf("%s: Ignoring set '%s'.\n", fn, legend[i]);
2721                 for (j = i + 1; j < ba->nset; j++)
2722                 {
2723                     ba->y[j - 1]  = ba->y[j];
2724                     legend[j - 1] = legend[j];
2725                 }
2726                 ba->nset--;
2727             }
2728         }
2729     }
2730
2731     if (!native_lambda_read)
2732     {
2733         gmx_fatal(FARGS, "File %s contains multiple sets but no indication of the native lambda", fn);
2734     }
2735
2736     if (legend != nullptr)
2737     {
2738         for (i = 0; i < ba->nset - 1; i++)
2739         {
2740             sfree(legend[i]);
2741         }
2742         sfree(legend);
2743     }
2744 }
2745
2746 static void read_bar_xvg(const char* fn, real* temp, sim_data_t* sd)
2747 {
2748     xvg_t*     barsim;
2749     samples_t* s;
2750     int        i;
2751
2752     snew(barsim, 1);
2753
2754     read_bar_xvg_lowlevel(fn, temp, barsim, &(sd->lc));
2755
2756     if (barsim->nset < 1)
2757     {
2758         gmx_fatal(FARGS, "File '%s' contains fewer than two columns", fn);
2759     }
2760
2761     if (!gmx_within_tol(*temp, barsim->temp, GMX_FLOAT_EPS) && (*temp > 0))
2762     {
2763         gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
2764     }
2765     *temp = barsim->temp;
2766
2767     /* now create a series of samples_t */
2768     snew(s, barsim->nset);
2769     for (i = 0; i < barsim->nset; i++)
2770     {
2771         samples_init(s + i,
2772                      &(barsim->native_lambda),
2773                      &(barsim->lambda[i]),
2774                      barsim->temp,
2775                      lambda_vec_same(&(barsim->native_lambda), &(barsim->lambda[i])),
2776                      fn);
2777         s[i].du  = barsim->y[i];
2778         s[i].ndu = barsim->np[i];
2779         s[i].t   = barsim->t;
2780
2781         lambda_data_list_insert_sample(sd->lb, s + i);
2782     }
2783     {
2784         char buf[STRLEN];
2785
2786         lambda_vec_print(s[0].native_lambda, buf, FALSE);
2787         printf("%s: %.1f - %.1f; lambda = %s\n    dH/dl & foreign lambdas:\n",
2788                fn,
2789                s[0].t[0],
2790                s[0].t[s[0].ndu - 1],
2791                buf);
2792         for (i = 0; i < barsim->nset; i++)
2793         {
2794             lambda_vec_print(s[i].foreign_lambda, buf, TRUE);
2795             printf("        %s (%d pts)\n", buf, s[i].ndu);
2796         }
2797     }
2798     printf("\n\n");
2799 }
2800
2801 static void read_edr_rawdh_block(samples_t**   smp,
2802                                  int*          ndu,
2803                                  t_enxblock*   blk,
2804                                  double        start_time,
2805                                  double        delta_time,
2806                                  lambda_vec_t* native_lambda,
2807                                  double        temp,
2808                                  double*       last_t,
2809                                  const char*   filename)
2810 {
2811     int           i, j;
2812     lambda_vec_t* foreign_lambda;
2813     int           type;
2814     samples_t*    s; /* convenience pointer */
2815     int           startj;
2816
2817     /* check the block types etc. */
2818     if ((blk->nsub < 3) || (blk->sub[0].type != xdr_datatype_int) || (blk->sub[1].type != xdr_datatype_double)
2819         || ((blk->sub[2].type != xdr_datatype_float) && (blk->sub[2].type != xdr_datatype_double))
2820         || (blk->sub[0].nr < 1) || (blk->sub[1].nr < 1))
2821     {
2822         gmx_fatal(FARGS, "Unexpected/corrupted block data in file %s around time %f.", filename, start_time);
2823     }
2824
2825     snew(foreign_lambda, 1);
2826     lambda_vec_init(foreign_lambda, native_lambda->lc);
2827     lambda_vec_copy(foreign_lambda, native_lambda);
2828     type = blk->sub[0].ival[0];
2829     if (type == dhbtDH)
2830     {
2831         for (i = 0; i < native_lambda->lc->N; i++)
2832         {
2833             foreign_lambda->val[i] = blk->sub[1].dval[i];
2834         }
2835     }
2836     else
2837     {
2838         if (blk->sub[0].nr > 1)
2839         {
2840             foreign_lambda->dhdl = blk->sub[0].ival[1];
2841         }
2842         else
2843         {
2844             foreign_lambda->dhdl = 0;
2845         }
2846     }
2847
2848     if (!*smp)
2849     {
2850         /* initialize the samples structure if it's empty. */
2851         snew(*smp, 1);
2852         samples_init(*smp, native_lambda, foreign_lambda, temp, type == dhbtDHDL, filename);
2853         (*smp)->start_time = start_time;
2854         (*smp)->delta_time = delta_time;
2855     }
2856
2857     /* set convenience pointer */
2858     s = *smp;
2859
2860     /* now double check */
2861     if (!lambda_vec_same(s->foreign_lambda, foreign_lambda))
2862     {
2863         char buf[STRLEN], buf2[STRLEN];
2864         lambda_vec_print(foreign_lambda, buf, FALSE);
2865         lambda_vec_print(s->foreign_lambda, buf2, FALSE);
2866         fprintf(stderr, "Got foreign lambda=%s, expected: %s\n", buf, buf2);
2867         gmx_fatal(FARGS, "Corrupted data in file %s around t=%f.", filename, start_time);
2868     }
2869
2870     /* make room for the data */
2871     if (gmx::index(s->ndu_alloc) < s->ndu + blk->sub[2].nr)
2872     {
2873         s->ndu_alloc += (s->ndu_alloc < static_cast<size_t>(blk->sub[2].nr)) ? blk->sub[2].nr * 2
2874                                                                              : s->ndu_alloc;
2875         srenew(s->du_alloc, s->ndu_alloc);
2876         s->du = s->du_alloc;
2877     }
2878     startj = s->ndu;
2879     s->ndu += blk->sub[2].nr;
2880     s->ntot += blk->sub[2].nr;
2881     *ndu = blk->sub[2].nr;
2882
2883     /* and copy the data*/
2884     for (j = 0; j < blk->sub[2].nr; j++)
2885     {
2886         if (blk->sub[2].type == xdr_datatype_float)
2887         {
2888             s->du[startj + j] = blk->sub[2].fval[j];
2889         }
2890         else
2891         {
2892             s->du[startj + j] = blk->sub[2].dval[j];
2893         }
2894     }
2895     if (start_time + blk->sub[2].nr * delta_time > *last_t)
2896     {
2897         *last_t = start_time + blk->sub[2].nr * delta_time;
2898     }
2899 }
2900
2901 static samples_t* read_edr_hist_block(int*          nsamples,
2902                                       t_enxblock*   blk,
2903                                       double        start_time,
2904                                       double        delta_time,
2905                                       lambda_vec_t* native_lambda,
2906                                       double        temp,
2907                                       double*       last_t,
2908                                       const char*   filename)
2909 {
2910     int           i, j;
2911     samples_t*    s;
2912     int           nhist;
2913     lambda_vec_t* foreign_lambda;
2914     int           type;
2915     int           nbins[2];
2916
2917     /* check the block types etc. */
2918     if ((blk->nsub < 2) || (blk->sub[0].type != xdr_datatype_double)
2919         || (blk->sub[1].type != xdr_datatype_int64) || (blk->sub[0].nr < 2) || (blk->sub[1].nr < 2))
2920     {
2921         gmx_fatal(FARGS, "Unexpected/corrupted block data in file %s around time %f", filename, start_time);
2922     }
2923
2924     nhist = blk->nsub - 2;
2925     if (nhist == 0)
2926     {
2927         return nullptr;
2928     }
2929     if (nhist > 2)
2930     {
2931         gmx_fatal(FARGS, "Unexpected/corrupted block data in file %s around time %f", filename, start_time);
2932     }
2933
2934     snew(s, 1);
2935     *nsamples = 1;
2936
2937     snew(foreign_lambda, 1);
2938     lambda_vec_init(foreign_lambda, native_lambda->lc);
2939     lambda_vec_copy(foreign_lambda, native_lambda);
2940     type = static_cast<int>(blk->sub[1].lval[1]);
2941     if (type == dhbtDH)
2942     {
2943         double old_foreign_lambda;
2944
2945         old_foreign_lambda = blk->sub[0].dval[0];
2946         if (old_foreign_lambda >= 0)
2947         {
2948             foreign_lambda->val[0] = old_foreign_lambda;
2949             if (foreign_lambda->lc->N > 1)
2950             {
2951                 gmx_fatal(FARGS, "Single-component lambda in multi-component file %s", filename);
2952             }
2953         }
2954         else
2955         {
2956             for (i = 0; i < native_lambda->lc->N; i++)
2957             {
2958                 foreign_lambda->val[i] = blk->sub[0].dval[i + 2];
2959             }
2960         }
2961     }
2962     else
2963     {
2964         if (foreign_lambda->lc->N > 1)
2965         {
2966             if (blk->sub[1].nr < 3 + nhist)
2967             {
2968                 gmx_fatal(FARGS, "Missing derivative coord in multi-component file %s", filename);
2969             }
2970             foreign_lambda->dhdl = blk->sub[1].lval[2 + nhist];
2971         }
2972         else
2973         {
2974             foreign_lambda->dhdl = 0;
2975         }
2976     }
2977
2978     samples_init(s, native_lambda, foreign_lambda, temp, type == dhbtDHDL, filename);
2979     snew(s->hist, 1);
2980
2981     for (i = 0; i < nhist; i++)
2982     {
2983         nbins[i] = blk->sub[i + 2].nr;
2984     }
2985
2986     hist_init(s->hist, nhist, nbins);
2987
2988     for (i = 0; i < nhist; i++)
2989     {
2990         s->hist->x0[i] = blk->sub[1].lval[2 + i];
2991         s->hist->dx[i] = blk->sub[0].dval[1];
2992         if (i == 1)
2993         {
2994             s->hist->dx[i] = -s->hist->dx[i];
2995         }
2996     }
2997
2998     s->hist->start_time = start_time;
2999     s->hist->delta_time = delta_time;
3000     s->start_time       = start_time;
3001     s->delta_time       = delta_time;
3002
3003     for (i = 0; i < nhist; i++)
3004     {
3005         int64_t sum = 0;
3006
3007         for (j = 0; j < s->hist->nbin[i]; j++)
3008         {
3009             int binv = static_cast<int>(blk->sub[i + 2].ival[j]);
3010
3011             s->hist->bin[i][j] = binv;
3012             sum += binv;
3013         }
3014         if (i == 0)
3015         {
3016             s->ntot      = sum;
3017             s->hist->sum = sum;
3018         }
3019         else
3020         {
3021             if (s->ntot != sum)
3022             {
3023                 gmx_fatal(FARGS, "Histogram counts don't match in %s", filename);
3024             }
3025         }
3026     }
3027
3028     if (start_time + s->hist->sum * delta_time > *last_t)
3029     {
3030         *last_t = start_time + s->hist->sum * delta_time;
3031     }
3032     return s;
3033 }
3034
3035
3036 static void read_barsim_edr(const char* fn, real* temp, sim_data_t* sd)
3037 {
3038     int            i, j;
3039     ener_file_t    fp;
3040     t_enxframe*    fr;
3041     int            nre;
3042     gmx_enxnm_t*   enm           = nullptr;
3043     double         first_t       = -1;
3044     double         last_t        = -1;
3045     samples_t**    samples_rawdh = nullptr; /* contains samples for raw delta_h  */
3046     int*           nhists        = nullptr; /* array to keep count & print at end */
3047     int*           npts          = nullptr; /* array to keep count & print at end */
3048     lambda_vec_t** lambdas       = nullptr; /* array to keep count & print at end */
3049     lambda_vec_t*  native_lambda;
3050     int            nsamples = 0;
3051     lambda_vec_t   start_lambda;
3052
3053     fp = open_enx(fn, "r");
3054     do_enxnms(fp, &nre, &enm);
3055     snew(fr, 1);
3056
3057     snew(native_lambda, 1);
3058     start_lambda.lc  = nullptr;
3059     start_lambda.val = nullptr;
3060
3061     while (do_enx(fp, fr))
3062     {
3063         /* count the data blocks */
3064         int nblocks_raw  = 0;
3065         int nblocks_hist = 0;
3066         int nlam         = 0;
3067         int k;
3068         /* DHCOLL block information: */
3069         double start_time = 0, delta_time = 0, old_start_lambda = 0, delta_lambda = 0;
3070         double rtemp = 0;
3071
3072         /* count the blocks and handle collection information: */
3073         for (i = 0; i < fr->nblock; i++)
3074         {
3075             if (fr->block[i].id == enxDHHIST)
3076             {
3077                 nblocks_hist++;
3078             }
3079             if (fr->block[i].id == enxDH)
3080             {
3081                 nblocks_raw++;
3082             }
3083             if (fr->block[i].id == enxDHCOLL)
3084             {
3085                 nlam++;
3086                 if ((fr->block[i].nsub < 1) || (fr->block[i].sub[0].type != xdr_datatype_double)
3087                     || (fr->block[i].sub[0].nr < 5))
3088                 {
3089                     gmx_fatal(FARGS, "Unexpected block data in file %s", fn);
3090                 }
3091
3092                 /* read the data from the DHCOLL block */
3093                 rtemp            = fr->block[i].sub[0].dval[0];
3094                 start_time       = fr->block[i].sub[0].dval[1];
3095                 delta_time       = fr->block[i].sub[0].dval[2];
3096                 old_start_lambda = fr->block[i].sub[0].dval[3];
3097                 delta_lambda     = fr->block[i].sub[0].dval[4];
3098
3099                 if (delta_lambda != 0)
3100                 {
3101                     gmx_fatal(FARGS, "Lambda values not constant in %s: can't apply BAR method", fn);
3102                 }
3103                 if ((*temp != rtemp) && (*temp > 0))
3104                 {
3105                     gmx_fatal(FARGS,
3106                               "Temperature in file %s different from earlier files or setting\n",
3107                               fn);
3108                 }
3109                 *temp = rtemp;
3110
3111                 if (old_start_lambda >= 0)
3112                 {
3113                     if (sd->lc.N > 0)
3114                     {
3115                         if (!lambda_components_check(&(sd->lc), 0, "", 0))
3116                         {
3117                             gmx_fatal(FARGS,
3118                                       "lambda vector components in %s don't match those previously "
3119                                       "read",
3120                                       fn);
3121                         }
3122                     }
3123                     else
3124                     {
3125                         lambda_components_add(&(sd->lc), "", 0);
3126                     }
3127                     if (!start_lambda.lc)
3128                     {
3129                         lambda_vec_init(&start_lambda, &(sd->lc));
3130                     }
3131                     start_lambda.val[0] = old_start_lambda;
3132                 }
3133                 else
3134                 {
3135                     /* read lambda vector */
3136                     int      n_lambda_vec;
3137                     gmx_bool check = (sd->lc.N > 0);
3138                     if (fr->block[i].nsub < 2)
3139                     {
3140                         gmx_fatal(FARGS, "No lambda vector, but start_lambda=%f\n", old_start_lambda);
3141                     }
3142                     n_lambda_vec = fr->block[i].sub[1].ival[1];
3143                     for (j = 0; j < n_lambda_vec; j++)
3144                     {
3145                         const char* name = efpt_singular_names[fr->block[i].sub[1].ival[1 + j]];
3146                         if (check)
3147                         {
3148                             /* check the components */
3149                             lambda_components_check(&(sd->lc), j, name, std::strlen(name));
3150                         }
3151                         else
3152                         {
3153                             lambda_components_add(&(sd->lc), name, std::strlen(name));
3154                         }
3155                     }
3156                     lambda_vec_init(&start_lambda, &(sd->lc));
3157                     start_lambda.index = fr->block[i].sub[1].ival[0];
3158                     for (j = 0; j < n_lambda_vec; j++)
3159                     {
3160                         start_lambda.val[j] = fr->block[i].sub[0].dval[5 + j];
3161                     }
3162                 }
3163                 if (first_t < 0)
3164                 {
3165                     first_t = start_time;
3166                 }
3167             }
3168         }
3169
3170         if (nlam != 1)
3171         {
3172             gmx_fatal(FARGS, "Did not find delta H information in file %s", fn);
3173         }
3174         if (nblocks_raw > 0 && nblocks_hist > 0)
3175         {
3176             gmx_fatal(FARGS, "Can't handle both raw delta U data and histograms in the same file %s", fn);
3177         }
3178
3179         if (nsamples == 0)
3180         {
3181             /* this is the first round; allocate the associated data
3182                structures */
3183             /*native_lambda=start_lambda;*/
3184             lambda_vec_init(native_lambda, &(sd->lc));
3185             lambda_vec_copy(native_lambda, &start_lambda);
3186             nsamples = nblocks_raw + nblocks_hist;
3187             snew(nhists, nsamples);
3188             snew(npts, nsamples);
3189             snew(lambdas, nsamples);
3190             snew(samples_rawdh, nsamples);
3191             for (i = 0; i < nsamples; i++)
3192             {
3193                 nhists[i]        = 0;
3194                 npts[i]          = 0;
3195                 lambdas[i]       = nullptr;
3196                 samples_rawdh[i] = nullptr; /* init to NULL so we know which
3197                                                ones contain values */
3198             }
3199         }
3200         else
3201         {
3202             // nsamples > 0 means this is NOT the first iteration
3203
3204             /* check the native lambda */
3205             if (!lambda_vec_same(&start_lambda, native_lambda))
3206             {
3207                 gmx_fatal(FARGS,
3208                           "Native lambda not constant in file %s: started at %f, and becomes %f at "
3209                           "time %f",
3210                           fn,
3211                           native_lambda->val[0],
3212                           start_lambda.val[0],
3213                           start_time);
3214             }
3215             /* check the number of samples against the previous number */
3216             if (((nblocks_raw + nblocks_hist) != nsamples) || (nlam != 1))
3217             {
3218                 gmx_fatal(FARGS,
3219                           "Unexpected block count in %s: was %d, now %d\n",
3220                           fn,
3221                           nsamples + 1,
3222                           nblocks_raw + nblocks_hist + nlam);
3223             }
3224             /* check whether last iterations's end time matches with
3225                the currrent start time */
3226             if ((std::abs(last_t - start_time) > 2 * delta_time) && last_t >= 0)
3227             {
3228                 /* it didn't. We need to store our samples and reallocate */
3229                 for (i = 0; i < nsamples; i++)
3230                 {
3231                     if (samples_rawdh[i])
3232                     {
3233                         /* insert it into the existing list */
3234                         lambda_data_list_insert_sample(sd->lb, samples_rawdh[i]);
3235                         /* and make sure we'll allocate a new one this time
3236                            around */
3237                         samples_rawdh[i] = nullptr;
3238                     }
3239                 }
3240             }
3241         }
3242
3243         /* and read them */
3244         k = 0; /* counter for the lambdas, etc. arrays */
3245         for (i = 0; i < fr->nblock; i++)
3246         {
3247             if (fr->block[i].id == enxDH)
3248             {
3249                 int type = (fr->block[i].sub[0].ival[0]);
3250                 if (type == dhbtDH || type == dhbtDHDL)
3251                 {
3252                     int ndu;
3253                     read_edr_rawdh_block(&(samples_rawdh[k]),
3254                                          &ndu,
3255                                          &(fr->block[i]),
3256                                          start_time,
3257                                          delta_time,
3258                                          native_lambda,
3259                                          rtemp,
3260                                          &last_t,
3261                                          fn);
3262                     npts[k] += ndu;
3263                     if (samples_rawdh[k])
3264                     {
3265                         lambdas[k] = samples_rawdh[k]->foreign_lambda;
3266                     }
3267                     k++;
3268                 }
3269             }
3270             else if (fr->block[i].id == enxDHHIST)
3271             {
3272                 int type = static_cast<int>(fr->block[i].sub[1].lval[1]);
3273                 if (type == dhbtDH || type == dhbtDHDL)
3274                 {
3275                     int        j;
3276                     int        nb = 0;
3277                     samples_t* s; /* this is where the data will go */
3278                     s = read_edr_hist_block(
3279                             &nb, &(fr->block[i]), start_time, delta_time, native_lambda, rtemp, &last_t, fn);
3280                     nhists[k] += nb;
3281                     if (nb > 0)
3282                     {
3283                         lambdas[k] = s->foreign_lambda;
3284                     }
3285                     k++;
3286                     /* and insert the new sample immediately */
3287                     for (j = 0; j < nb; j++)
3288                     {
3289                         lambda_data_list_insert_sample(sd->lb, s + j);
3290                     }
3291                 }
3292             }
3293         }
3294     }
3295     /* Now store all our extant sample collections */
3296     for (i = 0; i < nsamples; i++)
3297     {
3298         if (samples_rawdh[i])
3299         {
3300             /* insert it into the existing list */
3301             lambda_data_list_insert_sample(sd->lb, samples_rawdh[i]);
3302         }
3303     }
3304
3305
3306     {
3307         char buf[STRLEN];
3308         printf("\n");
3309         lambda_vec_print(native_lambda, buf, FALSE);
3310         printf("%s: %.1f - %.1f; lambda = %s\n    foreign lambdas:\n", fn, first_t, last_t, buf);
3311         for (i = 0; i < nsamples; i++)
3312         {
3313             if (lambdas[i])
3314             {
3315                 lambda_vec_print(lambdas[i], buf, TRUE);
3316                 if (nhists[i] > 0)
3317                 {
3318                     printf("        %s (%d hists)\n", buf, nhists[i]);
3319                 }
3320                 else
3321                 {
3322                     printf("        %s (%d pts)\n", buf, npts[i]);
3323                 }
3324             }
3325         }
3326     }
3327     printf("\n\n");
3328     sfree(npts);
3329     sfree(nhists);
3330     sfree(lambdas);
3331 }
3332
3333
3334 int gmx_bar(int argc, char* argv[])
3335 {
3336     static const char* desc[] = {
3337         "[THISMODULE] calculates free energy difference estimates through ",
3338         "Bennett's acceptance ratio method (BAR). It also automatically",
3339         "adds series of individual free energies obtained with BAR into",
3340         "a combined free energy estimate.[PAR]",
3341
3342         "Every individual BAR free energy difference relies on two ",
3343         "simulations at different states: say state A and state B, as",
3344         "controlled by a parameter, [GRK]lambda[grk] (see the [REF].mdp[ref] parameter",
3345         "[TT]init_lambda[tt]). The BAR method calculates a ratio of weighted",
3346         "average of the Hamiltonian difference of state B given state A and",
3347         "vice versa.",
3348         "The energy differences to the other state must be calculated",
3349         "explicitly during the simulation. This can be done with",
3350         "the [REF].mdp[ref] option [TT]foreign_lambda[tt].[PAR]",
3351
3352         "Input option [TT]-f[tt] expects multiple [TT]dhdl.xvg[tt] files. ",
3353         "Two types of input files are supported:",
3354         "",
3355         " * Files with more than one [IT]y[it]-value. ",
3356         "   The files should have columns ",
3357         "   with dH/d[GRK]lambda[grk] and [GRK]Delta[grk][GRK]lambda[grk]. ",
3358         "   The [GRK]lambda[grk] values are inferred ",
3359         "   from the legends: [GRK]lambda[grk] of the simulation from the legend of ",
3360         "   dH/d[GRK]lambda[grk] and the foreign [GRK]lambda[grk] values from the ",
3361         "   legends of Delta H",
3362         " * Files with only one [IT]y[it]-value. Using the",
3363         "   [TT]-extp[tt] option for these files, it is assumed",
3364         "   that the [IT]y[it]-value is dH/d[GRK]lambda[grk] and that the ",
3365         "   Hamiltonian depends linearly on [GRK]lambda[grk]. ",
3366         "   The [GRK]lambda[grk] value of the simulation is inferred from the ",
3367         "   subtitle (if present), otherwise from a number in the subdirectory ",
3368         "   in the file name.",
3369         "",
3370
3371         "The [GRK]lambda[grk] of the simulation is parsed from ",
3372         "[TT]dhdl.xvg[tt] file's legend containing the string 'dH', the ",
3373         "foreign [GRK]lambda[grk] values from the legend containing the ",
3374         "capitalized letters 'D' and 'H'. The temperature is parsed from ",
3375         "the legend line containing 'T ='.[PAR]",
3376
3377         "The input option [TT]-g[tt] expects multiple [REF].edr[ref] files. ",
3378         "These can contain either lists of energy differences (see the ",
3379         "[REF].mdp[ref] option [TT]separate_dhdl_file[tt]), or a series of ",
3380         "histograms (see the [REF].mdp[ref] options [TT]dh_hist_size[tt] and ",
3381         "[TT]dh_hist_spacing[tt]).",
3382         "The temperature and [GRK]lambda[grk] ",
3383         "values are automatically deduced from the [TT]ener.edr[tt] file.[PAR]",
3384
3385         "In addition to the [REF].mdp[ref] option [TT]foreign_lambda[tt], ",
3386         "the energy difference can also be extrapolated from the ",
3387         "dH/d[GRK]lambda[grk] values. This is done with the[TT]-extp[tt]",
3388         "option, which assumes that the system's Hamiltonian depends linearly",
3389         "on [GRK]lambda[grk], which is not normally the case.[PAR]",
3390
3391         "The free energy estimates are determined using BAR with bisection, ",
3392         "with the precision of the output set with [TT]-prec[tt]. ",
3393         "An error estimate taking into account time correlations ",
3394         "is made by splitting the data into blocks and determining ",
3395         "the free energy differences over those blocks and assuming ",
3396         "the blocks are independent. ",
3397         "The final error estimate is determined from the average variance ",
3398         "over 5 blocks. A range of block numbers for error estimation can ",
3399         "be provided with the options [TT]-nbmin[tt] and [TT]-nbmax[tt].[PAR]",
3400
3401         "[THISMODULE] tries to aggregate samples with the same 'native' and ",
3402         "'foreign' [GRK]lambda[grk] values, but always assumes independent ",
3403         "samples. [BB]Note[bb] that when aggregating energy ",
3404         "differences/derivatives with different sampling intervals, this is ",
3405         "almost certainly not correct. Usually subsequent energies are ",
3406         "correlated and different time intervals mean different degrees ",
3407         "of correlation between samples.[PAR]",
3408
3409         "The results are split in two parts: the last part contains the final ",
3410         "results in kJ/mol, together with the error estimate for each part ",
3411         "and the total. The first part contains detailed free energy ",
3412         "difference estimates and phase space overlap measures in units of ",
3413         "kT (together with their computed error estimate). The printed ",
3414         "values are:",
3415         "",
3416         " * lam_A: the [GRK]lambda[grk] values for point A.",
3417         " * lam_B: the [GRK]lambda[grk] values for point B.",
3418         " *    DG: the free energy estimate.",
3419         " *   s_A: an estimate of the relative entropy of B in A.",
3420         " *   s_B: an estimate of the relative entropy of A in B.",
3421         " * stdev: an estimate expected per-sample standard deviation.",
3422         "",
3423
3424         "The relative entropy of both states in each other's ensemble can be ",
3425         "interpreted as a measure of phase space overlap: ",
3426         "the relative entropy s_A of the work samples of lambda_B in the ",
3427         "ensemble of lambda_A (and vice versa for s_B), is a ",
3428         "measure of the 'distance' between Boltzmann distributions of ",
3429         "the two states, that goes to zero for identical distributions. See ",
3430         "Wu & Kofke, J. Chem. Phys. 123 084109 (2005) for more information.",
3431         "[PAR]",
3432         "The estimate of the expected per-sample standard deviation, as given ",
3433         "in Bennett's original BAR paper: Bennett, J. Comp. Phys. 22, p 245 (1976).",
3434         "Eq. 10 therein gives an estimate of the quality of sampling (not directly",
3435         "of the actual statistical error, because it assumes independent samples).[PAR]",
3436
3437         "To get a visual estimate of the phase space overlap, use the ",
3438         "[TT]-oh[tt] option to write series of histograms, together with the ",
3439         "[TT]-nbin[tt] option.[PAR]"
3440     };
3441     static real begin = 0, end = -1, temp = -1;
3442     int         nd = 2, nbmin = 5, nbmax = 5;
3443     int         nbin     = 100;
3444     gmx_bool    use_dhdl = FALSE;
3445     t_pargs     pa[]     = {
3446         { "-b", FALSE, etREAL, { &begin }, "Begin time for BAR" },
3447         { "-e", FALSE, etREAL, { &end }, "End time for BAR" },
3448         { "-temp", FALSE, etREAL, { &temp }, "Temperature (K)" },
3449         { "-prec", FALSE, etINT, { &nd }, "The number of digits after the decimal point" },
3450         { "-nbmin", FALSE, etINT, { &nbmin }, "Minimum number of blocks for error estimation" },
3451         { "-nbmax", FALSE, etINT, { &nbmax }, "Maximum number of blocks for error estimation" },
3452         { "-nbin", FALSE, etINT, { &nbin }, "Number of bins for histogram output" },
3453         { "-extp",
3454           FALSE,
3455           etBOOL,
3456           { &use_dhdl },
3457           "Whether to linearly extrapolate dH/dl values to use as energies" }
3458     };
3459
3460     t_filenm fnm[] = { { efXVG, "-f", "dhdl", ffOPTRDMULT },
3461                        { efEDR, "-g", "ener", ffOPTRDMULT },
3462                        { efXVG, "-o", "bar", ffOPTWR },
3463                        { efXVG, "-oi", "barint", ffOPTWR },
3464                        { efXVG, "-oh", "histogram", ffOPTWR } };
3465 #define NFILE asize(fnm)
3466
3467     int        f;
3468     int        nf = 0;    /* file counter */
3469     int        nfile_tot; /* total number of input files */
3470     sim_data_t sim_data;  /* the simulation data */
3471     barres_t*  results;   /* the results */
3472     int        nresults;  /* number of results in results array */
3473
3474     double*           partsum;
3475     double            prec, dg_tot;
3476     FILE *            fpb, *fpi;
3477     char              dgformat[20], xvg2format[STRLEN], xvg3format[STRLEN];
3478     char              buf[STRLEN], buf2[STRLEN];
3479     char              ktformat[STRLEN], sktformat[STRLEN];
3480     char              kteformat[STRLEN], skteformat[STRLEN];
3481     gmx_output_env_t* oenv;
3482     double            kT;
3483     gmx_bool          result_OK = TRUE, bEE = TRUE;
3484
3485     gmx_bool disc_err          = FALSE;
3486     double   sum_disc_err      = 0.; /* discretization error */
3487     gmx_bool histrange_err     = FALSE;
3488     double   sum_histrange_err = 0.; /* histogram range error */
3489     double   stat_err          = 0.; /* statistical error */
3490
3491     if (!parse_common_args(
3492                 &argc, argv, PCA_CAN_VIEW, NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, nullptr, &oenv))
3493     {
3494         return 0;
3495     }
3496
3497     gmx::ArrayRef<const std::string> xvgFiles = opt2fnsIfOptionSet("-f", NFILE, fnm);
3498     gmx::ArrayRef<const std::string> edrFiles = opt2fnsIfOptionSet("-g", NFILE, fnm);
3499
3500     sim_data_init(&sim_data);
3501 #if 0
3502     /* make linked list */
3503     lb = &lambda_head;
3504     lambda_data_init(lb, 0, 0);
3505     lb->next = lb;
3506     lb->prev = lb;
3507 #endif
3508
3509
3510     nfile_tot = xvgFiles.size() + edrFiles.size();
3511
3512     if (nfile_tot == 0)
3513     {
3514         gmx_fatal(FARGS, "No input files!");
3515     }
3516
3517     if (nd < 0)
3518     {
3519         gmx_fatal(FARGS, "Can not have negative number of digits");
3520     }
3521     prec = std::pow(10.0, static_cast<double>(-nd));
3522
3523     snew(partsum, (nbmax + 1) * (nbmax + 1));
3524     nf = 0;
3525
3526     /* read in all files. First xvg files */
3527     for (const std::string& filenm : xvgFiles)
3528     {
3529         read_bar_xvg(filenm.c_str(), &temp, &sim_data);
3530         nf++;
3531     }
3532     /* then .edr files */
3533     for (const std::string& filenm : edrFiles)
3534     {
3535         read_barsim_edr(filenm.c_str(), &temp, &sim_data);
3536
3537         nf++;
3538     }
3539
3540     /* fix the times to allow for equilibration */
3541     sim_data_impose_times(&sim_data, begin, end);
3542
3543     if (opt2bSet("-oh", NFILE, fnm))
3544     {
3545         sim_data_histogram(&sim_data, opt2fn("-oh", NFILE, fnm), nbin, oenv);
3546     }
3547
3548     /* assemble the output structures from the lambdas */
3549     results = barres_list_create(&sim_data, &nresults, use_dhdl);
3550
3551     sum_disc_err = barres_list_max_disc_err(results, nresults);
3552
3553     if (nresults == 0)
3554     {
3555         printf("\nNo results to calculate.\n");
3556         return 0;
3557     }
3558
3559     if (sum_disc_err > prec)
3560     {
3561         prec = sum_disc_err;
3562         nd   = static_cast<int>(std::ceil(-std::log10(prec)));
3563         printf("WARNING: setting the precision to %g because that is the minimum\n         "
3564                "reasonable number, given the expected discretization error.\n",
3565                prec);
3566     }
3567
3568
3569     /*sprintf(lamformat,"%%6.3f");*/
3570     sprintf(dgformat, "%%%d.%df", 3 + nd, nd);
3571     /* the format strings of the results in kT */
3572     sprintf(ktformat, "%%%d.%df", 5 + nd, nd);
3573     sprintf(sktformat, "%%%ds", 6 + nd);
3574     /* the format strings of the errors in kT */
3575     sprintf(kteformat, "%%%d.%df", 3 + nd, nd);
3576     sprintf(skteformat, "%%%ds", 4 + nd);
3577     sprintf(xvg2format, "%s %s\n", "%s", dgformat);
3578     sprintf(xvg3format, "%s %s %s\n", "%s", dgformat, dgformat);
3579
3580
3581     fpb = nullptr;
3582     if (opt2bSet("-o", NFILE, fnm))
3583     {
3584         sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3585         fpb = xvgropen_type(
3586                 opt2fn("-o", NFILE, fnm), "Free energy differences", "\\lambda", buf, exvggtXYDY, oenv);
3587     }
3588
3589     fpi = nullptr;
3590     if (opt2bSet("-oi", NFILE, fnm))
3591     {
3592         sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3593         fpi = xvgropen(opt2fn("-oi", NFILE, fnm), "Free energy integral", "\\lambda", buf, oenv);
3594     }
3595
3596
3597     if (nbmin > nbmax)
3598     {
3599         nbmin = nbmax;
3600     }
3601
3602     /* first calculate results */
3603     bEE      = TRUE;
3604     disc_err = FALSE;
3605     for (f = 0; f < nresults; f++)
3606     {
3607         /* Determine the free energy difference with a factor of 10
3608          * more accuracy than requested for printing.
3609          */
3610         calc_bar(&(results[f]), 0.1 * prec, nbmin, nbmax, &bEE, partsum);
3611
3612         if (results[f].dg_disc_err > prec / 10.)
3613         {
3614             disc_err = TRUE;
3615         }
3616         if (results[f].dg_histrange_err > prec / 10.)
3617         {
3618             histrange_err = TRUE;
3619         }
3620     }
3621
3622     /* print results in kT */
3623     kT = BOLTZ * temp;
3624
3625     printf("\nTemperature: %g K\n", temp);
3626
3627     printf("\nDetailed results in kT (see help for explanation):\n\n");
3628     printf("%6s ", " lam_A");
3629     printf("%6s ", " lam_B");
3630     printf(sktformat, "DG ");
3631     if (bEE)
3632     {
3633         printf(skteformat, "+/- ");
3634     }
3635     if (disc_err)
3636     {
3637         printf(skteformat, "disc ");
3638     }
3639     if (histrange_err)
3640     {
3641         printf(skteformat, "range ");
3642     }
3643     printf(sktformat, "s_A ");
3644     if (bEE)
3645     {
3646         printf(skteformat, "+/- ");
3647     }
3648     printf(sktformat, "s_B ");
3649     if (bEE)
3650     {
3651         printf(skteformat, "+/- ");
3652     }
3653     printf(sktformat, "stdev ");
3654     if (bEE)
3655     {
3656         printf(skteformat, "+/- ");
3657     }
3658     printf("\n");
3659     for (f = 0; f < nresults; f++)
3660     {
3661         lambda_vec_print_short(results[f].a->native_lambda, buf);
3662         printf("%s ", buf);
3663         lambda_vec_print_short(results[f].b->native_lambda, buf);
3664         printf("%s ", buf);
3665         printf(ktformat, results[f].dg);
3666         printf(" ");
3667         if (bEE)
3668         {
3669             printf(kteformat, results[f].dg_err);
3670             printf(" ");
3671         }
3672         if (disc_err)
3673         {
3674             printf(kteformat, results[f].dg_disc_err);
3675             printf(" ");
3676         }
3677         if (histrange_err)
3678         {
3679             printf(kteformat, results[f].dg_histrange_err);
3680             printf(" ");
3681         }
3682         printf(ktformat, results[f].sa);
3683         printf(" ");
3684         if (bEE)
3685         {
3686             printf(kteformat, results[f].sa_err);
3687             printf(" ");
3688         }
3689         printf(ktformat, results[f].sb);
3690         printf(" ");
3691         if (bEE)
3692         {
3693             printf(kteformat, results[f].sb_err);
3694             printf(" ");
3695         }
3696         printf(ktformat, results[f].dg_stddev);
3697         printf(" ");
3698         if (bEE)
3699         {
3700             printf(kteformat, results[f].dg_stddev_err);
3701         }
3702         printf("\n");
3703
3704         /* Check for negative relative entropy with a 95% certainty. */
3705         if (results[f].sa < -2 * results[f].sa_err || results[f].sb < -2 * results[f].sb_err)
3706         {
3707             result_OK = FALSE;
3708         }
3709     }
3710
3711     if (!result_OK)
3712     {
3713         printf("\nWARNING: Some of these results violate the Second Law of "
3714                "Thermodynamics: \n"
3715                "         This is can be the result of severe undersampling, or "
3716                "(more likely)\n"
3717                "         there is something wrong with the simulations.\n");
3718     }
3719
3720
3721     /* final results in kJ/mol */
3722     printf("\n\nFinal results in kJ/mol:\n\n");
3723     dg_tot = 0;
3724     for (f = 0; f < nresults; f++)
3725     {
3726
3727         if (fpi != nullptr)
3728         {
3729             lambda_vec_print_short(results[f].a->native_lambda, buf);
3730             fprintf(fpi, xvg2format, buf, dg_tot);
3731         }
3732
3733
3734         if (fpb != nullptr)
3735         {
3736             lambda_vec_print_intermediate(results[f].a->native_lambda, results[f].b->native_lambda, buf);
3737
3738             fprintf(fpb, xvg3format, buf, results[f].dg, results[f].dg_err);
3739         }
3740
3741         printf("point ");
3742         lambda_vec_print_short(results[f].a->native_lambda, buf);
3743         lambda_vec_print_short(results[f].b->native_lambda, buf2);
3744         printf("%s - %s", buf, buf2);
3745         printf(",   DG ");
3746
3747         printf(dgformat, results[f].dg * kT);
3748         if (bEE)
3749         {
3750             printf(" +/- ");
3751             printf(dgformat, results[f].dg_err * kT);
3752         }
3753         if (histrange_err)
3754         {
3755             printf(" (max. range err. = ");
3756             printf(dgformat, results[f].dg_histrange_err * kT);
3757             printf(")");
3758             sum_histrange_err += results[f].dg_histrange_err * kT;
3759         }
3760
3761         printf("\n");
3762         dg_tot += results[f].dg;
3763     }
3764     printf("\n");
3765     printf("total ");
3766     lambda_vec_print_short(results[0].a->native_lambda, buf);
3767     lambda_vec_print_short(results[nresults - 1].b->native_lambda, buf2);
3768     printf("%s - %s", buf, buf2);
3769     printf(",   DG ");
3770
3771     printf(dgformat, dg_tot * kT);
3772     if (bEE)
3773     {
3774         stat_err = bar_err(nbmin, nbmax, partsum) * kT;
3775         printf(" +/- ");
3776         printf(dgformat, std::max(std::max(stat_err, sum_disc_err), sum_histrange_err));
3777     }
3778     printf("\n");
3779     if (disc_err)
3780     {
3781         printf("\nmaximum discretization error = ");
3782         printf(dgformat, sum_disc_err);
3783         if (bEE && stat_err < sum_disc_err)
3784         {
3785             printf("WARNING: discretization error (%g) is larger than statistical error.\n       "
3786                    "Decrease histogram spacing for more accurate results\n",
3787                    stat_err);
3788         }
3789     }
3790     if (histrange_err)
3791     {
3792         printf("\nmaximum histogram range error = ");
3793         printf(dgformat, sum_histrange_err);
3794         if (bEE && stat_err < sum_histrange_err)
3795         {
3796             printf("WARNING: histogram range error (%g) is larger than statistical error.\n       "
3797                    "Increase histogram range for more accurate results\n",
3798                    stat_err);
3799         }
3800     }
3801     printf("\n");
3802
3803
3804     if (fpi != nullptr)
3805     {
3806         lambda_vec_print_short(results[nresults - 1].b->native_lambda, buf);
3807         fprintf(fpi, xvg2format, buf, dg_tot);
3808         xvgrclose(fpi);
3809     }
3810     if (fpb != nullptr)
3811     {
3812         xvgrclose(fpb);
3813     }
3814
3815     do_view(oenv, opt2fn_null("-o", NFILE, fnm), "-xydy");
3816     do_view(oenv, opt2fn_null("-oi", NFILE, fnm), "-xydy");
3817
3818     return 0;
3819 }