src/gromacs/gmxana/gmx_bar.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2010,2011,2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 #include "gmxpre.h"
  36
  37 #include <cctype>
  38 #include <cmath>
  39 #include <cstdlib>
  40 #include <cstring>
  41
  42 #include <algorithm>
  43 #include <limits>
  44
  45 #include "gromacs/commandline/pargs.h"
  46 #include "gromacs/commandline/viewit.h"
  47 #include "gromacs/fileio/enxio.h"
  48 #include "gromacs/fileio/xvgr.h"
  49 #include "gromacs/gmxana/gmx_ana.h"
  50 #include "gromacs/math/units.h"
  51 #include "gromacs/math/utilities.h"
  52 #include "gromacs/mdlib/mdebin.h"
  53 #include "gromacs/mdtypes/md_enums.h"
  54 #include "gromacs/utility/arraysize.h"
  55 #include "gromacs/utility/cstringutil.h"
  56 #include "gromacs/utility/dir_separator.h"
  57 #include "gromacs/utility/fatalerror.h"
  58 #include "gromacs/utility/gmxassert.h"
  59 #include "gromacs/utility/smalloc.h"
  60 #include "gromacs/utility/snprintf.h"
  61
  62
  63 /* Structure for the names of lambda vector components */
  64 typedef struct lambda_components_t
  65 {
  66     char **names;  /* Array of strings with names for the lambda vector
  67                       components */
  68     int    N;      /* The number of components */
  69     int    Nalloc; /* The number of allocated components */
  70 } lambda_components_t;
  71
  72 /* Structure for a lambda vector or a dhdl derivative direction */
  73 typedef struct lambda_vec_t
  74 {
  75     double                    *val;   /* The lambda vector component values. Only valid if
  76                                          dhdl == -1 */
  77     int                        dhdl;  /* The coordinate index for the derivative described by this
  78                                          structure, or -1 */
  79     const lambda_components_t *lc;    /* the associated lambda_components
  80                                          structure */
  81     int                        index; /* The state number (init-lambda-state) of this lambda
  82                                          vector, if known. If not, it is set to -1 */
  83 } lambda_vec_t;
  84
  85 /* the dhdl.xvg data from a simulation */
  86 typedef struct xvg_t
  87 {
  88     const char   *filename;
  89     int           ftp;           /* file type */
  90     int           nset;          /* number of lambdas, including dhdl */
  91     int          *np;            /* number of data points (du or hists) per lambda */
  92     int           np_alloc;      /* number of points (du or hists) allocated */
  93     double        temp;          /* temperature */
  94     lambda_vec_t *lambda;        /* the lambdas (of first index for y). */
  95     double       *t;             /* the times (of second index for y) */
  96     double      **y;             /* the dU values. y[0] holds the derivative, while
  97                                     further ones contain the energy differences between
  98                                     the native lambda and the 'foreign' lambdas. */
  99     lambda_vec_t  native_lambda; /* the native lambda */
 100
 101     struct xvg_t *next, *prev;   /*location in the global linked list of xvg_ts*/
 102 } xvg_t;
 103
 104
 105 typedef struct hist_t
 106 {
 107     unsigned int   *bin[2];                 /* the (forward + reverse) histogram values */
 108     double          dx[2];                  /* the histogram spacing. The reverse
 109                                                dx is the negative of the forward dx.*/
 110     gmx_int64_t     x0[2];                  /* the (forward + reverse) histogram start
 111                                                    point(s) as int */
 112
 113     int             nbin[2];                /* the (forward+reverse) number of bins */
 114     gmx_int64_t     sum;                    /* the total number of counts. Must be
 115                                                    the same for forward + reverse.  */
 116     int             nhist;                  /* number of hist datas (forward or reverse) */
 117
 118     double          start_time, delta_time; /* start time, end time of histogram */
 119 } hist_t;
 120
 121
 122 /* an aggregate of samples for partial free energy calculation */
 123 typedef struct samples_t
 124 {
 125     lambda_vec_t *native_lambda;  /* pointer to native lambda vector */
 126     lambda_vec_t *foreign_lambda; /* pointer to foreign lambda vector */
 127     double        temp;           /* the temperature */
 128     gmx_bool      derivative;     /* whether this sample is a derivative */
 129
 130     /* The samples come either as either delta U lists: */
 131     int     ndu;                    /* the number of delta U samples */
 132     double *du;                     /* the delta u's */
 133     double *t;                      /* the times associated with those samples, or: */
 134     double  start_time, delta_time; /*start time and delta time for linear time*/
 135
 136     /* or as histograms: */
 137     hist_t *hist; /* a histogram */
 138
 139     /* allocation data: (not NULL for data 'owned' by this struct) */
 140     double         *du_alloc, *t_alloc;  /* allocated delta u arrays  */
 141     size_t          ndu_alloc, nt_alloc; /* pre-allocated sizes */
 142     hist_t         *hist_alloc;          /* allocated hist */
 143
 144     gmx_int64_t     ntot;                /* total number of samples */
 145     const char     *filename;            /* the file name this sample comes from */
 146 } samples_t;
 147
 148 /* a sample range (start to end for du-style data, or boolean
 149     for both du-style data and histograms */
 150 typedef struct sample_range_t
 151 {
 152     int        start, end; /* start and end index for du style data */
 153     gmx_bool   use;        /* whether to use this sample */
 154
 155     samples_t *s;          /* the samples this range belongs to */
 156 } sample_range_t;
 157
 158
 159 /* a collection of samples for a partial free energy calculation
 160     (i.e. the collection of samples from one native lambda to one
 161     foreign lambda) */
 162 typedef struct sample_coll_t
 163 {
 164     lambda_vec_t   *native_lambda;     /* these should be the same for all samples
 165                                           in the histogram */
 166     lambda_vec_t   *foreign_lambda;    /* collection */
 167     double          temp;              /* the temperature */
 168
 169     int             nsamples;          /* the number of samples */
 170     samples_t     **s;                 /* the samples themselves */
 171     sample_range_t *r;                 /* the sample ranges */
 172     int             nsamples_alloc;    /* number of allocated samples */
 173
 174     gmx_int64_t     ntot;              /* total number of samples in the ranges of
 175                                               this collection */
 176
 177     struct sample_coll_t *next, *prev; /* next and previous in the list */
 178 } sample_coll_t;
 179
 180 /* all the samples associated with a lambda point */
 181 typedef struct lambda_data_t
 182 {
 183     lambda_vec_t         *lambda;      /* the native lambda (at start time if dynamic) */
 184     double                temp;        /* temperature */
 185
 186     sample_coll_t        *sc;          /* the samples */
 187
 188     sample_coll_t         sc_head;     /*the pre-allocated list head for the linked list.*/
 189
 190     struct lambda_data_t *next, *prev; /* the next and prev in the list */
 191 } lambda_data_t;
 192
 193 /* Top-level data structure of simulation data */
 194 typedef struct sim_data_t
 195 {
 196     lambda_data_t      *lb;      /* a lambda data linked list */
 197     lambda_data_t       lb_head; /* The head element of the linked list */
 198
 199     lambda_components_t lc;      /* the allowed components of the lambda
 200                                     vectors */
 201 } sim_data_t;
 202
 203 /* Top-level data structure with calculated values. */
 204 typedef struct {
 205     sample_coll_t *a, *b;            /* the simulation data */
 206
 207     double         dg;               /* the free energy difference */
 208     double         dg_err;           /* the free energy difference */
 209
 210     double         dg_disc_err;      /* discretization error */
 211     double         dg_histrange_err; /* histogram range error */
 212
 213     double         sa;               /* relative entropy of b in state a */
 214     double         sa_err;           /* error in sa */
 215     double         sb;               /* relative entropy of a in state b */
 216     double         sb_err;           /* error in sb */
 217
 218     double         dg_stddev;        /* expected dg stddev per sample */
 219     double         dg_stddev_err;    /* error in dg_stddev */
 220 } barres_t;
 221
 222
 223 /* Initialize a lambda_components structure */
 224 static void lambda_components_init(lambda_components_t *lc)
 225 {
 226     lc->N      = 0;
 227     lc->Nalloc = 2;
 228     snew(lc->names, lc->Nalloc);
 229 }
 230
 231 /* Add a component to a lambda_components structure */
 232 static void lambda_components_add(lambda_components_t *lc,
 233                                   const char *name, size_t name_length)
 234 {
 235     while (lc->N + 1 > lc->Nalloc)
 236     {
 237         lc->Nalloc = (lc->Nalloc == 0) ? 2 : 2*lc->Nalloc;
 238         srenew(lc->names, lc->Nalloc);
 239     }
 240     snew(lc->names[lc->N], name_length+1);
 241     std::strncpy(lc->names[lc->N], name, name_length);
 242     lc->N++;
 243 }
 244
 245 /* check whether a component with index 'index' matches the given name, or
 246    is also NULL. Returns TRUE if this is the case.
 247    the string name does not need to end */
 248 static gmx_bool lambda_components_check(const lambda_components_t *lc,
 249                                         int                        index,
 250                                         const char                *name,
 251                                         size_t                     name_length)
 252 {
 253     size_t len;
 254     if (!lc || index >= lc->N)
 255     {
 256         return FALSE;
 257     }
 258     if (name == nullptr && lc->names[index] == nullptr)
 259     {
 260         return TRUE;
 261     }
 262     if ((name == nullptr) != (lc->names[index] == nullptr))
 263     {
 264         return FALSE;
 265     }
 266     len = std::strlen(lc->names[index]);
 267     if (len != name_length)
 268     {
 269         return FALSE;
 270     }
 271     return std::strncmp(lc->names[index], name, name_length) == 0;
 272 }
 273
 274 /* Find the index of a given lambda component name, or -1 if not found */
 275 static int lambda_components_find(const lambda_components_t *lc,
 276                                   const char                *name,
 277                                   size_t                     name_length)
 278 {
 279     int i;
 280
 281     for (i = 0; i < lc->N; i++)
 282     {
 283         if (std::strncmp(lc->names[i], name, name_length) == 0)
 284         {
 285             return i;
 286         }
 287     }
 288     return -1;
 289 }
 290
 291
 292
 293 /* initialize a lambda vector */
 294 static void lambda_vec_init(lambda_vec_t *lv, const lambda_components_t *lc)
 295 {
 296     snew(lv->val, lc->N);
 297     lv->index = -1;
 298     lv->dhdl  = -1;
 299     lv->lc    = lc;
 300 }
 301
 302 static void lambda_vec_copy(lambda_vec_t *lv, const lambda_vec_t *orig)
 303 {
 304     int i;
 305
 306     lambda_vec_init(lv, orig->lc);
 307     lv->dhdl  = orig->dhdl;
 308     lv->index = orig->index;
 309     for (i = 0; i < lv->lc->N; i++)
 310     {
 311         lv->val[i] = orig->val[i];
 312     }
 313 }
 314
 315 /* write a lambda vec to a preallocated string */
 316 static void lambda_vec_print(const lambda_vec_t *lv, char *str, gmx_bool named)
 317 {
 318     int    i;
 319
 320     str[0] = 0; /* reset the string */
 321     if (lv->dhdl < 0)
 322     {
 323         if (named)
 324         {
 325             str += sprintf(str, "delta H to ");
 326         }
 327         if (lv->lc->N > 1)
 328         {
 329             str += sprintf(str, "(");
 330         }
 331         for (i = 0; i < lv->lc->N; i++)
 332         {
 333             str += sprintf(str, "%g", lv->val[i]);
 334             if (i < lv->lc->N-1)
 335             {
 336                 str += sprintf(str, ", ");
 337             }
 338         }
 339         if (lv->lc->N > 1)
 340         {
 341             sprintf(str, ")");
 342         }
 343     }
 344     else
 345     {
 346         /* this lambda vector describes a derivative */
 347         str += sprintf(str, "dH/dl");
 348         if (std::strlen(lv->lc->names[lv->dhdl]) > 0)
 349         {
 350             sprintf(str, " (%s)", lv->lc->names[lv->dhdl]);
 351         }
 352     }
 353 }
 354
 355 /* write a shortened version of the lambda vec to a preallocated string */
 356 static void lambda_vec_print_short(const lambda_vec_t *lv, char *str)
 357 {
 358     if (lv->index >= 0)
 359     {
 360         sprintf(str, "%6d", lv->index);
 361     }
 362     else
 363     {
 364         if (lv->dhdl < 0)
 365         {
 366             sprintf(str, "%6.3f", lv->val[0]);
 367         }
 368         else
 369         {
 370             sprintf(str, "dH/dl[%d]", lv->dhdl);
 371         }
 372     }
 373 }
 374
 375 /* write an intermediate version of two lambda vecs to a preallocated string */
 376 static void lambda_vec_print_intermediate(const lambda_vec_t *a,
 377                                           const lambda_vec_t *b, char *str)
 378 {
 379     str[0] = 0;
 380     if ( (a->index >= 0) && (b->index >= 0) )
 381     {
 382         sprintf(str, "%6.3f", (a->index+b->index)/2.0);
 383     }
 384     else
 385     {
 386         if ( (a->dhdl < 0) && (b->dhdl < 0) )
 387         {
 388             sprintf(str, "%6.3f", (a->val[0]+b->val[0])/2.0);
 389         }
 390     }
 391 }
 392
 393
 394 /* calculate and return the absolute difference in lambda vectors: c = |a-b|.
 395    a and b must describe non-derivative lambda points */
 396 static double lambda_vec_abs_diff(const lambda_vec_t *a, const lambda_vec_t *b)
 397 {
 398     int    i;
 399     double ret = 0.;
 400
 401     if ( (a->dhdl > 0)  || (b->dhdl > 0) )
 402     {
 403         gmx_fatal(FARGS,
 404                   "Trying to calculate the difference between derivatives instead of lambda points");
 405     }
 406     if (a->lc != b->lc)
 407     {
 408         gmx_fatal(FARGS,
 409                   "Trying to calculate the difference lambdas with differing basis set");
 410     }
 411     for (i = 0; i < a->lc->N; i++)
 412     {
 413         double df = a->val[i] - b->val[i];
 414         ret += df*df;
 415     }
 416     return std::sqrt(ret);
 417 }
 418
 419
 420 /* check whether two lambda vectors are the same */
 421 static gmx_bool lambda_vec_same(const lambda_vec_t *a, const lambda_vec_t *b)
 422 {
 423     int i;
 424
 425     if (a->lc != b->lc)
 426     {
 427         return FALSE;
 428     }
 429     if (a->dhdl < 0)
 430     {
 431         for (i = 0; i < a->lc->N; i++)
 432         {
 433             if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
 434             {
 435                 return FALSE;
 436             }
 437         }
 438         return TRUE;
 439     }
 440     else
 441     {
 442         /* they're derivatives, so we check whether the indices match */
 443         return (a->dhdl == b->dhdl);
 444     }
 445 }
 446
 447 /* Compare the sort order of two foreign lambda vectors
 448
 449     returns 1 if a is 'bigger' than b,
 450     returns 0 if they're the same,
 451     returns -1 if a is 'smaller' than b.*/
 452 static int lambda_vec_cmp_foreign(const lambda_vec_t *a,
 453                                   const lambda_vec_t *b)
 454 {
 455     int      i;
 456     double   norm_a    = 0, norm_b = 0;
 457     gmx_bool different = FALSE;
 458
 459     if (a->lc != b->lc)
 460     {
 461         gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
 462     }
 463     /* if either one has an index we sort based on that */
 464     if ((a->index >= 0) || (b->index >= 0))
 465     {
 466         if (a->index == b->index)
 467         {
 468             return 0;
 469         }
 470         return (a->index > b->index) ? 1 : -1;
 471     }
 472     if (a->dhdl >= 0 || b->dhdl >= 0)
 473     {
 474         /* lambda vectors that are derivatives always sort higher than those
 475            without derivatives */
 476         if ((a->dhdl >= 0)  != (b->dhdl >= 0) )
 477         {
 478             return (a->dhdl >= 0) ? 1 : -1;
 479         }
 480         return a->dhdl > b->dhdl;
 481     }
 482
 483     /* neither has an index, so we can only sort on the lambda components,
 484        which is only valid if there is one component */
 485     for (i = 0; i < a->lc->N; i++)
 486     {
 487         if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
 488         {
 489             different = TRUE;
 490         }
 491         norm_a += a->val[i]*a->val[i];
 492         norm_b += b->val[i]*b->val[i];
 493     }
 494     if (!different)
 495     {
 496         return 0;
 497     }
 498     return norm_a > norm_b;
 499 }
 500
 501 /* Compare the sort order of two native lambda vectors
 502
 503     returns 1 if a is 'bigger' than b,
 504     returns 0 if they're the same,
 505     returns -1 if a is 'smaller' than b.*/
 506 static int lambda_vec_cmp_native(const lambda_vec_t *a,
 507                                  const lambda_vec_t *b)
 508 {
 509     if (a->lc != b->lc)
 510     {
 511         gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
 512     }
 513     /* if either one has an index we sort based on that */
 514     if ((a->index >= 0) || (b->index >= 0))
 515     {
 516         if (a->index == b->index)
 517         {
 518             return 0;
 519         }
 520         return (a->index > b->index) ? 1 : -1;
 521     }
 522     /* neither has an index, so we can only sort on the lambda components,
 523        which is only valid if there is one component */
 524     if (a->lc->N > 1)
 525     {
 526         gmx_fatal(FARGS,
 527                   "Can't compare lambdas with no index and > 1 component");
 528     }
 529     if (a->dhdl >= 0 || b->dhdl >= 0)
 530     {
 531         gmx_fatal(FARGS,
 532                   "Can't compare native lambdas that are derivatives");
 533     }
 534     if (gmx_within_tol(a->val[0], b->val[0], 10*GMX_REAL_EPS))
 535     {
 536         return 0;
 537     }
 538     return a->val[0] > b->val[0] ? 1 : -1;
 539 }
 540
 541
 542
 543
 544 static void hist_init(hist_t *h, int nhist, int *nbin)
 545 {
 546     int i;
 547     if (nhist > 2)
 548     {
 549         gmx_fatal(FARGS, "histogram with more than two sets of data!");
 550     }
 551     for (i = 0; i < nhist; i++)
 552     {
 553         snew(h->bin[i], nbin[i]);
 554         h->x0[i]      = 0;
 555         h->nbin[i]    = nbin[i];
 556         h->start_time = h->delta_time = 0;
 557         h->dx[i]      = 0;
 558     }
 559     h->sum   = 0;
 560     h->nhist = nhist;
 561 }
 562
 563 static void xvg_init(xvg_t *ba)
 564 {
 565     ba->filename = nullptr;
 566     ba->nset     = 0;
 567     ba->np_alloc = 0;
 568     ba->np       = nullptr;
 569     ba->y        = nullptr;
 570 }
 571
 572 static void samples_init(samples_t *s, lambda_vec_t *native_lambda,
 573                          lambda_vec_t *foreign_lambda, double temp,
 574                          gmx_bool derivative, const char *filename)
 575 {
 576     s->native_lambda  = native_lambda;
 577     s->foreign_lambda = foreign_lambda;
 578     s->temp           = temp;
 579     s->derivative     = derivative;
 580
 581     s->ndu        = 0;
 582     s->du         = nullptr;
 583     s->t          = nullptr;
 584     s->start_time = s->delta_time = 0;
 585     s->hist       = nullptr;
 586     s->du_alloc   = nullptr;
 587     s->t_alloc    = nullptr;
 588     s->hist_alloc = nullptr;
 589     s->ndu_alloc  = 0;
 590     s->nt_alloc   = 0;
 591
 592     s->ntot     = 0;
 593     s->filename = filename;
 594 }
 595
 596 static void sample_range_init(sample_range_t *r, samples_t *s)
 597 {
 598     r->start = 0;
 599     r->end   = s->ndu;
 600     r->use   = TRUE;
 601     r->s     = nullptr;
 602 }
 603
 604 static void sample_coll_init(sample_coll_t *sc, lambda_vec_t *native_lambda,
 605                              lambda_vec_t *foreign_lambda, double temp)
 606 {
 607     sc->native_lambda  = native_lambda;
 608     sc->foreign_lambda = foreign_lambda;
 609     sc->temp           = temp;
 610
 611     sc->nsamples       = 0;
 612     sc->s              = nullptr;
 613     sc->r              = nullptr;
 614     sc->nsamples_alloc = 0;
 615
 616     sc->ntot = 0;
 617     sc->next = sc->prev = nullptr;
 618 }
 619
 620 static void sample_coll_destroy(sample_coll_t *sc)
 621 {
 622     /* don't free the samples themselves */
 623     sfree(sc->r);
 624     sfree(sc->s);
 625 }
 626
 627
 628 static void lambda_data_init(lambda_data_t *l, lambda_vec_t *native_lambda,
 629                              double temp)
 630 {
 631     l->lambda = native_lambda;
 632     l->temp   = temp;
 633
 634     l->next = nullptr;
 635     l->prev = nullptr;
 636
 637     l->sc = &(l->sc_head);
 638
 639     sample_coll_init(l->sc, native_lambda, nullptr, 0.);
 640     l->sc->next = l->sc;
 641     l->sc->prev = l->sc;
 642 }
 643
 644 static void barres_init(barres_t *br)
 645 {
 646     br->dg            = 0;
 647     br->dg_err        = 0;
 648     br->sa            = 0;
 649     br->sa_err        = 0;
 650     br->sb            = 0;
 651     br->sb_err        = 0;
 652     br->dg_stddev     = 0;
 653     br->dg_stddev_err = 0;
 654
 655     br->a = nullptr;
 656     br->b = nullptr;
 657 }
 658
 659
 660 /* calculate the total number of samples in a sample collection */
 661 static void sample_coll_calc_ntot(sample_coll_t *sc)
 662 {
 663     int i;
 664
 665     sc->ntot = 0;
 666     for (i = 0; i < sc->nsamples; i++)
 667     {
 668         if (sc->r[i].use)
 669         {
 670             if (sc->s[i]->hist)
 671             {
 672                 sc->ntot += sc->s[i]->ntot;
 673             }
 674             else
 675             {
 676                 sc->ntot += sc->r[i].end - sc->r[i].start;
 677             }
 678         }
 679     }
 680 }
 681
 682
 683 /* find the barsamples_t associated with a lambda that corresponds to
 684    a specific foreign lambda */
 685 static sample_coll_t *lambda_data_find_sample_coll(lambda_data_t *l,
 686                                                    lambda_vec_t  *foreign_lambda)
 687 {
 688     sample_coll_t *sc = l->sc->next;
 689
 690     while (sc != l->sc)
 691     {
 692         if (lambda_vec_same(sc->foreign_lambda, foreign_lambda))
 693         {
 694             return sc;
 695         }
 696         sc = sc->next;
 697     }
 698
 699     return nullptr;
 700 }
 701
 702 /* insert li into an ordered list of lambda_colls */
 703 static void lambda_data_insert_sample_coll(lambda_data_t *l, sample_coll_t *sc)
 704 {
 705     sample_coll_t *scn = l->sc->next;
 706     while ( (scn != l->sc) )
 707     {
 708         if (lambda_vec_cmp_foreign(scn->foreign_lambda, sc->foreign_lambda) > 0)
 709         {
 710             break;
 711         }
 712         scn = scn->next;
 713     }
 714     /* now insert it before the found scn */
 715     sc->next        = scn;
 716     sc->prev        = scn->prev;
 717     scn->prev->next = sc;
 718     scn->prev       = sc;
 719 }
 720
 721 /* insert li into an ordered list of lambdas */
 722 static void lambda_data_insert_lambda(lambda_data_t *head, lambda_data_t *li)
 723 {
 724     lambda_data_t *lc = head->next;
 725     while (lc != head)
 726     {
 727         if (lambda_vec_cmp_native(lc->lambda, li->lambda) > 0)
 728         {
 729             break;
 730         }
 731         lc = lc->next;
 732     }
 733     /* now insert ourselves before the found lc */
 734     li->next       = lc;
 735     li->prev       = lc->prev;
 736     lc->prev->next = li;
 737     lc->prev       = li;
 738 }
 739
 740 /* insert a sample and a sample_range into a sample_coll. The
 741     samples are stored as a pointer, the range is copied. */
 742 static void sample_coll_insert_sample(sample_coll_t *sc, samples_t *s,
 743                                       sample_range_t *r)
 744 {
 745     /* first check if it belongs here */
 746     GMX_ASSERT(sc->next->s, "Next not properly initialized!");
 747     if (sc->temp != s->temp)
 748     {
 749         gmx_fatal(FARGS, "Temperatures in files %s and %s are not the same!",
 750                   s->filename, sc->next->s[0]->filename);
 751     }
 752     if (!lambda_vec_same(sc->native_lambda, s->native_lambda))
 753     {
 754         gmx_fatal(FARGS, "Native lambda in files %s and %s are not the same (and they should be)!",
 755                   s->filename, sc->next->s[0]->filename);
 756     }
 757     if (!lambda_vec_same(sc->foreign_lambda, s->foreign_lambda))
 758     {
 759         gmx_fatal(FARGS, "Foreign lambda in files %s and %s are not the same (and they should be)!",
 760                   s->filename, sc->next->s[0]->filename);
 761     }
 762
 763     /* check if there's room */
 764     if ( (sc->nsamples + 1) > sc->nsamples_alloc)
 765     {
 766         sc->nsamples_alloc = std::max(2*sc->nsamples_alloc, 2);
 767         srenew(sc->s, sc->nsamples_alloc);
 768         srenew(sc->r, sc->nsamples_alloc);
 769     }
 770     sc->s[sc->nsamples] = s;
 771     sc->r[sc->nsamples] = *r;
 772     sc->nsamples++;
 773
 774     sample_coll_calc_ntot(sc);
 775 }
 776
 777 /* insert a sample into a lambda_list, creating the right sample_coll if
 778    neccesary */
 779 static void lambda_data_list_insert_sample(lambda_data_t *head, samples_t *s)
 780 {
 781     gmx_bool       found = FALSE;
 782     sample_coll_t *sc;
 783     sample_range_t r;
 784
 785     lambda_data_t *l = head->next;
 786
 787     /* first search for the right lambda_data_t */
 788     while (l != head)
 789     {
 790         if (lambda_vec_same(l->lambda, s->native_lambda) )
 791         {
 792             found = TRUE;
 793             break;
 794         }
 795         l = l->next;
 796     }
 797
 798     if (!found)
 799     {
 800         snew(l, 1);                                     /* allocate a new one */
 801         lambda_data_init(l, s->native_lambda, s->temp); /* initialize it */
 802         lambda_data_insert_lambda(head, l);             /* add it to the list */
 803     }
 804
 805     /* now look for a sample collection */
 806     sc = lambda_data_find_sample_coll(l, s->foreign_lambda);
 807     if (!sc)
 808     {
 809         snew(sc, 1); /* allocate a new one */
 810         sample_coll_init(sc, s->native_lambda, s->foreign_lambda, s->temp);
 811         lambda_data_insert_sample_coll(l, sc);
 812     }
 813
 814     /* now insert the samples into the sample coll */
 815     sample_range_init(&r, s);
 816     sample_coll_insert_sample(sc, s, &r);
 817 }
 818
 819
 820 /* make a histogram out of a sample collection */
 821 static void sample_coll_make_hist(sample_coll_t *sc, int **bin,
 822                                   int *nbin_alloc, int *nbin,
 823                                   double *dx, double *xmin, int nbin_default)
 824 {
 825     int      i, j, k;
 826     gmx_bool dx_set   = FALSE;
 827     gmx_bool xmin_set = FALSE;
 828
 829     gmx_bool xmax_set      = FALSE;
 830     gmx_bool xmax_set_hard = FALSE; /* whether the xmax is bounded by the
 831                                        limits of a histogram */
 832     double   xmax          = -1;
 833
 834     /* first determine dx and xmin; try the histograms */
 835     for (i = 0; i < sc->nsamples; i++)
 836     {
 837         if (sc->s[i]->hist)
 838         {
 839             hist_t *hist = sc->s[i]->hist;
 840             for (k = 0; k < hist->nhist; k++)
 841             {
 842                 double hdx      = hist->dx[k];
 843                 double xmax_now = (hist->x0[k]+hist->nbin[k])*hdx;
 844
 845                 /* we use the biggest dx*/
 846                 if ( (!dx_set) || hist->dx[0] > *dx)
 847                 {
 848                     dx_set = TRUE;
 849                     *dx    = hist->dx[0];
 850                 }
 851                 if ( (!xmin_set) || (hist->x0[k]*hdx) < *xmin)
 852                 {
 853                     xmin_set = TRUE;
 854                     *xmin    = (hist->x0[k]*hdx);
 855                 }
 856
 857                 if ( (!xmax_set) || (xmax_now > xmax && !xmax_set_hard) )
 858                 {
 859                     xmax_set = TRUE;
 860                     xmax     = xmax_now;
 861                     if (hist->bin[k][hist->nbin[k]-1] != 0)
 862                     {
 863                         xmax_set_hard = TRUE;
 864                     }
 865                 }
 866                 if (hist->bin[k][hist->nbin[k]-1] != 0 && (xmax_now < xmax) )
 867                 {
 868                     xmax_set_hard = TRUE;
 869                     xmax          = xmax_now;
 870                 }
 871             }
 872         }
 873     }
 874     /* and the delta us */
 875     for (i = 0; i < sc->nsamples; i++)
 876     {
 877         if (sc->s[i]->ndu > 0)
 878         {
 879             /* determine min and max */
 880             int    starti  = sc->r[i].start;
 881             int    endi    = sc->r[i].end;
 882             double du_xmin = sc->s[i]->du[starti];
 883             double du_xmax = sc->s[i]->du[starti];
 884             for (j = starti+1; j < endi; j++)
 885             {
 886                 if (sc->s[i]->du[j] < du_xmin)
 887                 {
 888                     du_xmin = sc->s[i]->du[j];
 889                 }
 890                 if (sc->s[i]->du[j] > du_xmax)
 891                 {
 892                     du_xmax = sc->s[i]->du[j];
 893                 }
 894             }
 895
 896             /* and now change the limits */
 897             if ( (!xmin_set) || (du_xmin < *xmin) )
 898             {
 899                 xmin_set = TRUE;
 900                 *xmin    = du_xmin;
 901             }
 902             if ( (!xmax_set) || ((du_xmax > xmax) &&  !xmax_set_hard) )
 903             {
 904                 xmax_set = TRUE;
 905                 xmax     = du_xmax;
 906             }
 907         }
 908     }
 909
 910     if (!xmax_set || !xmin_set)
 911     {
 912         *nbin = 0;
 913         return;
 914     }
 915
 916
 917     if (!dx_set)
 918     {
 919         *nbin = nbin_default;
 920         *dx   = (xmax-(*xmin))/((*nbin)-2); /* -2 because we want the last bin to
 921                                                be 0, and we count from 0 */
 922     }
 923     else
 924     {
 925         *nbin = static_cast<int>((xmax-(*xmin))/(*dx));
 926     }
 927
 928     if (*nbin > *nbin_alloc)
 929     {
 930         *nbin_alloc = *nbin;
 931         srenew(*bin, *nbin_alloc);
 932     }
 933
 934     /* reset the histogram */
 935     for (i = 0; i < (*nbin); i++)
 936     {
 937         (*bin)[i] = 0;
 938     }
 939
 940     /* now add the actual data */
 941     for (i = 0; i < sc->nsamples; i++)
 942     {
 943         if (sc->s[i]->hist)
 944         {
 945             hist_t *hist = sc->s[i]->hist;
 946             for (k = 0; k < hist->nhist; k++)
 947             {
 948                 double hdx       = hist->dx[k];
 949                 double xmin_hist = hist->x0[k]*hdx;
 950                 for (j = 0; j < hist->nbin[k]; j++)
 951                 {
 952                     /* calculate the bin corresponding to the middle of the
 953                        original bin */
 954                     double x     = hdx*(j+0.5) + xmin_hist;
 955                     int    binnr = static_cast<int>((x-(*xmin))/(*dx));
 956
 957                     if (binnr >= *nbin || binnr < 0)
 958                     {
 959                         binnr = (*nbin)-1;
 960                     }
 961
 962                     (*bin)[binnr] += hist->bin[k][j];
 963                 }
 964             }
 965         }
 966         else
 967         {
 968             int starti = sc->r[i].start;
 969             int endi   = sc->r[i].end;
 970             for (j = starti; j < endi; j++)
 971             {
 972                 int binnr = static_cast<int>((sc->s[i]->du[j]-(*xmin))/(*dx));
 973                 if (binnr >= *nbin || binnr < 0)
 974                 {
 975                     binnr = (*nbin)-1;
 976                 }
 977
 978                 (*bin)[binnr]++;
 979             }
 980         }
 981     }
 982 }
 983
 984 /* write a collection of histograms to a file */
 985 static void sim_data_histogram(sim_data_t *sd, const char *filename,
 986                                int nbin_default, const gmx_output_env_t *oenv)
 987 {
 988     char           label_x[STRLEN];
 989     const char    *dhdl    = "dH/d\\lambda", *deltag = "\\DeltaH", *lambda = "\\lambda";
 990     const char    *title   = "N(\\DeltaH)";
 991     const char    *label_y = "Samples";
 992     FILE          *fp;
 993     lambda_data_t *bl;
 994     int            nsets     = 0;
 995     char         **setnames  = nullptr;
 996     gmx_bool       first_set = FALSE;
 997     /* histogram data: */
 998     int           *hist       = nullptr;
 999     int            nbin       = 0;
1000     int            nbin_alloc = 0;
1001     double         dx         = 0;
1002     double         minval     = 0;
1003     int            i;
1004     lambda_data_t *bl_head = sd->lb;
1005
1006     printf("\nWriting histogram to %s\n", filename);
1007     sprintf(label_x, "\\DeltaH (%s)", unit_energy);
1008
1009     fp = xvgropen_type(filename, title, label_x, label_y, exvggtXNY, oenv);
1010
1011     /* first get all the set names */
1012     bl = bl_head->next;
1013     /* iterate over all lambdas */
1014     while (bl != bl_head)
1015     {
1016         sample_coll_t *sc = bl->sc->next;
1017
1018         /* iterate over all samples */
1019         while (sc != bl->sc)
1020         {
1021             char buf[STRLEN], buf2[STRLEN];
1022
1023             nsets++;
1024             srenew(setnames, nsets);
1025             snew(setnames[nsets-1], STRLEN);
1026             if (sc->foreign_lambda->dhdl < 0)
1027             {
1028                 lambda_vec_print(sc->native_lambda, buf, FALSE);
1029                 lambda_vec_print(sc->foreign_lambda, buf2, FALSE);
1030                 sprintf(setnames[nsets-1], "N(%s(%s=%s) | %s=%s)",
1031                         deltag, lambda, buf2, lambda, buf);
1032             }
1033             else
1034             {
1035                 lambda_vec_print(sc->native_lambda, buf, FALSE);
1036                 sprintf(setnames[nsets-1], "N(%s | %s=%s)",
1037                         dhdl, lambda, buf);
1038             }
1039             sc = sc->next;
1040         }
1041
1042         bl = bl->next;
1043     }
1044     xvgr_legend(fp, nsets, (const char**)setnames, oenv);
1045
1046
1047     /* now make the histograms */
1048     bl = bl_head->next;
1049     /* iterate over all lambdas */
1050     while (bl != bl_head)
1051     {
1052         sample_coll_t *sc = bl->sc->next;
1053
1054         /* iterate over all samples */
1055         while (sc != bl->sc)
1056         {
1057             if (!first_set)
1058             {
1059                 xvgr_new_dataset(fp, 0, 0, nullptr, oenv);
1060             }
1061
1062             sample_coll_make_hist(sc, &hist, &nbin_alloc, &nbin, &dx, &minval,
1063                                   nbin_default);
1064
1065             for (i = 0; i < nbin; i++)
1066             {
1067                 double xmin = i*dx + minval;
1068                 double xmax = (i+1)*dx + minval;
1069
1070                 fprintf(fp, "%g %d\n%g %d\n", xmin, hist[i], xmax, hist[i]);
1071             }
1072
1073             first_set = FALSE;
1074             sc        = sc->next;
1075         }
1076
1077         bl = bl->next;
1078     }
1079
1080     if (hist)
1081     {
1082         sfree(hist);
1083     }
1084
1085     xvgrclose(fp);
1086 }
1087
1088 static int
1089 snprint_lambda_vec(char *str, int sz, const char *label, lambda_vec_t *lambda)
1090 {
1091     int n = 0;
1092
1093     n += snprintf(str+n, sz-n, "lambda vector [%s]: ", label);
1094     if (lambda->index >= 0)
1095     {
1096         n += snprintf(str+n, sz-n, " init-lambda-state=%d", lambda->index);
1097     }
1098     if (lambda->dhdl >= 0)
1099     {
1100         n += snprintf(str+n, sz-n, " dhdl index=%d", lambda->dhdl);
1101     }
1102     else
1103     {
1104         int i;
1105         for (i = 0; i < lambda->lc->N; i++)
1106         {
1107             n += snprintf(str+n, sz-n, " (%s) l=%g", lambda->lc->names[i], lambda->val[i]);
1108         }
1109     }
1110     return n;
1111 }
1112
1113 /* create a collection (array) of barres_t object given a ordered linked list
1114    of barlamda_t sample collections */
1115 static barres_t *barres_list_create(sim_data_t *sd, int *nres,
1116                                     gmx_bool use_dhdl)
1117 {
1118     lambda_data_t *bl;
1119     int            nlambda = 0;
1120     barres_t      *res;
1121     gmx_bool       dhdl    = FALSE;
1122     gmx_bool       first   = TRUE;
1123     lambda_data_t *bl_head = sd->lb;
1124
1125     /* first count the lambdas */
1126     bl = bl_head->next;
1127     while (bl != bl_head)
1128     {
1129         nlambda++;
1130         bl = bl->next;
1131     }
1132     snew(res, nlambda-1);
1133
1134     /* next put the right samples in the res */
1135     *nres = 0;
1136     bl    = bl_head->next->next; /* we start with the second one. */
1137     while (bl != bl_head)
1138     {
1139         sample_coll_t *sc, *scprev;
1140         barres_t      *br = &(res[*nres]);
1141         /* there is always a previous one. we search for that as a foreign
1142            lambda: */
1143         scprev = lambda_data_find_sample_coll(bl->prev, bl->lambda);
1144         sc     = lambda_data_find_sample_coll(bl, bl->prev->lambda);
1145
1146         barres_init(br);
1147
1148         if (use_dhdl)
1149         {
1150             /* we use dhdl */
1151
1152             scprev = lambda_data_find_sample_coll(bl->prev, bl->prev->lambda);
1153             sc     = lambda_data_find_sample_coll(bl, bl->lambda);
1154
1155             if (first)
1156             {
1157                 printf("\nWARNING: Using the derivative data (dH/dlambda) to extrapolate delta H values.\nThis will only work if the Hamiltonian is linear in lambda.\n");
1158                 dhdl = TRUE;
1159             }
1160             if (!dhdl)
1161             {
1162                 gmx_fatal(FARGS, "Some dhdl files contain only one value (dH/dl), while others \ncontain multiple values (dH/dl and/or Delta H), will not proceed \nbecause of possible inconsistencies.\n");
1163             }
1164         }
1165         else if (!scprev && !sc)
1166         {
1167             char descX[STRLEN], descY[STRLEN];
1168             snprint_lambda_vec(descX, STRLEN, "X", bl->prev->lambda);
1169             snprint_lambda_vec(descY, STRLEN, "Y", bl->lambda);
1170
1171             gmx_fatal(FARGS, "There is no path between the states X & Y below that is covered by foreign lambdas:\ncannot proceed with BAR.\nUse thermodynamic integration of dH/dl by calculating the averages of dH/dl\nwith g_analyze and integrating them.\nAlternatively, use the -extp option if (and only if) the Hamiltonian\ndepends linearly on lambda, which is NOT normally the case.\n\n%s\n%s\n", descX, descY);
1172         }
1173
1174         /* normal delta H */
1175         if (!scprev)
1176         {
1177             char descX[STRLEN], descY[STRLEN];
1178             snprint_lambda_vec(descX, STRLEN, "X", bl->lambda);
1179             snprint_lambda_vec(descY, STRLEN, "Y", bl->prev->lambda);
1180             gmx_fatal(FARGS, "Could not find a set for foreign lambda (state X below)\nin the files for main lambda (state Y below)\n\n%s\n%s\n", descX, descY);
1181         }
1182         if (!sc)
1183         {
1184             char descX[STRLEN], descY[STRLEN];
1185             snprint_lambda_vec(descX, STRLEN, "X", bl->prev->lambda);
1186             snprint_lambda_vec(descY, STRLEN, "Y", bl->lambda);
1187             gmx_fatal(FARGS, "Could not find a set for foreign lambda (state X below)\nin the files for main lambda (state Y below)\n\n%s\n%s\n", descX, descY);
1188         }
1189         br->a = scprev;
1190         br->b = sc;
1191
1192         first = FALSE;
1193         (*nres)++;
1194         bl = bl->next;
1195     }
1196     return res;
1197 }
1198
1199 /* estimate the maximum discretization error */
1200 static double barres_list_max_disc_err(barres_t *res, int nres)
1201 {
1202     int    i, j;
1203     double disc_err = 0.;
1204     double delta_lambda;
1205
1206     for (i = 0; i < nres; i++)
1207     {
1208         barres_t *br = &(res[i]);
1209
1210         delta_lambda = lambda_vec_abs_diff(br->b->native_lambda,
1211                                            br->a->native_lambda);
1212
1213         for (j = 0; j < br->a->nsamples; j++)
1214         {
1215             if (br->a->s[j]->hist)
1216             {
1217                 double Wfac = 1.;
1218                 if (br->a->s[j]->derivative)
1219                 {
1220                     Wfac =  delta_lambda;
1221                 }
1222
1223                 disc_err = std::max(disc_err, Wfac*br->a->s[j]->hist->dx[0]);
1224             }
1225         }
1226         for (j = 0; j < br->b->nsamples; j++)
1227         {
1228             if (br->b->s[j]->hist)
1229             {
1230                 double Wfac = 1.;
1231                 if (br->b->s[j]->derivative)
1232                 {
1233                     Wfac =  delta_lambda;
1234                 }
1235                 disc_err = std::max(disc_err, Wfac*br->b->s[j]->hist->dx[0]);
1236             }
1237         }
1238     }
1239     return disc_err;
1240 }
1241
1242
1243 /* impose start and end times on a sample collection, updating sample_ranges */
1244 static void sample_coll_impose_times(sample_coll_t *sc, double begin_t,
1245                                      double end_t)
1246 {
1247     int i;
1248     for (i = 0; i < sc->nsamples; i++)
1249     {
1250         samples_t      *s = sc->s[i];
1251         sample_range_t *r = &(sc->r[i]);
1252         if (s->hist)
1253         {
1254             double end_time = s->hist->delta_time*s->hist->sum +
1255                 s->hist->start_time;
1256             if (s->hist->start_time < begin_t || end_time > end_t)
1257             {
1258                 r->use = FALSE;
1259             }
1260         }
1261         else
1262         {
1263             if (!s->t)
1264             {
1265                 double end_time;
1266                 if (s->start_time < begin_t)
1267                 {
1268                     r->start = static_cast<int>((begin_t - s->start_time)/s->delta_time);
1269                 }
1270                 end_time = s->delta_time*s->ndu + s->start_time;
1271                 if (end_time > end_t)
1272                 {
1273                     r->end = static_cast<int>((end_t - s->start_time)/s->delta_time);
1274                 }
1275             }
1276             else
1277             {
1278                 int j;
1279                 for (j = 0; j < s->ndu; j++)
1280                 {
1281                     if (s->t[j] < begin_t)
1282                     {
1283                         r->start = j;
1284                     }
1285
1286                     if (s->t[j] >= end_t)
1287                     {
1288                         r->end = j;
1289                         break;
1290                     }
1291                 }
1292             }
1293             if (r->start > r->end)
1294             {
1295                 r->use = FALSE;
1296             }
1297         }
1298     }
1299     sample_coll_calc_ntot(sc);
1300 }
1301
1302 static void sim_data_impose_times(sim_data_t *sd, double begin, double end)
1303 {
1304     double         first_t, last_t;
1305     double         begin_t, end_t;
1306     lambda_data_t *lc;
1307     lambda_data_t *head = sd->lb;
1308     int            j;
1309
1310     if (begin <= 0 && end < 0)
1311     {
1312         return;
1313     }
1314
1315     /* first determine the global start and end times */
1316     first_t = -1;
1317     last_t  = -1;
1318     lc      = head->next;
1319     while (lc != head)
1320     {
1321         sample_coll_t *sc = lc->sc->next;
1322         while (sc != lc->sc)
1323         {
1324             for (j = 0; j < sc->nsamples; j++)
1325             {
1326                 double start_t, end_t;
1327
1328                 start_t = sc->s[j]->start_time;
1329                 end_t   =   sc->s[j]->start_time;
1330                 if (sc->s[j]->hist)
1331                 {
1332                     end_t += sc->s[j]->delta_time*sc->s[j]->hist->sum;
1333                 }
1334                 else
1335                 {
1336                     if (sc->s[j]->t)
1337                     {
1338                         end_t = sc->s[j]->t[sc->s[j]->ndu-1];
1339                     }
1340                     else
1341                     {
1342                         end_t += sc->s[j]->delta_time*sc->s[j]->ndu;
1343                     }
1344                 }
1345
1346                 if (start_t < first_t || first_t < 0)
1347                 {
1348                     first_t = start_t;
1349                 }
1350                 if (end_t > last_t)
1351                 {
1352                     last_t = end_t;
1353                 }
1354             }
1355             sc = sc->next;
1356         }
1357         lc = lc->next;
1358     }
1359
1360     /* calculate the actual times */
1361     if (begin > 0)
1362     {
1363         begin_t = begin;
1364     }
1365     else
1366     {
1367         begin_t = first_t;
1368     }
1369
1370     if (end > 0)
1371     {
1372         end_t = end;
1373     }
1374     else
1375     {
1376         end_t = last_t;
1377     }
1378     printf("\n   Samples in time interval: %.3f - %.3f\n", first_t, last_t);
1379
1380     if (begin_t > end_t)
1381     {
1382         return;
1383     }
1384     printf("Removing samples outside of: %.3f - %.3f\n", begin_t, end_t);
1385
1386     /* then impose them */
1387     lc = head->next;
1388     while (lc != head)
1389     {
1390         sample_coll_t *sc = lc->sc->next;
1391         while (sc != lc->sc)
1392         {
1393             sample_coll_impose_times(sc, begin_t, end_t);
1394             sc = sc->next;
1395         }
1396         lc = lc->next;
1397     }
1398 }
1399
1400
1401 /* create subsample i out of ni from an existing sample_coll */
1402 static gmx_bool sample_coll_create_subsample(sample_coll_t  *sc,
1403                                              sample_coll_t *sc_orig,
1404                                              int i, int ni)
1405 {
1406     int             j;
1407
1408     gmx_int64_t     ntot_start;
1409     gmx_int64_t     ntot_end;
1410     gmx_int64_t     ntot_so_far;
1411
1412     *sc = *sc_orig; /* just copy all fields */
1413
1414     /* allocate proprietary memory */
1415     snew(sc->s, sc_orig->nsamples);
1416     snew(sc->r, sc_orig->nsamples);
1417
1418     /* copy the samples */
1419     for (j = 0; j < sc_orig->nsamples; j++)
1420     {
1421         sc->s[j] = sc_orig->s[j];
1422         sc->r[j] = sc_orig->r[j]; /* copy the ranges too */
1423     }
1424
1425     /* now fix start and end fields */
1426     /* the casts avoid possible overflows */
1427     ntot_start  = static_cast<gmx_int64_t>(sc_orig->ntot*static_cast<double>(i)/static_cast<double>(ni));
1428     ntot_end    = static_cast<gmx_int64_t>(sc_orig->ntot*static_cast<double>(i+1)/static_cast<double>(ni));
1429     ntot_so_far = 0;
1430     for (j = 0; j < sc->nsamples; j++)
1431     {
1432         gmx_int64_t ntot_add;
1433         gmx_int64_t new_start, new_end;
1434
1435         if (sc->r[j].use)
1436         {
1437             if (sc->s[j]->hist)
1438             {
1439                 ntot_add = sc->s[j]->hist->sum;
1440             }
1441             else
1442             {
1443                 ntot_add = sc->r[j].end - sc->r[j].start;
1444             }
1445         }
1446         else
1447         {
1448             ntot_add = 0;
1449         }
1450
1451         if (!sc->s[j]->hist)
1452         {
1453             if (ntot_so_far < ntot_start)
1454             {
1455                 /* adjust starting point */
1456                 new_start = sc->r[j].start + (ntot_start - ntot_so_far);
1457             }
1458             else
1459             {
1460                 new_start = sc->r[j].start;
1461             }
1462             /* adjust end point */
1463             new_end = sc->r[j].start + (ntot_end - ntot_so_far);
1464             if (new_end > sc->r[j].end)
1465             {
1466                 new_end = sc->r[j].end;
1467             }
1468
1469             /* check if we're in range at all */
1470             if ( (new_end < new_start) || (new_start > sc->r[j].end) )
1471             {
1472                 new_start = 0;
1473                 new_end   = 0;
1474             }
1475             /* and write the new range */
1476             GMX_RELEASE_ASSERT(new_start <= std::numeric_limits<int>::max(), "Value of 'new_start' too large for int converstion");
1477             GMX_RELEASE_ASSERT(new_end <= std::numeric_limits<int>::max(), "Value of 'new_end' too large for int converstion");
1478             sc->r[j].start = static_cast<int>(new_start);
1479             sc->r[j].end   = static_cast<int>(new_end);
1480         }
1481         else
1482         {
1483             if (sc->r[j].use)
1484             {
1485                 double overlap;
1486                 double ntot_start_norm, ntot_end_norm;
1487                 /* calculate the amount of overlap of the
1488                    desired range (ntot_start -- ntot_end) onto
1489                    the histogram range (ntot_so_far -- ntot_so_far+ntot_add)*/
1490
1491                 /* first calculate normalized bounds
1492                    (where 0 is the start of the hist range, and 1 the end) */
1493                 ntot_start_norm = (ntot_start-ntot_so_far)/static_cast<double>(ntot_add);
1494                 ntot_end_norm   = (ntot_end-ntot_so_far)/static_cast<double>(ntot_add);
1495
1496                 /* now fix the boundaries */
1497                 ntot_start_norm = std::min(1.0, std::max(0.0, ntot_start_norm));
1498                 ntot_end_norm   = std::max(0.0, std::min(1.0, ntot_end_norm));
1499
1500                 /* and calculate the overlap */
1501                 overlap = ntot_end_norm - ntot_start_norm;
1502
1503                 if (overlap > 0.95) /* we allow for 5% slack */
1504                 {
1505                     sc->r[j].use = TRUE;
1506                 }
1507                 else if (overlap < 0.05)
1508                 {
1509                     sc->r[j].use = FALSE;
1510                 }
1511                 else
1512                 {
1513                     return FALSE;
1514                 }
1515             }
1516         }
1517         ntot_so_far += ntot_add;
1518     }
1519     sample_coll_calc_ntot(sc);
1520
1521     return TRUE;
1522 }
1523
1524 /* calculate minimum and maximum work values in sample collection */
1525 static void sample_coll_min_max(sample_coll_t *sc, double Wfac,
1526                                 double *Wmin, double *Wmax)
1527 {
1528     int i, j;
1529
1530     *Wmin =  std::numeric_limits<float>::max();
1531     *Wmax = -std::numeric_limits<float>::max();
1532
1533     for (i = 0; i < sc->nsamples; i++)
1534     {
1535         samples_t      *s = sc->s[i];
1536         sample_range_t *r = &(sc->r[i]);
1537         if (r->use)
1538         {
1539             if (!s->hist)
1540             {
1541                 for (j = r->start; j < r->end; j++)
1542                 {
1543                     *Wmin = std::min(*Wmin, s->du[j]*Wfac);
1544                     *Wmax = std::max(*Wmax, s->du[j]*Wfac);
1545                 }
1546             }
1547             else
1548             {
1549                 int    hd = 0; /* determine the histogram direction: */
1550                 double dx;
1551                 if ( (s->hist->nhist > 1) && (Wfac < 0) )
1552                 {
1553                     hd = 1;
1554                 }
1555                 dx = s->hist->dx[hd];
1556
1557                 for (j = s->hist->nbin[hd]-1; j >= 0; j--)
1558                 {
1559                     *Wmin = std::min(*Wmin, Wfac*(s->hist->x0[hd])*dx);
1560                     *Wmax = std::max(*Wmax, Wfac*(s->hist->x0[hd])*dx);
1561                     /* look for the highest value bin with values */
1562                     if (s->hist->bin[hd][j] > 0)
1563                     {
1564                         *Wmin = std::min(*Wmin, Wfac*(j+s->hist->x0[hd]+1)*dx);
1565                         *Wmax = std::max(*Wmax, Wfac*(j+s->hist->x0[hd]+1)*dx);
1566                         break;
1567                     }
1568                 }
1569             }
1570         }
1571     }
1572 }
1573
1574 /* Initialize a sim_data structure */
1575 static void sim_data_init(sim_data_t *sd)
1576 {
1577     /* make linked list */
1578     sd->lb       = &(sd->lb_head);
1579     sd->lb->next = sd->lb;
1580     sd->lb->prev = sd->lb;
1581
1582     lambda_components_init(&(sd->lc));
1583 }
1584
1585
1586 static double calc_bar_sum(int n, const double *W, double Wfac, double sbMmDG)
1587 {
1588     int    i;
1589     double sum;
1590
1591     sum = 0;
1592
1593     for (i = 0; i < n; i++)
1594     {
1595         sum += 1./(1. + std::exp(Wfac*W[i] + sbMmDG));
1596     }
1597
1598     return sum;
1599 }
1600
1601 /* calculate the BAR average given a histogram
1602
1603     if type== 0, calculate the best estimate for the average,
1604     if type==-1, calculate the minimum possible value given the histogram
1605     if type== 1, calculate the maximum possible value given the histogram */
1606 static double calc_bar_sum_hist(const hist_t *hist, double Wfac, double sbMmDG,
1607                                 int type)
1608 {
1609     double sum = 0.;
1610     int    i;
1611     int    maxbin;
1612     /* normalization factor multiplied with bin width and
1613        number of samples (we normalize through M): */
1614     double normdx = 1.;
1615     int    hd     = 0; /* determine the histogram direction: */
1616     double dx;
1617
1618     if ( (hist->nhist > 1) && (Wfac < 0) )
1619     {
1620         hd = 1;
1621     }
1622     dx     = hist->dx[hd];
1623     maxbin = hist->nbin[hd]-1;
1624     if (type == 1)
1625     {
1626         maxbin = hist->nbin[hd]; /* we also add whatever was out of range */
1627     }
1628
1629     for (i = 0; i < maxbin; i++)
1630     {
1631         double x    = Wfac*((i+hist->x0[hd])+0.5)*dx; /* bin middle */
1632         double pxdx = hist->bin[0][i]*normdx;         /* p(x)dx */
1633
1634         sum += pxdx/(1. + std::exp(x + sbMmDG));
1635     }
1636
1637     return sum;
1638 }
1639
1640 static double calc_bar_lowlevel(sample_coll_t *ca, sample_coll_t *cb,
1641                                 double temp, double tol, int type)
1642 {
1643     double kT, beta, M;
1644     int    i;
1645     double Wfac1, Wfac2, Wmin, Wmax;
1646     double DG0, DG1, DG2, dDG1;
1647     double n1, n2; /* numbers of samples as doubles */
1648
1649     kT   = BOLTZ*temp;
1650     beta = 1/kT;
1651
1652     /* count the numbers of samples */
1653     n1 = ca->ntot;
1654     n2 = cb->ntot;
1655
1656     M = std::log(n1/n2);
1657
1658     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1659     if (ca->foreign_lambda->dhdl < 0)
1660     {
1661         /* this is the case when the delta U were calculated directly
1662            (i.e. we're not scaling dhdl) */
1663         Wfac1 = beta;
1664         Wfac2 = beta;
1665     }
1666     else
1667     {
1668         /* we're using dhdl, so delta_lambda needs to be a
1669            multiplication factor.  */
1670         /*double delta_lambda=cb->native_lambda-ca->native_lambda;*/
1671         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1672                                                   ca->native_lambda);
1673         if (cb->native_lambda->lc->N > 1)
1674         {
1675             gmx_fatal(FARGS,
1676                       "Can't (yet) do multi-component dhdl interpolation");
1677         }
1678
1679         Wfac1 =  beta*delta_lambda;
1680         Wfac2 = -beta*delta_lambda;
1681     }
1682
1683     if (beta < 1)
1684     {
1685         /* We print the output both in kT and kJ/mol.
1686          * Here we determine DG in kT, so when beta < 1
1687          * the precision has to be increased.
1688          */
1689         tol *= beta;
1690     }
1691
1692     /* Calculate minimum and maximum work to give an initial estimate of
1693      * delta G  as their average.
1694      */
1695     {
1696         double Wmin1, Wmin2, Wmax1, Wmax2;
1697         sample_coll_min_max(ca, Wfac1, &Wmin1, &Wmax1);
1698         sample_coll_min_max(cb, Wfac2, &Wmin2, &Wmax2);
1699
1700         Wmin = std::min(Wmin1, Wmin2);
1701         Wmax = std::max(Wmax1, Wmax2);
1702     }
1703
1704     DG0 = Wmin;
1705     DG2 = Wmax;
1706
1707     if (debug)
1708     {
1709         fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1710     }
1711     /* We approximate by bisection: given our initial estimates
1712        we keep checking whether the halfway point is greater or
1713        smaller than what we get out of the BAR averages.
1714
1715        For the comparison we can use twice the tolerance. */
1716     while (DG2 - DG0 > 2*tol)
1717     {
1718         DG1 = 0.5*(DG0 + DG2);
1719
1720         /* calculate the BAR averages */
1721         dDG1 = 0.;
1722
1723         for (i = 0; i < ca->nsamples; i++)
1724         {
1725             samples_t      *s = ca->s[i];
1726             sample_range_t *r = &(ca->r[i]);
1727             if (r->use)
1728             {
1729                 if (s->hist)
1730                 {
1731                     dDG1 += calc_bar_sum_hist(s->hist, Wfac1, (M-DG1), type);
1732                 }
1733                 else
1734                 {
1735                     dDG1 += calc_bar_sum(r->end - r->start, s->du + r->start,
1736                                          Wfac1, (M-DG1));
1737                 }
1738             }
1739         }
1740         for (i = 0; i < cb->nsamples; i++)
1741         {
1742             samples_t      *s = cb->s[i];
1743             sample_range_t *r = &(cb->r[i]);
1744             if (r->use)
1745             {
1746                 if (s->hist)
1747                 {
1748                     dDG1 -= calc_bar_sum_hist(s->hist, Wfac2, -(M-DG1), type);
1749                 }
1750                 else
1751                 {
1752                     dDG1 -= calc_bar_sum(r->end - r->start, s->du + r->start,
1753                                          Wfac2, -(M-DG1));
1754                 }
1755             }
1756         }
1757
1758         if (dDG1 < 0)
1759         {
1760             DG0 = DG1;
1761         }
1762         else
1763         {
1764             DG2 = DG1;
1765         }
1766         if (debug)
1767         {
1768             fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1769         }
1770     }
1771
1772     return 0.5*(DG0 + DG2);
1773 }
1774
1775 static void calc_rel_entropy(sample_coll_t *ca, sample_coll_t *cb,
1776                              double temp, double dg, double *sa, double *sb)
1777 {
1778     int    i, j;
1779     double W_ab = 0.;
1780     double W_ba = 0.;
1781     double kT, beta;
1782     double Wfac1, Wfac2;
1783     double n1, n2;
1784
1785     kT   = BOLTZ*temp;
1786     beta = 1/kT;
1787
1788     /* count the numbers of samples */
1789     n1 = ca->ntot;
1790     n2 = cb->ntot;
1791
1792     /* to ensure the work values are the same as during the delta_G */
1793     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1794     if (ca->foreign_lambda->dhdl < 0)
1795     {
1796         /* this is the case when the delta U were calculated directly
1797            (i.e. we're not scaling dhdl) */
1798         Wfac1 = beta;
1799         Wfac2 = beta;
1800     }
1801     else
1802     {
1803         /* we're using dhdl, so delta_lambda needs to be a
1804            multiplication factor.  */
1805         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1806                                                   ca->native_lambda);
1807         Wfac1 =  beta*delta_lambda;
1808         Wfac2 = -beta*delta_lambda;
1809     }
1810
1811     /* first calculate the average work in both directions */
1812     for (i = 0; i < ca->nsamples; i++)
1813     {
1814         samples_t      *s = ca->s[i];
1815         sample_range_t *r = &(ca->r[i]);
1816         if (r->use)
1817         {
1818             if (!s->hist)
1819             {
1820                 for (j = r->start; j < r->end; j++)
1821                 {
1822                     W_ab += Wfac1*s->du[j];
1823                 }
1824             }
1825             else
1826             {
1827                 /* normalization factor multiplied with bin width and
1828                    number of samples (we normalize through M): */
1829                 double normdx = 1.;
1830                 double dx;
1831                 int    hd = 0; /* histogram direction */
1832                 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1833                 {
1834                     hd = 1;
1835                 }
1836                 dx = s->hist->dx[hd];
1837
1838                 for (j = 0; j < s->hist->nbin[0]; j++)
1839                 {
1840                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1841                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1842                     W_ab += pxdx*x;
1843                 }
1844             }
1845         }
1846     }
1847     W_ab /= n1;
1848
1849     for (i = 0; i < cb->nsamples; i++)
1850     {
1851         samples_t      *s = cb->s[i];
1852         sample_range_t *r = &(cb->r[i]);
1853         if (r->use)
1854         {
1855             if (!s->hist)
1856             {
1857                 for (j = r->start; j < r->end; j++)
1858                 {
1859                     W_ba += Wfac1*s->du[j];
1860                 }
1861             }
1862             else
1863             {
1864                 /* normalization factor multiplied with bin width and
1865                    number of samples (we normalize through M): */
1866                 double normdx = 1.;
1867                 double dx;
1868                 int    hd = 0; /* histogram direction */
1869                 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
1870                 {
1871                     hd = 1;
1872                 }
1873                 dx = s->hist->dx[hd];
1874
1875                 for (j = 0; j < s->hist->nbin[0]; j++)
1876                 {
1877                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1878                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1879                     W_ba += pxdx*x;
1880                 }
1881             }
1882         }
1883     }
1884     W_ba /= n2;
1885
1886     /* then calculate the relative entropies */
1887     *sa = (W_ab - dg);
1888     *sb = (W_ba + dg);
1889 }
1890
1891 static void calc_dg_stddev(sample_coll_t *ca, sample_coll_t *cb,
1892                            double temp, double dg, double *stddev)
1893 {
1894     int    i, j;
1895     double M;
1896     double sigmafact = 0.;
1897     double kT, beta;
1898     double Wfac1, Wfac2;
1899     double n1, n2;
1900
1901     kT   = BOLTZ*temp;
1902     beta = 1/kT;
1903
1904     /* count the numbers of samples */
1905     n1 = ca->ntot;
1906     n2 = cb->ntot;
1907
1908     /* to ensure the work values are the same as during the delta_G */
1909     /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1910     if (ca->foreign_lambda->dhdl < 0)
1911     {
1912         /* this is the case when the delta U were calculated directly
1913            (i.e. we're not scaling dhdl) */
1914         Wfac1 = beta;
1915         Wfac2 = beta;
1916     }
1917     else
1918     {
1919         /* we're using dhdl, so delta_lambda needs to be a
1920            multiplication factor.  */
1921         double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1922                                                   ca->native_lambda);
1923         Wfac1 =  beta*delta_lambda;
1924         Wfac2 = -beta*delta_lambda;
1925     }
1926
1927     M = std::log(n1/n2);
1928
1929
1930     /* calculate average in both directions */
1931     for (i = 0; i < ca->nsamples; i++)
1932     {
1933         samples_t      *s = ca->s[i];
1934         sample_range_t *r = &(ca->r[i]);
1935         if (r->use)
1936         {
1937             if (!s->hist)
1938             {
1939                 for (j = r->start; j < r->end; j++)
1940                 {
1941                     sigmafact += 1./(2. + 2.*std::cosh((M + Wfac1*s->du[j] - dg)));
1942                 }
1943             }
1944             else
1945             {
1946                 /* normalization factor multiplied with bin width and
1947                    number of samples (we normalize through M): */
1948                 double normdx = 1.;
1949                 double dx;
1950                 int    hd = 0; /* histogram direction */
1951                 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1952                 {
1953                     hd = 1;
1954                 }
1955                 dx = s->hist->dx[hd];
1956
1957                 for (j = 0; j < s->hist->nbin[0]; j++)
1958                 {
1959                     double x    = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1960                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1961
1962                     sigmafact += pxdx/(2. + 2.*std::cosh((M + x - dg)));
1963                 }
1964             }
1965         }
1966     }
1967     for (i = 0; i < cb->nsamples; i++)
1968     {
1969         samples_t      *s = cb->s[i];
1970         sample_range_t *r = &(cb->r[i]);
1971         if (r->use)
1972         {
1973             if (!s->hist)
1974             {
1975                 for (j = r->start; j < r->end; j++)
1976                 {
1977                     sigmafact += 1./(2. + 2.*std::cosh((M - Wfac2*s->du[j] - dg)));
1978                 }
1979             }
1980             else
1981             {
1982                 /* normalization factor multiplied with bin width and
1983                    number of samples (we normalize through M): */
1984                 double normdx = 1.;
1985                 double dx;
1986                 int    hd = 0; /* histogram direction */
1987                 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
1988                 {
1989                     hd = 1;
1990                 }
1991                 dx = s->hist->dx[hd];
1992
1993                 for (j = 0; j < s->hist->nbin[0]; j++)
1994                 {
1995                     double x    = Wfac2*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1996                     double pxdx = s->hist->bin[0][j]*normdx;         /* p(x)dx */
1997
1998                     sigmafact += pxdx/(2. + 2.*std::cosh((M - x - dg)));
1999                 }
2000             }
2001         }
2002     }
2003
2004     sigmafact /= (n1 + n2);
2005
2006
2007     /* Eq. 10 from
2008        Shirts, Bair, Hooker & Pande, Phys. Rev. Lett 91, 140601 (2003): */
2009     *stddev = std::sqrt(((1.0/sigmafact) - ( (n1+n2)/n1 + (n1+n2)/n2 )));
2010 }
2011
2012
2013
2014 static void calc_bar(barres_t *br, double tol,
2015                      int npee_min, int npee_max, gmx_bool *bEE,
2016                      double *partsum)
2017 {
2018     int      npee, p;
2019     double   dg_sig2, sa_sig2, sb_sig2, stddev_sig2; /* intermediate variance values
2020                                                         for calculated quantities */
2021     double   temp = br->a->temp;
2022     int      i;
2023     double   dg_min, dg_max;
2024     gmx_bool have_hist = FALSE;
2025
2026     br->dg = calc_bar_lowlevel(br->a, br->b, temp, tol, 0);
2027
2028     br->dg_disc_err      = 0.;
2029     br->dg_histrange_err = 0.;
2030
2031     /* check if there are histograms */
2032     for (i = 0; i < br->a->nsamples; i++)
2033     {
2034         if (br->a->r[i].use && br->a->s[i]->hist)
2035         {
2036             have_hist = TRUE;
2037             break;
2038         }
2039     }
2040     if (!have_hist)
2041     {
2042         for (i = 0; i < br->b->nsamples; i++)
2043         {
2044             if (br->b->r[i].use && br->b->s[i]->hist)
2045             {
2046                 have_hist = TRUE;
2047                 break;
2048             }
2049         }
2050     }
2051
2052     /* calculate histogram-specific errors */
2053     if (have_hist)
2054     {
2055         dg_min = calc_bar_lowlevel(br->a, br->b, temp, tol, -1);
2056         dg_max = calc_bar_lowlevel(br->a, br->b, temp, tol, 1);
2057
2058         if (std::abs(dg_max - dg_min) > GMX_REAL_EPS*10)
2059         {
2060             /* the histogram range  error is the biggest of the differences
2061                between the best estimate and the extremes */
2062             br->dg_histrange_err = std::abs(dg_max - dg_min);
2063         }
2064         br->dg_disc_err = 0.;
2065         for (i = 0; i < br->a->nsamples; i++)
2066         {
2067             if (br->a->s[i]->hist)
2068             {
2069                 br->dg_disc_err = std::max(br->dg_disc_err, br->a->s[i]->hist->dx[0]);
2070             }
2071         }
2072         for (i = 0; i < br->b->nsamples; i++)
2073         {
2074             if (br->b->s[i]->hist)
2075             {
2076                 br->dg_disc_err = std::max(br->dg_disc_err, br->b->s[i]->hist->dx[0]);
2077             }
2078         }
2079     }
2080     calc_rel_entropy(br->a, br->b, temp, br->dg, &(br->sa), &(br->sb));
2081
2082     calc_dg_stddev(br->a, br->b, temp, br->dg, &(br->dg_stddev) );
2083
2084     dg_sig2     = 0;
2085     sa_sig2     = 0;
2086     sb_sig2     = 0;
2087     stddev_sig2 = 0;
2088
2089     *bEE = TRUE;
2090     {
2091         sample_coll_t ca, cb;
2092
2093         /* initialize the samples */
2094         sample_coll_init(&ca, br->a->native_lambda, br->a->foreign_lambda,
2095                          br->a->temp);
2096         sample_coll_init(&cb, br->b->native_lambda, br->b->foreign_lambda,
2097                          br->b->temp);
2098
2099         for (npee = npee_min; npee <= npee_max; npee++)
2100         {
2101             double dgs      = 0;
2102             double dgs2     = 0;
2103             double dsa      = 0;
2104             double dsb      = 0;
2105             double dsa2     = 0;
2106             double dsb2     = 0;
2107             double dstddev  = 0;
2108             double dstddev2 = 0;
2109
2110
2111             for (p = 0; p < npee; p++)
2112             {
2113                 double   dgp;
2114                 double   stddevc;
2115                 double   sac, sbc;
2116                 gmx_bool cac, cbc;
2117
2118                 cac = sample_coll_create_subsample(&ca, br->a, p, npee);
2119                 cbc = sample_coll_create_subsample(&cb, br->b, p, npee);
2120
2121                 if (!cac || !cbc)
2122                 {
2123                     printf("WARNING: histogram number incompatible with block number for averaging: can't do error estimate\n");
2124                     *bEE = FALSE;
2125                     if (cac)
2126                     {
2127                         sample_coll_destroy(&ca);
2128                     }
2129                     if (cbc)
2130                     {
2131                         sample_coll_destroy(&cb);
2132                     }
2133                     return;
2134                 }
2135
2136                 dgp   = calc_bar_lowlevel(&ca, &cb, temp, tol, 0);
2137                 dgs  += dgp;
2138                 dgs2 += dgp*dgp;
2139
2140                 partsum[npee*(npee_max+1)+p] += dgp;
2141
2142                 calc_rel_entropy(&ca, &cb, temp, dgp, &sac, &sbc);
2143                 dsa  += sac;
2144                 dsa2 += sac*sac;
2145                 dsb  += sbc;
2146                 dsb2 += sbc*sbc;
2147                 calc_dg_stddev(&ca, &cb, temp, dgp, &stddevc );
2148
2149                 dstddev  += stddevc;
2150                 dstddev2 += stddevc*stddevc;
2151
2152                 sample_coll_destroy(&ca);
2153                 sample_coll_destroy(&cb);
2154             }
2155             dgs     /= npee;
2156             dgs2    /= npee;
2157             dg_sig2 += (dgs2-dgs*dgs)/(npee-1);
2158
2159             dsa     /= npee;
2160             dsa2    /= npee;
2161             dsb     /= npee;
2162             dsb2    /= npee;
2163             sa_sig2 += (dsa2-dsa*dsa)/(npee-1);
2164             sb_sig2 += (dsb2-dsb*dsb)/(npee-1);
2165
2166             dstddev     /= npee;
2167             dstddev2    /= npee;
2168             stddev_sig2 += (dstddev2-dstddev*dstddev)/(npee-1);
2169         }
2170         br->dg_err        = std::sqrt(dg_sig2/(npee_max - npee_min + 1));
2171         br->sa_err        = std::sqrt(sa_sig2/(npee_max - npee_min + 1));
2172         br->sb_err        = std::sqrt(sb_sig2/(npee_max - npee_min + 1));
2173         br->dg_stddev_err = std::sqrt(stddev_sig2/(npee_max - npee_min + 1));
2174     }
2175 }
2176
2177
2178 static double bar_err(int nbmin, int nbmax, const double *partsum)
2179 {
2180     int    nb, b;
2181     double svar, s, s2, dg;
2182
2183     svar = 0;
2184     for (nb = nbmin; nb <= nbmax; nb++)
2185     {
2186         s  = 0;
2187         s2 = 0;
2188         for (b = 0; b < nb; b++)
2189         {
2190             dg  = partsum[nb*(nbmax+1)+b];
2191             s  += dg;
2192             s2 += dg*dg;
2193         }
2194         s    /= nb;
2195         s2   /= nb;
2196         svar += (s2 - s*s)/(nb - 1);
2197     }
2198
2199     return std::sqrt(svar/(nbmax + 1 - nbmin));
2200 }
2201
2202
2203 /* Seek the end of an identifier (consecutive non-spaces), followed by
2204    an optional number of spaces or '='-signs. Returns a pointer to the
2205    first non-space value found after that. Returns NULL if the string
2206    ends before that.
2207  */
2208 static const char *find_value(const char *str)
2209 {
2210     gmx_bool name_end_found = FALSE;
2211
2212     /* if the string is a NULL pointer, return a NULL pointer. */
2213     if (str == nullptr)
2214     {
2215         return nullptr;
2216     }
2217     while (*str != '\0')
2218     {
2219         /* first find the end of the name */
2220         if (!name_end_found)
2221         {
2222             if (std::isspace(*str) || (*str == '=') )
2223             {
2224                 name_end_found = TRUE;
2225             }
2226         }
2227         else
2228         {
2229             if (!( std::isspace(*str) || (*str == '=') ))
2230             {
2231                 return str;
2232             }
2233         }
2234         str++;
2235     }
2236     return nullptr;
2237 }
2238
2239
2240 /* read a vector-notation description of a lambda vector */
2241 static gmx_bool read_lambda_compvec(const char                *str,
2242                                     lambda_vec_t              *lv,
2243                                     const lambda_components_t *lc_in,
2244                                     lambda_components_t       *lc_out,
2245                                     const char               **end,
2246                                     const char                *fn)
2247 {
2248     gmx_bool    initialize_lc = FALSE;   /* whether to initialize the lambda
2249                                             components, or to check them */
2250     gmx_bool    start_reached = FALSE;   /* whether the start of component names
2251                                             has been reached */
2252     gmx_bool    vector        = FALSE;   /* whether there are multiple components */
2253     int         n             = 0;       /* current component number */
2254     const char *val_start     = nullptr; /* start of the component name, or NULL
2255                                             if not in a value */
2256     char       *strtod_end;
2257     gmx_bool    OK = TRUE;
2258
2259     if (end)
2260     {
2261         *end = str;
2262     }
2263
2264
2265     if (lc_out && lc_out->N == 0)
2266     {
2267         initialize_lc = TRUE;
2268     }
2269
2270     if (lc_in == nullptr)
2271     {
2272         lc_in = lc_out;
2273     }
2274
2275     while (1)
2276     {
2277         if (!start_reached)
2278         {
2279             if (std::isalnum(*str))
2280             {
2281                 vector        = FALSE;
2282                 start_reached = TRUE;
2283                 val_start     = str;
2284             }
2285             else if (*str == '(')
2286             {
2287                 vector        = TRUE;
2288                 start_reached = TRUE;
2289             }
2290             else if (!std::isspace(*str))
2291             {
2292                 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2293             }
2294         }
2295         else
2296         {
2297             if (val_start)
2298             {
2299                 if (std::isspace(*str) || *str == ')' || *str == ',' || *str == '\0')
2300                 {
2301                     /* end of value */
2302                     if (lv == nullptr)
2303                     {
2304                         if (initialize_lc)
2305                         {
2306                             lambda_components_add(lc_out, val_start,
2307                                                   (str-val_start));
2308                         }
2309                         else
2310                         {
2311                             if (!lambda_components_check(lc_out, n, val_start,
2312                                                          (str-val_start)))
2313                             {
2314                                 return FALSE;
2315                             }
2316                         }
2317                     }
2318                     else
2319                     {
2320                         /* add a vector component to lv */
2321                         lv->val[n] = strtod(val_start, &strtod_end);
2322                         if (val_start == strtod_end)
2323                         {
2324                             gmx_fatal(FARGS,
2325                                       "Error reading lambda vector in %s", fn);
2326                         }
2327                     }
2328                     /* reset for the next identifier */
2329                     val_start = nullptr;
2330                     n++;
2331                     if (!vector)
2332                     {
2333                         return OK;
2334                     }
2335                 }
2336             }
2337             else if (std::isalnum(*str))
2338             {
2339                 val_start = str;
2340             }
2341             if (*str == ')')
2342             {
2343                 str++;
2344                 if (end)
2345                 {
2346                     *end = str;
2347                 }
2348                 if (!vector)
2349                 {
2350                     gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2351                 }
2352                 else
2353                 {
2354                     GMX_RELEASE_ASSERT(lc_in != nullptr, "Internal inconsistency? lc_in==NULL");
2355                     if (n == lc_in->N)
2356                     {
2357                         return OK;
2358                     }
2359                     else if (lv == nullptr)
2360                     {
2361                         return FALSE;
2362                     }
2363                     else
2364                     {
2365                         gmx_fatal(FARGS, "Incomplete lambda vector data in %s",
2366                                   fn);
2367                         return FALSE;
2368                     }
2369
2370                 }
2371             }
2372         }
2373         if (*str == '\0')
2374         {
2375             break;
2376         }
2377         str++;
2378         if (end)
2379         {
2380             *end = str;
2381         }
2382     }
2383     if (vector)
2384     {
2385         gmx_fatal(FARGS, "Incomplete lambda components data in %s", fn);
2386         return FALSE;
2387     }
2388     return OK;
2389 }
2390
2391 /* read and check the component names from a string */
2392 static gmx_bool read_lambda_components(const char          *str,
2393                                        lambda_components_t *lc,
2394                                        const char         **end,
2395                                        const char          *fn)
2396 {
2397     return read_lambda_compvec(str, nullptr, nullptr, lc, end, fn);
2398 }
2399
2400 /* read an initialized lambda vector from a string */
2401 static gmx_bool read_lambda_vector(const char   *str,
2402                                    lambda_vec_t *lv,
2403                                    const char  **end,
2404                                    const char   *fn)
2405 {
2406     return read_lambda_compvec(str, lv, lv->lc, nullptr, end, fn);
2407 }
2408
2409
2410
2411 /* deduce lambda value from legend.
2412     fn = the file name
2413     legend = the legend string
2414     ba = the xvg data
2415     lam = the initialized lambda vector
2416    returns whether to use the data in this set.
2417  */
2418 static gmx_bool legend2lambda(const char   *fn,
2419                               const char   *legend,
2420                               lambda_vec_t *lam)
2421 {
2422     const char   *ptr    = nullptr, *ptr2 = nullptr;
2423     gmx_bool      ok     = FALSE;
2424     gmx_bool      bdhdl  = FALSE;
2425     const char   *tostr  = " to ";
2426
2427     if (legend == nullptr)
2428     {
2429         gmx_fatal(FARGS, "There is no legend in file '%s', can not deduce lambda", fn);
2430     }
2431
2432     /* look for the last 'to': */
2433     ptr2 = legend;
2434     do
2435     {
2436         ptr2 = std::strstr(ptr2, tostr);
2437         if (ptr2 != nullptr)
2438         {
2439             ptr = ptr2;
2440             ptr2++;
2441         }
2442     }
2443     while (ptr2 != nullptr && *ptr2 != '\0');
2444
2445     if (ptr)
2446     {
2447         ptr += std::strlen(tostr)-1; /* and advance past that 'to' */
2448     }
2449     else
2450     {
2451         /* look for the = sign */
2452         ptr = std::strrchr(legend, '=');
2453         if (!ptr)
2454         {
2455             /* otherwise look for the last space */
2456             ptr = std::strrchr(legend, ' ');
2457         }
2458     }
2459
2460     if (std::strstr(legend, "dH"))
2461     {
2462         ok    = TRUE;
2463         bdhdl = TRUE;
2464     }
2465     else if (std::strchr(legend, 'D') != nullptr && std::strchr(legend, 'H') != nullptr)
2466     {
2467         ok    = TRUE;
2468         bdhdl = FALSE;
2469     }
2470     else /*if (std::strstr(legend, "pV"))*/
2471     {
2472         return FALSE;
2473     }
2474     if (!ptr)
2475     {
2476         ok = FALSE;
2477     }
2478
2479     if (!ok)
2480     {
2481         gmx_fatal(FARGS, "There is no proper lambda legend in file '%s', can not deduce lambda", fn);
2482     }
2483     if (!bdhdl)
2484     {
2485         ptr = find_value(ptr);
2486         if (!ptr || !read_lambda_vector(ptr, lam, nullptr, fn))
2487         {
2488             gmx_fatal(FARGS, "lambda vector '%s' %s faulty", legend, fn);
2489         }
2490     }
2491     else
2492     {
2493         int         dhdl_index;
2494         const char *end;
2495
2496         ptr = std::strrchr(legend, '=');
2497         end = ptr;
2498         if (ptr)
2499         {
2500             /* there must be a component name */
2501             ptr--;
2502             if (ptr < legend)
2503             {
2504                 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2505             }
2506             /* now backtrack to the start of the identifier */
2507             while (isspace(*ptr))
2508             {
2509                 end = ptr;
2510                 ptr--;
2511                 if (ptr < legend)
2512                 {
2513                     gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2514                 }
2515             }
2516             while (!std::isspace(*ptr))
2517             {
2518                 ptr--;
2519                 if (ptr < legend)
2520                 {
2521                     gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2522                 }
2523             }
2524             ptr++;
2525             dhdl_index     = lambda_components_find(lam->lc, ptr, (end-ptr));
2526             if (dhdl_index < 0)
2527             {
2528                 char buf[STRLEN];
2529                 std::strncpy(buf, ptr, (end-ptr));
2530                 buf[(end-ptr)] = '\0';
2531                 gmx_fatal(FARGS,
2532                           "Did not find lambda component for '%s' in %s",
2533                           buf, fn);
2534             }
2535         }
2536         else
2537         {
2538             if (lam->lc->N > 1)
2539             {
2540                 gmx_fatal(FARGS,
2541                           "dhdl without component name with >1 lambda component in %s",
2542                           fn);
2543             }
2544             dhdl_index = 0;
2545         }
2546         lam->dhdl = dhdl_index;
2547     }
2548     return TRUE;
2549 }
2550
2551 static gmx_bool subtitle2lambda(const char *subtitle, xvg_t *ba, const char *fn,
2552                                 lambda_components_t *lc)
2553 {
2554     gmx_bool    bFound;
2555     const char *ptr;
2556     char       *end;
2557     double      native_lambda;
2558
2559     bFound = FALSE;
2560
2561     /* first check for a state string */
2562     ptr = std::strstr(subtitle, "state");
2563     if (ptr)
2564     {
2565         int         index = -1;
2566         const char *val_end;
2567
2568         /* the new 4.6 style lambda vectors */
2569         ptr = find_value(ptr);
2570         if (ptr)
2571         {
2572             index = std::strtol(ptr, &end, 10);
2573             if (ptr == end)
2574             {
2575                 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2576                 return FALSE;
2577             }
2578             ptr = end;
2579         }
2580         else
2581         {
2582             gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2583             return FALSE;
2584         }
2585         /* now find the lambda vector component names */
2586         while (*ptr != '(' && !std::isalnum(*ptr))
2587         {
2588             ptr++;
2589             if (*ptr == '\0')
2590             {
2591                 gmx_fatal(FARGS,
2592                           "Incomplete lambda vector component data in %s", fn);
2593                 return FALSE;
2594             }
2595         }
2596         val_end = ptr;
2597         if (!read_lambda_components(ptr, lc, &val_end, fn))
2598         {
2599             gmx_fatal(FARGS,
2600                       "lambda vector components in %s don't match those previously read",
2601                       fn);
2602         }
2603         ptr = find_value(val_end);
2604         if (!ptr)
2605         {
2606             gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2607             return FALSE;
2608         }
2609         lambda_vec_init(&(ba->native_lambda), lc);
2610         if (!read_lambda_vector(ptr, &(ba->native_lambda), nullptr, fn))
2611         {
2612             gmx_fatal(FARGS, "lambda vector in %s faulty", fn);
2613         }
2614         ba->native_lambda.index = index;
2615         bFound                  = TRUE;
2616     }
2617     else
2618     {
2619         /* compatibility mode: check for lambda in other ways. */
2620         /* plain text lambda string */
2621         ptr = std::strstr(subtitle, "lambda");
2622         if (ptr == nullptr)
2623         {
2624             /* xmgrace formatted lambda string */
2625             ptr = std::strstr(subtitle, "\\xl\\f{}");
2626         }
2627         if (ptr == nullptr)
2628         {
2629             /* xmgr formatted lambda string */
2630             ptr = std::strstr(subtitle, "\\8l\\4");
2631         }
2632         if (ptr != nullptr)
2633         {
2634             ptr = std::strstr(ptr, "=");
2635         }
2636         if (ptr != nullptr)
2637         {
2638             bFound = (sscanf(ptr+1, "%lf", &(native_lambda)) == 1);
2639             /* add the lambda component name as an empty string */
2640             if (lc->N > 0)
2641             {
2642                 if (!lambda_components_check(lc, 0, "", 0))
2643                 {
2644                     gmx_fatal(FARGS,
2645                               "lambda vector components in %s don't match those previously read",
2646                               fn);
2647                 }
2648             }
2649             else
2650             {
2651                 lambda_components_add(lc, "", 0);
2652             }
2653             lambda_vec_init(&(ba->native_lambda), lc);
2654             ba->native_lambda.val[0] = native_lambda;
2655         }
2656     }
2657
2658     return bFound;
2659 }
2660
2661 static double filename2lambda(const char *fn)
2662 {
2663     double        lambda;
2664     const char   *ptr, *digitptr;
2665     char         *endptr;
2666     int           dirsep;
2667     ptr = fn;
2668     /* go to the end of the path string and search backward to find the last
2669        directory in the path which has to contain the value of lambda
2670      */
2671     while (ptr[1] != '\0')
2672     {
2673         ptr++;
2674     }
2675     /* searching backward to find the second directory separator */
2676     dirsep   = 0;
2677     digitptr = nullptr;
2678     while (ptr >= fn)
2679     {
2680         if (ptr[0] != DIR_SEPARATOR && ptr[1] == DIR_SEPARATOR)
2681         {
2682             if (dirsep == 1)
2683             {
2684                 break;
2685             }
2686             dirsep++;
2687         }
2688         /* save the last position of a digit between the last two
2689            separators = in the last dirname */
2690         if (dirsep > 0 && std::isdigit(*ptr))
2691         {
2692             digitptr = ptr;
2693         }
2694         ptr--;
2695     }
2696     if (!digitptr)
2697     {
2698         gmx_fatal(FARGS, "While trying to read the lambda value from the file path:"
2699                   " last directory in the path '%s' does not contain a number", fn);
2700     }
2701     if (digitptr[-1] == '-')
2702     {
2703         digitptr--;
2704     }
2705     lambda = strtod(digitptr, &endptr);
2706     if (endptr == digitptr)
2707     {
2708         gmx_fatal(FARGS, "Malformed number in file path '%s'", fn);
2709     }
2710     return lambda;
2711 }
2712
2713 static void read_bar_xvg_lowlevel(const char *fn, const real *temp, xvg_t *ba,
2714                                   lambda_components_t *lc)
2715 {
2716     int          i;
2717     char        *subtitle, **legend, *ptr;
2718     int          np;
2719     gmx_bool     native_lambda_read = FALSE;
2720     char         buf[STRLEN];
2721
2722     xvg_init(ba);
2723
2724     ba->filename = fn;
2725
2726     np = read_xvg_legend(fn, &ba->y, &ba->nset, &subtitle, &legend);
2727     if (!ba->y)
2728     {
2729         gmx_fatal(FARGS, "File %s contains no usable data.", fn);
2730     }
2731     /* Reorder the data */
2732     ba->t  = ba->y[0];
2733     for (i = 1; i < ba->nset; i++)
2734     {
2735         ba->y[i-1] = ba->y[i];
2736     }
2737     ba->nset--;
2738
2739     snew(ba->np, ba->nset);
2740     for (i = 0; i < ba->nset; i++)
2741     {
2742         ba->np[i] = np;
2743     }
2744
2745     ba->temp = -1;
2746     if (subtitle != nullptr)
2747     {
2748         /* try to extract temperature */
2749         ptr = std::strstr(subtitle, "T =");
2750         if (ptr != nullptr)
2751         {
2752             ptr += 3;
2753             if (sscanf(ptr, "%lf", &ba->temp) == 1)
2754             {
2755                 if (ba->temp <= 0)
2756                 {
2757                     gmx_fatal(FARGS, "Found temperature of %f in file '%s'",
2758                               ba->temp, fn);
2759                 }
2760             }
2761         }
2762     }
2763     if (ba->temp < 0)
2764     {
2765         if (*temp <= 0)
2766         {
2767             gmx_fatal(FARGS, "Did not find a temperature in the subtitle in file '%s', use the -temp option of [TT]gmx bar[tt]", fn);
2768         }
2769         ba->temp = *temp;
2770     }
2771
2772     /* Try to deduce lambda from the subtitle */
2773     if (subtitle)
2774     {
2775         if (subtitle2lambda(subtitle, ba, fn, lc))
2776         {
2777             native_lambda_read = TRUE;
2778         }
2779     }
2780     snew(ba->lambda, ba->nset);
2781     if (legend == nullptr)
2782     {
2783         /* Check if we have a single set, no legend, nset=1 means t and dH/dl */
2784         if (ba->nset == 1)
2785         {
2786             if (!native_lambda_read)
2787             {
2788                 // Deduce lambda from the file name.
2789                 // Updated the routine filename2lambda to actually return lambda
2790                 // to avoid C++ warnings, but this usage does not seem to need it?
2791                 // EL 2015-07-10
2792                 filename2lambda(fn);
2793                 native_lambda_read = TRUE;
2794             }
2795             ba->lambda[0] = ba->native_lambda;
2796         }
2797         else
2798         {
2799             gmx_fatal(FARGS, "File %s contains multiple sets but no legends, can not determine the lambda values", fn);
2800         }
2801     }
2802     else
2803     {
2804         for (i = 0; i < ba->nset; )
2805         {
2806             /* Read lambda from the legend */
2807             lambda_vec_init( &(ba->lambda[i]), lc );
2808             lambda_vec_copy( &(ba->lambda[i]), &(ba->native_lambda));
2809             gmx_bool use = legend2lambda(fn, legend[i], &(ba->lambda[i]));
2810             if (use)
2811             {
2812                 lambda_vec_print(&(ba->lambda[i]), buf, FALSE);
2813                 i++;
2814             }
2815             else
2816             {
2817                 int j;
2818                 printf("%s: Ignoring set '%s'.\n", fn, legend[i]);
2819                 for (j = i+1; j < ba->nset; j++)
2820                 {
2821                     ba->y[j-1]  = ba->y[j];
2822                     legend[j-1] = legend[j];
2823                 }
2824                 ba->nset--;
2825             }
2826         }
2827     }
2828
2829     if (!native_lambda_read)
2830     {
2831         gmx_fatal(FARGS, "File %s contains multiple sets but no indication of the native lambda", fn);
2832     }
2833
2834     if (legend != nullptr)
2835     {
2836         for (i = 0; i < ba->nset-1; i++)
2837         {
2838             sfree(legend[i]);
2839         }
2840         sfree(legend);
2841     }
2842 }
2843
2844 static void read_bar_xvg(const char *fn, real *temp, sim_data_t *sd)
2845 {
2846     xvg_t     *barsim;
2847     samples_t *s;
2848     int        i;
2849
2850     snew(barsim, 1);
2851
2852     read_bar_xvg_lowlevel(fn, temp, barsim, &(sd->lc));
2853
2854     if (barsim->nset < 1)
2855     {
2856         gmx_fatal(FARGS, "File '%s' contains fewer than two columns", fn);
2857     }
2858
2859     if (!gmx_within_tol(*temp, barsim->temp, GMX_FLOAT_EPS) && (*temp > 0) )
2860     {
2861         gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
2862     }
2863     *temp = barsim->temp;
2864
2865     /* now create a series of samples_t */
2866     snew(s, barsim->nset);
2867     for (i = 0; i < barsim->nset; i++)
2868     {
2869         samples_init(s+i, &(barsim->native_lambda), &(barsim->lambda[i]),
2870                      barsim->temp, lambda_vec_same(&(barsim->native_lambda),
2871                                                    &(barsim->lambda[i])),
2872                      fn);
2873         s[i].du  = barsim->y[i];
2874         s[i].ndu = barsim->np[i];
2875         s[i].t   = barsim->t;
2876
2877         lambda_data_list_insert_sample(sd->lb, s+i);
2878     }
2879     {
2880         char buf[STRLEN];
2881
2882         lambda_vec_print(s[0].native_lambda, buf, FALSE);
2883         printf("%s: %.1f - %.1f; lambda = %s\n    dH/dl & foreign lambdas:\n",
2884                fn, s[0].t[0], s[0].t[s[0].ndu-1], buf);
2885         for (i = 0; i < barsim->nset; i++)
2886         {
2887             lambda_vec_print(s[i].foreign_lambda, buf, TRUE);
2888             printf("        %s (%d pts)\n", buf, s[i].ndu);
2889         }
2890     }
2891     printf("\n\n");
2892 }
2893
2894 static void read_edr_rawdh_block(samples_t **smp, int *ndu, t_enxblock *blk,
2895                                  double start_time, double delta_time,
2896                                  lambda_vec_t *native_lambda, double temp,
2897                                  double *last_t, const char *filename)
2898 {
2899     int           i, j;
2900     lambda_vec_t *foreign_lambda;
2901     int           type;
2902     samples_t    *s; /* convenience pointer */
2903     int           startj;
2904
2905     /* check the block types etc. */
2906     if ( (blk->nsub < 3) ||
2907          (blk->sub[0].type != xdr_datatype_int) ||
2908          (blk->sub[1].type != xdr_datatype_double) ||
2909          (
2910              (blk->sub[2].type != xdr_datatype_float) &&
2911              (blk->sub[2].type != xdr_datatype_double)
2912          ) ||
2913          (blk->sub[0].nr < 1) ||
2914          (blk->sub[1].nr < 1) )
2915     {
2916         gmx_fatal(FARGS,
2917                   "Unexpected/corrupted block data in file %s around time %f.",
2918                   filename, start_time);
2919     }
2920
2921     snew(foreign_lambda, 1);
2922     lambda_vec_init(foreign_lambda, native_lambda->lc);
2923     lambda_vec_copy(foreign_lambda, native_lambda);
2924     type = blk->sub[0].ival[0];
2925     if (type == dhbtDH)
2926     {
2927         for (i = 0; i < native_lambda->lc->N; i++)
2928         {
2929             foreign_lambda->val[i] = blk->sub[1].dval[i];
2930         }
2931     }
2932     else
2933     {
2934         if (blk->sub[0].nr > 1)
2935         {
2936             foreign_lambda->dhdl = blk->sub[0].ival[1];
2937         }
2938         else
2939         {
2940             foreign_lambda->dhdl = 0;
2941         }
2942     }
2943
2944     if (!*smp)
2945     {
2946         /* initialize the samples structure if it's empty. */
2947         snew(*smp, 1);
2948         samples_init(*smp, native_lambda, foreign_lambda, temp,
2949                      type == dhbtDHDL, filename);
2950         (*smp)->start_time = start_time;
2951         (*smp)->delta_time = delta_time;
2952     }
2953
2954     /* set convenience pointer */
2955     s = *smp;
2956
2957     /* now double check */
2958     if (!lambda_vec_same(s->foreign_lambda, foreign_lambda) )
2959     {
2960         char buf[STRLEN], buf2[STRLEN];
2961         lambda_vec_print(foreign_lambda, buf, FALSE);
2962         lambda_vec_print(s->foreign_lambda, buf2, FALSE);
2963         fprintf(stderr, "Got foreign lambda=%s, expected: %s\n", buf, buf2);
2964         gmx_fatal(FARGS, "Corrupted data in file %s around t=%f.",
2965                   filename, start_time);
2966     }
2967
2968     /* make room for the data */
2969     if (gmx::index(s->ndu_alloc) < s->ndu + blk->sub[2].nr)
2970     {
2971         s->ndu_alloc += (s->ndu_alloc < (size_t)blk->sub[2].nr) ?
2972             blk->sub[2].nr*2 : s->ndu_alloc;
2973         srenew(s->du_alloc, s->ndu_alloc);
2974         s->du = s->du_alloc;
2975     }
2976     startj   = s->ndu;
2977     s->ndu  += blk->sub[2].nr;
2978     s->ntot += blk->sub[2].nr;
2979     *ndu     = blk->sub[2].nr;
2980
2981     /* and copy the data*/
2982     for (j = 0; j < blk->sub[2].nr; j++)
2983     {
2984         if (blk->sub[2].type == xdr_datatype_float)
2985         {
2986             s->du[startj+j] = blk->sub[2].fval[j];
2987         }
2988         else
2989         {
2990             s->du[startj+j] = blk->sub[2].dval[j];
2991         }
2992     }
2993     if (start_time + blk->sub[2].nr*delta_time > *last_t)
2994     {
2995         *last_t = start_time + blk->sub[2].nr*delta_time;
2996     }
2997 }
2998
2999 static samples_t *read_edr_hist_block(int *nsamples, t_enxblock *blk,
3000                                       double start_time, double delta_time,
3001                                       lambda_vec_t *native_lambda, double temp,
3002                                       double *last_t, const char *filename)
3003 {
3004     int           i, j;
3005     samples_t    *s;
3006     int           nhist;
3007     lambda_vec_t *foreign_lambda;
3008     int           type;
3009     int           nbins[2];
3010
3011     /* check the block types etc. */
3012     if ( (blk->nsub < 2) ||
3013          (blk->sub[0].type != xdr_datatype_double) ||
3014          (blk->sub[1].type != xdr_datatype_int64) ||
3015          (blk->sub[0].nr < 2)  ||
3016          (blk->sub[1].nr < 2) )
3017     {
3018         gmx_fatal(FARGS,
3019                   "Unexpected/corrupted block data in file %s around time %f",
3020                   filename, start_time);
3021     }
3022
3023     nhist = blk->nsub-2;
3024     if (nhist == 0)
3025     {
3026         return nullptr;
3027     }
3028     if (nhist > 2)
3029     {
3030         gmx_fatal(FARGS,
3031                   "Unexpected/corrupted block data in file %s around time %f",
3032                   filename, start_time);
3033     }
3034
3035     snew(s, 1);
3036     *nsamples = 1;
3037
3038     snew(foreign_lambda, 1);
3039     lambda_vec_init(foreign_lambda, native_lambda->lc);
3040     lambda_vec_copy(foreign_lambda, native_lambda);
3041     type = static_cast<int>(blk->sub[1].lval[1]);
3042     if (type == dhbtDH)
3043     {
3044         double old_foreign_lambda;
3045
3046         old_foreign_lambda = blk->sub[0].dval[0];
3047         if (old_foreign_lambda >= 0)
3048         {
3049             foreign_lambda->val[0] = old_foreign_lambda;
3050             if (foreign_lambda->lc->N > 1)
3051             {
3052                 gmx_fatal(FARGS,
3053                           "Single-component lambda in multi-component file %s",
3054                           filename);
3055             }
3056         }
3057         else
3058         {
3059             for (i = 0; i < native_lambda->lc->N; i++)
3060             {
3061                 foreign_lambda->val[i] = blk->sub[0].dval[i+2];
3062             }
3063         }
3064     }
3065     else
3066     {
3067         if (foreign_lambda->lc->N > 1)
3068         {
3069             if (blk->sub[1].nr < 3 + nhist)
3070             {
3071                 gmx_fatal(FARGS,
3072                           "Missing derivative coord in multi-component file %s",
3073                           filename);
3074             }
3075             foreign_lambda->dhdl = blk->sub[1].lval[2 + nhist];
3076         }
3077         else
3078         {
3079             foreign_lambda->dhdl = 0;
3080         }
3081     }
3082
3083     samples_init(s, native_lambda, foreign_lambda, temp, type == dhbtDHDL,
3084                  filename);
3085     snew(s->hist, 1);
3086
3087     for (i = 0; i < nhist; i++)
3088     {
3089         nbins[i] = blk->sub[i+2].nr;
3090     }
3091
3092     hist_init(s->hist, nhist, nbins);
3093
3094     for (i = 0; i < nhist; i++)
3095     {
3096         s->hist->x0[i] = blk->sub[1].lval[2+i];
3097         s->hist->dx[i] = blk->sub[0].dval[1];
3098         if (i == 1)
3099         {
3100             s->hist->dx[i] = -s->hist->dx[i];
3101         }
3102     }
3103
3104     s->hist->start_time = start_time;
3105     s->hist->delta_time = delta_time;
3106     s->start_time       = start_time;
3107     s->delta_time       = delta_time;
3108
3109     for (i = 0; i < nhist; i++)
3110     {
3111         gmx_int64_t     sum = 0;
3112
3113         for (j = 0; j < s->hist->nbin[i]; j++)
3114         {
3115             int binv = static_cast<int>(blk->sub[i+2].ival[j]);
3116
3117             s->hist->bin[i][j] = binv;
3118             sum               += binv;
3119
3120         }
3121         if (i == 0)
3122         {
3123             s->ntot      = sum;
3124             s->hist->sum = sum;
3125         }
3126         else
3127         {
3128             if (s->ntot != sum)
3129             {
3130                 gmx_fatal(FARGS, "Histogram counts don't match in %s",
3131                           filename);
3132             }
3133         }
3134     }
3135
3136     if (start_time + s->hist->sum*delta_time > *last_t)
3137     {
3138         *last_t = start_time + s->hist->sum*delta_time;
3139     }
3140     return s;
3141 }
3142
3143
3144 static void read_barsim_edr(const char *fn, real *temp, sim_data_t *sd)
3145 {
3146     int            i, j;
3147     ener_file_t    fp;
3148     t_enxframe    *fr;
3149     int            nre;
3150     gmx_enxnm_t   *enm           = nullptr;
3151     double         first_t       = -1;
3152     double         last_t        = -1;
3153     samples_t    **samples_rawdh = nullptr; /* contains samples for raw delta_h  */
3154     int           *nhists        = nullptr; /* array to keep count & print at end */
3155     int           *npts          = nullptr; /* array to keep count & print at end */
3156     lambda_vec_t **lambdas       = nullptr; /* array to keep count & print at end */
3157     lambda_vec_t  *native_lambda;
3158     int            nsamples = 0;
3159     lambda_vec_t   start_lambda;
3160
3161     fp = open_enx(fn, "r");
3162     do_enxnms(fp, &nre, &enm);
3163     snew(fr, 1);
3164
3165     snew(native_lambda, 1);
3166     start_lambda.lc  = nullptr;
3167     start_lambda.val = nullptr;
3168
3169     while (do_enx(fp, fr))
3170     {
3171         /* count the data blocks */
3172         int    nblocks_raw  = 0;
3173         int    nblocks_hist = 0;
3174         int    nlam         = 0;
3175         int    k;
3176         /* DHCOLL block information: */
3177         double start_time = 0, delta_time = 0, old_start_lambda = 0, delta_lambda = 0;
3178         double rtemp      = 0;
3179
3180         /* count the blocks and handle collection information: */
3181         for (i = 0; i < fr->nblock; i++)
3182         {
3183             if (fr->block[i].id == enxDHHIST)
3184             {
3185                 nblocks_hist++;
3186             }
3187             if (fr->block[i].id == enxDH)
3188             {
3189                 nblocks_raw++;
3190             }
3191             if (fr->block[i].id == enxDHCOLL)
3192             {
3193                 nlam++;
3194                 if ( (fr->block[i].nsub < 1) ||
3195                      (fr->block[i].sub[0].type != xdr_datatype_double) ||
3196                      (fr->block[i].sub[0].nr < 5))
3197                 {
3198                     gmx_fatal(FARGS, "Unexpected block data in file %s", fn);
3199                 }
3200
3201                 /* read the data from the DHCOLL block */
3202                 rtemp            = fr->block[i].sub[0].dval[0];
3203                 start_time       = fr->block[i].sub[0].dval[1];
3204                 delta_time       = fr->block[i].sub[0].dval[2];
3205                 old_start_lambda = fr->block[i].sub[0].dval[3];
3206                 delta_lambda     = fr->block[i].sub[0].dval[4];
3207
3208                 if (delta_lambda > 0)
3209                 {
3210                     gmx_fatal(FARGS, "Lambda values not constant in %s: can't apply BAR method", fn);
3211                 }
3212                 if ( ( *temp != rtemp) && (*temp > 0) )
3213                 {
3214                     gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
3215                 }
3216                 *temp = rtemp;
3217
3218                 if (old_start_lambda >= 0)
3219                 {
3220                     if (sd->lc.N > 0)
3221                     {
3222                         if (!lambda_components_check(&(sd->lc), 0, "", 0))
3223                         {
3224                             gmx_fatal(FARGS,
3225                                       "lambda vector components in %s don't match those previously read",
3226                                       fn);
3227                         }
3228                     }
3229                     else
3230                     {
3231                         lambda_components_add(&(sd->lc), "", 0);
3232                     }
3233                     if (!start_lambda.lc)
3234                     {
3235                         lambda_vec_init(&start_lambda, &(sd->lc));
3236                     }
3237                     start_lambda.val[0] = old_start_lambda;
3238                 }
3239                 else
3240                 {
3241                     /* read lambda vector */
3242                     int      n_lambda_vec;
3243                     gmx_bool check = (sd->lc.N > 0);
3244                     if (fr->block[i].nsub < 2)
3245                     {
3246                         gmx_fatal(FARGS,
3247                                   "No lambda vector, but start_lambda=%f\n",
3248                                   old_start_lambda);
3249                     }
3250                     n_lambda_vec = fr->block[i].sub[1].ival[1];
3251                     for (j = 0; j < n_lambda_vec; j++)
3252                     {
3253                         const char *name =
3254                             efpt_singular_names[fr->block[i].sub[1].ival[1+j]];
3255                         if (check)
3256                         {
3257                             /* check the components */
3258                             lambda_components_check(&(sd->lc), j, name,
3259                                                     std::strlen(name));
3260                         }
3261                         else
3262                         {
3263                             lambda_components_add(&(sd->lc), name,
3264                                                   std::strlen(name));
3265                         }
3266                     }
3267                     lambda_vec_init(&start_lambda, &(sd->lc));
3268                     start_lambda.index = fr->block[i].sub[1].ival[0];
3269                     for (j = 0; j < n_lambda_vec; j++)
3270                     {
3271                         start_lambda.val[j] = fr->block[i].sub[0].dval[5+j];
3272                     }
3273                 }
3274                 if (first_t < 0)
3275                 {
3276                     first_t = start_time;
3277                 }
3278             }
3279         }
3280
3281         if (nlam != 1)
3282         {
3283             gmx_fatal(FARGS, "Did not find delta H information in file %s", fn);
3284         }
3285         if (nblocks_raw > 0 && nblocks_hist > 0)
3286         {
3287             gmx_fatal(FARGS, "Can't handle both raw delta U data and histograms in the same file %s", fn);
3288         }
3289
3290         if (nsamples == 0)
3291         {
3292             /* this is the first round; allocate the associated data
3293                structures */
3294             /*native_lambda=start_lambda;*/
3295             lambda_vec_init(native_lambda, &(sd->lc));
3296             lambda_vec_copy(native_lambda, &start_lambda);
3297             nsamples = nblocks_raw+nblocks_hist;
3298             snew(nhists, nsamples);
3299             snew(npts, nsamples);
3300             snew(lambdas, nsamples);
3301             snew(samples_rawdh, nsamples);
3302             for (i = 0; i < nsamples; i++)
3303             {
3304                 nhists[i]        = 0;
3305                 npts[i]          = 0;
3306                 lambdas[i]       = nullptr;
3307                 samples_rawdh[i] = nullptr; /* init to NULL so we know which
3308                                                ones contain values */
3309             }
3310         }
3311         else
3312         {
3313             // nsamples > 0 means this is NOT the first iteration
3314
3315             /* check the native lambda */
3316             if (!lambda_vec_same(&start_lambda, native_lambda) )
3317             {
3318                 gmx_fatal(FARGS, "Native lambda not constant in file %s: started at %f, and becomes %f at time %f",
3319                           fn, native_lambda->val, start_lambda.val, start_time);
3320             }
3321             /* check the number of samples against the previous number */
3322             if ( ((nblocks_raw+nblocks_hist) != nsamples) || (nlam != 1 ) )
3323             {
3324                 gmx_fatal(FARGS, "Unexpected block count in %s: was %d, now %d\n",
3325                           fn, nsamples+1, nblocks_raw+nblocks_hist+nlam);
3326             }
3327             /* check whether last iterations's end time matches with
3328                the currrent start time */
3329             if ( (std::abs(last_t - start_time) > 2*delta_time)  && last_t >= 0)
3330             {
3331                 /* it didn't. We need to store our samples and reallocate */
3332
3333                 for (i = 0; i < nsamples; i++)
3334                 {
3335                     // nsamples is always >0 here, so samples_rawdh must be a valid pointer. Unfortunately
3336                     // cppcheck does not understand the logic unless the assert is inside the loop, but
3337                     // this is not performance-sensitive code.
3338                     GMX_RELEASE_ASSERT(samples_rawdh != nullptr, "samples_rawdh==NULL with nsamples>0");
3339                     if (samples_rawdh[i])
3340                     {
3341                         /* insert it into the existing list */
3342                         lambda_data_list_insert_sample(sd->lb,
3343                                                        samples_rawdh[i]);
3344                         /* and make sure we'll allocate a new one this time
3345                            around */
3346                         samples_rawdh[i] = nullptr;
3347                     }
3348                 }
3349             }
3350         }
3351
3352         /* and read them */
3353         k = 0; /* counter for the lambdas, etc. arrays */
3354         for (i = 0; i < fr->nblock; i++)
3355         {
3356             if (fr->block[i].id == enxDH)
3357             {
3358                 int type = (fr->block[i].sub[0].ival[0]);
3359                 if (type == dhbtDH || type == dhbtDHDL)
3360                 {
3361                     int ndu;
3362                     read_edr_rawdh_block(&(samples_rawdh[k]),
3363                                          &ndu,
3364                                          &(fr->block[i]),
3365                                          start_time, delta_time,
3366                                          native_lambda, rtemp,
3367                                          &last_t, fn);
3368                     npts[k] += ndu;
3369                     if (samples_rawdh[k])
3370                     {
3371                         lambdas[k] = samples_rawdh[k]->foreign_lambda;
3372                     }
3373                     k++;
3374                 }
3375             }
3376             else if (fr->block[i].id == enxDHHIST)
3377             {
3378                 int type = static_cast<int>(fr->block[i].sub[1].lval[1]);
3379                 if (type == dhbtDH || type == dhbtDHDL)
3380                 {
3381                     int        j;
3382                     int        nb = 0;
3383                     samples_t *s; /* this is where the data will go */
3384                     s = read_edr_hist_block(&nb, &(fr->block[i]),
3385                                             start_time, delta_time,
3386                                             native_lambda, rtemp,
3387                                             &last_t, fn);
3388                     nhists[k] += nb;
3389                     if (nb > 0)
3390                     {
3391                         lambdas[k] = s->foreign_lambda;
3392                     }
3393                     k++;
3394                     /* and insert the new sample immediately */
3395                     for (j = 0; j < nb; j++)
3396                     {
3397                         lambda_data_list_insert_sample(sd->lb, s+j);
3398                     }
3399                 }
3400             }
3401         }
3402     }
3403     /* Now store all our extant sample collections */
3404     for (i = 0; i < nsamples; i++)
3405     {
3406         if (samples_rawdh[i])
3407         {
3408             /* insert it into the existing list */
3409             lambda_data_list_insert_sample(sd->lb, samples_rawdh[i]);
3410         }
3411     }
3412
3413
3414     {
3415         char buf[STRLEN];
3416         printf("\n");
3417         lambda_vec_print(native_lambda, buf, FALSE);
3418         printf("%s: %.1f - %.1f; lambda = %s\n    foreign lambdas:\n",
3419                fn, first_t, last_t, buf);
3420         for (i = 0; i < nsamples; i++)
3421         {
3422             if (lambdas[i])
3423             {
3424                 lambda_vec_print(lambdas[i], buf, TRUE);
3425                 if (nhists[i] > 0)
3426                 {
3427                     printf("        %s (%d hists)\n", buf, nhists[i]);
3428                 }
3429                 else
3430                 {
3431                     printf("        %s (%d pts)\n", buf, npts[i]);
3432                 }
3433             }
3434         }
3435     }
3436     printf("\n\n");
3437     sfree(npts);
3438     sfree(nhists);
3439     sfree(lambdas);
3440 }
3441
3442
3443 int gmx_bar(int argc, char *argv[])
3444 {
3445     static const char *desc[] = {
3446         "[THISMODULE] calculates free energy difference estimates through ",
3447         "Bennett's acceptance ratio method (BAR). It also automatically",
3448         "adds series of individual free energies obtained with BAR into",
3449         "a combined free energy estimate.[PAR]",
3450
3451         "Every individual BAR free energy difference relies on two ",
3452         "simulations at different states: say state A and state B, as",
3453         "controlled by a parameter, [GRK]lambda[grk] (see the [REF].mdp[ref] parameter",
3454         "[TT]init_lambda[tt]). The BAR method calculates a ratio of weighted",
3455         "average of the Hamiltonian difference of state B given state A and",
3456         "vice versa.",
3457         "The energy differences to the other state must be calculated",
3458         "explicitly during the simulation. This can be done with",
3459         "the [REF].mdp[ref] option [TT]foreign_lambda[tt].[PAR]",
3460
3461         "Input option [TT]-f[tt] expects multiple [TT]dhdl.xvg[tt] files. ",
3462         "Two types of input files are supported:",
3463         "",
3464         " * Files with more than one [IT]y[it]-value. ",
3465         "   The files should have columns ",
3466         "   with dH/d[GRK]lambda[grk] and [GRK]Delta[grk][GRK]lambda[grk]. ",
3467         "   The [GRK]lambda[grk] values are inferred ",
3468         "   from the legends: [GRK]lambda[grk] of the simulation from the legend of ",
3469         "   dH/d[GRK]lambda[grk] and the foreign [GRK]lambda[grk] values from the ",
3470         "   legends of Delta H",
3471         " * Files with only one [IT]y[it]-value. Using the",
3472         "   [TT]-extp[tt] option for these files, it is assumed",
3473         "   that the [IT]y[it]-value is dH/d[GRK]lambda[grk] and that the ",
3474         "   Hamiltonian depends linearly on [GRK]lambda[grk]. ",
3475         "   The [GRK]lambda[grk] value of the simulation is inferred from the ",
3476         "   subtitle (if present), otherwise from a number in the subdirectory ",
3477         "   in the file name.",
3478         "",
3479
3480         "The [GRK]lambda[grk] of the simulation is parsed from ",
3481         "[TT]dhdl.xvg[tt] file's legend containing the string 'dH', the ",
3482         "foreign [GRK]lambda[grk] values from the legend containing the ",
3483         "capitalized letters 'D' and 'H'. The temperature is parsed from ",
3484         "the legend line containing 'T ='.[PAR]",
3485
3486         "The input option [TT]-g[tt] expects multiple [REF].edr[ref] files. ",
3487         "These can contain either lists of energy differences (see the ",
3488         "[REF].mdp[ref] option [TT]separate_dhdl_file[tt]), or a series of ",
3489         "histograms (see the [REF].mdp[ref] options [TT]dh_hist_size[tt] and ",
3490         "[TT]dh_hist_spacing[tt]).", "The temperature and [GRK]lambda[grk] ",
3491         "values are automatically deduced from the [TT]ener.edr[tt] file.[PAR]",
3492
3493         "In addition to the [REF].mdp[ref] option [TT]foreign_lambda[tt], ",
3494         "the energy difference can also be extrapolated from the ",
3495         "dH/d[GRK]lambda[grk] values. This is done with the[TT]-extp[tt]",
3496         "option, which assumes that the system's Hamiltonian depends linearly",
3497         "on [GRK]lambda[grk], which is not normally the case.[PAR]",
3498
3499         "The free energy estimates are determined using BAR with bisection, ",
3500         "with the precision of the output set with [TT]-prec[tt]. ",
3501         "An error estimate taking into account time correlations ",
3502         "is made by splitting the data into blocks and determining ",
3503         "the free energy differences over those blocks and assuming ",
3504         "the blocks are independent. ",
3505         "The final error estimate is determined from the average variance ",
3506         "over 5 blocks. A range of block numbers for error estimation can ",
3507         "be provided with the options [TT]-nbmin[tt] and [TT]-nbmax[tt].[PAR]",
3508
3509         "[THISMODULE] tries to aggregate samples with the same 'native' and ",
3510         "'foreign' [GRK]lambda[grk] values, but always assumes independent ",
3511         "samples. [BB]Note[bb] that when aggregating energy ",
3512         "differences/derivatives with different sampling intervals, this is ",
3513         "almost certainly not correct. Usually subsequent energies are ",
3514         "correlated and different time intervals mean different degrees ",
3515         "of correlation between samples.[PAR]",
3516
3517         "The results are split in two parts: the last part contains the final ",
3518         "results in kJ/mol, together with the error estimate for each part ",
3519         "and the total. The first part contains detailed free energy ",
3520         "difference estimates and phase space overlap measures in units of ",
3521         "kT (together with their computed error estimate). The printed ",
3522         "values are:",
3523         "",
3524         " * lam_A: the [GRK]lambda[grk] values for point A.",
3525         " * lam_B: the [GRK]lambda[grk] values for point B.",
3526         " *    DG: the free energy estimate.",
3527         " *   s_A: an estimate of the relative entropy of B in A.",
3528         " *   s_B: an estimate of the relative entropy of A in B.",
3529         " * stdev: an estimate expected per-sample standard deviation.",
3530         "",
3531
3532         "The relative entropy of both states in each other's ensemble can be ",
3533         "interpreted as a measure of phase space overlap: ",
3534         "the relative entropy s_A of the work samples of lambda_B in the ",
3535         "ensemble of lambda_A (and vice versa for s_B), is a ",
3536         "measure of the 'distance' between Boltzmann distributions of ",
3537         "the two states, that goes to zero for identical distributions. See ",
3538         "Wu & Kofke, J. Chem. Phys. 123 084109 (2005) for more information.",
3539         "[PAR]",
3540         "The estimate of the expected per-sample standard deviation, as given ",
3541         "in Bennett's original BAR paper: Bennett, J. Comp. Phys. 22, p 245 (1976).",
3542         "Eq. 10 therein gives an estimate of the quality of sampling (not directly",
3543         "of the actual statistical error, because it assumes independent samples).[PAR]",
3544
3545         "To get a visual estimate of the phase space overlap, use the ",
3546         "[TT]-oh[tt] option to write series of histograms, together with the ",
3547         "[TT]-nbin[tt] option.[PAR]"
3548     };
3549     static real        begin    = 0, end = -1, temp = -1;
3550     int                nd       = 2, nbmin = 5, nbmax = 5;
3551     int                nbin     = 100;
3552     gmx_bool           use_dhdl = FALSE;
3553     t_pargs            pa[]     = {
3554         { "-b",    FALSE, etREAL, {&begin},  "Begin time for BAR" },
3555         { "-e",    FALSE, etREAL, {&end},    "End time for BAR" },
3556         { "-temp", FALSE, etREAL, {&temp},   "Temperature (K)" },
3557         { "-prec", FALSE, etINT,  {&nd},     "The number of digits after the decimal point" },
3558         { "-nbmin",  FALSE, etINT,  {&nbmin}, "Minimum number of blocks for error estimation" },
3559         { "-nbmax",  FALSE, etINT,  {&nbmax}, "Maximum number of blocks for error estimation" },
3560         { "-nbin",  FALSE, etINT, {&nbin}, "Number of bins for histogram output"},
3561         { "-extp",  FALSE, etBOOL, {&use_dhdl}, "Whether to linearly extrapolate dH/dl values to use as energies"}
3562     };
3563
3564     t_filenm           fnm[] = {
3565         { efXVG, "-f",  "dhdl",   ffOPTRDMULT },
3566         { efEDR, "-g",  "ener",   ffOPTRDMULT },
3567         { efXVG, "-o",  "bar",    ffOPTWR },
3568         { efXVG, "-oi", "barint", ffOPTWR },
3569         { efXVG, "-oh", "histogram", ffOPTWR }
3570     };
3571 #define NFILE asize(fnm)
3572
3573     int               f;
3574     int               nf = 0;    /* file counter */
3575     int               nfile_tot; /* total number of input files */
3576     sim_data_t        sim_data;  /* the simulation data */
3577     barres_t         *results;   /* the results */
3578     int               nresults;  /* number of results in results array */
3579
3580     double           *partsum;
3581     double            prec, dg_tot;
3582     FILE             *fpb, *fpi;
3583     char              dgformat[20], xvg2format[STRLEN], xvg3format[STRLEN];
3584     char              buf[STRLEN], buf2[STRLEN];
3585     char              ktformat[STRLEN], sktformat[STRLEN];
3586     char              kteformat[STRLEN], skteformat[STRLEN];
3587     gmx_output_env_t *oenv;
3588     double            kT;
3589     gmx_bool          result_OK = TRUE, bEE = TRUE;
3590
3591     gmx_bool          disc_err          = FALSE;
3592     double            sum_disc_err      = 0.; /* discretization error */
3593     gmx_bool          histrange_err     = FALSE;
3594     double            sum_histrange_err = 0.; /* histogram range error */
3595     double            stat_err          = 0.; /* statistical error */
3596
3597     if (!parse_common_args(&argc, argv,
3598                            PCA_CAN_VIEW,
3599                            NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, nullptr, &oenv))
3600     {
3601         return 0;
3602     }
3603
3604     gmx::ArrayRef<const std::string> xvgFiles = opt2fnsIfOptionSet("-f", NFILE, fnm);
3605     gmx::ArrayRef<const std::string> edrFiles = opt2fnsIfOptionSet("-g", NFILE, fnm);
3606
3607     sim_data_init(&sim_data);
3608 #if 0
3609     /* make linked list */
3610     lb = &lambda_head;
3611     lambda_data_init(lb, 0, 0);
3612     lb->next = lb;
3613     lb->prev = lb;
3614 #endif
3615
3616
3617     nfile_tot = xvgFiles.size() + edrFiles.size();
3618
3619     if (nfile_tot == 0)
3620     {
3621         gmx_fatal(FARGS, "No input files!");
3622     }
3623
3624     if (nd < 0)
3625     {
3626         gmx_fatal(FARGS, "Can not have negative number of digits");
3627     }
3628     prec = std::pow(10.0, static_cast<double>(-nd));
3629
3630     snew(partsum, (nbmax+1)*(nbmax+1));
3631     nf = 0;
3632
3633     /* read in all files. First xvg files */
3634     for (const std::string &filenm : xvgFiles)
3635     {
3636         read_bar_xvg(filenm.c_str(), &temp, &sim_data);
3637         nf++;
3638     }
3639     /* then .edr files */
3640     for (const std::string &filenm : edrFiles)
3641     {
3642         read_barsim_edr(filenm.c_str(), &temp, &sim_data);;
3643         nf++;
3644     }
3645
3646     /* fix the times to allow for equilibration */
3647     sim_data_impose_times(&sim_data, begin, end);
3648
3649     if (opt2bSet("-oh", NFILE, fnm))
3650     {
3651         sim_data_histogram(&sim_data, opt2fn("-oh", NFILE, fnm), nbin, oenv);
3652     }
3653
3654     /* assemble the output structures from the lambdas */
3655     results = barres_list_create(&sim_data, &nresults, use_dhdl);
3656
3657     sum_disc_err = barres_list_max_disc_err(results, nresults);
3658
3659     if (nresults == 0)
3660     {
3661         printf("\nNo results to calculate.\n");
3662         return 0;
3663     }
3664
3665     if (sum_disc_err > prec)
3666     {
3667         prec = sum_disc_err;
3668         nd   = static_cast<int>(std::ceil(-std::log10(prec)));
3669         printf("WARNING: setting the precision to %g because that is the minimum\n         reasonable number, given the expected discretization error.\n", prec);
3670     }
3671
3672
3673     /*sprintf(lamformat,"%%6.3f");*/
3674     sprintf( dgformat, "%%%d.%df", 3+nd, nd);
3675     /* the format strings of the results in kT */
3676     sprintf( ktformat, "%%%d.%df", 5+nd, nd);
3677     sprintf( sktformat, "%%%ds", 6+nd);
3678     /* the format strings of the errors in kT */
3679     sprintf( kteformat, "%%%d.%df", 3+nd, nd);
3680     sprintf( skteformat, "%%%ds", 4+nd);
3681     sprintf(xvg2format, "%s %s\n", "%s", dgformat);
3682     sprintf(xvg3format, "%s %s %s\n", "%s", dgformat, dgformat);
3683
3684
3685
3686     fpb = nullptr;
3687     if (opt2bSet("-o", NFILE, fnm))
3688     {
3689         sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3690         fpb = xvgropen_type(opt2fn("-o", NFILE, fnm), "Free energy differences",
3691                             "\\lambda", buf, exvggtXYDY, oenv);
3692     }
3693
3694     fpi = nullptr;
3695     if (opt2bSet("-oi", NFILE, fnm))
3696     {
3697         sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3698         fpi = xvgropen(opt2fn("-oi", NFILE, fnm), "Free energy integral",
3699                        "\\lambda", buf, oenv);
3700     }
3701
3702
3703
3704     if (nbmin > nbmax)
3705     {
3706         nbmin = nbmax;
3707     }
3708
3709     /* first calculate results */
3710     bEE      = TRUE;
3711     disc_err = FALSE;
3712     for (f = 0; f < nresults; f++)
3713     {
3714         /* Determine the free energy difference with a factor of 10
3715          * more accuracy than requested for printing.
3716          */
3717         calc_bar(&(results[f]), 0.1*prec, nbmin, nbmax,
3718                  &bEE, partsum);
3719
3720         if (results[f].dg_disc_err > prec/10.)
3721         {
3722             disc_err = TRUE;
3723         }
3724         if (results[f].dg_histrange_err > prec/10.)
3725         {
3726             histrange_err = TRUE;
3727         }
3728     }
3729
3730     /* print results in kT */
3731     kT   = BOLTZ*temp;
3732
3733     printf("\nTemperature: %g K\n", temp);
3734
3735     printf("\nDetailed results in kT (see help for explanation):\n\n");
3736     printf("%6s ", " lam_A");
3737     printf("%6s ", " lam_B");
3738     printf(sktformat,  "DG ");
3739     if (bEE)
3740     {
3741         printf(skteformat, "+/- ");
3742     }
3743     if (disc_err)
3744     {
3745         printf(skteformat, "disc ");
3746     }
3747     if (histrange_err)
3748     {
3749         printf(skteformat, "range ");
3750     }
3751     printf(sktformat,  "s_A ");
3752     if (bEE)
3753     {
3754         printf(skteformat, "+/- " );
3755     }
3756     printf(sktformat,  "s_B ");
3757     if (bEE)
3758     {
3759         printf(skteformat, "+/- " );
3760     }
3761     printf(sktformat,  "stdev ");
3762     if (bEE)
3763     {
3764         printf(skteformat, "+/- ");
3765     }
3766     printf("\n");
3767     for (f = 0; f < nresults; f++)
3768     {
3769         lambda_vec_print_short(results[f].a->native_lambda, buf);
3770         printf("%s ", buf);
3771         lambda_vec_print_short(results[f].b->native_lambda, buf);
3772         printf("%s ", buf);
3773         printf(ktformat,  results[f].dg);
3774         printf(" ");
3775         if (bEE)
3776         {
3777             printf(kteformat, results[f].dg_err);
3778             printf(" ");
3779         }
3780         if (disc_err)
3781         {
3782             printf(kteformat, results[f].dg_disc_err);
3783             printf(" ");
3784         }
3785         if (histrange_err)
3786         {
3787             printf(kteformat, results[f].dg_histrange_err);
3788             printf(" ");
3789         }
3790         printf(ktformat,  results[f].sa);
3791         printf(" ");
3792         if (bEE)
3793         {
3794             printf(kteformat, results[f].sa_err);
3795             printf(" ");
3796         }
3797         printf(ktformat,  results[f].sb);
3798         printf(" ");
3799         if (bEE)
3800         {
3801             printf(kteformat, results[f].sb_err);
3802             printf(" ");
3803         }
3804         printf(ktformat,  results[f].dg_stddev);
3805         printf(" ");
3806         if (bEE)
3807         {
3808             printf(kteformat, results[f].dg_stddev_err);
3809         }
3810         printf("\n");
3811
3812         /* Check for negative relative entropy with a 95% certainty. */
3813         if (results[f].sa < -2*results[f].sa_err ||
3814             results[f].sb < -2*results[f].sb_err)
3815         {
3816             result_OK = FALSE;
3817         }
3818     }
3819
3820     if (!result_OK)
3821     {
3822         printf("\nWARNING: Some of these results violate the Second Law of "
3823                "Thermodynamics: \n"
3824                "         This is can be the result of severe undersampling, or "
3825                "(more likely)\n"
3826                "         there is something wrong with the simulations.\n");
3827     }
3828
3829
3830     /* final results in kJ/mol */
3831     printf("\n\nFinal results in kJ/mol:\n\n");
3832     dg_tot  = 0;
3833     for (f = 0; f < nresults; f++)
3834     {
3835
3836         if (fpi != nullptr)
3837         {
3838             lambda_vec_print_short(results[f].a->native_lambda, buf);
3839             fprintf(fpi, xvg2format, buf, dg_tot);
3840         }
3841
3842
3843         if (fpb != nullptr)
3844         {
3845             lambda_vec_print_intermediate(results[f].a->native_lambda,
3846                                           results[f].b->native_lambda,
3847                                           buf);
3848
3849             fprintf(fpb, xvg3format, buf, results[f].dg, results[f].dg_err);
3850         }
3851
3852         printf("point ");
3853         lambda_vec_print_short(results[f].a->native_lambda, buf);
3854         lambda_vec_print_short(results[f].b->native_lambda, buf2);
3855         printf("%s - %s", buf, buf2);
3856         printf(",   DG ");
3857
3858         printf(dgformat, results[f].dg*kT);
3859         if (bEE)
3860         {
3861             printf(" +/- ");
3862             printf(dgformat, results[f].dg_err*kT);
3863         }
3864         if (histrange_err)
3865         {
3866             printf(" (max. range err. = ");
3867             printf(dgformat, results[f].dg_histrange_err*kT);
3868             printf(")");
3869             sum_histrange_err += results[f].dg_histrange_err*kT;
3870         }
3871
3872         printf("\n");
3873         dg_tot += results[f].dg;
3874     }
3875     printf("\n");
3876     printf("total ");
3877     lambda_vec_print_short(results[0].a->native_lambda, buf);
3878     lambda_vec_print_short(results[nresults-1].b->native_lambda, buf2);
3879     printf("%s - %s", buf, buf2);
3880     printf(",   DG ");
3881
3882     printf(dgformat, dg_tot*kT);
3883     if (bEE)
3884     {
3885         stat_err = bar_err(nbmin, nbmax, partsum)*kT;
3886         printf(" +/- ");
3887         printf(dgformat, std::max(std::max(stat_err, sum_disc_err), sum_histrange_err));
3888     }
3889     printf("\n");
3890     if (disc_err)
3891     {
3892         printf("\nmaximum discretization error = ");
3893         printf(dgformat, sum_disc_err);
3894         if (bEE && stat_err < sum_disc_err)
3895         {
3896             printf("WARNING: discretization error (%g) is larger than statistical error.\n       Decrease histogram spacing for more accurate results\n", stat_err);
3897         }
3898     }
3899     if (histrange_err)
3900     {
3901         printf("\nmaximum histogram range error = ");
3902         printf(dgformat, sum_histrange_err);
3903         if (bEE && stat_err < sum_histrange_err)
3904         {
3905             printf("WARNING: histogram range error (%g) is larger than statistical error.\n       Increase histogram range for more accurate results\n", stat_err);
3906         }
3907
3908     }
3909     printf("\n");
3910
3911
3912     if (fpi != nullptr)
3913     {
3914         lambda_vec_print_short(results[nresults-1].b->native_lambda, buf);
3915         fprintf(fpi, xvg2format, buf, dg_tot);
3916         xvgrclose(fpi);
3917     }
3918     if (fpb != nullptr)
3919     {
3920         xvgrclose(fpb);
3921     }
3922
3923     do_view(oenv, opt2fn_null("-o", NFILE, fnm), "-xydy");
3924     do_view(oenv, opt2fn_null("-oi", NFILE, fnm), "-xydy");
3925
3926     return 0;
3927 }