2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2010-2018, The GROMACS development team.
5 * Copyright (c) 2019,2020, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
47 #include "gromacs/commandline/pargs.h"
48 #include "gromacs/commandline/viewit.h"
49 #include "gromacs/fileio/enxio.h"
50 #include "gromacs/fileio/xvgr.h"
51 #include "gromacs/gmxana/gmx_ana.h"
52 #include "gromacs/math/units.h"
53 #include "gromacs/math/utilities.h"
54 #include "gromacs/mdlib/energyoutput.h"
55 #include "gromacs/mdtypes/md_enums.h"
56 #include "gromacs/trajectory/energyframe.h"
57 #include "gromacs/utility/arraysize.h"
58 #include "gromacs/utility/cstringutil.h"
59 #include "gromacs/utility/dir_separator.h"
60 #include "gromacs/utility/fatalerror.h"
61 #include "gromacs/utility/gmxassert.h"
62 #include "gromacs/utility/smalloc.h"
63 #include "gromacs/utility/snprintf.h"
66 /* Structure for the names of lambda vector components */
67 typedef struct lambda_components_t
69 char** names; /* Array of strings with names for the lambda vector
71 int N; /* The number of components */
72 int Nalloc; /* The number of allocated components */
73 } lambda_components_t;
75 /* Structure for a lambda vector or a dhdl derivative direction */
76 typedef struct lambda_vec_t
78 double* val; /* The lambda vector component values. Only valid if
80 int dhdl; /* The coordinate index for the derivative described by this
82 const lambda_components_t* lc; /* the associated lambda_components
84 int index; /* The state number (init-lambda-state) of this lambda
85 vector, if known. If not, it is set to -1 */
88 /* the dhdl.xvg data from a simulation */
92 int ftp; /* file type */
93 int nset; /* number of lambdas, including dhdl */
94 int* np; /* number of data points (du or hists) per lambda */
95 int np_alloc; /* number of points (du or hists) allocated */
96 double temp; /* temperature */
97 lambda_vec_t* lambda; /* the lambdas (of first index for y). */
98 double* t; /* the times (of second index for y) */
99 double** y; /* the dU values. y[0] holds the derivative, while
100 further ones contain the energy differences between
101 the native lambda and the 'foreign' lambdas. */
102 lambda_vec_t native_lambda; /* the native lambda */
104 struct xvg_t *next, *prev; /*location in the global linked list of xvg_ts*/
108 typedef struct hist_t
110 unsigned int* bin[2]; /* the (forward + reverse) histogram values */
111 double dx[2]; /* the histogram spacing. The reverse
112 dx is the negative of the forward dx.*/
113 int64_t x0[2]; /* the (forward + reverse) histogram start
116 int nbin[2]; /* the (forward+reverse) number of bins */
117 int64_t sum; /* the total number of counts. Must be
118 the same for forward + reverse. */
119 int nhist; /* number of hist datas (forward or reverse) */
121 double start_time, delta_time; /* start time, end time of histogram */
125 /* an aggregate of samples for partial free energy calculation */
126 typedef struct samples_t
128 lambda_vec_t* native_lambda; /* pointer to native lambda vector */
129 lambda_vec_t* foreign_lambda; /* pointer to foreign lambda vector */
130 double temp; /* the temperature */
131 gmx_bool derivative; /* whether this sample is a derivative */
133 /* The samples come either as either delta U lists: */
134 int ndu; /* the number of delta U samples */
135 double* du; /* the delta u's */
136 double* t; /* the times associated with those samples, or: */
137 double start_time, delta_time; /*start time and delta time for linear time*/
139 /* or as histograms: */
140 hist_t* hist; /* a histogram */
142 /* allocation data: (not NULL for data 'owned' by this struct) */
143 double *du_alloc, *t_alloc; /* allocated delta u arrays */
144 size_t ndu_alloc, nt_alloc; /* pre-allocated sizes */
145 hist_t* hist_alloc; /* allocated hist */
147 int64_t ntot; /* total number of samples */
148 const char* filename; /* the file name this sample comes from */
151 /* a sample range (start to end for du-style data, or boolean
152 for both du-style data and histograms */
153 typedef struct sample_range_t
155 int start, end; /* start and end index for du style data */
156 gmx_bool use; /* whether to use this sample */
158 samples_t* s; /* the samples this range belongs to */
162 /* a collection of samples for a partial free energy calculation
163 (i.e. the collection of samples from one native lambda to one
165 typedef struct sample_coll_t
167 lambda_vec_t* native_lambda; /* these should be the same for all samples
169 lambda_vec_t* foreign_lambda; /* collection */
170 double temp; /* the temperature */
172 int nsamples; /* the number of samples */
173 samples_t** s; /* the samples themselves */
174 sample_range_t* r; /* the sample ranges */
175 int nsamples_alloc; /* number of allocated samples */
177 int64_t ntot; /* total number of samples in the ranges of
180 struct sample_coll_t *next, *prev; /* next and previous in the list */
183 /* all the samples associated with a lambda point */
184 typedef struct lambda_data_t
186 lambda_vec_t* lambda; /* the native lambda (at start time if dynamic) */
187 double temp; /* temperature */
189 sample_coll_t* sc; /* the samples */
191 sample_coll_t sc_head; /*the pre-allocated list head for the linked list.*/
193 struct lambda_data_t *next, *prev; /* the next and prev in the list */
196 /* Top-level data structure of simulation data */
197 typedef struct sim_data_t
199 lambda_data_t* lb; /* a lambda data linked list */
200 lambda_data_t lb_head; /* The head element of the linked list */
202 lambda_components_t lc; /* the allowed components of the lambda
206 /* Top-level data structure with calculated values. */
209 sample_coll_t *a, *b; /* the simulation data */
211 double dg; /* the free energy difference */
212 double dg_err; /* the free energy difference */
214 double dg_disc_err; /* discretization error */
215 double dg_histrange_err; /* histogram range error */
217 double sa; /* relative entropy of b in state a */
218 double sa_err; /* error in sa */
219 double sb; /* relative entropy of a in state b */
220 double sb_err; /* error in sb */
222 double dg_stddev; /* expected dg stddev per sample */
223 double dg_stddev_err; /* error in dg_stddev */
227 /* Initialize a lambda_components structure */
228 static void lambda_components_init(lambda_components_t* lc)
232 snew(lc->names, lc->Nalloc);
235 /* Add a component to a lambda_components structure */
236 static void lambda_components_add(lambda_components_t* lc, const char* name, size_t name_length)
238 while (lc->N + 1 > lc->Nalloc)
240 lc->Nalloc = (lc->Nalloc == 0) ? 2 : 2 * lc->Nalloc;
241 srenew(lc->names, lc->Nalloc);
243 snew(lc->names[lc->N], name_length + 1);
244 std::strncpy(lc->names[lc->N], name, name_length);
248 /* check whether a component with index 'index' matches the given name, or
249 is also NULL. Returns TRUE if this is the case.
250 the string name does not need to end */
251 static gmx_bool lambda_components_check(const lambda_components_t* lc, int index, const char* name, size_t name_length)
254 if (!lc || index >= lc->N)
258 if ((name == nullptr) && (lc->names[index] == nullptr))
262 if (((name != nullptr) && (lc->names[index] == nullptr))
263 || ((name == nullptr) && (lc->names[index] != nullptr)))
268 (name != nullptr) || (name_length == 0),
269 "If name is empty, the length of the substring to examine within it must be zero");
270 len = std::strlen(lc->names[index]);
271 if (len != name_length)
275 if (name_length == 0)
277 // Everything matches a zero-length substring. This branch is
278 // needed because name could in principle be nullptr.
281 return std::strncmp(lc->names[index], name, name_length) == 0;
284 /* Find the index of a given lambda component name, or -1 if not found */
285 static int lambda_components_find(const lambda_components_t* lc, const char* name, size_t name_length)
289 for (i = 0; i < lc->N; i++)
291 if (std::strncmp(lc->names[i], name, name_length) == 0)
300 /* initialize a lambda vector */
301 static void lambda_vec_init(lambda_vec_t* lv, const lambda_components_t* lc)
303 snew(lv->val, lc->N);
309 static void lambda_vec_copy(lambda_vec_t* lv, const lambda_vec_t* orig)
313 lambda_vec_init(lv, orig->lc);
314 lv->dhdl = orig->dhdl;
315 lv->index = orig->index;
316 for (i = 0; i < lv->lc->N; i++)
318 lv->val[i] = orig->val[i];
322 /* write a lambda vec to a preallocated string */
323 static void lambda_vec_print(const lambda_vec_t* lv, char* str, gmx_bool named)
327 str[0] = 0; /* reset the string */
332 str += sprintf(str, "delta H to ");
336 str += sprintf(str, "(");
338 for (i = 0; i < lv->lc->N; i++)
340 str += sprintf(str, "%g", lv->val[i]);
341 if (i < lv->lc->N - 1)
343 str += sprintf(str, ", ");
353 /* this lambda vector describes a derivative */
354 str += sprintf(str, "dH/dl");
355 if (std::strlen(lv->lc->names[lv->dhdl]) > 0)
357 sprintf(str, " (%s)", lv->lc->names[lv->dhdl]);
362 /* write a shortened version of the lambda vec to a preallocated string */
363 static void lambda_vec_print_short(const lambda_vec_t* lv, char* str)
367 sprintf(str, "%6d", lv->index);
373 sprintf(str, "%6.3f", lv->val[0]);
377 sprintf(str, "dH/dl[%d]", lv->dhdl);
382 /* write an intermediate version of two lambda vecs to a preallocated string */
383 static void lambda_vec_print_intermediate(const lambda_vec_t* a, const lambda_vec_t* b, char* str)
386 if ((a->index >= 0) && (b->index >= 0))
388 sprintf(str, "%6.3f", (a->index + b->index) / 2.0);
392 if ((a->dhdl < 0) && (b->dhdl < 0))
394 sprintf(str, "%6.3f", (a->val[0] + b->val[0]) / 2.0);
400 /* calculate and return the absolute difference in lambda vectors: c = |a-b|.
401 a and b must describe non-derivative lambda points */
402 static double lambda_vec_abs_diff(const lambda_vec_t* a, const lambda_vec_t* b)
407 if ((a->dhdl > 0) || (b->dhdl > 0))
411 "Trying to calculate the difference between derivatives instead of lambda points");
415 gmx_fatal(FARGS, "Trying to calculate the difference lambdas with differing basis set");
417 for (i = 0; i < a->lc->N; i++)
419 double df = a->val[i] - b->val[i];
422 return std::sqrt(ret);
426 /* check whether two lambda vectors are the same */
427 static gmx_bool lambda_vec_same(const lambda_vec_t* a, const lambda_vec_t* b)
437 for (i = 0; i < a->lc->N; i++)
439 if (!gmx_within_tol(a->val[i], b->val[i], 10 * GMX_REAL_EPS))
448 /* they're derivatives, so we check whether the indices match */
449 return (a->dhdl == b->dhdl);
453 /* Compare the sort order of two foreign lambda vectors
455 returns 1 if a is 'bigger' than b,
456 returns 0 if they're the same,
457 returns -1 if a is 'smaller' than b.*/
458 static int lambda_vec_cmp_foreign(const lambda_vec_t* a, const lambda_vec_t* b)
461 double norm_a = 0, norm_b = 0;
462 gmx_bool different = FALSE;
466 gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
468 /* if either one has an index we sort based on that */
469 if ((a->index >= 0) || (b->index >= 0))
471 if (a->index == b->index)
475 return (a->index > b->index) ? 1 : -1;
477 if (a->dhdl >= 0 || b->dhdl >= 0)
479 /* lambda vectors that are derivatives always sort higher than those
480 without derivatives */
481 if ((a->dhdl >= 0) != (b->dhdl >= 0))
483 return (a->dhdl >= 0) ? 1 : -1;
488 /* neither has an index, so we can only sort on the lambda components,
489 which is only valid if there is one component */
490 for (i = 0; i < a->lc->N; i++)
492 if (!gmx_within_tol(a->val[i], b->val[i], 10 * GMX_REAL_EPS))
496 norm_a += a->val[i] * a->val[i];
497 norm_b += b->val[i] * b->val[i];
503 return (norm_a > norm_b) ? 1 : -1;
506 /* Compare the sort order of two native lambda vectors
508 returns 1 if a is 'bigger' than b,
509 returns 0 if they're the same,
510 returns -1 if a is 'smaller' than b.*/
511 static int lambda_vec_cmp_native(const lambda_vec_t* a, const lambda_vec_t* b)
515 gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
517 /* if either one has an index we sort based on that */
518 if ((a->index >= 0) || (b->index >= 0))
520 if (a->index == b->index)
524 return (a->index > b->index) ? 1 : -1;
526 /* neither has an index, so we can only sort on the lambda components,
527 which is only valid if there is one component */
530 gmx_fatal(FARGS, "Can't compare lambdas with no index and > 1 component");
532 if (a->dhdl >= 0 || b->dhdl >= 0)
534 gmx_fatal(FARGS, "Can't compare native lambdas that are derivatives");
536 if (gmx_within_tol(a->val[0], b->val[0], 10 * GMX_REAL_EPS))
540 return a->val[0] > b->val[0] ? 1 : -1;
544 static void hist_init(hist_t* h, int nhist, int* nbin)
549 gmx_fatal(FARGS, "histogram with more than two sets of data!");
551 for (i = 0; i < nhist; i++)
553 snew(h->bin[i], nbin[i]);
555 h->nbin[i] = nbin[i];
556 h->start_time = h->delta_time = 0;
563 static void xvg_init(xvg_t* ba)
565 ba->filename = nullptr;
572 static void samples_init(samples_t* s,
573 lambda_vec_t* native_lambda,
574 lambda_vec_t* foreign_lambda,
577 const char* filename)
579 s->native_lambda = native_lambda;
580 s->foreign_lambda = foreign_lambda;
582 s->derivative = derivative;
587 s->start_time = s->delta_time = 0;
589 s->du_alloc = nullptr;
590 s->t_alloc = nullptr;
591 s->hist_alloc = nullptr;
596 s->filename = filename;
599 static void sample_range_init(sample_range_t* r, samples_t* s)
607 static void sample_coll_init(sample_coll_t* sc, lambda_vec_t* native_lambda, lambda_vec_t* foreign_lambda, double temp)
609 sc->native_lambda = native_lambda;
610 sc->foreign_lambda = foreign_lambda;
616 sc->nsamples_alloc = 0;
619 sc->next = sc->prev = nullptr;
622 static void sample_coll_destroy(sample_coll_t* sc)
624 /* don't free the samples themselves */
630 static void lambda_data_init(lambda_data_t* l, lambda_vec_t* native_lambda, double temp)
632 l->lambda = native_lambda;
638 l->sc = &(l->sc_head);
640 sample_coll_init(l->sc, native_lambda, nullptr, 0.);
645 static void barres_init(barres_t* br)
654 br->dg_stddev_err = 0;
661 /* calculate the total number of samples in a sample collection */
662 static void sample_coll_calc_ntot(sample_coll_t* sc)
667 for (i = 0; i < sc->nsamples; i++)
673 sc->ntot += sc->s[i]->ntot;
677 sc->ntot += sc->r[i].end - sc->r[i].start;
684 /* find the barsamples_t associated with a lambda that corresponds to
685 a specific foreign lambda */
686 static sample_coll_t* lambda_data_find_sample_coll(lambda_data_t* l, lambda_vec_t* foreign_lambda)
688 sample_coll_t* sc = l->sc->next;
692 if (lambda_vec_same(sc->foreign_lambda, foreign_lambda))
702 /* insert li into an ordered list of lambda_colls */
703 static void lambda_data_insert_sample_coll(lambda_data_t* l, sample_coll_t* sc)
705 sample_coll_t* scn = l->sc->next;
706 while ((scn != l->sc))
708 if (lambda_vec_cmp_foreign(scn->foreign_lambda, sc->foreign_lambda) > 0)
714 /* now insert it before the found scn */
716 sc->prev = scn->prev;
717 scn->prev->next = sc;
721 /* insert li into an ordered list of lambdas */
722 static void lambda_data_insert_lambda(lambda_data_t* head, lambda_data_t* li)
724 lambda_data_t* lc = head->next;
727 if (lambda_vec_cmp_native(lc->lambda, li->lambda) > 0)
733 /* now insert ourselves before the found lc */
740 /* insert a sample and a sample_range into a sample_coll. The
741 samples are stored as a pointer, the range is copied. */
742 static void sample_coll_insert_sample(sample_coll_t* sc, samples_t* s, sample_range_t* r)
744 /* first check if it belongs here */
745 GMX_ASSERT(sc->next->s, "Next not properly initialized!");
746 if (sc->temp != s->temp)
749 "Temperatures in files %s and %s are not the same!",
751 sc->next->s[0]->filename);
753 if (!lambda_vec_same(sc->native_lambda, s->native_lambda))
756 "Native lambda in files %s and %s are not the same (and they should be)!",
758 sc->next->s[0]->filename);
760 if (!lambda_vec_same(sc->foreign_lambda, s->foreign_lambda))
763 "Foreign lambda in files %s and %s are not the same (and they should be)!",
765 sc->next->s[0]->filename);
768 /* check if there's room */
769 if ((sc->nsamples + 1) > sc->nsamples_alloc)
771 sc->nsamples_alloc = std::max(2 * sc->nsamples_alloc, 2);
772 srenew(sc->s, sc->nsamples_alloc);
773 srenew(sc->r, sc->nsamples_alloc);
775 sc->s[sc->nsamples] = s;
776 sc->r[sc->nsamples] = *r;
779 sample_coll_calc_ntot(sc);
782 /* insert a sample into a lambda_list, creating the right sample_coll if
784 static void lambda_data_list_insert_sample(lambda_data_t* head, samples_t* s)
786 gmx_bool found = FALSE;
790 lambda_data_t* l = head->next;
792 /* first search for the right lambda_data_t */
795 if (lambda_vec_same(l->lambda, s->native_lambda))
805 snew(l, 1); /* allocate a new one */
806 lambda_data_init(l, s->native_lambda, s->temp); /* initialize it */
807 lambda_data_insert_lambda(head, l); /* add it to the list */
810 /* now look for a sample collection */
811 sc = lambda_data_find_sample_coll(l, s->foreign_lambda);
814 snew(sc, 1); /* allocate a new one */
815 sample_coll_init(sc, s->native_lambda, s->foreign_lambda, s->temp);
816 lambda_data_insert_sample_coll(l, sc);
819 /* now insert the samples into the sample coll */
820 sample_range_init(&r, s);
821 sample_coll_insert_sample(sc, s, &r);
825 /* make a histogram out of a sample collection */
826 static void sample_coll_make_hist(sample_coll_t* sc, std::vector<int>* bin, double* dx, double* xmin, int nbin_default)
829 gmx_bool dx_set = FALSE;
830 gmx_bool xmin_set = FALSE;
832 gmx_bool xmax_set = FALSE;
833 gmx_bool xmax_set_hard = FALSE; /* whether the xmax is bounded by the
834 limits of a histogram */
837 /* first determine dx and xmin; try the histograms */
838 for (i = 0; i < sc->nsamples; i++)
842 hist_t* hist = sc->s[i]->hist;
843 for (k = 0; k < hist->nhist; k++)
845 double hdx = hist->dx[k];
846 double xmax_now = (hist->x0[k] + hist->nbin[k]) * hdx;
848 /* we use the biggest dx*/
849 if ((!dx_set) || hist->dx[0] > *dx)
854 if ((!xmin_set) || (hist->x0[k] * hdx) < *xmin)
857 *xmin = (hist->x0[k] * hdx);
860 if ((!xmax_set) || (xmax_now > xmax && !xmax_set_hard))
864 if (hist->bin[k][hist->nbin[k] - 1] != 0)
866 xmax_set_hard = TRUE;
869 if (hist->bin[k][hist->nbin[k] - 1] != 0 && (xmax_now < xmax))
871 xmax_set_hard = TRUE;
877 /* and the delta us */
878 for (i = 0; i < sc->nsamples; i++)
880 if (sc->s[i]->ndu > 0)
882 /* determine min and max */
883 int starti = sc->r[i].start;
884 int endi = sc->r[i].end;
885 double du_xmin = sc->s[i]->du[starti];
886 double du_xmax = sc->s[i]->du[starti];
887 for (j = starti + 1; j < endi; j++)
889 if (sc->s[i]->du[j] < du_xmin)
891 du_xmin = sc->s[i]->du[j];
893 if (sc->s[i]->du[j] > du_xmax)
895 du_xmax = sc->s[i]->du[j];
899 /* and now change the limits */
900 if ((!xmin_set) || (du_xmin < *xmin))
905 if ((!xmax_set) || ((du_xmax > xmax) && !xmax_set_hard))
913 if (!xmax_set || !xmin_set)
922 bin->resize(nbin_default);
923 *dx = (xmax - (*xmin)) / ((bin->size()) - 2); /* -2 because we want the last bin to
924 be 0, and we count from 0 */
928 bin->resize(static_cast<int>((xmax - (*xmin)) / (*dx)));
931 /* reset the histogram */
932 std::fill(bin->begin(), bin->end(), 0);
934 /* now add the actual data */
935 for (i = 0; i < sc->nsamples; i++)
939 hist_t* hist = sc->s[i]->hist;
940 for (k = 0; k < hist->nhist; k++)
942 double hdx = hist->dx[k];
943 double xmin_hist = hist->x0[k] * hdx;
944 for (j = 0; j < hist->nbin[k]; j++)
946 /* calculate the bin corresponding to the middle of the
948 double x = hdx * (j + 0.5) + xmin_hist;
949 int binnr = static_cast<int>((x - (*xmin)) / (*dx));
951 if (binnr >= gmx::ssize(*bin) || binnr < 0)
953 binnr = (bin->size()) - 1;
956 (*bin)[binnr] += hist->bin[k][j];
962 int starti = sc->r[i].start;
963 int endi = sc->r[i].end;
964 for (j = starti; j < endi; j++)
966 int binnr = static_cast<int>((sc->s[i]->du[j] - (*xmin)) / (*dx));
967 if (binnr >= gmx::ssize(*bin) || binnr < 0)
969 binnr = (bin->size()) - 1;
978 /* write a collection of histograms to a file */
979 static void sim_data_histogram(sim_data_t* sd, const char* filename, int nbin_default, const gmx_output_env_t* oenv)
981 char label_x[STRLEN];
982 const char * dhdl = "dH/d\\lambda", *deltag = "\\DeltaH", *lambda = "\\lambda";
983 const char* title = "N(\\DeltaH)";
984 const char* label_y = "Samples";
988 char** setnames = nullptr;
989 gmx_bool first_set = FALSE;
990 /* histogram data: */
991 std::vector<int> hist;
994 lambda_data_t* bl_head = sd->lb;
996 printf("\nWriting histogram to %s\n", filename);
997 sprintf(label_x, "\\DeltaH (%s)", unit_energy);
999 fp = xvgropen_type(filename, title, label_x, label_y, exvggtXNY, oenv);
1001 /* first get all the set names */
1003 /* iterate over all lambdas */
1004 while (bl != bl_head)
1006 sample_coll_t* sc = bl->sc->next;
1008 /* iterate over all samples */
1009 while (sc != bl->sc)
1011 char buf[STRLEN], buf2[STRLEN];
1014 srenew(setnames, nsets);
1015 snew(setnames[nsets - 1], STRLEN);
1016 if (sc->foreign_lambda->dhdl < 0)
1018 lambda_vec_print(sc->native_lambda, buf, FALSE);
1019 lambda_vec_print(sc->foreign_lambda, buf2, FALSE);
1020 sprintf(setnames[nsets - 1], "N(%s(%s=%s) | %s=%s)", deltag, lambda, buf2, lambda, buf);
1024 lambda_vec_print(sc->native_lambda, buf, FALSE);
1025 sprintf(setnames[nsets - 1], "N(%s | %s=%s)", dhdl, lambda, buf);
1032 xvgr_legend(fp, nsets, setnames, oenv);
1035 /* now make the histograms */
1037 /* iterate over all lambdas */
1038 while (bl != bl_head)
1040 sample_coll_t* sc = bl->sc->next;
1042 /* iterate over all samples */
1043 while (sc != bl->sc)
1047 xvgr_new_dataset(fp, 0, 0, nullptr, oenv);
1050 sample_coll_make_hist(sc, &hist, &dx, &minval, nbin_default);
1052 for (gmx::index i = 0; i < gmx::ssize(hist); i++)
1054 double xmin = i * dx + minval;
1055 double xmax = (i + 1) * dx + minval;
1057 fprintf(fp, "%g %d\n%g %d\n", xmin, hist[i], xmax, hist[i]);
1070 static int snprint_lambda_vec(char* str, int sz, const char* label, lambda_vec_t* lambda)
1074 n += snprintf(str + n, sz - n, "lambda vector [%s]: ", label);
1075 if (lambda->index >= 0)
1077 n += snprintf(str + n, sz - n, " init-lambda-state=%d", lambda->index);
1079 if (lambda->dhdl >= 0)
1081 n += snprintf(str + n, sz - n, " dhdl index=%d", lambda->dhdl);
1086 for (i = 0; i < lambda->lc->N; i++)
1088 n += snprintf(str + n, sz - n, " (%s) l=%g", lambda->lc->names[i], lambda->val[i]);
1094 /* create a collection (array) of barres_t object given a ordered linked list
1095 of barlamda_t sample collections */
1096 static barres_t* barres_list_create(sim_data_t* sd, int* nres, gmx_bool use_dhdl)
1101 gmx_bool dhdl = FALSE;
1102 gmx_bool first = TRUE;
1103 lambda_data_t* bl_head = sd->lb;
1105 /* first count the lambdas */
1107 while (bl != bl_head)
1112 snew(res, nlambda - 1);
1114 /* next put the right samples in the res */
1116 bl = bl_head->next->next; /* we start with the second one. */
1117 while (bl != bl_head)
1119 sample_coll_t *sc, *scprev;
1120 barres_t* br = &(res[*nres]);
1121 /* there is always a previous one. we search for that as a foreign
1123 scprev = lambda_data_find_sample_coll(bl->prev, bl->lambda);
1124 sc = lambda_data_find_sample_coll(bl, bl->prev->lambda);
1132 scprev = lambda_data_find_sample_coll(bl->prev, bl->prev->lambda);
1133 sc = lambda_data_find_sample_coll(bl, bl->lambda);
1137 printf("\nWARNING: Using the derivative data (dH/dlambda) to extrapolate delta H "
1138 "values.\nThis will only work if the Hamiltonian is linear in lambda.\n");
1144 "Some dhdl files contain only one value (dH/dl), while others \ncontain "
1145 "multiple values (dH/dl and/or Delta H), will not proceed \nbecause of "
1146 "possible inconsistencies.\n");
1149 else if (!scprev && !sc)
1151 char descX[STRLEN], descY[STRLEN];
1152 snprint_lambda_vec(descX, STRLEN, "X", bl->prev->lambda);
1153 snprint_lambda_vec(descY, STRLEN, "Y", bl->lambda);
1156 "There is no path between the states X & Y below that is covered by foreign "
1157 "lambdas:\ncannot proceed with BAR.\nUse thermodynamic integration of dH/dl "
1158 "by calculating the averages of dH/dl\nwith g_analyze and integrating "
1159 "them.\nAlternatively, use the -extp option if (and only if) the "
1160 "Hamiltonian\ndepends linearly on lambda, which is NOT normally the "
1161 "case.\n\n%s\n%s\n",
1166 /* normal delta H */
1169 char descX[STRLEN], descY[STRLEN];
1170 snprint_lambda_vec(descX, STRLEN, "X", bl->lambda);
1171 snprint_lambda_vec(descY, STRLEN, "Y", bl->prev->lambda);
1173 "Could not find a set for foreign lambda (state X below)\nin the files for "
1174 "main lambda (state Y below)\n\n%s\n%s\n",
1180 char descX[STRLEN], descY[STRLEN];
1181 snprint_lambda_vec(descX, STRLEN, "X", bl->prev->lambda);
1182 snprint_lambda_vec(descY, STRLEN, "Y", bl->lambda);
1184 "Could not find a set for foreign lambda (state X below)\nin the files for "
1185 "main lambda (state Y below)\n\n%s\n%s\n",
1199 /* estimate the maximum discretization error */
1200 static double barres_list_max_disc_err(barres_t* res, int nres)
1203 double disc_err = 0.;
1204 double delta_lambda;
1206 for (i = 0; i < nres; i++)
1208 barres_t* br = &(res[i]);
1210 delta_lambda = lambda_vec_abs_diff(br->b->native_lambda, br->a->native_lambda);
1212 for (j = 0; j < br->a->nsamples; j++)
1214 if (br->a->s[j]->hist)
1217 if (br->a->s[j]->derivative)
1219 Wfac = delta_lambda;
1222 disc_err = std::max(disc_err, Wfac * br->a->s[j]->hist->dx[0]);
1225 for (j = 0; j < br->b->nsamples; j++)
1227 if (br->b->s[j]->hist)
1230 if (br->b->s[j]->derivative)
1232 Wfac = delta_lambda;
1234 disc_err = std::max(disc_err, Wfac * br->b->s[j]->hist->dx[0]);
1242 /* impose start and end times on a sample collection, updating sample_ranges */
1243 static void sample_coll_impose_times(sample_coll_t* sc, double begin_t, double end_t)
1246 for (i = 0; i < sc->nsamples; i++)
1248 samples_t* s = sc->s[i];
1249 sample_range_t* r = &(sc->r[i]);
1252 double end_time = s->hist->delta_time * s->hist->sum + s->hist->start_time;
1253 if (s->hist->start_time < begin_t || end_time > end_t)
1263 if (s->start_time < begin_t)
1265 r->start = static_cast<int>((begin_t - s->start_time) / s->delta_time);
1267 end_time = s->delta_time * s->ndu + s->start_time;
1268 if (end_time > end_t)
1270 r->end = static_cast<int>((end_t - s->start_time) / s->delta_time);
1276 for (j = 0; j < s->ndu; j++)
1278 if (s->t[j] < begin_t)
1283 if (s->t[j] >= end_t)
1290 if (r->start > r->end)
1296 sample_coll_calc_ntot(sc);
1299 static void sim_data_impose_times(sim_data_t* sd, double begin, double end)
1301 double first_t, last_t;
1302 double begin_t, end_t;
1304 lambda_data_t* head = sd->lb;
1307 if (begin <= 0 && end < 0)
1312 /* first determine the global start and end times */
1318 sample_coll_t* sc = lc->sc->next;
1319 while (sc != lc->sc)
1321 for (j = 0; j < sc->nsamples; j++)
1323 double start_t, end_t;
1325 start_t = sc->s[j]->start_time;
1326 end_t = sc->s[j]->start_time;
1329 end_t += sc->s[j]->delta_time * sc->s[j]->hist->sum;
1335 end_t = sc->s[j]->t[sc->s[j]->ndu - 1];
1339 end_t += sc->s[j]->delta_time * sc->s[j]->ndu;
1343 if (start_t < first_t || first_t < 0)
1357 /* calculate the actual times */
1375 printf("\n Samples in time interval: %.3f - %.3f\n", first_t, last_t);
1377 if (begin_t > end_t)
1381 printf("Removing samples outside of: %.3f - %.3f\n", begin_t, end_t);
1383 /* then impose them */
1387 sample_coll_t* sc = lc->sc->next;
1388 while (sc != lc->sc)
1390 sample_coll_impose_times(sc, begin_t, end_t);
1398 /* create subsample i out of ni from an existing sample_coll */
1399 static gmx_bool sample_coll_create_subsample(sample_coll_t* sc, sample_coll_t* sc_orig, int i, int ni)
1405 int64_t ntot_so_far;
1407 *sc = *sc_orig; /* just copy all fields */
1409 /* allocate proprietary memory */
1410 snew(sc->s, sc_orig->nsamples);
1411 snew(sc->r, sc_orig->nsamples);
1413 /* copy the samples */
1414 for (j = 0; j < sc_orig->nsamples; j++)
1416 sc->s[j] = sc_orig->s[j];
1417 sc->r[j] = sc_orig->r[j]; /* copy the ranges too */
1420 /* now fix start and end fields */
1421 /* the casts avoid possible overflows */
1422 ntot_start = static_cast<int64_t>(sc_orig->ntot * static_cast<double>(i) / static_cast<double>(ni));
1423 ntot_end = static_cast<int64_t>(sc_orig->ntot * static_cast<double>(i + 1) / static_cast<double>(ni));
1425 for (j = 0; j < sc->nsamples; j++)
1428 int64_t new_start, new_end;
1434 ntot_add = sc->s[j]->hist->sum;
1438 ntot_add = sc->r[j].end - sc->r[j].start;
1446 if (!sc->s[j]->hist)
1448 if (ntot_so_far < ntot_start)
1450 /* adjust starting point */
1451 new_start = sc->r[j].start + (ntot_start - ntot_so_far);
1455 new_start = sc->r[j].start;
1457 /* adjust end point */
1458 new_end = sc->r[j].start + (ntot_end - ntot_so_far);
1459 if (new_end > sc->r[j].end)
1461 new_end = sc->r[j].end;
1464 /* check if we're in range at all */
1465 if ((new_end < new_start) || (new_start > sc->r[j].end))
1470 /* and write the new range */
1471 GMX_RELEASE_ASSERT(new_start <= std::numeric_limits<int>::max(),
1472 "Value of 'new_start' too large for int converstion");
1473 GMX_RELEASE_ASSERT(new_end <= std::numeric_limits<int>::max(),
1474 "Value of 'new_end' too large for int converstion");
1475 sc->r[j].start = static_cast<int>(new_start);
1476 sc->r[j].end = static_cast<int>(new_end);
1483 double ntot_start_norm, ntot_end_norm;
1484 /* calculate the amount of overlap of the
1485 desired range (ntot_start -- ntot_end) onto
1486 the histogram range (ntot_so_far -- ntot_so_far+ntot_add)*/
1488 /* first calculate normalized bounds
1489 (where 0 is the start of the hist range, and 1 the end) */
1490 ntot_start_norm = (ntot_start - ntot_so_far) / static_cast<double>(ntot_add);
1491 ntot_end_norm = (ntot_end - ntot_so_far) / static_cast<double>(ntot_add);
1493 /* now fix the boundaries */
1494 ntot_start_norm = std::min(1.0, std::max(0.0, ntot_start_norm));
1495 ntot_end_norm = std::max(0.0, std::min(1.0, ntot_end_norm));
1497 /* and calculate the overlap */
1498 overlap = ntot_end_norm - ntot_start_norm;
1500 if (overlap > 0.95) /* we allow for 5% slack */
1502 sc->r[j].use = TRUE;
1504 else if (overlap < 0.05)
1506 sc->r[j].use = FALSE;
1514 ntot_so_far += ntot_add;
1516 sample_coll_calc_ntot(sc);
1521 /* calculate minimum and maximum work values in sample collection */
1522 static void sample_coll_min_max(sample_coll_t* sc, double Wfac, double* Wmin, double* Wmax)
1526 *Wmin = std::numeric_limits<float>::max();
1527 *Wmax = -std::numeric_limits<float>::max();
1529 for (i = 0; i < sc->nsamples; i++)
1531 samples_t* s = sc->s[i];
1532 sample_range_t* r = &(sc->r[i]);
1537 for (j = r->start; j < r->end; j++)
1539 *Wmin = std::min(*Wmin, s->du[j] * Wfac);
1540 *Wmax = std::max(*Wmax, s->du[j] * Wfac);
1545 int hd = 0; /* determine the histogram direction: */
1547 if ((s->hist->nhist > 1) && (Wfac < 0))
1551 dx = s->hist->dx[hd];
1553 for (j = s->hist->nbin[hd] - 1; j >= 0; j--)
1555 *Wmin = std::min(*Wmin, Wfac * (s->hist->x0[hd]) * dx);
1556 *Wmax = std::max(*Wmax, Wfac * (s->hist->x0[hd]) * dx);
1557 /* look for the highest value bin with values */
1558 if (s->hist->bin[hd][j] > 0)
1560 *Wmin = std::min(*Wmin, Wfac * (j + s->hist->x0[hd] + 1) * dx);
1561 *Wmax = std::max(*Wmax, Wfac * (j + s->hist->x0[hd] + 1) * dx);
1570 /* Initialize a sim_data structure */
1571 static void sim_data_init(sim_data_t* sd)
1573 /* make linked list */
1574 sd->lb = &(sd->lb_head);
1575 sd->lb->next = sd->lb;
1576 sd->lb->prev = sd->lb;
1578 lambda_components_init(&(sd->lc));
1582 static double calc_bar_sum(int n, const double* W, double Wfac, double sbMmDG)
1589 for (i = 0; i < n; i++)
1591 sum += 1. / (1. + std::exp(Wfac * W[i] + sbMmDG));
1597 /* calculate the BAR average given a histogram
1599 if type== 0, calculate the best estimate for the average,
1600 if type==-1, calculate the minimum possible value given the histogram
1601 if type== 1, calculate the maximum possible value given the histogram */
1602 static double calc_bar_sum_hist(const hist_t* hist, double Wfac, double sbMmDG, int type)
1607 /* normalization factor multiplied with bin width and
1608 number of samples (we normalize through M): */
1610 int hd = 0; /* determine the histogram direction: */
1613 if ((hist->nhist > 1) && (Wfac < 0))
1618 maxbin = hist->nbin[hd] - 1;
1621 maxbin = hist->nbin[hd]; /* we also add whatever was out of range */
1624 for (i = 0; i < maxbin; i++)
1626 double x = Wfac * ((i + hist->x0[hd]) + 0.5) * dx; /* bin middle */
1627 double pxdx = hist->bin[0][i] * normdx; /* p(x)dx */
1629 sum += pxdx / (1. + std::exp(x + sbMmDG));
1635 static double calc_bar_lowlevel(sample_coll_t* ca, sample_coll_t* cb, double temp, double tol, int type)
1639 double Wfac1, Wfac2, Wmin, Wmax;
1640 double DG0, DG1, DG2, dDG1;
1641 double n1, n2; /* numbers of samples as doubles */
1646 /* count the numbers of samples */
1650 M = std::log(n1 / n2);
1652 /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1653 if (ca->foreign_lambda->dhdl < 0)
1655 /* this is the case when the delta U were calculated directly
1656 (i.e. we're not scaling dhdl) */
1662 /* we're using dhdl, so delta_lambda needs to be a
1663 multiplication factor. */
1664 /*double delta_lambda=cb->native_lambda-ca->native_lambda;*/
1665 double delta_lambda = lambda_vec_abs_diff(cb->native_lambda, ca->native_lambda);
1666 if (cb->native_lambda->lc->N > 1)
1668 gmx_fatal(FARGS, "Can't (yet) do multi-component dhdl interpolation");
1671 Wfac1 = beta * delta_lambda;
1672 Wfac2 = -beta * delta_lambda;
1677 /* We print the output both in kT and kJ/mol.
1678 * Here we determine DG in kT, so when beta < 1
1679 * the precision has to be increased.
1684 /* Calculate minimum and maximum work to give an initial estimate of
1685 * delta G as their average.
1688 double Wmin1, Wmin2, Wmax1, Wmax2;
1689 sample_coll_min_max(ca, Wfac1, &Wmin1, &Wmax1);
1690 sample_coll_min_max(cb, Wfac2, &Wmin2, &Wmax2);
1692 Wmin = std::min(Wmin1, Wmin2);
1693 Wmax = std::max(Wmax1, Wmax2);
1701 fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1703 /* We approximate by bisection: given our initial estimates
1704 we keep checking whether the halfway point is greater or
1705 smaller than what we get out of the BAR averages.
1707 For the comparison we can use twice the tolerance. */
1708 while (DG2 - DG0 > 2 * tol)
1710 DG1 = 0.5 * (DG0 + DG2);
1712 /* calculate the BAR averages */
1715 for (i = 0; i < ca->nsamples; i++)
1717 samples_t* s = ca->s[i];
1718 sample_range_t* r = &(ca->r[i]);
1723 dDG1 += calc_bar_sum_hist(s->hist, Wfac1, (M - DG1), type);
1727 dDG1 += calc_bar_sum(r->end - r->start, s->du + r->start, Wfac1, (M - DG1));
1731 for (i = 0; i < cb->nsamples; i++)
1733 samples_t* s = cb->s[i];
1734 sample_range_t* r = &(cb->r[i]);
1739 dDG1 -= calc_bar_sum_hist(s->hist, Wfac2, -(M - DG1), type);
1743 dDG1 -= calc_bar_sum(r->end - r->start, s->du + r->start, Wfac2, -(M - DG1));
1758 fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1762 return 0.5 * (DG0 + DG2);
1765 static void calc_rel_entropy(sample_coll_t* ca, sample_coll_t* cb, double temp, double dg, double* sa, double* sb)
1771 double Wfac1, Wfac2;
1777 /* count the numbers of samples */
1781 /* to ensure the work values are the same as during the delta_G */
1782 /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1783 if (ca->foreign_lambda->dhdl < 0)
1785 /* this is the case when the delta U were calculated directly
1786 (i.e. we're not scaling dhdl) */
1792 /* we're using dhdl, so delta_lambda needs to be a
1793 multiplication factor. */
1794 double delta_lambda = lambda_vec_abs_diff(cb->native_lambda, ca->native_lambda);
1795 Wfac1 = beta * delta_lambda;
1796 Wfac2 = -beta * delta_lambda;
1799 /* first calculate the average work in both directions */
1800 for (i = 0; i < ca->nsamples; i++)
1802 samples_t* s = ca->s[i];
1803 sample_range_t* r = &(ca->r[i]);
1808 for (j = r->start; j < r->end; j++)
1810 W_ab += Wfac1 * s->du[j];
1815 /* normalization factor multiplied with bin width and
1816 number of samples (we normalize through M): */
1819 int hd = 0; /* histogram direction */
1820 if ((s->hist->nhist > 1) && (Wfac1 < 0))
1824 dx = s->hist->dx[hd];
1826 for (j = 0; j < s->hist->nbin[0]; j++)
1828 double x = Wfac1 * ((j + s->hist->x0[0]) + 0.5) * dx; /*bin ctr*/
1829 double pxdx = s->hist->bin[0][j] * normdx; /* p(x)dx */
1837 for (i = 0; i < cb->nsamples; i++)
1839 samples_t* s = cb->s[i];
1840 sample_range_t* r = &(cb->r[i]);
1845 for (j = r->start; j < r->end; j++)
1847 W_ba += Wfac1 * s->du[j];
1852 /* normalization factor multiplied with bin width and
1853 number of samples (we normalize through M): */
1856 int hd = 0; /* histogram direction */
1857 if ((s->hist->nhist > 1) && (Wfac2 < 0))
1861 dx = s->hist->dx[hd];
1863 for (j = 0; j < s->hist->nbin[0]; j++)
1865 double x = Wfac1 * ((j + s->hist->x0[0]) + 0.5) * dx; /*bin ctr*/
1866 double pxdx = s->hist->bin[0][j] * normdx; /* p(x)dx */
1874 /* then calculate the relative entropies */
1879 static void calc_dg_stddev(sample_coll_t* ca, sample_coll_t* cb, double temp, double dg, double* stddev)
1883 double sigmafact = 0.;
1885 double Wfac1, Wfac2;
1891 /* count the numbers of samples */
1895 /* to ensure the work values are the same as during the delta_G */
1896 /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1897 if (ca->foreign_lambda->dhdl < 0)
1899 /* this is the case when the delta U were calculated directly
1900 (i.e. we're not scaling dhdl) */
1906 /* we're using dhdl, so delta_lambda needs to be a
1907 multiplication factor. */
1908 double delta_lambda = lambda_vec_abs_diff(cb->native_lambda, ca->native_lambda);
1909 Wfac1 = beta * delta_lambda;
1910 Wfac2 = -beta * delta_lambda;
1913 M = std::log(n1 / n2);
1916 /* calculate average in both directions */
1917 for (i = 0; i < ca->nsamples; i++)
1919 samples_t* s = ca->s[i];
1920 sample_range_t* r = &(ca->r[i]);
1925 for (j = r->start; j < r->end; j++)
1927 sigmafact += 1. / (2. + 2. * std::cosh((M + Wfac1 * s->du[j] - dg)));
1932 /* normalization factor multiplied with bin width and
1933 number of samples (we normalize through M): */
1936 int hd = 0; /* histogram direction */
1937 if ((s->hist->nhist > 1) && (Wfac1 < 0))
1941 dx = s->hist->dx[hd];
1943 for (j = 0; j < s->hist->nbin[0]; j++)
1945 double x = Wfac1 * ((j + s->hist->x0[0]) + 0.5) * dx; /*bin ctr*/
1946 double pxdx = s->hist->bin[0][j] * normdx; /* p(x)dx */
1948 sigmafact += pxdx / (2. + 2. * std::cosh((M + x - dg)));
1953 for (i = 0; i < cb->nsamples; i++)
1955 samples_t* s = cb->s[i];
1956 sample_range_t* r = &(cb->r[i]);
1961 for (j = r->start; j < r->end; j++)
1963 sigmafact += 1. / (2. + 2. * std::cosh((M - Wfac2 * s->du[j] - dg)));
1968 /* normalization factor multiplied with bin width and
1969 number of samples (we normalize through M): */
1972 int hd = 0; /* histogram direction */
1973 if ((s->hist->nhist > 1) && (Wfac2 < 0))
1977 dx = s->hist->dx[hd];
1979 for (j = 0; j < s->hist->nbin[0]; j++)
1981 double x = Wfac2 * ((j + s->hist->x0[0]) + 0.5) * dx; /*bin ctr*/
1982 double pxdx = s->hist->bin[0][j] * normdx; /* p(x)dx */
1984 sigmafact += pxdx / (2. + 2. * std::cosh((M - x - dg)));
1990 sigmafact /= (n1 + n2);
1994 Shirts, Bair, Hooker & Pande, Phys. Rev. Lett 91, 140601 (2003): */
1995 *stddev = std::sqrt(((1.0 / sigmafact) - ((n1 + n2) / n1 + (n1 + n2) / n2)));
1999 static void calc_bar(barres_t* br, double tol, int npee_min, int npee_max, gmx_bool* bEE, double* partsum)
2002 double dg_sig2, sa_sig2, sb_sig2, stddev_sig2; /* intermediate variance values
2003 for calculated quantities */
2004 double temp = br->a->temp;
2006 double dg_min, dg_max;
2007 gmx_bool have_hist = FALSE;
2009 br->dg = calc_bar_lowlevel(br->a, br->b, temp, tol, 0);
2011 br->dg_disc_err = 0.;
2012 br->dg_histrange_err = 0.;
2014 /* check if there are histograms */
2015 for (i = 0; i < br->a->nsamples; i++)
2017 if (br->a->r[i].use && br->a->s[i]->hist)
2025 for (i = 0; i < br->b->nsamples; i++)
2027 if (br->b->r[i].use && br->b->s[i]->hist)
2035 /* calculate histogram-specific errors */
2038 dg_min = calc_bar_lowlevel(br->a, br->b, temp, tol, -1);
2039 dg_max = calc_bar_lowlevel(br->a, br->b, temp, tol, 1);
2041 if (std::abs(dg_max - dg_min) > GMX_REAL_EPS * 10)
2043 /* the histogram range error is the biggest of the differences
2044 between the best estimate and the extremes */
2045 br->dg_histrange_err = std::abs(dg_max - dg_min);
2047 br->dg_disc_err = 0.;
2048 for (i = 0; i < br->a->nsamples; i++)
2050 if (br->a->s[i]->hist)
2052 br->dg_disc_err = std::max(br->dg_disc_err, br->a->s[i]->hist->dx[0]);
2055 for (i = 0; i < br->b->nsamples; i++)
2057 if (br->b->s[i]->hist)
2059 br->dg_disc_err = std::max(br->dg_disc_err, br->b->s[i]->hist->dx[0]);
2063 calc_rel_entropy(br->a, br->b, temp, br->dg, &(br->sa), &(br->sb));
2065 calc_dg_stddev(br->a, br->b, temp, br->dg, &(br->dg_stddev));
2074 sample_coll_t ca, cb;
2076 /* initialize the samples */
2077 sample_coll_init(&ca, br->a->native_lambda, br->a->foreign_lambda, br->a->temp);
2078 sample_coll_init(&cb, br->b->native_lambda, br->b->foreign_lambda, br->b->temp);
2080 for (npee = npee_min; npee <= npee_max; npee++)
2089 double dstddev2 = 0;
2092 for (p = 0; p < npee; p++)
2099 cac = sample_coll_create_subsample(&ca, br->a, p, npee);
2100 cbc = sample_coll_create_subsample(&cb, br->b, p, npee);
2104 printf("WARNING: histogram number incompatible with block number for "
2105 "averaging: can't do error estimate\n");
2109 sample_coll_destroy(&ca);
2113 sample_coll_destroy(&cb);
2118 dgp = calc_bar_lowlevel(&ca, &cb, temp, tol, 0);
2122 partsum[npee * (npee_max + 1) + p] += dgp;
2124 calc_rel_entropy(&ca, &cb, temp, dgp, &sac, &sbc);
2129 calc_dg_stddev(&ca, &cb, temp, dgp, &stddevc);
2132 dstddev2 += stddevc * stddevc;
2134 sample_coll_destroy(&ca);
2135 sample_coll_destroy(&cb);
2139 dg_sig2 += (dgs2 - dgs * dgs) / (npee - 1);
2145 sa_sig2 += (dsa2 - dsa * dsa) / (npee - 1);
2146 sb_sig2 += (dsb2 - dsb * dsb) / (npee - 1);
2150 stddev_sig2 += (dstddev2 - dstddev * dstddev) / (npee - 1);
2152 br->dg_err = std::sqrt(dg_sig2 / (npee_max - npee_min + 1));
2153 br->sa_err = std::sqrt(sa_sig2 / (npee_max - npee_min + 1));
2154 br->sb_err = std::sqrt(sb_sig2 / (npee_max - npee_min + 1));
2155 br->dg_stddev_err = std::sqrt(stddev_sig2 / (npee_max - npee_min + 1));
2160 static double bar_err(int nbmin, int nbmax, const double* partsum)
2163 double svar, s, s2, dg;
2166 for (nb = nbmin; nb <= nbmax; nb++)
2170 for (b = 0; b < nb; b++)
2172 dg = partsum[nb * (nbmax + 1) + b];
2178 svar += (s2 - s * s) / (nb - 1);
2181 return std::sqrt(svar / (nbmax + 1 - nbmin));
2185 /* Seek the end of an identifier (consecutive non-spaces), followed by
2186 an optional number of spaces or '='-signs. Returns a pointer to the
2187 first non-space value found after that. Returns NULL if the string
2190 static const char* find_value(const char* str)
2192 gmx_bool name_end_found = FALSE;
2194 /* if the string is a NULL pointer, return a NULL pointer. */
2199 while (*str != '\0')
2201 /* first find the end of the name */
2202 if (!name_end_found)
2204 if (std::isspace(*str) || (*str == '='))
2206 name_end_found = TRUE;
2211 if (!(std::isspace(*str) || (*str == '=')))
2222 /* read a vector-notation description of a lambda vector */
2223 static gmx_bool read_lambda_compvec(const char* str,
2225 const lambda_components_t* lc_in,
2226 lambda_components_t* lc_out,
2230 gmx_bool initialize_lc = FALSE; /* whether to initialize the lambda
2231 components, or to check them */
2232 gmx_bool start_reached = FALSE; /* whether the start of component names
2234 gmx_bool vector = FALSE; /* whether there are multiple components */
2235 int n = 0; /* current component number */
2236 const char* val_start = nullptr; /* start of the component name, or NULL
2237 if not in a value */
2247 if (lc_out && lc_out->N == 0)
2249 initialize_lc = TRUE;
2252 if (lc_in == nullptr)
2261 if (std::isalnum(*str))
2264 start_reached = TRUE;
2267 else if (*str == '(')
2270 start_reached = TRUE;
2272 else if (!std::isspace(*str))
2274 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2281 if (std::isspace(*str) || *str == ')' || *str == ',' || *str == '\0')
2288 lambda_components_add(lc_out, val_start, (str - val_start));
2292 if (!lambda_components_check(lc_out, n, val_start, (str - val_start)))
2300 /* add a vector component to lv */
2301 lv->val[n] = strtod(val_start, &strtod_end);
2302 if (val_start == strtod_end)
2304 gmx_fatal(FARGS, "Error reading lambda vector in %s", fn);
2307 /* reset for the next identifier */
2308 val_start = nullptr;
2316 else if (std::isalnum(*str))
2329 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2333 GMX_RELEASE_ASSERT(lc_in != nullptr, "Internal inconsistency? lc_in==NULL");
2338 else if (lv == nullptr)
2344 gmx_fatal(FARGS, "Incomplete lambda vector data in %s", fn);
2362 gmx_fatal(FARGS, "Incomplete lambda components data in %s", fn);
2368 /* read and check the component names from a string */
2369 static gmx_bool read_lambda_components(const char* str, lambda_components_t* lc, const char** end, const char* fn)
2371 return read_lambda_compvec(str, nullptr, nullptr, lc, end, fn);
2374 /* read an initialized lambda vector from a string */
2375 static gmx_bool read_lambda_vector(const char* str, lambda_vec_t* lv, const char** end, const char* fn)
2377 return read_lambda_compvec(str, lv, lv->lc, nullptr, end, fn);
2381 /* deduce lambda value from legend.
2383 legend = the legend string
2385 lam = the initialized lambda vector
2386 returns whether to use the data in this set.
2388 static gmx_bool legend2lambda(const char* fn, const char* legend, lambda_vec_t* lam)
2390 const char *ptr = nullptr, *ptr2 = nullptr;
2391 gmx_bool ok = FALSE;
2392 gmx_bool bdhdl = FALSE;
2393 const char* tostr = " to ";
2395 if (legend == nullptr)
2397 gmx_fatal(FARGS, "There is no legend in file '%s', can not deduce lambda", fn);
2400 /* look for the last 'to': */
2404 ptr2 = std::strstr(ptr2, tostr);
2405 if (ptr2 != nullptr)
2410 } while (ptr2 != nullptr && *ptr2 != '\0');
2414 ptr += std::strlen(tostr) - 1; /* and advance past that 'to' */
2418 /* look for the = sign */
2419 ptr = std::strrchr(legend, '=');
2422 /* otherwise look for the last space */
2423 ptr = std::strrchr(legend, ' ');
2427 if (std::strstr(legend, "dH"))
2432 else if (std::strchr(legend, 'D') != nullptr && std::strchr(legend, 'H') != nullptr)
2437 else /*if (std::strstr(legend, "pV"))*/
2448 gmx_fatal(FARGS, "There is no proper lambda legend in file '%s', can not deduce lambda", fn);
2452 ptr = find_value(ptr);
2453 if (!ptr || !read_lambda_vector(ptr, lam, nullptr, fn))
2455 gmx_fatal(FARGS, "lambda vector '%s' %s faulty", legend, fn);
2463 ptr = std::strrchr(legend, '=');
2467 /* there must be a component name */
2471 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2473 /* now backtrack to the start of the identifier */
2474 while (isspace(*ptr))
2480 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2483 while (!std::isspace(*ptr))
2488 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2492 dhdl_index = lambda_components_find(lam->lc, ptr, (end - ptr));
2496 std::strncpy(buf, ptr, (end - ptr));
2497 buf[(end - ptr)] = '\0';
2498 gmx_fatal(FARGS, "Did not find lambda component for '%s' in %s", buf, fn);
2505 gmx_fatal(FARGS, "dhdl without component name with >1 lambda component in %s", fn);
2509 lam->dhdl = dhdl_index;
2514 static gmx_bool subtitle2lambda(const char* subtitle, xvg_t* ba, const char* fn, lambda_components_t* lc)
2519 double native_lambda;
2523 /* first check for a state string */
2524 ptr = std::strstr(subtitle, "state");
2528 const char* val_end;
2530 /* the new 4.6 style lambda vectors */
2531 ptr = find_value(ptr);
2534 index = std::strtol(ptr, &end, 10);
2537 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2544 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2547 /* now find the lambda vector component names */
2548 while (*ptr != '(' && !std::isalnum(*ptr))
2553 gmx_fatal(FARGS, "Incomplete lambda vector component data in %s", fn);
2558 if (!read_lambda_components(ptr, lc, &val_end, fn))
2560 gmx_fatal(FARGS, "lambda vector components in %s don't match those previously read", fn);
2562 ptr = find_value(val_end);
2565 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2568 lambda_vec_init(&(ba->native_lambda), lc);
2569 if (!read_lambda_vector(ptr, &(ba->native_lambda), nullptr, fn))
2571 gmx_fatal(FARGS, "lambda vector in %s faulty", fn);
2573 ba->native_lambda.index = index;
2578 /* compatibility mode: check for lambda in other ways. */
2579 /* plain text lambda string */
2580 ptr = std::strstr(subtitle, "lambda");
2583 /* xmgrace formatted lambda string */
2584 ptr = std::strstr(subtitle, "\\xl\\f{}");
2588 /* xmgr formatted lambda string */
2589 ptr = std::strstr(subtitle, "\\8l\\4");
2593 ptr = std::strstr(ptr, "=");
2597 bFound = (sscanf(ptr + 1, "%lf", &(native_lambda)) == 1);
2598 /* add the lambda component name as an empty string */
2601 if (!lambda_components_check(lc, 0, "", 0))
2604 "lambda vector components in %s don't match those previously read",
2610 lambda_components_add(lc, "", 0);
2612 lambda_vec_init(&(ba->native_lambda), lc);
2613 ba->native_lambda.val[0] = native_lambda;
2620 static void read_bar_xvg_lowlevel(const char* fn, const real* temp, xvg_t* ba, lambda_components_t* lc)
2623 char * subtitle, **legend, *ptr;
2625 gmx_bool native_lambda_read = FALSE;
2632 np = read_xvg_legend(fn, &ba->y, &ba->nset, &subtitle, &legend);
2635 gmx_fatal(FARGS, "File %s contains no usable data.", fn);
2637 /* Reorder the data */
2639 for (i = 1; i < ba->nset; i++)
2641 ba->y[i - 1] = ba->y[i];
2645 snew(ba->np, ba->nset);
2646 for (i = 0; i < ba->nset; i++)
2652 if (subtitle != nullptr)
2654 /* try to extract temperature */
2655 ptr = std::strstr(subtitle, "T =");
2659 if (sscanf(ptr, "%lf", &ba->temp) == 1)
2663 gmx_fatal(FARGS, "Found temperature of %f in file '%s'", ba->temp, fn);
2673 "Did not find a temperature in the subtitle in file '%s', use the -temp "
2674 "option of [TT]gmx bar[tt]",
2680 /* Try to deduce lambda from the subtitle */
2683 if (subtitle2lambda(subtitle, ba, fn, lc))
2685 native_lambda_read = TRUE;
2688 snew(ba->lambda, ba->nset);
2689 if (legend == nullptr)
2691 /* Check if we have a single set, no legend, nset=1 means t and dH/dl */
2694 ba->lambda[0] = ba->native_lambda;
2699 "File %s contains multiple sets but no legends, can not determine the lambda "
2706 for (i = 0; i < ba->nset;)
2708 /* Read lambda from the legend */
2709 lambda_vec_init(&(ba->lambda[i]), lc);
2710 lambda_vec_copy(&(ba->lambda[i]), &(ba->native_lambda));
2711 gmx_bool use = legend2lambda(fn, legend[i], &(ba->lambda[i]));
2714 lambda_vec_print(&(ba->lambda[i]), buf, FALSE);
2720 printf("%s: Ignoring set '%s'.\n", fn, legend[i]);
2721 for (j = i + 1; j < ba->nset; j++)
2723 ba->y[j - 1] = ba->y[j];
2724 legend[j - 1] = legend[j];
2731 if (!native_lambda_read)
2733 gmx_fatal(FARGS, "File %s contains multiple sets but no indication of the native lambda", fn);
2736 if (legend != nullptr)
2738 for (i = 0; i < ba->nset - 1; i++)
2746 static void read_bar_xvg(const char* fn, real* temp, sim_data_t* sd)
2754 read_bar_xvg_lowlevel(fn, temp, barsim, &(sd->lc));
2756 if (barsim->nset < 1)
2758 gmx_fatal(FARGS, "File '%s' contains fewer than two columns", fn);
2761 if (!gmx_within_tol(*temp, barsim->temp, GMX_FLOAT_EPS) && (*temp > 0))
2763 gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
2765 *temp = barsim->temp;
2767 /* now create a series of samples_t */
2768 snew(s, barsim->nset);
2769 for (i = 0; i < barsim->nset; i++)
2772 &(barsim->native_lambda),
2773 &(barsim->lambda[i]),
2775 lambda_vec_same(&(barsim->native_lambda), &(barsim->lambda[i])),
2777 s[i].du = barsim->y[i];
2778 s[i].ndu = barsim->np[i];
2781 lambda_data_list_insert_sample(sd->lb, s + i);
2786 lambda_vec_print(s[0].native_lambda, buf, FALSE);
2787 printf("%s: %.1f - %.1f; lambda = %s\n dH/dl & foreign lambdas:\n",
2790 s[0].t[s[0].ndu - 1],
2792 for (i = 0; i < barsim->nset; i++)
2794 lambda_vec_print(s[i].foreign_lambda, buf, TRUE);
2795 printf(" %s (%d pts)\n", buf, s[i].ndu);
2801 static void read_edr_rawdh_block(samples_t** smp,
2806 lambda_vec_t* native_lambda,
2809 const char* filename)
2812 lambda_vec_t* foreign_lambda;
2814 samples_t* s; /* convenience pointer */
2817 /* check the block types etc. */
2818 if ((blk->nsub < 3) || (blk->sub[0].type != xdr_datatype_int) || (blk->sub[1].type != xdr_datatype_double)
2819 || ((blk->sub[2].type != xdr_datatype_float) && (blk->sub[2].type != xdr_datatype_double))
2820 || (blk->sub[0].nr < 1) || (blk->sub[1].nr < 1))
2822 gmx_fatal(FARGS, "Unexpected/corrupted block data in file %s around time %f.", filename, start_time);
2825 snew(foreign_lambda, 1);
2826 lambda_vec_init(foreign_lambda, native_lambda->lc);
2827 lambda_vec_copy(foreign_lambda, native_lambda);
2828 type = blk->sub[0].ival[0];
2831 for (i = 0; i < native_lambda->lc->N; i++)
2833 foreign_lambda->val[i] = blk->sub[1].dval[i];
2838 if (blk->sub[0].nr > 1)
2840 foreign_lambda->dhdl = blk->sub[0].ival[1];
2844 foreign_lambda->dhdl = 0;
2850 /* initialize the samples structure if it's empty. */
2852 samples_init(*smp, native_lambda, foreign_lambda, temp, type == dhbtDHDL, filename);
2853 (*smp)->start_time = start_time;
2854 (*smp)->delta_time = delta_time;
2857 /* set convenience pointer */
2860 /* now double check */
2861 if (!lambda_vec_same(s->foreign_lambda, foreign_lambda))
2863 char buf[STRLEN], buf2[STRLEN];
2864 lambda_vec_print(foreign_lambda, buf, FALSE);
2865 lambda_vec_print(s->foreign_lambda, buf2, FALSE);
2866 fprintf(stderr, "Got foreign lambda=%s, expected: %s\n", buf, buf2);
2867 gmx_fatal(FARGS, "Corrupted data in file %s around t=%f.", filename, start_time);
2870 /* make room for the data */
2871 if (gmx::index(s->ndu_alloc) < s->ndu + blk->sub[2].nr)
2873 s->ndu_alloc += (s->ndu_alloc < static_cast<size_t>(blk->sub[2].nr)) ? blk->sub[2].nr * 2
2875 srenew(s->du_alloc, s->ndu_alloc);
2876 s->du = s->du_alloc;
2879 s->ndu += blk->sub[2].nr;
2880 s->ntot += blk->sub[2].nr;
2881 *ndu = blk->sub[2].nr;
2883 /* and copy the data*/
2884 for (j = 0; j < blk->sub[2].nr; j++)
2886 if (blk->sub[2].type == xdr_datatype_float)
2888 s->du[startj + j] = blk->sub[2].fval[j];
2892 s->du[startj + j] = blk->sub[2].dval[j];
2895 if (start_time + blk->sub[2].nr * delta_time > *last_t)
2897 *last_t = start_time + blk->sub[2].nr * delta_time;
2901 static samples_t* read_edr_hist_block(int* nsamples,
2905 lambda_vec_t* native_lambda,
2908 const char* filename)
2913 lambda_vec_t* foreign_lambda;
2917 /* check the block types etc. */
2918 if ((blk->nsub < 2) || (blk->sub[0].type != xdr_datatype_double)
2919 || (blk->sub[1].type != xdr_datatype_int64) || (blk->sub[0].nr < 2) || (blk->sub[1].nr < 2))
2921 gmx_fatal(FARGS, "Unexpected/corrupted block data in file %s around time %f", filename, start_time);
2924 nhist = blk->nsub - 2;
2931 gmx_fatal(FARGS, "Unexpected/corrupted block data in file %s around time %f", filename, start_time);
2937 snew(foreign_lambda, 1);
2938 lambda_vec_init(foreign_lambda, native_lambda->lc);
2939 lambda_vec_copy(foreign_lambda, native_lambda);
2940 type = static_cast<int>(blk->sub[1].lval[1]);
2943 double old_foreign_lambda;
2945 old_foreign_lambda = blk->sub[0].dval[0];
2946 if (old_foreign_lambda >= 0)
2948 foreign_lambda->val[0] = old_foreign_lambda;
2949 if (foreign_lambda->lc->N > 1)
2951 gmx_fatal(FARGS, "Single-component lambda in multi-component file %s", filename);
2956 for (i = 0; i < native_lambda->lc->N; i++)
2958 foreign_lambda->val[i] = blk->sub[0].dval[i + 2];
2964 if (foreign_lambda->lc->N > 1)
2966 if (blk->sub[1].nr < 3 + nhist)
2968 gmx_fatal(FARGS, "Missing derivative coord in multi-component file %s", filename);
2970 foreign_lambda->dhdl = blk->sub[1].lval[2 + nhist];
2974 foreign_lambda->dhdl = 0;
2978 samples_init(s, native_lambda, foreign_lambda, temp, type == dhbtDHDL, filename);
2981 for (i = 0; i < nhist; i++)
2983 nbins[i] = blk->sub[i + 2].nr;
2986 hist_init(s->hist, nhist, nbins);
2988 for (i = 0; i < nhist; i++)
2990 s->hist->x0[i] = blk->sub[1].lval[2 + i];
2991 s->hist->dx[i] = blk->sub[0].dval[1];
2994 s->hist->dx[i] = -s->hist->dx[i];
2998 s->hist->start_time = start_time;
2999 s->hist->delta_time = delta_time;
3000 s->start_time = start_time;
3001 s->delta_time = delta_time;
3003 for (i = 0; i < nhist; i++)
3007 for (j = 0; j < s->hist->nbin[i]; j++)
3009 int binv = static_cast<int>(blk->sub[i + 2].ival[j]);
3011 s->hist->bin[i][j] = binv;
3023 gmx_fatal(FARGS, "Histogram counts don't match in %s", filename);
3028 if (start_time + s->hist->sum * delta_time > *last_t)
3030 *last_t = start_time + s->hist->sum * delta_time;
3036 static void read_barsim_edr(const char* fn, real* temp, sim_data_t* sd)
3042 gmx_enxnm_t* enm = nullptr;
3043 double first_t = -1;
3045 samples_t** samples_rawdh = nullptr; /* contains samples for raw delta_h */
3046 int* nhists = nullptr; /* array to keep count & print at end */
3047 int* npts = nullptr; /* array to keep count & print at end */
3048 lambda_vec_t** lambdas = nullptr; /* array to keep count & print at end */
3049 lambda_vec_t* native_lambda;
3051 lambda_vec_t start_lambda;
3053 fp = open_enx(fn, "r");
3054 do_enxnms(fp, &nre, &enm);
3057 snew(native_lambda, 1);
3058 start_lambda.lc = nullptr;
3059 start_lambda.val = nullptr;
3061 while (do_enx(fp, fr))
3063 /* count the data blocks */
3064 int nblocks_raw = 0;
3065 int nblocks_hist = 0;
3068 /* DHCOLL block information: */
3069 double start_time = 0, delta_time = 0, old_start_lambda = 0, delta_lambda = 0;
3072 /* count the blocks and handle collection information: */
3073 for (i = 0; i < fr->nblock; i++)
3075 if (fr->block[i].id == enxDHHIST)
3079 if (fr->block[i].id == enxDH)
3083 if (fr->block[i].id == enxDHCOLL)
3086 if ((fr->block[i].nsub < 1) || (fr->block[i].sub[0].type != xdr_datatype_double)
3087 || (fr->block[i].sub[0].nr < 5))
3089 gmx_fatal(FARGS, "Unexpected block data in file %s", fn);
3092 /* read the data from the DHCOLL block */
3093 rtemp = fr->block[i].sub[0].dval[0];
3094 start_time = fr->block[i].sub[0].dval[1];
3095 delta_time = fr->block[i].sub[0].dval[2];
3096 old_start_lambda = fr->block[i].sub[0].dval[3];
3097 delta_lambda = fr->block[i].sub[0].dval[4];
3099 if (delta_lambda != 0)
3101 gmx_fatal(FARGS, "Lambda values not constant in %s: can't apply BAR method", fn);
3103 if ((*temp != rtemp) && (*temp > 0))
3106 "Temperature in file %s different from earlier files or setting\n",
3111 if (old_start_lambda >= 0)
3115 if (!lambda_components_check(&(sd->lc), 0, "", 0))
3118 "lambda vector components in %s don't match those previously "
3125 lambda_components_add(&(sd->lc), "", 0);
3127 if (!start_lambda.lc)
3129 lambda_vec_init(&start_lambda, &(sd->lc));
3131 start_lambda.val[0] = old_start_lambda;
3135 /* read lambda vector */
3137 gmx_bool check = (sd->lc.N > 0);
3138 if (fr->block[i].nsub < 2)
3140 gmx_fatal(FARGS, "No lambda vector, but start_lambda=%f\n", old_start_lambda);
3142 n_lambda_vec = fr->block[i].sub[1].ival[1];
3143 for (j = 0; j < n_lambda_vec; j++)
3145 const char* name = efpt_singular_names[fr->block[i].sub[1].ival[1 + j]];
3148 /* check the components */
3149 lambda_components_check(&(sd->lc), j, name, std::strlen(name));
3153 lambda_components_add(&(sd->lc), name, std::strlen(name));
3156 lambda_vec_init(&start_lambda, &(sd->lc));
3157 start_lambda.index = fr->block[i].sub[1].ival[0];
3158 for (j = 0; j < n_lambda_vec; j++)
3160 start_lambda.val[j] = fr->block[i].sub[0].dval[5 + j];
3165 first_t = start_time;
3172 gmx_fatal(FARGS, "Did not find delta H information in file %s", fn);
3174 if (nblocks_raw > 0 && nblocks_hist > 0)
3176 gmx_fatal(FARGS, "Can't handle both raw delta U data and histograms in the same file %s", fn);
3181 /* this is the first round; allocate the associated data
3183 /*native_lambda=start_lambda;*/
3184 lambda_vec_init(native_lambda, &(sd->lc));
3185 lambda_vec_copy(native_lambda, &start_lambda);
3186 nsamples = nblocks_raw + nblocks_hist;
3187 snew(nhists, nsamples);
3188 snew(npts, nsamples);
3189 snew(lambdas, nsamples);
3190 snew(samples_rawdh, nsamples);
3191 for (i = 0; i < nsamples; i++)
3195 lambdas[i] = nullptr;
3196 samples_rawdh[i] = nullptr; /* init to NULL so we know which
3197 ones contain values */
3202 // nsamples > 0 means this is NOT the first iteration
3204 /* check the native lambda */
3205 if (!lambda_vec_same(&start_lambda, native_lambda))
3208 "Native lambda not constant in file %s: started at %f, and becomes %f at "
3211 native_lambda->val[0],
3212 start_lambda.val[0],
3215 /* check the number of samples against the previous number */
3216 if (((nblocks_raw + nblocks_hist) != nsamples) || (nlam != 1))
3219 "Unexpected block count in %s: was %d, now %d\n",
3222 nblocks_raw + nblocks_hist + nlam);
3224 /* check whether last iterations's end time matches with
3225 the currrent start time */
3226 if ((std::abs(last_t - start_time) > 2 * delta_time) && last_t >= 0)
3228 /* it didn't. We need to store our samples and reallocate */
3229 for (i = 0; i < nsamples; i++)
3231 if (samples_rawdh[i])
3233 /* insert it into the existing list */
3234 lambda_data_list_insert_sample(sd->lb, samples_rawdh[i]);
3235 /* and make sure we'll allocate a new one this time
3237 samples_rawdh[i] = nullptr;
3244 k = 0; /* counter for the lambdas, etc. arrays */
3245 for (i = 0; i < fr->nblock; i++)
3247 if (fr->block[i].id == enxDH)
3249 int type = (fr->block[i].sub[0].ival[0]);
3250 if (type == dhbtDH || type == dhbtDHDL)
3253 read_edr_rawdh_block(&(samples_rawdh[k]),
3263 if (samples_rawdh[k])
3265 lambdas[k] = samples_rawdh[k]->foreign_lambda;
3270 else if (fr->block[i].id == enxDHHIST)
3272 int type = static_cast<int>(fr->block[i].sub[1].lval[1]);
3273 if (type == dhbtDH || type == dhbtDHDL)
3277 samples_t* s; /* this is where the data will go */
3278 s = read_edr_hist_block(
3279 &nb, &(fr->block[i]), start_time, delta_time, native_lambda, rtemp, &last_t, fn);
3283 lambdas[k] = s->foreign_lambda;
3286 /* and insert the new sample immediately */
3287 for (j = 0; j < nb; j++)
3289 lambda_data_list_insert_sample(sd->lb, s + j);
3295 /* Now store all our extant sample collections */
3296 for (i = 0; i < nsamples; i++)
3298 if (samples_rawdh[i])
3300 /* insert it into the existing list */
3301 lambda_data_list_insert_sample(sd->lb, samples_rawdh[i]);
3309 lambda_vec_print(native_lambda, buf, FALSE);
3310 printf("%s: %.1f - %.1f; lambda = %s\n foreign lambdas:\n", fn, first_t, last_t, buf);
3311 for (i = 0; i < nsamples; i++)
3315 lambda_vec_print(lambdas[i], buf, TRUE);
3318 printf(" %s (%d hists)\n", buf, nhists[i]);
3322 printf(" %s (%d pts)\n", buf, npts[i]);
3334 int gmx_bar(int argc, char* argv[])
3336 static const char* desc[] = {
3337 "[THISMODULE] calculates free energy difference estimates through ",
3338 "Bennett's acceptance ratio method (BAR). It also automatically",
3339 "adds series of individual free energies obtained with BAR into",
3340 "a combined free energy estimate.[PAR]",
3342 "Every individual BAR free energy difference relies on two ",
3343 "simulations at different states: say state A and state B, as",
3344 "controlled by a parameter, [GRK]lambda[grk] (see the [REF].mdp[ref] parameter",
3345 "[TT]init_lambda[tt]). The BAR method calculates a ratio of weighted",
3346 "average of the Hamiltonian difference of state B given state A and",
3348 "The energy differences to the other state must be calculated",
3349 "explicitly during the simulation. This can be done with",
3350 "the [REF].mdp[ref] option [TT]foreign_lambda[tt].[PAR]",
3352 "Input option [TT]-f[tt] expects multiple [TT]dhdl.xvg[tt] files. ",
3353 "Two types of input files are supported:",
3355 " * Files with more than one [IT]y[it]-value. ",
3356 " The files should have columns ",
3357 " with dH/d[GRK]lambda[grk] and [GRK]Delta[grk][GRK]lambda[grk]. ",
3358 " The [GRK]lambda[grk] values are inferred ",
3359 " from the legends: [GRK]lambda[grk] of the simulation from the legend of ",
3360 " dH/d[GRK]lambda[grk] and the foreign [GRK]lambda[grk] values from the ",
3361 " legends of Delta H",
3362 " * Files with only one [IT]y[it]-value. Using the",
3363 " [TT]-extp[tt] option for these files, it is assumed",
3364 " that the [IT]y[it]-value is dH/d[GRK]lambda[grk] and that the ",
3365 " Hamiltonian depends linearly on [GRK]lambda[grk]. ",
3366 " The [GRK]lambda[grk] value of the simulation is inferred from the ",
3367 " subtitle (if present), otherwise from a number in the subdirectory ",
3368 " in the file name.",
3371 "The [GRK]lambda[grk] of the simulation is parsed from ",
3372 "[TT]dhdl.xvg[tt] file's legend containing the string 'dH', the ",
3373 "foreign [GRK]lambda[grk] values from the legend containing the ",
3374 "capitalized letters 'D' and 'H'. The temperature is parsed from ",
3375 "the legend line containing 'T ='.[PAR]",
3377 "The input option [TT]-g[tt] expects multiple [REF].edr[ref] files. ",
3378 "These can contain either lists of energy differences (see the ",
3379 "[REF].mdp[ref] option [TT]separate_dhdl_file[tt]), or a series of ",
3380 "histograms (see the [REF].mdp[ref] options [TT]dh_hist_size[tt] and ",
3381 "[TT]dh_hist_spacing[tt]).",
3382 "The temperature and [GRK]lambda[grk] ",
3383 "values are automatically deduced from the [TT]ener.edr[tt] file.[PAR]",
3385 "In addition to the [REF].mdp[ref] option [TT]foreign_lambda[tt], ",
3386 "the energy difference can also be extrapolated from the ",
3387 "dH/d[GRK]lambda[grk] values. This is done with the[TT]-extp[tt]",
3388 "option, which assumes that the system's Hamiltonian depends linearly",
3389 "on [GRK]lambda[grk], which is not normally the case.[PAR]",
3391 "The free energy estimates are determined using BAR with bisection, ",
3392 "with the precision of the output set with [TT]-prec[tt]. ",
3393 "An error estimate taking into account time correlations ",
3394 "is made by splitting the data into blocks and determining ",
3395 "the free energy differences over those blocks and assuming ",
3396 "the blocks are independent. ",
3397 "The final error estimate is determined from the average variance ",
3398 "over 5 blocks. A range of block numbers for error estimation can ",
3399 "be provided with the options [TT]-nbmin[tt] and [TT]-nbmax[tt].[PAR]",
3401 "[THISMODULE] tries to aggregate samples with the same 'native' and ",
3402 "'foreign' [GRK]lambda[grk] values, but always assumes independent ",
3403 "samples. [BB]Note[bb] that when aggregating energy ",
3404 "differences/derivatives with different sampling intervals, this is ",
3405 "almost certainly not correct. Usually subsequent energies are ",
3406 "correlated and different time intervals mean different degrees ",
3407 "of correlation between samples.[PAR]",
3409 "The results are split in two parts: the last part contains the final ",
3410 "results in kJ/mol, together with the error estimate for each part ",
3411 "and the total. The first part contains detailed free energy ",
3412 "difference estimates and phase space overlap measures in units of ",
3413 "kT (together with their computed error estimate). The printed ",
3416 " * lam_A: the [GRK]lambda[grk] values for point A.",
3417 " * lam_B: the [GRK]lambda[grk] values for point B.",
3418 " * DG: the free energy estimate.",
3419 " * s_A: an estimate of the relative entropy of B in A.",
3420 " * s_B: an estimate of the relative entropy of A in B.",
3421 " * stdev: an estimate expected per-sample standard deviation.",
3424 "The relative entropy of both states in each other's ensemble can be ",
3425 "interpreted as a measure of phase space overlap: ",
3426 "the relative entropy s_A of the work samples of lambda_B in the ",
3427 "ensemble of lambda_A (and vice versa for s_B), is a ",
3428 "measure of the 'distance' between Boltzmann distributions of ",
3429 "the two states, that goes to zero for identical distributions. See ",
3430 "Wu & Kofke, J. Chem. Phys. 123 084109 (2005) for more information.",
3432 "The estimate of the expected per-sample standard deviation, as given ",
3433 "in Bennett's original BAR paper: Bennett, J. Comp. Phys. 22, p 245 (1976).",
3434 "Eq. 10 therein gives an estimate of the quality of sampling (not directly",
3435 "of the actual statistical error, because it assumes independent samples).[PAR]",
3437 "To get a visual estimate of the phase space overlap, use the ",
3438 "[TT]-oh[tt] option to write series of histograms, together with the ",
3439 "[TT]-nbin[tt] option.[PAR]"
3441 static real begin = 0, end = -1, temp = -1;
3442 int nd = 2, nbmin = 5, nbmax = 5;
3444 gmx_bool use_dhdl = FALSE;
3446 { "-b", FALSE, etREAL, { &begin }, "Begin time for BAR" },
3447 { "-e", FALSE, etREAL, { &end }, "End time for BAR" },
3448 { "-temp", FALSE, etREAL, { &temp }, "Temperature (K)" },
3449 { "-prec", FALSE, etINT, { &nd }, "The number of digits after the decimal point" },
3450 { "-nbmin", FALSE, etINT, { &nbmin }, "Minimum number of blocks for error estimation" },
3451 { "-nbmax", FALSE, etINT, { &nbmax }, "Maximum number of blocks for error estimation" },
3452 { "-nbin", FALSE, etINT, { &nbin }, "Number of bins for histogram output" },
3457 "Whether to linearly extrapolate dH/dl values to use as energies" }
3460 t_filenm fnm[] = { { efXVG, "-f", "dhdl", ffOPTRDMULT },
3461 { efEDR, "-g", "ener", ffOPTRDMULT },
3462 { efXVG, "-o", "bar", ffOPTWR },
3463 { efXVG, "-oi", "barint", ffOPTWR },
3464 { efXVG, "-oh", "histogram", ffOPTWR } };
3465 #define NFILE asize(fnm)
3468 int nf = 0; /* file counter */
3469 int nfile_tot; /* total number of input files */
3470 sim_data_t sim_data; /* the simulation data */
3471 barres_t* results; /* the results */
3472 int nresults; /* number of results in results array */
3475 double prec, dg_tot;
3477 char dgformat[20], xvg2format[STRLEN], xvg3format[STRLEN];
3478 char buf[STRLEN], buf2[STRLEN];
3479 char ktformat[STRLEN], sktformat[STRLEN];
3480 char kteformat[STRLEN], skteformat[STRLEN];
3481 gmx_output_env_t* oenv;
3483 gmx_bool result_OK = TRUE, bEE = TRUE;
3485 gmx_bool disc_err = FALSE;
3486 double sum_disc_err = 0.; /* discretization error */
3487 gmx_bool histrange_err = FALSE;
3488 double sum_histrange_err = 0.; /* histogram range error */
3489 double stat_err = 0.; /* statistical error */
3491 if (!parse_common_args(
3492 &argc, argv, PCA_CAN_VIEW, NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, nullptr, &oenv))
3497 gmx::ArrayRef<const std::string> xvgFiles = opt2fnsIfOptionSet("-f", NFILE, fnm);
3498 gmx::ArrayRef<const std::string> edrFiles = opt2fnsIfOptionSet("-g", NFILE, fnm);
3500 sim_data_init(&sim_data);
3502 /* make linked list */
3504 lambda_data_init(lb, 0, 0);
3510 nfile_tot = xvgFiles.size() + edrFiles.size();
3514 gmx_fatal(FARGS, "No input files!");
3519 gmx_fatal(FARGS, "Can not have negative number of digits");
3521 prec = std::pow(10.0, static_cast<double>(-nd));
3523 snew(partsum, (nbmax + 1) * (nbmax + 1));
3526 /* read in all files. First xvg files */
3527 for (const std::string& filenm : xvgFiles)
3529 read_bar_xvg(filenm.c_str(), &temp, &sim_data);
3532 /* then .edr files */
3533 for (const std::string& filenm : edrFiles)
3535 read_barsim_edr(filenm.c_str(), &temp, &sim_data);
3540 /* fix the times to allow for equilibration */
3541 sim_data_impose_times(&sim_data, begin, end);
3543 if (opt2bSet("-oh", NFILE, fnm))
3545 sim_data_histogram(&sim_data, opt2fn("-oh", NFILE, fnm), nbin, oenv);
3548 /* assemble the output structures from the lambdas */
3549 results = barres_list_create(&sim_data, &nresults, use_dhdl);
3551 sum_disc_err = barres_list_max_disc_err(results, nresults);
3555 printf("\nNo results to calculate.\n");
3559 if (sum_disc_err > prec)
3561 prec = sum_disc_err;
3562 nd = static_cast<int>(std::ceil(-std::log10(prec)));
3563 printf("WARNING: setting the precision to %g because that is the minimum\n "
3564 "reasonable number, given the expected discretization error.\n",
3569 /*sprintf(lamformat,"%%6.3f");*/
3570 sprintf(dgformat, "%%%d.%df", 3 + nd, nd);
3571 /* the format strings of the results in kT */
3572 sprintf(ktformat, "%%%d.%df", 5 + nd, nd);
3573 sprintf(sktformat, "%%%ds", 6 + nd);
3574 /* the format strings of the errors in kT */
3575 sprintf(kteformat, "%%%d.%df", 3 + nd, nd);
3576 sprintf(skteformat, "%%%ds", 4 + nd);
3577 sprintf(xvg2format, "%s %s\n", "%s", dgformat);
3578 sprintf(xvg3format, "%s %s %s\n", "%s", dgformat, dgformat);
3582 if (opt2bSet("-o", NFILE, fnm))
3584 sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3585 fpb = xvgropen_type(
3586 opt2fn("-o", NFILE, fnm), "Free energy differences", "\\lambda", buf, exvggtXYDY, oenv);
3590 if (opt2bSet("-oi", NFILE, fnm))
3592 sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3593 fpi = xvgropen(opt2fn("-oi", NFILE, fnm), "Free energy integral", "\\lambda", buf, oenv);
3602 /* first calculate results */
3605 for (f = 0; f < nresults; f++)
3607 /* Determine the free energy difference with a factor of 10
3608 * more accuracy than requested for printing.
3610 calc_bar(&(results[f]), 0.1 * prec, nbmin, nbmax, &bEE, partsum);
3612 if (results[f].dg_disc_err > prec / 10.)
3616 if (results[f].dg_histrange_err > prec / 10.)
3618 histrange_err = TRUE;
3622 /* print results in kT */
3625 printf("\nTemperature: %g K\n", temp);
3627 printf("\nDetailed results in kT (see help for explanation):\n\n");
3628 printf("%6s ", " lam_A");
3629 printf("%6s ", " lam_B");
3630 printf(sktformat, "DG ");
3633 printf(skteformat, "+/- ");
3637 printf(skteformat, "disc ");
3641 printf(skteformat, "range ");
3643 printf(sktformat, "s_A ");
3646 printf(skteformat, "+/- ");
3648 printf(sktformat, "s_B ");
3651 printf(skteformat, "+/- ");
3653 printf(sktformat, "stdev ");
3656 printf(skteformat, "+/- ");
3659 for (f = 0; f < nresults; f++)
3661 lambda_vec_print_short(results[f].a->native_lambda, buf);
3663 lambda_vec_print_short(results[f].b->native_lambda, buf);
3665 printf(ktformat, results[f].dg);
3669 printf(kteformat, results[f].dg_err);
3674 printf(kteformat, results[f].dg_disc_err);
3679 printf(kteformat, results[f].dg_histrange_err);
3682 printf(ktformat, results[f].sa);
3686 printf(kteformat, results[f].sa_err);
3689 printf(ktformat, results[f].sb);
3693 printf(kteformat, results[f].sb_err);
3696 printf(ktformat, results[f].dg_stddev);
3700 printf(kteformat, results[f].dg_stddev_err);
3704 /* Check for negative relative entropy with a 95% certainty. */
3705 if (results[f].sa < -2 * results[f].sa_err || results[f].sb < -2 * results[f].sb_err)
3713 printf("\nWARNING: Some of these results violate the Second Law of "
3714 "Thermodynamics: \n"
3715 " This is can be the result of severe undersampling, or "
3717 " there is something wrong with the simulations.\n");
3721 /* final results in kJ/mol */
3722 printf("\n\nFinal results in kJ/mol:\n\n");
3724 for (f = 0; f < nresults; f++)
3729 lambda_vec_print_short(results[f].a->native_lambda, buf);
3730 fprintf(fpi, xvg2format, buf, dg_tot);
3736 lambda_vec_print_intermediate(results[f].a->native_lambda, results[f].b->native_lambda, buf);
3738 fprintf(fpb, xvg3format, buf, results[f].dg, results[f].dg_err);
3742 lambda_vec_print_short(results[f].a->native_lambda, buf);
3743 lambda_vec_print_short(results[f].b->native_lambda, buf2);
3744 printf("%s - %s", buf, buf2);
3747 printf(dgformat, results[f].dg * kT);
3751 printf(dgformat, results[f].dg_err * kT);
3755 printf(" (max. range err. = ");
3756 printf(dgformat, results[f].dg_histrange_err * kT);
3758 sum_histrange_err += results[f].dg_histrange_err * kT;
3762 dg_tot += results[f].dg;
3766 lambda_vec_print_short(results[0].a->native_lambda, buf);
3767 lambda_vec_print_short(results[nresults - 1].b->native_lambda, buf2);
3768 printf("%s - %s", buf, buf2);
3771 printf(dgformat, dg_tot * kT);
3774 stat_err = bar_err(nbmin, nbmax, partsum) * kT;
3776 printf(dgformat, std::max(std::max(stat_err, sum_disc_err), sum_histrange_err));
3781 printf("\nmaximum discretization error = ");
3782 printf(dgformat, sum_disc_err);
3783 if (bEE && stat_err < sum_disc_err)
3785 printf("WARNING: discretization error (%g) is larger than statistical error.\n "
3786 "Decrease histogram spacing for more accurate results\n",
3792 printf("\nmaximum histogram range error = ");
3793 printf(dgformat, sum_histrange_err);
3794 if (bEE && stat_err < sum_histrange_err)
3796 printf("WARNING: histogram range error (%g) is larger than statistical error.\n "
3797 "Increase histogram range for more accurate results\n",
3806 lambda_vec_print_short(results[nresults - 1].b->native_lambda, buf);
3807 fprintf(fpi, xvg2format, buf, dg_tot);
3815 do_view(oenv, opt2fn_null("-o", NFILE, fnm), "-xydy");
3816 do_view(oenv, opt2fn_null("-oi", NFILE, fnm), "-xydy");