2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2010,2011,2012,2013,2014,2015,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
45 #include "gromacs/commandline/pargs.h"
46 #include "gromacs/commandline/viewit.h"
47 #include "gromacs/fileio/enxio.h"
48 #include "gromacs/fileio/xvgr.h"
49 #include "gromacs/gmxana/gmx_ana.h"
50 #include "gromacs/math/units.h"
51 #include "gromacs/math/utilities.h"
52 #include "gromacs/mdlib/energyoutput.h"
53 #include "gromacs/mdtypes/md_enums.h"
54 #include "gromacs/trajectory/energyframe.h"
55 #include "gromacs/utility/arraysize.h"
56 #include "gromacs/utility/cstringutil.h"
57 #include "gromacs/utility/dir_separator.h"
58 #include "gromacs/utility/fatalerror.h"
59 #include "gromacs/utility/gmxassert.h"
60 #include "gromacs/utility/smalloc.h"
61 #include "gromacs/utility/snprintf.h"
64 /* Structure for the names of lambda vector components */
65 typedef struct lambda_components_t
67 char **names; /* Array of strings with names for the lambda vector
69 int N; /* The number of components */
70 int Nalloc; /* The number of allocated components */
71 } lambda_components_t;
73 /* Structure for a lambda vector or a dhdl derivative direction */
74 typedef struct lambda_vec_t
76 double *val; /* The lambda vector component values. Only valid if
78 int dhdl; /* The coordinate index for the derivative described by this
80 const lambda_components_t *lc; /* the associated lambda_components
82 int index; /* The state number (init-lambda-state) of this lambda
83 vector, if known. If not, it is set to -1 */
86 /* the dhdl.xvg data from a simulation */
90 int ftp; /* file type */
91 int nset; /* number of lambdas, including dhdl */
92 int *np; /* number of data points (du or hists) per lambda */
93 int np_alloc; /* number of points (du or hists) allocated */
94 double temp; /* temperature */
95 lambda_vec_t *lambda; /* the lambdas (of first index for y). */
96 double *t; /* the times (of second index for y) */
97 double **y; /* the dU values. y[0] holds the derivative, while
98 further ones contain the energy differences between
99 the native lambda and the 'foreign' lambdas. */
100 lambda_vec_t native_lambda; /* the native lambda */
102 struct xvg_t *next, *prev; /*location in the global linked list of xvg_ts*/
106 typedef struct hist_t
108 unsigned int *bin[2]; /* the (forward + reverse) histogram values */
109 double dx[2]; /* the histogram spacing. The reverse
110 dx is the negative of the forward dx.*/
111 int64_t x0[2]; /* the (forward + reverse) histogram start
114 int nbin[2]; /* the (forward+reverse) number of bins */
115 int64_t sum; /* the total number of counts. Must be
116 the same for forward + reverse. */
117 int nhist; /* number of hist datas (forward or reverse) */
119 double start_time, delta_time; /* start time, end time of histogram */
123 /* an aggregate of samples for partial free energy calculation */
124 typedef struct samples_t
126 lambda_vec_t *native_lambda; /* pointer to native lambda vector */
127 lambda_vec_t *foreign_lambda; /* pointer to foreign lambda vector */
128 double temp; /* the temperature */
129 gmx_bool derivative; /* whether this sample is a derivative */
131 /* The samples come either as either delta U lists: */
132 int ndu; /* the number of delta U samples */
133 double *du; /* the delta u's */
134 double *t; /* the times associated with those samples, or: */
135 double start_time, delta_time; /*start time and delta time for linear time*/
137 /* or as histograms: */
138 hist_t *hist; /* a histogram */
140 /* allocation data: (not NULL for data 'owned' by this struct) */
141 double *du_alloc, *t_alloc; /* allocated delta u arrays */
142 size_t ndu_alloc, nt_alloc; /* pre-allocated sizes */
143 hist_t *hist_alloc; /* allocated hist */
145 int64_t ntot; /* total number of samples */
146 const char *filename; /* the file name this sample comes from */
149 /* a sample range (start to end for du-style data, or boolean
150 for both du-style data and histograms */
151 typedef struct sample_range_t
153 int start, end; /* start and end index for du style data */
154 gmx_bool use; /* whether to use this sample */
156 samples_t *s; /* the samples this range belongs to */
160 /* a collection of samples for a partial free energy calculation
161 (i.e. the collection of samples from one native lambda to one
163 typedef struct sample_coll_t
165 lambda_vec_t *native_lambda; /* these should be the same for all samples
167 lambda_vec_t *foreign_lambda; /* collection */
168 double temp; /* the temperature */
170 int nsamples; /* the number of samples */
171 samples_t **s; /* the samples themselves */
172 sample_range_t *r; /* the sample ranges */
173 int nsamples_alloc; /* number of allocated samples */
175 int64_t ntot; /* total number of samples in the ranges of
178 struct sample_coll_t *next, *prev; /* next and previous in the list */
181 /* all the samples associated with a lambda point */
182 typedef struct lambda_data_t
184 lambda_vec_t *lambda; /* the native lambda (at start time if dynamic) */
185 double temp; /* temperature */
187 sample_coll_t *sc; /* the samples */
189 sample_coll_t sc_head; /*the pre-allocated list head for the linked list.*/
191 struct lambda_data_t *next, *prev; /* the next and prev in the list */
194 /* Top-level data structure of simulation data */
195 typedef struct sim_data_t
197 lambda_data_t *lb; /* a lambda data linked list */
198 lambda_data_t lb_head; /* The head element of the linked list */
200 lambda_components_t lc; /* the allowed components of the lambda
204 /* Top-level data structure with calculated values. */
206 sample_coll_t *a, *b; /* the simulation data */
208 double dg; /* the free energy difference */
209 double dg_err; /* the free energy difference */
211 double dg_disc_err; /* discretization error */
212 double dg_histrange_err; /* histogram range error */
214 double sa; /* relative entropy of b in state a */
215 double sa_err; /* error in sa */
216 double sb; /* relative entropy of a in state b */
217 double sb_err; /* error in sb */
219 double dg_stddev; /* expected dg stddev per sample */
220 double dg_stddev_err; /* error in dg_stddev */
224 /* Initialize a lambda_components structure */
225 static void lambda_components_init(lambda_components_t *lc)
229 snew(lc->names, lc->Nalloc);
232 /* Add a component to a lambda_components structure */
233 static void lambda_components_add(lambda_components_t *lc,
234 const char *name, size_t name_length)
236 while (lc->N + 1 > lc->Nalloc)
238 lc->Nalloc = (lc->Nalloc == 0) ? 2 : 2*lc->Nalloc;
239 srenew(lc->names, lc->Nalloc);
241 snew(lc->names[lc->N], name_length+1);
242 std::strncpy(lc->names[lc->N], name, name_length);
246 /* check whether a component with index 'index' matches the given name, or
247 is also NULL. Returns TRUE if this is the case.
248 the string name does not need to end */
249 static gmx_bool lambda_components_check(const lambda_components_t *lc,
255 if (!lc || index >= lc->N)
259 if ((name == nullptr) && (lc->names[index] == nullptr))
263 if (((name != nullptr) && (lc->names[index] == nullptr)) ||
264 ((name == nullptr) && (lc->names[index] != nullptr)))
268 len = std::strlen(lc->names[index]);
269 if (len != name_length)
273 return std::strncmp(lc->names[index], name, name_length) == 0;
276 /* Find the index of a given lambda component name, or -1 if not found */
277 static int lambda_components_find(const lambda_components_t *lc,
283 for (i = 0; i < lc->N; i++)
285 if (std::strncmp(lc->names[i], name, name_length) == 0)
295 /* initialize a lambda vector */
296 static void lambda_vec_init(lambda_vec_t *lv, const lambda_components_t *lc)
298 snew(lv->val, lc->N);
304 static void lambda_vec_copy(lambda_vec_t *lv, const lambda_vec_t *orig)
308 lambda_vec_init(lv, orig->lc);
309 lv->dhdl = orig->dhdl;
310 lv->index = orig->index;
311 for (i = 0; i < lv->lc->N; i++)
313 lv->val[i] = orig->val[i];
317 /* write a lambda vec to a preallocated string */
318 static void lambda_vec_print(const lambda_vec_t *lv, char *str, gmx_bool named)
322 str[0] = 0; /* reset the string */
327 str += sprintf(str, "delta H to ");
331 str += sprintf(str, "(");
333 for (i = 0; i < lv->lc->N; i++)
335 str += sprintf(str, "%g", lv->val[i]);
338 str += sprintf(str, ", ");
348 /* this lambda vector describes a derivative */
349 str += sprintf(str, "dH/dl");
350 if (std::strlen(lv->lc->names[lv->dhdl]) > 0)
352 sprintf(str, " (%s)", lv->lc->names[lv->dhdl]);
357 /* write a shortened version of the lambda vec to a preallocated string */
358 static void lambda_vec_print_short(const lambda_vec_t *lv, char *str)
362 sprintf(str, "%6d", lv->index);
368 sprintf(str, "%6.3f", lv->val[0]);
372 sprintf(str, "dH/dl[%d]", lv->dhdl);
377 /* write an intermediate version of two lambda vecs to a preallocated string */
378 static void lambda_vec_print_intermediate(const lambda_vec_t *a,
379 const lambda_vec_t *b, char *str)
382 if ( (a->index >= 0) && (b->index >= 0) )
384 sprintf(str, "%6.3f", (a->index+b->index)/2.0);
388 if ( (a->dhdl < 0) && (b->dhdl < 0) )
390 sprintf(str, "%6.3f", (a->val[0]+b->val[0])/2.0);
396 /* calculate and return the absolute difference in lambda vectors: c = |a-b|.
397 a and b must describe non-derivative lambda points */
398 static double lambda_vec_abs_diff(const lambda_vec_t *a, const lambda_vec_t *b)
403 if ( (a->dhdl > 0) || (b->dhdl > 0) )
406 "Trying to calculate the difference between derivatives instead of lambda points");
411 "Trying to calculate the difference lambdas with differing basis set");
413 for (i = 0; i < a->lc->N; i++)
415 double df = a->val[i] - b->val[i];
418 return std::sqrt(ret);
422 /* check whether two lambda vectors are the same */
423 static gmx_bool lambda_vec_same(const lambda_vec_t *a, const lambda_vec_t *b)
433 for (i = 0; i < a->lc->N; i++)
435 if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
444 /* they're derivatives, so we check whether the indices match */
445 return (a->dhdl == b->dhdl);
449 /* Compare the sort order of two foreign lambda vectors
451 returns 1 if a is 'bigger' than b,
452 returns 0 if they're the same,
453 returns -1 if a is 'smaller' than b.*/
454 static int lambda_vec_cmp_foreign(const lambda_vec_t *a,
455 const lambda_vec_t *b)
458 double norm_a = 0, norm_b = 0;
459 gmx_bool different = FALSE;
463 gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
465 /* if either one has an index we sort based on that */
466 if ((a->index >= 0) || (b->index >= 0))
468 if (a->index == b->index)
472 return (a->index > b->index) ? 1 : -1;
474 if (a->dhdl >= 0 || b->dhdl >= 0)
476 /* lambda vectors that are derivatives always sort higher than those
477 without derivatives */
478 if ((a->dhdl >= 0) != (b->dhdl >= 0) )
480 return (a->dhdl >= 0) ? 1 : -1;
485 /* neither has an index, so we can only sort on the lambda components,
486 which is only valid if there is one component */
487 for (i = 0; i < a->lc->N; i++)
489 if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
493 norm_a += a->val[i]*a->val[i];
494 norm_b += b->val[i]*b->val[i];
500 return (norm_a > norm_b) ? 1 : -1;
503 /* Compare the sort order of two native lambda vectors
505 returns 1 if a is 'bigger' than b,
506 returns 0 if they're the same,
507 returns -1 if a is 'smaller' than b.*/
508 static int lambda_vec_cmp_native(const lambda_vec_t *a,
509 const lambda_vec_t *b)
513 gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
515 /* if either one has an index we sort based on that */
516 if ((a->index >= 0) || (b->index >= 0))
518 if (a->index == b->index)
522 return (a->index > b->index) ? 1 : -1;
524 /* neither has an index, so we can only sort on the lambda components,
525 which is only valid if there is one component */
529 "Can't compare lambdas with no index and > 1 component");
531 if (a->dhdl >= 0 || b->dhdl >= 0)
534 "Can't compare native lambdas that are derivatives");
536 if (gmx_within_tol(a->val[0], b->val[0], 10*GMX_REAL_EPS))
540 return a->val[0] > b->val[0] ? 1 : -1;
546 static void hist_init(hist_t *h, int nhist, int *nbin)
551 gmx_fatal(FARGS, "histogram with more than two sets of data!");
553 for (i = 0; i < nhist; i++)
555 snew(h->bin[i], nbin[i]);
557 h->nbin[i] = nbin[i];
558 h->start_time = h->delta_time = 0;
565 static void xvg_init(xvg_t *ba)
567 ba->filename = nullptr;
574 static void samples_init(samples_t *s, lambda_vec_t *native_lambda,
575 lambda_vec_t *foreign_lambda, double temp,
576 gmx_bool derivative, const char *filename)
578 s->native_lambda = native_lambda;
579 s->foreign_lambda = foreign_lambda;
581 s->derivative = derivative;
586 s->start_time = s->delta_time = 0;
588 s->du_alloc = nullptr;
589 s->t_alloc = nullptr;
590 s->hist_alloc = nullptr;
595 s->filename = filename;
598 static void sample_range_init(sample_range_t *r, samples_t *s)
606 static void sample_coll_init(sample_coll_t *sc, lambda_vec_t *native_lambda,
607 lambda_vec_t *foreign_lambda, double temp)
609 sc->native_lambda = native_lambda;
610 sc->foreign_lambda = foreign_lambda;
616 sc->nsamples_alloc = 0;
619 sc->next = sc->prev = nullptr;
622 static void sample_coll_destroy(sample_coll_t *sc)
624 /* don't free the samples themselves */
630 static void lambda_data_init(lambda_data_t *l, lambda_vec_t *native_lambda,
633 l->lambda = native_lambda;
639 l->sc = &(l->sc_head);
641 sample_coll_init(l->sc, native_lambda, nullptr, 0.);
646 static void barres_init(barres_t *br)
655 br->dg_stddev_err = 0;
662 /* calculate the total number of samples in a sample collection */
663 static void sample_coll_calc_ntot(sample_coll_t *sc)
668 for (i = 0; i < sc->nsamples; i++)
674 sc->ntot += sc->s[i]->ntot;
678 sc->ntot += sc->r[i].end - sc->r[i].start;
685 /* find the barsamples_t associated with a lambda that corresponds to
686 a specific foreign lambda */
687 static sample_coll_t *lambda_data_find_sample_coll(lambda_data_t *l,
688 lambda_vec_t *foreign_lambda)
690 sample_coll_t *sc = l->sc->next;
694 if (lambda_vec_same(sc->foreign_lambda, foreign_lambda))
704 /* insert li into an ordered list of lambda_colls */
705 static void lambda_data_insert_sample_coll(lambda_data_t *l, sample_coll_t *sc)
707 sample_coll_t *scn = l->sc->next;
708 while ( (scn != l->sc) )
710 if (lambda_vec_cmp_foreign(scn->foreign_lambda, sc->foreign_lambda) > 0)
716 /* now insert it before the found scn */
718 sc->prev = scn->prev;
719 scn->prev->next = sc;
723 /* insert li into an ordered list of lambdas */
724 static void lambda_data_insert_lambda(lambda_data_t *head, lambda_data_t *li)
726 lambda_data_t *lc = head->next;
729 if (lambda_vec_cmp_native(lc->lambda, li->lambda) > 0)
735 /* now insert ourselves before the found lc */
742 /* insert a sample and a sample_range into a sample_coll. The
743 samples are stored as a pointer, the range is copied. */
744 static void sample_coll_insert_sample(sample_coll_t *sc, samples_t *s,
747 /* first check if it belongs here */
748 GMX_ASSERT(sc->next->s, "Next not properly initialized!");
749 if (sc->temp != s->temp)
751 gmx_fatal(FARGS, "Temperatures in files %s and %s are not the same!",
752 s->filename, sc->next->s[0]->filename);
754 if (!lambda_vec_same(sc->native_lambda, s->native_lambda))
756 gmx_fatal(FARGS, "Native lambda in files %s and %s are not the same (and they should be)!",
757 s->filename, sc->next->s[0]->filename);
759 if (!lambda_vec_same(sc->foreign_lambda, s->foreign_lambda))
761 gmx_fatal(FARGS, "Foreign lambda in files %s and %s are not the same (and they should be)!",
762 s->filename, sc->next->s[0]->filename);
765 /* check if there's room */
766 if ( (sc->nsamples + 1) > sc->nsamples_alloc)
768 sc->nsamples_alloc = std::max(2*sc->nsamples_alloc, 2);
769 srenew(sc->s, sc->nsamples_alloc);
770 srenew(sc->r, sc->nsamples_alloc);
772 sc->s[sc->nsamples] = s;
773 sc->r[sc->nsamples] = *r;
776 sample_coll_calc_ntot(sc);
779 /* insert a sample into a lambda_list, creating the right sample_coll if
781 static void lambda_data_list_insert_sample(lambda_data_t *head, samples_t *s)
783 gmx_bool found = FALSE;
787 lambda_data_t *l = head->next;
789 /* first search for the right lambda_data_t */
792 if (lambda_vec_same(l->lambda, s->native_lambda) )
802 snew(l, 1); /* allocate a new one */
803 lambda_data_init(l, s->native_lambda, s->temp); /* initialize it */
804 lambda_data_insert_lambda(head, l); /* add it to the list */
807 /* now look for a sample collection */
808 sc = lambda_data_find_sample_coll(l, s->foreign_lambda);
811 snew(sc, 1); /* allocate a new one */
812 sample_coll_init(sc, s->native_lambda, s->foreign_lambda, s->temp);
813 lambda_data_insert_sample_coll(l, sc);
816 /* now insert the samples into the sample coll */
817 sample_range_init(&r, s);
818 sample_coll_insert_sample(sc, s, &r);
822 /* make a histogram out of a sample collection */
823 static void sample_coll_make_hist(sample_coll_t *sc, int **bin,
824 int *nbin_alloc, int *nbin,
825 double *dx, double *xmin, int nbin_default)
828 gmx_bool dx_set = FALSE;
829 gmx_bool xmin_set = FALSE;
831 gmx_bool xmax_set = FALSE;
832 gmx_bool xmax_set_hard = FALSE; /* whether the xmax is bounded by the
833 limits of a histogram */
836 /* first determine dx and xmin; try the histograms */
837 for (i = 0; i < sc->nsamples; i++)
841 hist_t *hist = sc->s[i]->hist;
842 for (k = 0; k < hist->nhist; k++)
844 double hdx = hist->dx[k];
845 double xmax_now = (hist->x0[k]+hist->nbin[k])*hdx;
847 /* we use the biggest dx*/
848 if ( (!dx_set) || hist->dx[0] > *dx)
853 if ( (!xmin_set) || (hist->x0[k]*hdx) < *xmin)
856 *xmin = (hist->x0[k]*hdx);
859 if ( (!xmax_set) || (xmax_now > xmax && !xmax_set_hard) )
863 if (hist->bin[k][hist->nbin[k]-1] != 0)
865 xmax_set_hard = TRUE;
868 if (hist->bin[k][hist->nbin[k]-1] != 0 && (xmax_now < xmax) )
870 xmax_set_hard = TRUE;
876 /* and the delta us */
877 for (i = 0; i < sc->nsamples; i++)
879 if (sc->s[i]->ndu > 0)
881 /* determine min and max */
882 int starti = sc->r[i].start;
883 int endi = sc->r[i].end;
884 double du_xmin = sc->s[i]->du[starti];
885 double du_xmax = sc->s[i]->du[starti];
886 for (j = starti+1; j < endi; j++)
888 if (sc->s[i]->du[j] < du_xmin)
890 du_xmin = sc->s[i]->du[j];
892 if (sc->s[i]->du[j] > du_xmax)
894 du_xmax = sc->s[i]->du[j];
898 /* and now change the limits */
899 if ( (!xmin_set) || (du_xmin < *xmin) )
904 if ( (!xmax_set) || ((du_xmax > xmax) && !xmax_set_hard) )
912 if (!xmax_set || !xmin_set)
921 *nbin = nbin_default;
922 *dx = (xmax-(*xmin))/((*nbin)-2); /* -2 because we want the last bin to
923 be 0, and we count from 0 */
927 *nbin = static_cast<int>((xmax-(*xmin))/(*dx));
930 if (*nbin > *nbin_alloc)
933 srenew(*bin, *nbin_alloc);
936 /* reset the histogram */
937 for (i = 0; i < (*nbin); i++)
942 /* now add the actual data */
943 for (i = 0; i < sc->nsamples; i++)
947 hist_t *hist = sc->s[i]->hist;
948 for (k = 0; k < hist->nhist; k++)
950 double hdx = hist->dx[k];
951 double xmin_hist = hist->x0[k]*hdx;
952 for (j = 0; j < hist->nbin[k]; j++)
954 /* calculate the bin corresponding to the middle of the
956 double x = hdx*(j+0.5) + xmin_hist;
957 int binnr = static_cast<int>((x-(*xmin))/(*dx));
959 if (binnr >= *nbin || binnr < 0)
964 (*bin)[binnr] += hist->bin[k][j];
970 int starti = sc->r[i].start;
971 int endi = sc->r[i].end;
972 for (j = starti; j < endi; j++)
974 int binnr = static_cast<int>((sc->s[i]->du[j]-(*xmin))/(*dx));
975 if (binnr >= *nbin || binnr < 0)
986 /* write a collection of histograms to a file */
987 static void sim_data_histogram(sim_data_t *sd, const char *filename,
988 int nbin_default, const gmx_output_env_t *oenv)
990 char label_x[STRLEN];
991 const char *dhdl = "dH/d\\lambda", *deltag = "\\DeltaH", *lambda = "\\lambda";
992 const char *title = "N(\\DeltaH)";
993 const char *label_y = "Samples";
997 char **setnames = nullptr;
998 gmx_bool first_set = FALSE;
999 /* histogram data: */
1000 int *hist = nullptr;
1006 lambda_data_t *bl_head = sd->lb;
1008 printf("\nWriting histogram to %s\n", filename);
1009 sprintf(label_x, "\\DeltaH (%s)", unit_energy);
1011 fp = xvgropen_type(filename, title, label_x, label_y, exvggtXNY, oenv);
1013 /* first get all the set names */
1015 /* iterate over all lambdas */
1016 while (bl != bl_head)
1018 sample_coll_t *sc = bl->sc->next;
1020 /* iterate over all samples */
1021 while (sc != bl->sc)
1023 char buf[STRLEN], buf2[STRLEN];
1026 srenew(setnames, nsets);
1027 snew(setnames[nsets-1], STRLEN);
1028 if (sc->foreign_lambda->dhdl < 0)
1030 lambda_vec_print(sc->native_lambda, buf, FALSE);
1031 lambda_vec_print(sc->foreign_lambda, buf2, FALSE);
1032 sprintf(setnames[nsets-1], "N(%s(%s=%s) | %s=%s)",
1033 deltag, lambda, buf2, lambda, buf);
1037 lambda_vec_print(sc->native_lambda, buf, FALSE);
1038 sprintf(setnames[nsets-1], "N(%s | %s=%s)",
1046 xvgr_legend(fp, nsets, setnames, oenv);
1049 /* now make the histograms */
1051 /* iterate over all lambdas */
1052 while (bl != bl_head)
1054 sample_coll_t *sc = bl->sc->next;
1056 /* iterate over all samples */
1057 while (sc != bl->sc)
1061 xvgr_new_dataset(fp, 0, 0, nullptr, oenv);
1064 sample_coll_make_hist(sc, &hist, &nbin_alloc, &nbin, &dx, &minval,
1067 for (i = 0; i < nbin; i++)
1069 double xmin = i*dx + minval;
1070 double xmax = (i+1)*dx + minval;
1072 fprintf(fp, "%g %d\n%g %d\n", xmin, hist[i], xmax, hist[i]);
1091 snprint_lambda_vec(char *str, int sz, const char *label, lambda_vec_t *lambda)
1095 n += snprintf(str+n, sz-n, "lambda vector [%s]: ", label);
1096 if (lambda->index >= 0)
1098 n += snprintf(str+n, sz-n, " init-lambda-state=%d", lambda->index);
1100 if (lambda->dhdl >= 0)
1102 n += snprintf(str+n, sz-n, " dhdl index=%d", lambda->dhdl);
1107 for (i = 0; i < lambda->lc->N; i++)
1109 n += snprintf(str+n, sz-n, " (%s) l=%g", lambda->lc->names[i], lambda->val[i]);
1115 /* create a collection (array) of barres_t object given a ordered linked list
1116 of barlamda_t sample collections */
1117 static barres_t *barres_list_create(sim_data_t *sd, int *nres,
1123 gmx_bool dhdl = FALSE;
1124 gmx_bool first = TRUE;
1125 lambda_data_t *bl_head = sd->lb;
1127 /* first count the lambdas */
1129 while (bl != bl_head)
1134 snew(res, nlambda-1);
1136 /* next put the right samples in the res */
1138 bl = bl_head->next->next; /* we start with the second one. */
1139 while (bl != bl_head)
1141 sample_coll_t *sc, *scprev;
1142 barres_t *br = &(res[*nres]);
1143 /* there is always a previous one. we search for that as a foreign
1145 scprev = lambda_data_find_sample_coll(bl->prev, bl->lambda);
1146 sc = lambda_data_find_sample_coll(bl, bl->prev->lambda);
1154 scprev = lambda_data_find_sample_coll(bl->prev, bl->prev->lambda);
1155 sc = lambda_data_find_sample_coll(bl, bl->lambda);
1159 printf("\nWARNING: Using the derivative data (dH/dlambda) to extrapolate delta H values.\nThis will only work if the Hamiltonian is linear in lambda.\n");
1164 gmx_fatal(FARGS, "Some dhdl files contain only one value (dH/dl), while others \ncontain multiple values (dH/dl and/or Delta H), will not proceed \nbecause of possible inconsistencies.\n");
1167 else if (!scprev && !sc)
1169 char descX[STRLEN], descY[STRLEN];
1170 snprint_lambda_vec(descX, STRLEN, "X", bl->prev->lambda);
1171 snprint_lambda_vec(descY, STRLEN, "Y", bl->lambda);
1173 gmx_fatal(FARGS, "There is no path between the states X & Y below that is covered by foreign lambdas:\ncannot proceed with BAR.\nUse thermodynamic integration of dH/dl by calculating the averages of dH/dl\nwith g_analyze and integrating them.\nAlternatively, use the -extp option if (and only if) the Hamiltonian\ndepends linearly on lambda, which is NOT normally the case.\n\n%s\n%s\n", descX, descY);
1176 /* normal delta H */
1179 char descX[STRLEN], descY[STRLEN];
1180 snprint_lambda_vec(descX, STRLEN, "X", bl->lambda);
1181 snprint_lambda_vec(descY, STRLEN, "Y", bl->prev->lambda);
1182 gmx_fatal(FARGS, "Could not find a set for foreign lambda (state X below)\nin the files for main lambda (state Y below)\n\n%s\n%s\n", descX, descY);
1186 char descX[STRLEN], descY[STRLEN];
1187 snprint_lambda_vec(descX, STRLEN, "X", bl->prev->lambda);
1188 snprint_lambda_vec(descY, STRLEN, "Y", bl->lambda);
1189 gmx_fatal(FARGS, "Could not find a set for foreign lambda (state X below)\nin the files for main lambda (state Y below)\n\n%s\n%s\n", descX, descY);
1201 /* estimate the maximum discretization error */
1202 static double barres_list_max_disc_err(barres_t *res, int nres)
1205 double disc_err = 0.;
1206 double delta_lambda;
1208 for (i = 0; i < nres; i++)
1210 barres_t *br = &(res[i]);
1212 delta_lambda = lambda_vec_abs_diff(br->b->native_lambda,
1213 br->a->native_lambda);
1215 for (j = 0; j < br->a->nsamples; j++)
1217 if (br->a->s[j]->hist)
1220 if (br->a->s[j]->derivative)
1222 Wfac = delta_lambda;
1225 disc_err = std::max(disc_err, Wfac*br->a->s[j]->hist->dx[0]);
1228 for (j = 0; j < br->b->nsamples; j++)
1230 if (br->b->s[j]->hist)
1233 if (br->b->s[j]->derivative)
1235 Wfac = delta_lambda;
1237 disc_err = std::max(disc_err, Wfac*br->b->s[j]->hist->dx[0]);
1245 /* impose start and end times on a sample collection, updating sample_ranges */
1246 static void sample_coll_impose_times(sample_coll_t *sc, double begin_t,
1250 for (i = 0; i < sc->nsamples; i++)
1252 samples_t *s = sc->s[i];
1253 sample_range_t *r = &(sc->r[i]);
1256 double end_time = s->hist->delta_time*s->hist->sum +
1257 s->hist->start_time;
1258 if (s->hist->start_time < begin_t || end_time > end_t)
1268 if (s->start_time < begin_t)
1270 r->start = static_cast<int>((begin_t - s->start_time)/s->delta_time);
1272 end_time = s->delta_time*s->ndu + s->start_time;
1273 if (end_time > end_t)
1275 r->end = static_cast<int>((end_t - s->start_time)/s->delta_time);
1281 for (j = 0; j < s->ndu; j++)
1283 if (s->t[j] < begin_t)
1288 if (s->t[j] >= end_t)
1295 if (r->start > r->end)
1301 sample_coll_calc_ntot(sc);
1304 static void sim_data_impose_times(sim_data_t *sd, double begin, double end)
1306 double first_t, last_t;
1307 double begin_t, end_t;
1309 lambda_data_t *head = sd->lb;
1312 if (begin <= 0 && end < 0)
1317 /* first determine the global start and end times */
1323 sample_coll_t *sc = lc->sc->next;
1324 while (sc != lc->sc)
1326 for (j = 0; j < sc->nsamples; j++)
1328 double start_t, end_t;
1330 start_t = sc->s[j]->start_time;
1331 end_t = sc->s[j]->start_time;
1334 end_t += sc->s[j]->delta_time*sc->s[j]->hist->sum;
1340 end_t = sc->s[j]->t[sc->s[j]->ndu-1];
1344 end_t += sc->s[j]->delta_time*sc->s[j]->ndu;
1348 if (start_t < first_t || first_t < 0)
1362 /* calculate the actual times */
1380 printf("\n Samples in time interval: %.3f - %.3f\n", first_t, last_t);
1382 if (begin_t > end_t)
1386 printf("Removing samples outside of: %.3f - %.3f\n", begin_t, end_t);
1388 /* then impose them */
1392 sample_coll_t *sc = lc->sc->next;
1393 while (sc != lc->sc)
1395 sample_coll_impose_times(sc, begin_t, end_t);
1403 /* create subsample i out of ni from an existing sample_coll */
1404 static gmx_bool sample_coll_create_subsample(sample_coll_t *sc,
1405 sample_coll_t *sc_orig,
1412 int64_t ntot_so_far;
1414 *sc = *sc_orig; /* just copy all fields */
1416 /* allocate proprietary memory */
1417 snew(sc->s, sc_orig->nsamples);
1418 snew(sc->r, sc_orig->nsamples);
1420 /* copy the samples */
1421 for (j = 0; j < sc_orig->nsamples; j++)
1423 sc->s[j] = sc_orig->s[j];
1424 sc->r[j] = sc_orig->r[j]; /* copy the ranges too */
1427 /* now fix start and end fields */
1428 /* the casts avoid possible overflows */
1429 ntot_start = static_cast<int64_t>(sc_orig->ntot*static_cast<double>(i)/static_cast<double>(ni));
1430 ntot_end = static_cast<int64_t>(sc_orig->ntot*static_cast<double>(i+1)/static_cast<double>(ni));
1432 for (j = 0; j < sc->nsamples; j++)
1435 int64_t new_start, new_end;
1441 ntot_add = sc->s[j]->hist->sum;
1445 ntot_add = sc->r[j].end - sc->r[j].start;
1453 if (!sc->s[j]->hist)
1455 if (ntot_so_far < ntot_start)
1457 /* adjust starting point */
1458 new_start = sc->r[j].start + (ntot_start - ntot_so_far);
1462 new_start = sc->r[j].start;
1464 /* adjust end point */
1465 new_end = sc->r[j].start + (ntot_end - ntot_so_far);
1466 if (new_end > sc->r[j].end)
1468 new_end = sc->r[j].end;
1471 /* check if we're in range at all */
1472 if ( (new_end < new_start) || (new_start > sc->r[j].end) )
1477 /* and write the new range */
1478 GMX_RELEASE_ASSERT(new_start <= std::numeric_limits<int>::max(), "Value of 'new_start' too large for int converstion");
1479 GMX_RELEASE_ASSERT(new_end <= std::numeric_limits<int>::max(), "Value of 'new_end' too large for int converstion");
1480 sc->r[j].start = static_cast<int>(new_start);
1481 sc->r[j].end = static_cast<int>(new_end);
1488 double ntot_start_norm, ntot_end_norm;
1489 /* calculate the amount of overlap of the
1490 desired range (ntot_start -- ntot_end) onto
1491 the histogram range (ntot_so_far -- ntot_so_far+ntot_add)*/
1493 /* first calculate normalized bounds
1494 (where 0 is the start of the hist range, and 1 the end) */
1495 ntot_start_norm = (ntot_start-ntot_so_far)/static_cast<double>(ntot_add);
1496 ntot_end_norm = (ntot_end-ntot_so_far)/static_cast<double>(ntot_add);
1498 /* now fix the boundaries */
1499 ntot_start_norm = std::min(1.0, std::max(0.0, ntot_start_norm));
1500 ntot_end_norm = std::max(0.0, std::min(1.0, ntot_end_norm));
1502 /* and calculate the overlap */
1503 overlap = ntot_end_norm - ntot_start_norm;
1505 if (overlap > 0.95) /* we allow for 5% slack */
1507 sc->r[j].use = TRUE;
1509 else if (overlap < 0.05)
1511 sc->r[j].use = FALSE;
1519 ntot_so_far += ntot_add;
1521 sample_coll_calc_ntot(sc);
1526 /* calculate minimum and maximum work values in sample collection */
1527 static void sample_coll_min_max(sample_coll_t *sc, double Wfac,
1528 double *Wmin, double *Wmax)
1532 *Wmin = std::numeric_limits<float>::max();
1533 *Wmax = -std::numeric_limits<float>::max();
1535 for (i = 0; i < sc->nsamples; i++)
1537 samples_t *s = sc->s[i];
1538 sample_range_t *r = &(sc->r[i]);
1543 for (j = r->start; j < r->end; j++)
1545 *Wmin = std::min(*Wmin, s->du[j]*Wfac);
1546 *Wmax = std::max(*Wmax, s->du[j]*Wfac);
1551 int hd = 0; /* determine the histogram direction: */
1553 if ( (s->hist->nhist > 1) && (Wfac < 0) )
1557 dx = s->hist->dx[hd];
1559 for (j = s->hist->nbin[hd]-1; j >= 0; j--)
1561 *Wmin = std::min(*Wmin, Wfac*(s->hist->x0[hd])*dx);
1562 *Wmax = std::max(*Wmax, Wfac*(s->hist->x0[hd])*dx);
1563 /* look for the highest value bin with values */
1564 if (s->hist->bin[hd][j] > 0)
1566 *Wmin = std::min(*Wmin, Wfac*(j+s->hist->x0[hd]+1)*dx);
1567 *Wmax = std::max(*Wmax, Wfac*(j+s->hist->x0[hd]+1)*dx);
1576 /* Initialize a sim_data structure */
1577 static void sim_data_init(sim_data_t *sd)
1579 /* make linked list */
1580 sd->lb = &(sd->lb_head);
1581 sd->lb->next = sd->lb;
1582 sd->lb->prev = sd->lb;
1584 lambda_components_init(&(sd->lc));
1588 static double calc_bar_sum(int n, const double *W, double Wfac, double sbMmDG)
1595 for (i = 0; i < n; i++)
1597 sum += 1./(1. + std::exp(Wfac*W[i] + sbMmDG));
1603 /* calculate the BAR average given a histogram
1605 if type== 0, calculate the best estimate for the average,
1606 if type==-1, calculate the minimum possible value given the histogram
1607 if type== 1, calculate the maximum possible value given the histogram */
1608 static double calc_bar_sum_hist(const hist_t *hist, double Wfac, double sbMmDG,
1614 /* normalization factor multiplied with bin width and
1615 number of samples (we normalize through M): */
1617 int hd = 0; /* determine the histogram direction: */
1620 if ( (hist->nhist > 1) && (Wfac < 0) )
1625 maxbin = hist->nbin[hd]-1;
1628 maxbin = hist->nbin[hd]; /* we also add whatever was out of range */
1631 for (i = 0; i < maxbin; i++)
1633 double x = Wfac*((i+hist->x0[hd])+0.5)*dx; /* bin middle */
1634 double pxdx = hist->bin[0][i]*normdx; /* p(x)dx */
1636 sum += pxdx/(1. + std::exp(x + sbMmDG));
1642 static double calc_bar_lowlevel(sample_coll_t *ca, sample_coll_t *cb,
1643 double temp, double tol, int type)
1647 double Wfac1, Wfac2, Wmin, Wmax;
1648 double DG0, DG1, DG2, dDG1;
1649 double n1, n2; /* numbers of samples as doubles */
1654 /* count the numbers of samples */
1658 M = std::log(n1/n2);
1660 /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1661 if (ca->foreign_lambda->dhdl < 0)
1663 /* this is the case when the delta U were calculated directly
1664 (i.e. we're not scaling dhdl) */
1670 /* we're using dhdl, so delta_lambda needs to be a
1671 multiplication factor. */
1672 /*double delta_lambda=cb->native_lambda-ca->native_lambda;*/
1673 double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1675 if (cb->native_lambda->lc->N > 1)
1678 "Can't (yet) do multi-component dhdl interpolation");
1681 Wfac1 = beta*delta_lambda;
1682 Wfac2 = -beta*delta_lambda;
1687 /* We print the output both in kT and kJ/mol.
1688 * Here we determine DG in kT, so when beta < 1
1689 * the precision has to be increased.
1694 /* Calculate minimum and maximum work to give an initial estimate of
1695 * delta G as their average.
1698 double Wmin1, Wmin2, Wmax1, Wmax2;
1699 sample_coll_min_max(ca, Wfac1, &Wmin1, &Wmax1);
1700 sample_coll_min_max(cb, Wfac2, &Wmin2, &Wmax2);
1702 Wmin = std::min(Wmin1, Wmin2);
1703 Wmax = std::max(Wmax1, Wmax2);
1711 fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1713 /* We approximate by bisection: given our initial estimates
1714 we keep checking whether the halfway point is greater or
1715 smaller than what we get out of the BAR averages.
1717 For the comparison we can use twice the tolerance. */
1718 while (DG2 - DG0 > 2*tol)
1720 DG1 = 0.5*(DG0 + DG2);
1722 /* calculate the BAR averages */
1725 for (i = 0; i < ca->nsamples; i++)
1727 samples_t *s = ca->s[i];
1728 sample_range_t *r = &(ca->r[i]);
1733 dDG1 += calc_bar_sum_hist(s->hist, Wfac1, (M-DG1), type);
1737 dDG1 += calc_bar_sum(r->end - r->start, s->du + r->start,
1742 for (i = 0; i < cb->nsamples; i++)
1744 samples_t *s = cb->s[i];
1745 sample_range_t *r = &(cb->r[i]);
1750 dDG1 -= calc_bar_sum_hist(s->hist, Wfac2, -(M-DG1), type);
1754 dDG1 -= calc_bar_sum(r->end - r->start, s->du + r->start,
1770 fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1774 return 0.5*(DG0 + DG2);
1777 static void calc_rel_entropy(sample_coll_t *ca, sample_coll_t *cb,
1778 double temp, double dg, double *sa, double *sb)
1784 double Wfac1, Wfac2;
1790 /* count the numbers of samples */
1794 /* to ensure the work values are the same as during the delta_G */
1795 /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1796 if (ca->foreign_lambda->dhdl < 0)
1798 /* this is the case when the delta U were calculated directly
1799 (i.e. we're not scaling dhdl) */
1805 /* we're using dhdl, so delta_lambda needs to be a
1806 multiplication factor. */
1807 double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1809 Wfac1 = beta*delta_lambda;
1810 Wfac2 = -beta*delta_lambda;
1813 /* first calculate the average work in both directions */
1814 for (i = 0; i < ca->nsamples; i++)
1816 samples_t *s = ca->s[i];
1817 sample_range_t *r = &(ca->r[i]);
1822 for (j = r->start; j < r->end; j++)
1824 W_ab += Wfac1*s->du[j];
1829 /* normalization factor multiplied with bin width and
1830 number of samples (we normalize through M): */
1833 int hd = 0; /* histogram direction */
1834 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1838 dx = s->hist->dx[hd];
1840 for (j = 0; j < s->hist->nbin[0]; j++)
1842 double x = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1843 double pxdx = s->hist->bin[0][j]*normdx; /* p(x)dx */
1851 for (i = 0; i < cb->nsamples; i++)
1853 samples_t *s = cb->s[i];
1854 sample_range_t *r = &(cb->r[i]);
1859 for (j = r->start; j < r->end; j++)
1861 W_ba += Wfac1*s->du[j];
1866 /* normalization factor multiplied with bin width and
1867 number of samples (we normalize through M): */
1870 int hd = 0; /* histogram direction */
1871 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
1875 dx = s->hist->dx[hd];
1877 for (j = 0; j < s->hist->nbin[0]; j++)
1879 double x = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1880 double pxdx = s->hist->bin[0][j]*normdx; /* p(x)dx */
1888 /* then calculate the relative entropies */
1893 static void calc_dg_stddev(sample_coll_t *ca, sample_coll_t *cb,
1894 double temp, double dg, double *stddev)
1898 double sigmafact = 0.;
1900 double Wfac1, Wfac2;
1906 /* count the numbers of samples */
1910 /* to ensure the work values are the same as during the delta_G */
1911 /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1912 if (ca->foreign_lambda->dhdl < 0)
1914 /* this is the case when the delta U were calculated directly
1915 (i.e. we're not scaling dhdl) */
1921 /* we're using dhdl, so delta_lambda needs to be a
1922 multiplication factor. */
1923 double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1925 Wfac1 = beta*delta_lambda;
1926 Wfac2 = -beta*delta_lambda;
1929 M = std::log(n1/n2);
1932 /* calculate average in both directions */
1933 for (i = 0; i < ca->nsamples; i++)
1935 samples_t *s = ca->s[i];
1936 sample_range_t *r = &(ca->r[i]);
1941 for (j = r->start; j < r->end; j++)
1943 sigmafact += 1./(2. + 2.*std::cosh((M + Wfac1*s->du[j] - dg)));
1948 /* normalization factor multiplied with bin width and
1949 number of samples (we normalize through M): */
1952 int hd = 0; /* histogram direction */
1953 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1957 dx = s->hist->dx[hd];
1959 for (j = 0; j < s->hist->nbin[0]; j++)
1961 double x = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1962 double pxdx = s->hist->bin[0][j]*normdx; /* p(x)dx */
1964 sigmafact += pxdx/(2. + 2.*std::cosh((M + x - dg)));
1969 for (i = 0; i < cb->nsamples; i++)
1971 samples_t *s = cb->s[i];
1972 sample_range_t *r = &(cb->r[i]);
1977 for (j = r->start; j < r->end; j++)
1979 sigmafact += 1./(2. + 2.*std::cosh((M - Wfac2*s->du[j] - dg)));
1984 /* normalization factor multiplied with bin width and
1985 number of samples (we normalize through M): */
1988 int hd = 0; /* histogram direction */
1989 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
1993 dx = s->hist->dx[hd];
1995 for (j = 0; j < s->hist->nbin[0]; j++)
1997 double x = Wfac2*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1998 double pxdx = s->hist->bin[0][j]*normdx; /* p(x)dx */
2000 sigmafact += pxdx/(2. + 2.*std::cosh((M - x - dg)));
2006 sigmafact /= (n1 + n2);
2010 Shirts, Bair, Hooker & Pande, Phys. Rev. Lett 91, 140601 (2003): */
2011 *stddev = std::sqrt(((1.0/sigmafact) - ( (n1+n2)/n1 + (n1+n2)/n2 )));
2016 static void calc_bar(barres_t *br, double tol,
2017 int npee_min, int npee_max, gmx_bool *bEE,
2021 double dg_sig2, sa_sig2, sb_sig2, stddev_sig2; /* intermediate variance values
2022 for calculated quantities */
2023 double temp = br->a->temp;
2025 double dg_min, dg_max;
2026 gmx_bool have_hist = FALSE;
2028 br->dg = calc_bar_lowlevel(br->a, br->b, temp, tol, 0);
2030 br->dg_disc_err = 0.;
2031 br->dg_histrange_err = 0.;
2033 /* check if there are histograms */
2034 for (i = 0; i < br->a->nsamples; i++)
2036 if (br->a->r[i].use && br->a->s[i]->hist)
2044 for (i = 0; i < br->b->nsamples; i++)
2046 if (br->b->r[i].use && br->b->s[i]->hist)
2054 /* calculate histogram-specific errors */
2057 dg_min = calc_bar_lowlevel(br->a, br->b, temp, tol, -1);
2058 dg_max = calc_bar_lowlevel(br->a, br->b, temp, tol, 1);
2060 if (std::abs(dg_max - dg_min) > GMX_REAL_EPS*10)
2062 /* the histogram range error is the biggest of the differences
2063 between the best estimate and the extremes */
2064 br->dg_histrange_err = std::abs(dg_max - dg_min);
2066 br->dg_disc_err = 0.;
2067 for (i = 0; i < br->a->nsamples; i++)
2069 if (br->a->s[i]->hist)
2071 br->dg_disc_err = std::max(br->dg_disc_err, br->a->s[i]->hist->dx[0]);
2074 for (i = 0; i < br->b->nsamples; i++)
2076 if (br->b->s[i]->hist)
2078 br->dg_disc_err = std::max(br->dg_disc_err, br->b->s[i]->hist->dx[0]);
2082 calc_rel_entropy(br->a, br->b, temp, br->dg, &(br->sa), &(br->sb));
2084 calc_dg_stddev(br->a, br->b, temp, br->dg, &(br->dg_stddev) );
2093 sample_coll_t ca, cb;
2095 /* initialize the samples */
2096 sample_coll_init(&ca, br->a->native_lambda, br->a->foreign_lambda,
2098 sample_coll_init(&cb, br->b->native_lambda, br->b->foreign_lambda,
2101 for (npee = npee_min; npee <= npee_max; npee++)
2110 double dstddev2 = 0;
2113 for (p = 0; p < npee; p++)
2120 cac = sample_coll_create_subsample(&ca, br->a, p, npee);
2121 cbc = sample_coll_create_subsample(&cb, br->b, p, npee);
2125 printf("WARNING: histogram number incompatible with block number for averaging: can't do error estimate\n");
2129 sample_coll_destroy(&ca);
2133 sample_coll_destroy(&cb);
2138 dgp = calc_bar_lowlevel(&ca, &cb, temp, tol, 0);
2142 partsum[npee*(npee_max+1)+p] += dgp;
2144 calc_rel_entropy(&ca, &cb, temp, dgp, &sac, &sbc);
2149 calc_dg_stddev(&ca, &cb, temp, dgp, &stddevc );
2152 dstddev2 += stddevc*stddevc;
2154 sample_coll_destroy(&ca);
2155 sample_coll_destroy(&cb);
2159 dg_sig2 += (dgs2-dgs*dgs)/(npee-1);
2165 sa_sig2 += (dsa2-dsa*dsa)/(npee-1);
2166 sb_sig2 += (dsb2-dsb*dsb)/(npee-1);
2170 stddev_sig2 += (dstddev2-dstddev*dstddev)/(npee-1);
2172 br->dg_err = std::sqrt(dg_sig2/(npee_max - npee_min + 1));
2173 br->sa_err = std::sqrt(sa_sig2/(npee_max - npee_min + 1));
2174 br->sb_err = std::sqrt(sb_sig2/(npee_max - npee_min + 1));
2175 br->dg_stddev_err = std::sqrt(stddev_sig2/(npee_max - npee_min + 1));
2180 static double bar_err(int nbmin, int nbmax, const double *partsum)
2183 double svar, s, s2, dg;
2186 for (nb = nbmin; nb <= nbmax; nb++)
2190 for (b = 0; b < nb; b++)
2192 dg = partsum[nb*(nbmax+1)+b];
2198 svar += (s2 - s*s)/(nb - 1);
2201 return std::sqrt(svar/(nbmax + 1 - nbmin));
2205 /* Seek the end of an identifier (consecutive non-spaces), followed by
2206 an optional number of spaces or '='-signs. Returns a pointer to the
2207 first non-space value found after that. Returns NULL if the string
2210 static const char *find_value(const char *str)
2212 gmx_bool name_end_found = FALSE;
2214 /* if the string is a NULL pointer, return a NULL pointer. */
2219 while (*str != '\0')
2221 /* first find the end of the name */
2222 if (!name_end_found)
2224 if (std::isspace(*str) || (*str == '=') )
2226 name_end_found = TRUE;
2231 if (!( std::isspace(*str) || (*str == '=') ))
2242 /* read a vector-notation description of a lambda vector */
2243 static gmx_bool read_lambda_compvec(const char *str,
2245 const lambda_components_t *lc_in,
2246 lambda_components_t *lc_out,
2250 gmx_bool initialize_lc = FALSE; /* whether to initialize the lambda
2251 components, or to check them */
2252 gmx_bool start_reached = FALSE; /* whether the start of component names
2254 gmx_bool vector = FALSE; /* whether there are multiple components */
2255 int n = 0; /* current component number */
2256 const char *val_start = nullptr; /* start of the component name, or NULL
2257 if not in a value */
2267 if (lc_out && lc_out->N == 0)
2269 initialize_lc = TRUE;
2272 if (lc_in == nullptr)
2281 if (std::isalnum(*str))
2284 start_reached = TRUE;
2287 else if (*str == '(')
2290 start_reached = TRUE;
2292 else if (!std::isspace(*str))
2294 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2301 if (std::isspace(*str) || *str == ')' || *str == ',' || *str == '\0')
2308 lambda_components_add(lc_out, val_start,
2313 if (!lambda_components_check(lc_out, n, val_start,
2322 /* add a vector component to lv */
2323 lv->val[n] = strtod(val_start, &strtod_end);
2324 if (val_start == strtod_end)
2327 "Error reading lambda vector in %s", fn);
2330 /* reset for the next identifier */
2331 val_start = nullptr;
2339 else if (std::isalnum(*str))
2352 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2356 GMX_RELEASE_ASSERT(lc_in != nullptr, "Internal inconsistency? lc_in==NULL");
2361 else if (lv == nullptr)
2367 gmx_fatal(FARGS, "Incomplete lambda vector data in %s",
2387 gmx_fatal(FARGS, "Incomplete lambda components data in %s", fn);
2393 /* read and check the component names from a string */
2394 static gmx_bool read_lambda_components(const char *str,
2395 lambda_components_t *lc,
2399 return read_lambda_compvec(str, nullptr, nullptr, lc, end, fn);
2402 /* read an initialized lambda vector from a string */
2403 static gmx_bool read_lambda_vector(const char *str,
2408 return read_lambda_compvec(str, lv, lv->lc, nullptr, end, fn);
2413 /* deduce lambda value from legend.
2415 legend = the legend string
2417 lam = the initialized lambda vector
2418 returns whether to use the data in this set.
2420 static gmx_bool legend2lambda(const char *fn,
2424 const char *ptr = nullptr, *ptr2 = nullptr;
2425 gmx_bool ok = FALSE;
2426 gmx_bool bdhdl = FALSE;
2427 const char *tostr = " to ";
2429 if (legend == nullptr)
2431 gmx_fatal(FARGS, "There is no legend in file '%s', can not deduce lambda", fn);
2434 /* look for the last 'to': */
2438 ptr2 = std::strstr(ptr2, tostr);
2439 if (ptr2 != nullptr)
2445 while (ptr2 != nullptr && *ptr2 != '\0');
2449 ptr += std::strlen(tostr)-1; /* and advance past that 'to' */
2453 /* look for the = sign */
2454 ptr = std::strrchr(legend, '=');
2457 /* otherwise look for the last space */
2458 ptr = std::strrchr(legend, ' ');
2462 if (std::strstr(legend, "dH"))
2467 else if (std::strchr(legend, 'D') != nullptr && std::strchr(legend, 'H') != nullptr)
2472 else /*if (std::strstr(legend, "pV"))*/
2483 gmx_fatal(FARGS, "There is no proper lambda legend in file '%s', can not deduce lambda", fn);
2487 ptr = find_value(ptr);
2488 if (!ptr || !read_lambda_vector(ptr, lam, nullptr, fn))
2490 gmx_fatal(FARGS, "lambda vector '%s' %s faulty", legend, fn);
2498 ptr = std::strrchr(legend, '=');
2502 /* there must be a component name */
2506 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2508 /* now backtrack to the start of the identifier */
2509 while (isspace(*ptr))
2515 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2518 while (!std::isspace(*ptr))
2523 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2527 dhdl_index = lambda_components_find(lam->lc, ptr, (end-ptr));
2531 std::strncpy(buf, ptr, (end-ptr));
2532 buf[(end-ptr)] = '\0';
2534 "Did not find lambda component for '%s' in %s",
2543 "dhdl without component name with >1 lambda component in %s",
2548 lam->dhdl = dhdl_index;
2553 static gmx_bool subtitle2lambda(const char *subtitle, xvg_t *ba, const char *fn,
2554 lambda_components_t *lc)
2559 double native_lambda;
2563 /* first check for a state string */
2564 ptr = std::strstr(subtitle, "state");
2568 const char *val_end;
2570 /* the new 4.6 style lambda vectors */
2571 ptr = find_value(ptr);
2574 index = std::strtol(ptr, &end, 10);
2577 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2584 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2587 /* now find the lambda vector component names */
2588 while (*ptr != '(' && !std::isalnum(*ptr))
2594 "Incomplete lambda vector component data in %s", fn);
2599 if (!read_lambda_components(ptr, lc, &val_end, fn))
2602 "lambda vector components in %s don't match those previously read",
2605 ptr = find_value(val_end);
2608 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2611 lambda_vec_init(&(ba->native_lambda), lc);
2612 if (!read_lambda_vector(ptr, &(ba->native_lambda), nullptr, fn))
2614 gmx_fatal(FARGS, "lambda vector in %s faulty", fn);
2616 ba->native_lambda.index = index;
2621 /* compatibility mode: check for lambda in other ways. */
2622 /* plain text lambda string */
2623 ptr = std::strstr(subtitle, "lambda");
2626 /* xmgrace formatted lambda string */
2627 ptr = std::strstr(subtitle, "\\xl\\f{}");
2631 /* xmgr formatted lambda string */
2632 ptr = std::strstr(subtitle, "\\8l\\4");
2636 ptr = std::strstr(ptr, "=");
2640 bFound = (sscanf(ptr+1, "%lf", &(native_lambda)) == 1);
2641 /* add the lambda component name as an empty string */
2644 if (!lambda_components_check(lc, 0, "", 0))
2647 "lambda vector components in %s don't match those previously read",
2653 lambda_components_add(lc, "", 0);
2655 lambda_vec_init(&(ba->native_lambda), lc);
2656 ba->native_lambda.val[0] = native_lambda;
2663 static void read_bar_xvg_lowlevel(const char *fn, const real *temp, xvg_t *ba,
2664 lambda_components_t *lc)
2667 char *subtitle, **legend, *ptr;
2669 gmx_bool native_lambda_read = FALSE;
2676 np = read_xvg_legend(fn, &ba->y, &ba->nset, &subtitle, &legend);
2679 gmx_fatal(FARGS, "File %s contains no usable data.", fn);
2681 /* Reorder the data */
2683 for (i = 1; i < ba->nset; i++)
2685 ba->y[i-1] = ba->y[i];
2689 snew(ba->np, ba->nset);
2690 for (i = 0; i < ba->nset; i++)
2696 if (subtitle != nullptr)
2698 /* try to extract temperature */
2699 ptr = std::strstr(subtitle, "T =");
2703 if (sscanf(ptr, "%lf", &ba->temp) == 1)
2707 gmx_fatal(FARGS, "Found temperature of %f in file '%s'",
2717 gmx_fatal(FARGS, "Did not find a temperature in the subtitle in file '%s', use the -temp option of [TT]gmx bar[tt]", fn);
2722 /* Try to deduce lambda from the subtitle */
2725 if (subtitle2lambda(subtitle, ba, fn, lc))
2727 native_lambda_read = TRUE;
2730 snew(ba->lambda, ba->nset);
2731 if (legend == nullptr)
2733 /* Check if we have a single set, no legend, nset=1 means t and dH/dl */
2736 ba->lambda[0] = ba->native_lambda;
2740 gmx_fatal(FARGS, "File %s contains multiple sets but no legends, can not determine the lambda values", fn);
2745 for (i = 0; i < ba->nset; )
2747 /* Read lambda from the legend */
2748 lambda_vec_init( &(ba->lambda[i]), lc );
2749 lambda_vec_copy( &(ba->lambda[i]), &(ba->native_lambda));
2750 gmx_bool use = legend2lambda(fn, legend[i], &(ba->lambda[i]));
2753 lambda_vec_print(&(ba->lambda[i]), buf, FALSE);
2759 printf("%s: Ignoring set '%s'.\n", fn, legend[i]);
2760 for (j = i+1; j < ba->nset; j++)
2762 ba->y[j-1] = ba->y[j];
2763 legend[j-1] = legend[j];
2770 if (!native_lambda_read)
2772 gmx_fatal(FARGS, "File %s contains multiple sets but no indication of the native lambda", fn);
2775 if (legend != nullptr)
2777 for (i = 0; i < ba->nset-1; i++)
2785 static void read_bar_xvg(const char *fn, real *temp, sim_data_t *sd)
2793 read_bar_xvg_lowlevel(fn, temp, barsim, &(sd->lc));
2795 if (barsim->nset < 1)
2797 gmx_fatal(FARGS, "File '%s' contains fewer than two columns", fn);
2800 if (!gmx_within_tol(*temp, barsim->temp, GMX_FLOAT_EPS) && (*temp > 0) )
2802 gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
2804 *temp = barsim->temp;
2806 /* now create a series of samples_t */
2807 snew(s, barsim->nset);
2808 for (i = 0; i < barsim->nset; i++)
2810 samples_init(s+i, &(barsim->native_lambda), &(barsim->lambda[i]),
2811 barsim->temp, lambda_vec_same(&(barsim->native_lambda),
2812 &(barsim->lambda[i])),
2814 s[i].du = barsim->y[i];
2815 s[i].ndu = barsim->np[i];
2818 lambda_data_list_insert_sample(sd->lb, s+i);
2823 lambda_vec_print(s[0].native_lambda, buf, FALSE);
2824 printf("%s: %.1f - %.1f; lambda = %s\n dH/dl & foreign lambdas:\n",
2825 fn, s[0].t[0], s[0].t[s[0].ndu-1], buf);
2826 for (i = 0; i < barsim->nset; i++)
2828 lambda_vec_print(s[i].foreign_lambda, buf, TRUE);
2829 printf(" %s (%d pts)\n", buf, s[i].ndu);
2835 static void read_edr_rawdh_block(samples_t **smp, int *ndu, t_enxblock *blk,
2836 double start_time, double delta_time,
2837 lambda_vec_t *native_lambda, double temp,
2838 double *last_t, const char *filename)
2841 lambda_vec_t *foreign_lambda;
2843 samples_t *s; /* convenience pointer */
2846 /* check the block types etc. */
2847 if ( (blk->nsub < 3) ||
2848 (blk->sub[0].type != xdr_datatype_int) ||
2849 (blk->sub[1].type != xdr_datatype_double) ||
2851 (blk->sub[2].type != xdr_datatype_float) &&
2852 (blk->sub[2].type != xdr_datatype_double)
2854 (blk->sub[0].nr < 1) ||
2855 (blk->sub[1].nr < 1) )
2858 "Unexpected/corrupted block data in file %s around time %f.",
2859 filename, start_time);
2862 snew(foreign_lambda, 1);
2863 lambda_vec_init(foreign_lambda, native_lambda->lc);
2864 lambda_vec_copy(foreign_lambda, native_lambda);
2865 type = blk->sub[0].ival[0];
2868 for (i = 0; i < native_lambda->lc->N; i++)
2870 foreign_lambda->val[i] = blk->sub[1].dval[i];
2875 if (blk->sub[0].nr > 1)
2877 foreign_lambda->dhdl = blk->sub[0].ival[1];
2881 foreign_lambda->dhdl = 0;
2887 /* initialize the samples structure if it's empty. */
2889 samples_init(*smp, native_lambda, foreign_lambda, temp,
2890 type == dhbtDHDL, filename);
2891 (*smp)->start_time = start_time;
2892 (*smp)->delta_time = delta_time;
2895 /* set convenience pointer */
2898 /* now double check */
2899 if (!lambda_vec_same(s->foreign_lambda, foreign_lambda) )
2901 char buf[STRLEN], buf2[STRLEN];
2902 lambda_vec_print(foreign_lambda, buf, FALSE);
2903 lambda_vec_print(s->foreign_lambda, buf2, FALSE);
2904 fprintf(stderr, "Got foreign lambda=%s, expected: %s\n", buf, buf2);
2905 gmx_fatal(FARGS, "Corrupted data in file %s around t=%f.",
2906 filename, start_time);
2909 /* make room for the data */
2910 if (gmx::index(s->ndu_alloc) < s->ndu + blk->sub[2].nr)
2912 s->ndu_alloc += (s->ndu_alloc < static_cast<size_t>(blk->sub[2].nr)) ?
2913 blk->sub[2].nr*2 : s->ndu_alloc;
2914 srenew(s->du_alloc, s->ndu_alloc);
2915 s->du = s->du_alloc;
2918 s->ndu += blk->sub[2].nr;
2919 s->ntot += blk->sub[2].nr;
2920 *ndu = blk->sub[2].nr;
2922 /* and copy the data*/
2923 for (j = 0; j < blk->sub[2].nr; j++)
2925 if (blk->sub[2].type == xdr_datatype_float)
2927 s->du[startj+j] = blk->sub[2].fval[j];
2931 s->du[startj+j] = blk->sub[2].dval[j];
2934 if (start_time + blk->sub[2].nr*delta_time > *last_t)
2936 *last_t = start_time + blk->sub[2].nr*delta_time;
2940 static samples_t *read_edr_hist_block(int *nsamples, t_enxblock *blk,
2941 double start_time, double delta_time,
2942 lambda_vec_t *native_lambda, double temp,
2943 double *last_t, const char *filename)
2948 lambda_vec_t *foreign_lambda;
2952 /* check the block types etc. */
2953 if ( (blk->nsub < 2) ||
2954 (blk->sub[0].type != xdr_datatype_double) ||
2955 (blk->sub[1].type != xdr_datatype_int64) ||
2956 (blk->sub[0].nr < 2) ||
2957 (blk->sub[1].nr < 2) )
2960 "Unexpected/corrupted block data in file %s around time %f",
2961 filename, start_time);
2964 nhist = blk->nsub-2;
2972 "Unexpected/corrupted block data in file %s around time %f",
2973 filename, start_time);
2979 snew(foreign_lambda, 1);
2980 lambda_vec_init(foreign_lambda, native_lambda->lc);
2981 lambda_vec_copy(foreign_lambda, native_lambda);
2982 type = static_cast<int>(blk->sub[1].lval[1]);
2985 double old_foreign_lambda;
2987 old_foreign_lambda = blk->sub[0].dval[0];
2988 if (old_foreign_lambda >= 0)
2990 foreign_lambda->val[0] = old_foreign_lambda;
2991 if (foreign_lambda->lc->N > 1)
2994 "Single-component lambda in multi-component file %s",
3000 for (i = 0; i < native_lambda->lc->N; i++)
3002 foreign_lambda->val[i] = blk->sub[0].dval[i+2];
3008 if (foreign_lambda->lc->N > 1)
3010 if (blk->sub[1].nr < 3 + nhist)
3013 "Missing derivative coord in multi-component file %s",
3016 foreign_lambda->dhdl = blk->sub[1].lval[2 + nhist];
3020 foreign_lambda->dhdl = 0;
3024 samples_init(s, native_lambda, foreign_lambda, temp, type == dhbtDHDL,
3028 for (i = 0; i < nhist; i++)
3030 nbins[i] = blk->sub[i+2].nr;
3033 hist_init(s->hist, nhist, nbins);
3035 for (i = 0; i < nhist; i++)
3037 s->hist->x0[i] = blk->sub[1].lval[2+i];
3038 s->hist->dx[i] = blk->sub[0].dval[1];
3041 s->hist->dx[i] = -s->hist->dx[i];
3045 s->hist->start_time = start_time;
3046 s->hist->delta_time = delta_time;
3047 s->start_time = start_time;
3048 s->delta_time = delta_time;
3050 for (i = 0; i < nhist; i++)
3054 for (j = 0; j < s->hist->nbin[i]; j++)
3056 int binv = static_cast<int>(blk->sub[i+2].ival[j]);
3058 s->hist->bin[i][j] = binv;
3071 gmx_fatal(FARGS, "Histogram counts don't match in %s",
3077 if (start_time + s->hist->sum*delta_time > *last_t)
3079 *last_t = start_time + s->hist->sum*delta_time;
3085 static void read_barsim_edr(const char *fn, real *temp, sim_data_t *sd)
3091 gmx_enxnm_t *enm = nullptr;
3092 double first_t = -1;
3094 samples_t **samples_rawdh = nullptr; /* contains samples for raw delta_h */
3095 int *nhists = nullptr; /* array to keep count & print at end */
3096 int *npts = nullptr; /* array to keep count & print at end */
3097 lambda_vec_t **lambdas = nullptr; /* array to keep count & print at end */
3098 lambda_vec_t *native_lambda;
3100 lambda_vec_t start_lambda;
3102 fp = open_enx(fn, "r");
3103 do_enxnms(fp, &nre, &enm);
3106 snew(native_lambda, 1);
3107 start_lambda.lc = nullptr;
3108 start_lambda.val = nullptr;
3110 while (do_enx(fp, fr))
3112 /* count the data blocks */
3113 int nblocks_raw = 0;
3114 int nblocks_hist = 0;
3117 /* DHCOLL block information: */
3118 double start_time = 0, delta_time = 0, old_start_lambda = 0, delta_lambda = 0;
3121 /* count the blocks and handle collection information: */
3122 for (i = 0; i < fr->nblock; i++)
3124 if (fr->block[i].id == enxDHHIST)
3128 if (fr->block[i].id == enxDH)
3132 if (fr->block[i].id == enxDHCOLL)
3135 if ( (fr->block[i].nsub < 1) ||
3136 (fr->block[i].sub[0].type != xdr_datatype_double) ||
3137 (fr->block[i].sub[0].nr < 5))
3139 gmx_fatal(FARGS, "Unexpected block data in file %s", fn);
3142 /* read the data from the DHCOLL block */
3143 rtemp = fr->block[i].sub[0].dval[0];
3144 start_time = fr->block[i].sub[0].dval[1];
3145 delta_time = fr->block[i].sub[0].dval[2];
3146 old_start_lambda = fr->block[i].sub[0].dval[3];
3147 delta_lambda = fr->block[i].sub[0].dval[4];
3149 if (delta_lambda > 0)
3151 gmx_fatal(FARGS, "Lambda values not constant in %s: can't apply BAR method", fn);
3153 if ( ( *temp != rtemp) && (*temp > 0) )
3155 gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
3159 if (old_start_lambda >= 0)
3163 if (!lambda_components_check(&(sd->lc), 0, "", 0))
3166 "lambda vector components in %s don't match those previously read",
3172 lambda_components_add(&(sd->lc), "", 0);
3174 if (!start_lambda.lc)
3176 lambda_vec_init(&start_lambda, &(sd->lc));
3178 start_lambda.val[0] = old_start_lambda;
3182 /* read lambda vector */
3184 gmx_bool check = (sd->lc.N > 0);
3185 if (fr->block[i].nsub < 2)
3188 "No lambda vector, but start_lambda=%f\n",
3191 n_lambda_vec = fr->block[i].sub[1].ival[1];
3192 for (j = 0; j < n_lambda_vec; j++)
3195 efpt_singular_names[fr->block[i].sub[1].ival[1+j]];
3198 /* check the components */
3199 lambda_components_check(&(sd->lc), j, name,
3204 lambda_components_add(&(sd->lc), name,
3208 lambda_vec_init(&start_lambda, &(sd->lc));
3209 start_lambda.index = fr->block[i].sub[1].ival[0];
3210 for (j = 0; j < n_lambda_vec; j++)
3212 start_lambda.val[j] = fr->block[i].sub[0].dval[5+j];
3217 first_t = start_time;
3224 gmx_fatal(FARGS, "Did not find delta H information in file %s", fn);
3226 if (nblocks_raw > 0 && nblocks_hist > 0)
3228 gmx_fatal(FARGS, "Can't handle both raw delta U data and histograms in the same file %s", fn);
3233 /* this is the first round; allocate the associated data
3235 /*native_lambda=start_lambda;*/
3236 lambda_vec_init(native_lambda, &(sd->lc));
3237 lambda_vec_copy(native_lambda, &start_lambda);
3238 nsamples = nblocks_raw+nblocks_hist;
3239 snew(nhists, nsamples);
3240 snew(npts, nsamples);
3241 snew(lambdas, nsamples);
3242 snew(samples_rawdh, nsamples);
3243 for (i = 0; i < nsamples; i++)
3247 lambdas[i] = nullptr;
3248 samples_rawdh[i] = nullptr; /* init to NULL so we know which
3249 ones contain values */
3254 // nsamples > 0 means this is NOT the first iteration
3256 /* check the native lambda */
3257 if (!lambda_vec_same(&start_lambda, native_lambda) )
3259 gmx_fatal(FARGS, "Native lambda not constant in file %s: started at %f, and becomes %f at time %f",
3260 fn, native_lambda->val[0], start_lambda.val[0], start_time);
3262 /* check the number of samples against the previous number */
3263 if ( ((nblocks_raw+nblocks_hist) != nsamples) || (nlam != 1 ) )
3265 gmx_fatal(FARGS, "Unexpected block count in %s: was %d, now %d\n",
3266 fn, nsamples+1, nblocks_raw+nblocks_hist+nlam);
3268 /* check whether last iterations's end time matches with
3269 the currrent start time */
3270 if ( (std::abs(last_t - start_time) > 2*delta_time) && last_t >= 0)
3272 /* it didn't. We need to store our samples and reallocate */
3273 for (i = 0; i < nsamples; i++)
3275 if (samples_rawdh[i])
3277 /* insert it into the existing list */
3278 lambda_data_list_insert_sample(sd->lb,
3280 /* and make sure we'll allocate a new one this time
3282 samples_rawdh[i] = nullptr;
3289 k = 0; /* counter for the lambdas, etc. arrays */
3290 for (i = 0; i < fr->nblock; i++)
3292 if (fr->block[i].id == enxDH)
3294 int type = (fr->block[i].sub[0].ival[0]);
3295 if (type == dhbtDH || type == dhbtDHDL)
3298 read_edr_rawdh_block(&(samples_rawdh[k]),
3301 start_time, delta_time,
3302 native_lambda, rtemp,
3305 if (samples_rawdh[k])
3307 lambdas[k] = samples_rawdh[k]->foreign_lambda;
3312 else if (fr->block[i].id == enxDHHIST)
3314 int type = static_cast<int>(fr->block[i].sub[1].lval[1]);
3315 if (type == dhbtDH || type == dhbtDHDL)
3319 samples_t *s; /* this is where the data will go */
3320 s = read_edr_hist_block(&nb, &(fr->block[i]),
3321 start_time, delta_time,
3322 native_lambda, rtemp,
3327 lambdas[k] = s->foreign_lambda;
3330 /* and insert the new sample immediately */
3331 for (j = 0; j < nb; j++)
3333 lambda_data_list_insert_sample(sd->lb, s+j);
3339 /* Now store all our extant sample collections */
3340 for (i = 0; i < nsamples; i++)
3342 if (samples_rawdh[i])
3344 /* insert it into the existing list */
3345 lambda_data_list_insert_sample(sd->lb, samples_rawdh[i]);
3353 lambda_vec_print(native_lambda, buf, FALSE);
3354 printf("%s: %.1f - %.1f; lambda = %s\n foreign lambdas:\n",
3355 fn, first_t, last_t, buf);
3356 for (i = 0; i < nsamples; i++)
3360 lambda_vec_print(lambdas[i], buf, TRUE);
3363 printf(" %s (%d hists)\n", buf, nhists[i]);
3367 printf(" %s (%d pts)\n", buf, npts[i]);
3379 int gmx_bar(int argc, char *argv[])
3381 static const char *desc[] = {
3382 "[THISMODULE] calculates free energy difference estimates through ",
3383 "Bennett's acceptance ratio method (BAR). It also automatically",
3384 "adds series of individual free energies obtained with BAR into",
3385 "a combined free energy estimate.[PAR]",
3387 "Every individual BAR free energy difference relies on two ",
3388 "simulations at different states: say state A and state B, as",
3389 "controlled by a parameter, [GRK]lambda[grk] (see the [REF].mdp[ref] parameter",
3390 "[TT]init_lambda[tt]). The BAR method calculates a ratio of weighted",
3391 "average of the Hamiltonian difference of state B given state A and",
3393 "The energy differences to the other state must be calculated",
3394 "explicitly during the simulation. This can be done with",
3395 "the [REF].mdp[ref] option [TT]foreign_lambda[tt].[PAR]",
3397 "Input option [TT]-f[tt] expects multiple [TT]dhdl.xvg[tt] files. ",
3398 "Two types of input files are supported:",
3400 " * Files with more than one [IT]y[it]-value. ",
3401 " The files should have columns ",
3402 " with dH/d[GRK]lambda[grk] and [GRK]Delta[grk][GRK]lambda[grk]. ",
3403 " The [GRK]lambda[grk] values are inferred ",
3404 " from the legends: [GRK]lambda[grk] of the simulation from the legend of ",
3405 " dH/d[GRK]lambda[grk] and the foreign [GRK]lambda[grk] values from the ",
3406 " legends of Delta H",
3407 " * Files with only one [IT]y[it]-value. Using the",
3408 " [TT]-extp[tt] option for these files, it is assumed",
3409 " that the [IT]y[it]-value is dH/d[GRK]lambda[grk] and that the ",
3410 " Hamiltonian depends linearly on [GRK]lambda[grk]. ",
3411 " The [GRK]lambda[grk] value of the simulation is inferred from the ",
3412 " subtitle (if present), otherwise from a number in the subdirectory ",
3413 " in the file name.",
3416 "The [GRK]lambda[grk] of the simulation is parsed from ",
3417 "[TT]dhdl.xvg[tt] file's legend containing the string 'dH', the ",
3418 "foreign [GRK]lambda[grk] values from the legend containing the ",
3419 "capitalized letters 'D' and 'H'. The temperature is parsed from ",
3420 "the legend line containing 'T ='.[PAR]",
3422 "The input option [TT]-g[tt] expects multiple [REF].edr[ref] files. ",
3423 "These can contain either lists of energy differences (see the ",
3424 "[REF].mdp[ref] option [TT]separate_dhdl_file[tt]), or a series of ",
3425 "histograms (see the [REF].mdp[ref] options [TT]dh_hist_size[tt] and ",
3426 "[TT]dh_hist_spacing[tt]).", "The temperature and [GRK]lambda[grk] ",
3427 "values are automatically deduced from the [TT]ener.edr[tt] file.[PAR]",
3429 "In addition to the [REF].mdp[ref] option [TT]foreign_lambda[tt], ",
3430 "the energy difference can also be extrapolated from the ",
3431 "dH/d[GRK]lambda[grk] values. This is done with the[TT]-extp[tt]",
3432 "option, which assumes that the system's Hamiltonian depends linearly",
3433 "on [GRK]lambda[grk], which is not normally the case.[PAR]",
3435 "The free energy estimates are determined using BAR with bisection, ",
3436 "with the precision of the output set with [TT]-prec[tt]. ",
3437 "An error estimate taking into account time correlations ",
3438 "is made by splitting the data into blocks and determining ",
3439 "the free energy differences over those blocks and assuming ",
3440 "the blocks are independent. ",
3441 "The final error estimate is determined from the average variance ",
3442 "over 5 blocks. A range of block numbers for error estimation can ",
3443 "be provided with the options [TT]-nbmin[tt] and [TT]-nbmax[tt].[PAR]",
3445 "[THISMODULE] tries to aggregate samples with the same 'native' and ",
3446 "'foreign' [GRK]lambda[grk] values, but always assumes independent ",
3447 "samples. [BB]Note[bb] that when aggregating energy ",
3448 "differences/derivatives with different sampling intervals, this is ",
3449 "almost certainly not correct. Usually subsequent energies are ",
3450 "correlated and different time intervals mean different degrees ",
3451 "of correlation between samples.[PAR]",
3453 "The results are split in two parts: the last part contains the final ",
3454 "results in kJ/mol, together with the error estimate for each part ",
3455 "and the total. The first part contains detailed free energy ",
3456 "difference estimates and phase space overlap measures in units of ",
3457 "kT (together with their computed error estimate). The printed ",
3460 " * lam_A: the [GRK]lambda[grk] values for point A.",
3461 " * lam_B: the [GRK]lambda[grk] values for point B.",
3462 " * DG: the free energy estimate.",
3463 " * s_A: an estimate of the relative entropy of B in A.",
3464 " * s_B: an estimate of the relative entropy of A in B.",
3465 " * stdev: an estimate expected per-sample standard deviation.",
3468 "The relative entropy of both states in each other's ensemble can be ",
3469 "interpreted as a measure of phase space overlap: ",
3470 "the relative entropy s_A of the work samples of lambda_B in the ",
3471 "ensemble of lambda_A (and vice versa for s_B), is a ",
3472 "measure of the 'distance' between Boltzmann distributions of ",
3473 "the two states, that goes to zero for identical distributions. See ",
3474 "Wu & Kofke, J. Chem. Phys. 123 084109 (2005) for more information.",
3476 "The estimate of the expected per-sample standard deviation, as given ",
3477 "in Bennett's original BAR paper: Bennett, J. Comp. Phys. 22, p 245 (1976).",
3478 "Eq. 10 therein gives an estimate of the quality of sampling (not directly",
3479 "of the actual statistical error, because it assumes independent samples).[PAR]",
3481 "To get a visual estimate of the phase space overlap, use the ",
3482 "[TT]-oh[tt] option to write series of histograms, together with the ",
3483 "[TT]-nbin[tt] option.[PAR]"
3485 static real begin = 0, end = -1, temp = -1;
3486 int nd = 2, nbmin = 5, nbmax = 5;
3488 gmx_bool use_dhdl = FALSE;
3490 { "-b", FALSE, etREAL, {&begin}, "Begin time for BAR" },
3491 { "-e", FALSE, etREAL, {&end}, "End time for BAR" },
3492 { "-temp", FALSE, etREAL, {&temp}, "Temperature (K)" },
3493 { "-prec", FALSE, etINT, {&nd}, "The number of digits after the decimal point" },
3494 { "-nbmin", FALSE, etINT, {&nbmin}, "Minimum number of blocks for error estimation" },
3495 { "-nbmax", FALSE, etINT, {&nbmax}, "Maximum number of blocks for error estimation" },
3496 { "-nbin", FALSE, etINT, {&nbin}, "Number of bins for histogram output"},
3497 { "-extp", FALSE, etBOOL, {&use_dhdl}, "Whether to linearly extrapolate dH/dl values to use as energies"}
3501 { efXVG, "-f", "dhdl", ffOPTRDMULT },
3502 { efEDR, "-g", "ener", ffOPTRDMULT },
3503 { efXVG, "-o", "bar", ffOPTWR },
3504 { efXVG, "-oi", "barint", ffOPTWR },
3505 { efXVG, "-oh", "histogram", ffOPTWR }
3507 #define NFILE asize(fnm)
3510 int nf = 0; /* file counter */
3511 int nfile_tot; /* total number of input files */
3512 sim_data_t sim_data; /* the simulation data */
3513 barres_t *results; /* the results */
3514 int nresults; /* number of results in results array */
3517 double prec, dg_tot;
3519 char dgformat[20], xvg2format[STRLEN], xvg3format[STRLEN];
3520 char buf[STRLEN], buf2[STRLEN];
3521 char ktformat[STRLEN], sktformat[STRLEN];
3522 char kteformat[STRLEN], skteformat[STRLEN];
3523 gmx_output_env_t *oenv;
3525 gmx_bool result_OK = TRUE, bEE = TRUE;
3527 gmx_bool disc_err = FALSE;
3528 double sum_disc_err = 0.; /* discretization error */
3529 gmx_bool histrange_err = FALSE;
3530 double sum_histrange_err = 0.; /* histogram range error */
3531 double stat_err = 0.; /* statistical error */
3533 if (!parse_common_args(&argc, argv,
3535 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, nullptr, &oenv))
3540 gmx::ArrayRef<const std::string> xvgFiles = opt2fnsIfOptionSet("-f", NFILE, fnm);
3541 gmx::ArrayRef<const std::string> edrFiles = opt2fnsIfOptionSet("-g", NFILE, fnm);
3543 sim_data_init(&sim_data);
3545 /* make linked list */
3547 lambda_data_init(lb, 0, 0);
3553 nfile_tot = xvgFiles.size() + edrFiles.size();
3557 gmx_fatal(FARGS, "No input files!");
3562 gmx_fatal(FARGS, "Can not have negative number of digits");
3564 prec = std::pow(10.0, static_cast<double>(-nd));
3566 snew(partsum, (nbmax+1)*(nbmax+1));
3569 /* read in all files. First xvg files */
3570 for (const std::string &filenm : xvgFiles)
3572 read_bar_xvg(filenm.c_str(), &temp, &sim_data);
3575 /* then .edr files */
3576 for (const std::string &filenm : edrFiles)
3578 read_barsim_edr(filenm.c_str(), &temp, &sim_data);;
3582 /* fix the times to allow for equilibration */
3583 sim_data_impose_times(&sim_data, begin, end);
3585 if (opt2bSet("-oh", NFILE, fnm))
3587 sim_data_histogram(&sim_data, opt2fn("-oh", NFILE, fnm), nbin, oenv);
3590 /* assemble the output structures from the lambdas */
3591 results = barres_list_create(&sim_data, &nresults, use_dhdl);
3593 sum_disc_err = barres_list_max_disc_err(results, nresults);
3597 printf("\nNo results to calculate.\n");
3601 if (sum_disc_err > prec)
3603 prec = sum_disc_err;
3604 nd = static_cast<int>(std::ceil(-std::log10(prec)));
3605 printf("WARNING: setting the precision to %g because that is the minimum\n reasonable number, given the expected discretization error.\n", prec);
3609 /*sprintf(lamformat,"%%6.3f");*/
3610 sprintf( dgformat, "%%%d.%df", 3+nd, nd);
3611 /* the format strings of the results in kT */
3612 sprintf( ktformat, "%%%d.%df", 5+nd, nd);
3613 sprintf( sktformat, "%%%ds", 6+nd);
3614 /* the format strings of the errors in kT */
3615 sprintf( kteformat, "%%%d.%df", 3+nd, nd);
3616 sprintf( skteformat, "%%%ds", 4+nd);
3617 sprintf(xvg2format, "%s %s\n", "%s", dgformat);
3618 sprintf(xvg3format, "%s %s %s\n", "%s", dgformat, dgformat);
3623 if (opt2bSet("-o", NFILE, fnm))
3625 sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3626 fpb = xvgropen_type(opt2fn("-o", NFILE, fnm), "Free energy differences",
3627 "\\lambda", buf, exvggtXYDY, oenv);
3631 if (opt2bSet("-oi", NFILE, fnm))
3633 sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3634 fpi = xvgropen(opt2fn("-oi", NFILE, fnm), "Free energy integral",
3635 "\\lambda", buf, oenv);
3645 /* first calculate results */
3648 for (f = 0; f < nresults; f++)
3650 /* Determine the free energy difference with a factor of 10
3651 * more accuracy than requested for printing.
3653 calc_bar(&(results[f]), 0.1*prec, nbmin, nbmax,
3656 if (results[f].dg_disc_err > prec/10.)
3660 if (results[f].dg_histrange_err > prec/10.)
3662 histrange_err = TRUE;
3666 /* print results in kT */
3669 printf("\nTemperature: %g K\n", temp);
3671 printf("\nDetailed results in kT (see help for explanation):\n\n");
3672 printf("%6s ", " lam_A");
3673 printf("%6s ", " lam_B");
3674 printf(sktformat, "DG ");
3677 printf(skteformat, "+/- ");
3681 printf(skteformat, "disc ");
3685 printf(skteformat, "range ");
3687 printf(sktformat, "s_A ");
3690 printf(skteformat, "+/- " );
3692 printf(sktformat, "s_B ");
3695 printf(skteformat, "+/- " );
3697 printf(sktformat, "stdev ");
3700 printf(skteformat, "+/- ");
3703 for (f = 0; f < nresults; f++)
3705 lambda_vec_print_short(results[f].a->native_lambda, buf);
3707 lambda_vec_print_short(results[f].b->native_lambda, buf);
3709 printf(ktformat, results[f].dg);
3713 printf(kteformat, results[f].dg_err);
3718 printf(kteformat, results[f].dg_disc_err);
3723 printf(kteformat, results[f].dg_histrange_err);
3726 printf(ktformat, results[f].sa);
3730 printf(kteformat, results[f].sa_err);
3733 printf(ktformat, results[f].sb);
3737 printf(kteformat, results[f].sb_err);
3740 printf(ktformat, results[f].dg_stddev);
3744 printf(kteformat, results[f].dg_stddev_err);
3748 /* Check for negative relative entropy with a 95% certainty. */
3749 if (results[f].sa < -2*results[f].sa_err ||
3750 results[f].sb < -2*results[f].sb_err)
3758 printf("\nWARNING: Some of these results violate the Second Law of "
3759 "Thermodynamics: \n"
3760 " This is can be the result of severe undersampling, or "
3762 " there is something wrong with the simulations.\n");
3766 /* final results in kJ/mol */
3767 printf("\n\nFinal results in kJ/mol:\n\n");
3769 for (f = 0; f < nresults; f++)
3774 lambda_vec_print_short(results[f].a->native_lambda, buf);
3775 fprintf(fpi, xvg2format, buf, dg_tot);
3781 lambda_vec_print_intermediate(results[f].a->native_lambda,
3782 results[f].b->native_lambda,
3785 fprintf(fpb, xvg3format, buf, results[f].dg, results[f].dg_err);
3789 lambda_vec_print_short(results[f].a->native_lambda, buf);
3790 lambda_vec_print_short(results[f].b->native_lambda, buf2);
3791 printf("%s - %s", buf, buf2);
3794 printf(dgformat, results[f].dg*kT);
3798 printf(dgformat, results[f].dg_err*kT);
3802 printf(" (max. range err. = ");
3803 printf(dgformat, results[f].dg_histrange_err*kT);
3805 sum_histrange_err += results[f].dg_histrange_err*kT;
3809 dg_tot += results[f].dg;
3813 lambda_vec_print_short(results[0].a->native_lambda, buf);
3814 lambda_vec_print_short(results[nresults-1].b->native_lambda, buf2);
3815 printf("%s - %s", buf, buf2);
3818 printf(dgformat, dg_tot*kT);
3821 stat_err = bar_err(nbmin, nbmax, partsum)*kT;
3823 printf(dgformat, std::max(std::max(stat_err, sum_disc_err), sum_histrange_err));
3828 printf("\nmaximum discretization error = ");
3829 printf(dgformat, sum_disc_err);
3830 if (bEE && stat_err < sum_disc_err)
3832 printf("WARNING: discretization error (%g) is larger than statistical error.\n Decrease histogram spacing for more accurate results\n", stat_err);
3837 printf("\nmaximum histogram range error = ");
3838 printf(dgformat, sum_histrange_err);
3839 if (bEE && stat_err < sum_histrange_err)
3841 printf("WARNING: histogram range error (%g) is larger than statistical error.\n Increase histogram range for more accurate results\n", stat_err);
3850 lambda_vec_print_short(results[nresults-1].b->native_lambda, buf);
3851 fprintf(fpi, xvg2format, buf, dg_tot);
3859 do_view(oenv, opt2fn_null("-o", NFILE, fnm), "-xydy");
3860 do_view(oenv, opt2fn_null("-oi", NFILE, fnm), "-xydy");