2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2010,2011,2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
45 #include "gromacs/commandline/pargs.h"
46 #include "gromacs/commandline/viewit.h"
47 #include "gromacs/fileio/enxio.h"
48 #include "gromacs/fileio/xvgr.h"
49 #include "gromacs/gmxana/gmx_ana.h"
50 #include "gromacs/math/units.h"
51 #include "gromacs/math/utilities.h"
52 #include "gromacs/mdlib/mdebin.h"
53 #include "gromacs/mdtypes/md_enums.h"
54 #include "gromacs/utility/arraysize.h"
55 #include "gromacs/utility/cstringutil.h"
56 #include "gromacs/utility/dir_separator.h"
57 #include "gromacs/utility/fatalerror.h"
58 #include "gromacs/utility/gmxassert.h"
59 #include "gromacs/utility/smalloc.h"
60 #include "gromacs/utility/snprintf.h"
63 /* Structure for the names of lambda vector components */
64 typedef struct lambda_components_t
66 char **names; /* Array of strings with names for the lambda vector
68 int N; /* The number of components */
69 int Nalloc; /* The number of allocated components */
70 } lambda_components_t;
72 /* Structure for a lambda vector or a dhdl derivative direction */
73 typedef struct lambda_vec_t
75 double *val; /* The lambda vector component values. Only valid if
77 int dhdl; /* The coordinate index for the derivative described by this
79 const lambda_components_t *lc; /* the associated lambda_components
81 int index; /* The state number (init-lambda-state) of this lambda
82 vector, if known. If not, it is set to -1 */
85 /* the dhdl.xvg data from a simulation */
89 int ftp; /* file type */
90 int nset; /* number of lambdas, including dhdl */
91 int *np; /* number of data points (du or hists) per lambda */
92 int np_alloc; /* number of points (du or hists) allocated */
93 double temp; /* temperature */
94 lambda_vec_t *lambda; /* the lambdas (of first index for y). */
95 double *t; /* the times (of second index for y) */
96 double **y; /* the dU values. y[0] holds the derivative, while
97 further ones contain the energy differences between
98 the native lambda and the 'foreign' lambdas. */
99 lambda_vec_t native_lambda; /* the native lambda */
101 struct xvg_t *next, *prev; /*location in the global linked list of xvg_ts*/
105 typedef struct hist_t
107 unsigned int *bin[2]; /* the (forward + reverse) histogram values */
108 double dx[2]; /* the histogram spacing. The reverse
109 dx is the negative of the forward dx.*/
110 gmx_int64_t x0[2]; /* the (forward + reverse) histogram start
113 int nbin[2]; /* the (forward+reverse) number of bins */
114 gmx_int64_t sum; /* the total number of counts. Must be
115 the same for forward + reverse. */
116 int nhist; /* number of hist datas (forward or reverse) */
118 double start_time, delta_time; /* start time, end time of histogram */
122 /* an aggregate of samples for partial free energy calculation */
123 typedef struct samples_t
125 lambda_vec_t *native_lambda; /* pointer to native lambda vector */
126 lambda_vec_t *foreign_lambda; /* pointer to foreign lambda vector */
127 double temp; /* the temperature */
128 gmx_bool derivative; /* whether this sample is a derivative */
130 /* The samples come either as either delta U lists: */
131 int ndu; /* the number of delta U samples */
132 double *du; /* the delta u's */
133 double *t; /* the times associated with those samples, or: */
134 double start_time, delta_time; /*start time and delta time for linear time*/
136 /* or as histograms: */
137 hist_t *hist; /* a histogram */
139 /* allocation data: (not NULL for data 'owned' by this struct) */
140 double *du_alloc, *t_alloc; /* allocated delta u arrays */
141 size_t ndu_alloc, nt_alloc; /* pre-allocated sizes */
142 hist_t *hist_alloc; /* allocated hist */
144 gmx_int64_t ntot; /* total number of samples */
145 const char *filename; /* the file name this sample comes from */
148 /* a sample range (start to end for du-style data, or boolean
149 for both du-style data and histograms */
150 typedef struct sample_range_t
152 int start, end; /* start and end index for du style data */
153 gmx_bool use; /* whether to use this sample */
155 samples_t *s; /* the samples this range belongs to */
159 /* a collection of samples for a partial free energy calculation
160 (i.e. the collection of samples from one native lambda to one
162 typedef struct sample_coll_t
164 lambda_vec_t *native_lambda; /* these should be the same for all samples
166 lambda_vec_t *foreign_lambda; /* collection */
167 double temp; /* the temperature */
169 int nsamples; /* the number of samples */
170 samples_t **s; /* the samples themselves */
171 sample_range_t *r; /* the sample ranges */
172 int nsamples_alloc; /* number of allocated samples */
174 gmx_int64_t ntot; /* total number of samples in the ranges of
177 struct sample_coll_t *next, *prev; /* next and previous in the list */
180 /* all the samples associated with a lambda point */
181 typedef struct lambda_data_t
183 lambda_vec_t *lambda; /* the native lambda (at start time if dynamic) */
184 double temp; /* temperature */
186 sample_coll_t *sc; /* the samples */
188 sample_coll_t sc_head; /*the pre-allocated list head for the linked list.*/
190 struct lambda_data_t *next, *prev; /* the next and prev in the list */
193 /* Top-level data structure of simulation data */
194 typedef struct sim_data_t
196 lambda_data_t *lb; /* a lambda data linked list */
197 lambda_data_t lb_head; /* The head element of the linked list */
199 lambda_components_t lc; /* the allowed components of the lambda
203 /* Top-level data structure with calculated values. */
205 sample_coll_t *a, *b; /* the simulation data */
207 double dg; /* the free energy difference */
208 double dg_err; /* the free energy difference */
210 double dg_disc_err; /* discretization error */
211 double dg_histrange_err; /* histogram range error */
213 double sa; /* relative entropy of b in state a */
214 double sa_err; /* error in sa */
215 double sb; /* relative entropy of a in state b */
216 double sb_err; /* error in sb */
218 double dg_stddev; /* expected dg stddev per sample */
219 double dg_stddev_err; /* error in dg_stddev */
223 /* Initialize a lambda_components structure */
224 static void lambda_components_init(lambda_components_t *lc)
228 snew(lc->names, lc->Nalloc);
231 /* Add a component to a lambda_components structure */
232 static void lambda_components_add(lambda_components_t *lc,
233 const char *name, size_t name_length)
235 while (lc->N + 1 > lc->Nalloc)
237 lc->Nalloc = (lc->Nalloc == 0) ? 2 : 2*lc->Nalloc;
238 srenew(lc->names, lc->Nalloc);
240 snew(lc->names[lc->N], name_length+1);
241 std::strncpy(lc->names[lc->N], name, name_length);
245 /* check whether a component with index 'index' matches the given name, or
246 is also NULL. Returns TRUE if this is the case.
247 the string name does not need to end */
248 static gmx_bool lambda_components_check(const lambda_components_t *lc,
254 if (!lc || index >= lc->N)
258 if (name == nullptr && lc->names[index] == nullptr)
262 if ((name == nullptr) != (lc->names[index] == nullptr))
266 len = std::strlen(lc->names[index]);
267 if (len != name_length)
271 return std::strncmp(lc->names[index], name, name_length) == 0;
274 /* Find the index of a given lambda component name, or -1 if not found */
275 static int lambda_components_find(const lambda_components_t *lc,
281 for (i = 0; i < lc->N; i++)
283 if (std::strncmp(lc->names[i], name, name_length) == 0)
293 /* initialize a lambda vector */
294 static void lambda_vec_init(lambda_vec_t *lv, const lambda_components_t *lc)
296 snew(lv->val, lc->N);
302 static void lambda_vec_copy(lambda_vec_t *lv, const lambda_vec_t *orig)
306 lambda_vec_init(lv, orig->lc);
307 lv->dhdl = orig->dhdl;
308 lv->index = orig->index;
309 for (i = 0; i < lv->lc->N; i++)
311 lv->val[i] = orig->val[i];
315 /* write a lambda vec to a preallocated string */
316 static void lambda_vec_print(const lambda_vec_t *lv, char *str, gmx_bool named)
320 str[0] = 0; /* reset the string */
325 str += sprintf(str, "delta H to ");
329 str += sprintf(str, "(");
331 for (i = 0; i < lv->lc->N; i++)
333 str += sprintf(str, "%g", lv->val[i]);
336 str += sprintf(str, ", ");
346 /* this lambda vector describes a derivative */
347 str += sprintf(str, "dH/dl");
348 if (std::strlen(lv->lc->names[lv->dhdl]) > 0)
350 sprintf(str, " (%s)", lv->lc->names[lv->dhdl]);
355 /* write a shortened version of the lambda vec to a preallocated string */
356 static void lambda_vec_print_short(const lambda_vec_t *lv, char *str)
360 sprintf(str, "%6d", lv->index);
366 sprintf(str, "%6.3f", lv->val[0]);
370 sprintf(str, "dH/dl[%d]", lv->dhdl);
375 /* write an intermediate version of two lambda vecs to a preallocated string */
376 static void lambda_vec_print_intermediate(const lambda_vec_t *a,
377 const lambda_vec_t *b, char *str)
380 if ( (a->index >= 0) && (b->index >= 0) )
382 sprintf(str, "%6.3f", (a->index+b->index)/2.0);
386 if ( (a->dhdl < 0) && (b->dhdl < 0) )
388 sprintf(str, "%6.3f", (a->val[0]+b->val[0])/2.0);
394 /* calculate and return the absolute difference in lambda vectors: c = |a-b|.
395 a and b must describe non-derivative lambda points */
396 static double lambda_vec_abs_diff(const lambda_vec_t *a, const lambda_vec_t *b)
401 if ( (a->dhdl > 0) || (b->dhdl > 0) )
404 "Trying to calculate the difference between derivatives instead of lambda points");
409 "Trying to calculate the difference lambdas with differing basis set");
411 for (i = 0; i < a->lc->N; i++)
413 double df = a->val[i] - b->val[i];
416 return std::sqrt(ret);
420 /* check whether two lambda vectors are the same */
421 static gmx_bool lambda_vec_same(const lambda_vec_t *a, const lambda_vec_t *b)
431 for (i = 0; i < a->lc->N; i++)
433 if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
442 /* they're derivatives, so we check whether the indices match */
443 return (a->dhdl == b->dhdl);
447 /* Compare the sort order of two foreign lambda vectors
449 returns 1 if a is 'bigger' than b,
450 returns 0 if they're the same,
451 returns -1 if a is 'smaller' than b.*/
452 static int lambda_vec_cmp_foreign(const lambda_vec_t *a,
453 const lambda_vec_t *b)
456 double norm_a = 0, norm_b = 0;
457 gmx_bool different = FALSE;
461 gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
463 /* if either one has an index we sort based on that */
464 if ((a->index >= 0) || (b->index >= 0))
466 if (a->index == b->index)
470 return (a->index > b->index) ? 1 : -1;
472 if (a->dhdl >= 0 || b->dhdl >= 0)
474 /* lambda vectors that are derivatives always sort higher than those
475 without derivatives */
476 if ((a->dhdl >= 0) != (b->dhdl >= 0) )
478 return (a->dhdl >= 0) ? 1 : -1;
480 return a->dhdl > b->dhdl;
483 /* neither has an index, so we can only sort on the lambda components,
484 which is only valid if there is one component */
485 for (i = 0; i < a->lc->N; i++)
487 if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
491 norm_a += a->val[i]*a->val[i];
492 norm_b += b->val[i]*b->val[i];
498 return norm_a > norm_b;
501 /* Compare the sort order of two native lambda vectors
503 returns 1 if a is 'bigger' than b,
504 returns 0 if they're the same,
505 returns -1 if a is 'smaller' than b.*/
506 static int lambda_vec_cmp_native(const lambda_vec_t *a,
507 const lambda_vec_t *b)
511 gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
513 /* if either one has an index we sort based on that */
514 if ((a->index >= 0) || (b->index >= 0))
516 if (a->index == b->index)
520 return (a->index > b->index) ? 1 : -1;
522 /* neither has an index, so we can only sort on the lambda components,
523 which is only valid if there is one component */
527 "Can't compare lambdas with no index and > 1 component");
529 if (a->dhdl >= 0 || b->dhdl >= 0)
532 "Can't compare native lambdas that are derivatives");
534 if (gmx_within_tol(a->val[0], b->val[0], 10*GMX_REAL_EPS))
538 return a->val[0] > b->val[0] ? 1 : -1;
544 static void hist_init(hist_t *h, int nhist, int *nbin)
549 gmx_fatal(FARGS, "histogram with more than two sets of data!");
551 for (i = 0; i < nhist; i++)
553 snew(h->bin[i], nbin[i]);
555 h->nbin[i] = nbin[i];
556 h->start_time = h->delta_time = 0;
563 static void xvg_init(xvg_t *ba)
565 ba->filename = nullptr;
572 static void samples_init(samples_t *s, lambda_vec_t *native_lambda,
573 lambda_vec_t *foreign_lambda, double temp,
574 gmx_bool derivative, const char *filename)
576 s->native_lambda = native_lambda;
577 s->foreign_lambda = foreign_lambda;
579 s->derivative = derivative;
584 s->start_time = s->delta_time = 0;
586 s->du_alloc = nullptr;
587 s->t_alloc = nullptr;
588 s->hist_alloc = nullptr;
593 s->filename = filename;
596 static void sample_range_init(sample_range_t *r, samples_t *s)
604 static void sample_coll_init(sample_coll_t *sc, lambda_vec_t *native_lambda,
605 lambda_vec_t *foreign_lambda, double temp)
607 sc->native_lambda = native_lambda;
608 sc->foreign_lambda = foreign_lambda;
614 sc->nsamples_alloc = 0;
617 sc->next = sc->prev = nullptr;
620 static void sample_coll_destroy(sample_coll_t *sc)
622 /* don't free the samples themselves */
628 static void lambda_data_init(lambda_data_t *l, lambda_vec_t *native_lambda,
631 l->lambda = native_lambda;
637 l->sc = &(l->sc_head);
639 sample_coll_init(l->sc, native_lambda, nullptr, 0.);
644 static void barres_init(barres_t *br)
653 br->dg_stddev_err = 0;
660 /* calculate the total number of samples in a sample collection */
661 static void sample_coll_calc_ntot(sample_coll_t *sc)
666 for (i = 0; i < sc->nsamples; i++)
672 sc->ntot += sc->s[i]->ntot;
676 sc->ntot += sc->r[i].end - sc->r[i].start;
683 /* find the barsamples_t associated with a lambda that corresponds to
684 a specific foreign lambda */
685 static sample_coll_t *lambda_data_find_sample_coll(lambda_data_t *l,
686 lambda_vec_t *foreign_lambda)
688 sample_coll_t *sc = l->sc->next;
692 if (lambda_vec_same(sc->foreign_lambda, foreign_lambda))
702 /* insert li into an ordered list of lambda_colls */
703 static void lambda_data_insert_sample_coll(lambda_data_t *l, sample_coll_t *sc)
705 sample_coll_t *scn = l->sc->next;
706 while ( (scn != l->sc) )
708 if (lambda_vec_cmp_foreign(scn->foreign_lambda, sc->foreign_lambda) > 0)
714 /* now insert it before the found scn */
716 sc->prev = scn->prev;
717 scn->prev->next = sc;
721 /* insert li into an ordered list of lambdas */
722 static void lambda_data_insert_lambda(lambda_data_t *head, lambda_data_t *li)
724 lambda_data_t *lc = head->next;
727 if (lambda_vec_cmp_native(lc->lambda, li->lambda) > 0)
733 /* now insert ourselves before the found lc */
740 /* insert a sample and a sample_range into a sample_coll. The
741 samples are stored as a pointer, the range is copied. */
742 static void sample_coll_insert_sample(sample_coll_t *sc, samples_t *s,
745 /* first check if it belongs here */
746 GMX_ASSERT(sc->next->s, "Next not properly initialized!");
747 if (sc->temp != s->temp)
749 gmx_fatal(FARGS, "Temperatures in files %s and %s are not the same!",
750 s->filename, sc->next->s[0]->filename);
752 if (!lambda_vec_same(sc->native_lambda, s->native_lambda))
754 gmx_fatal(FARGS, "Native lambda in files %s and %s are not the same (and they should be)!",
755 s->filename, sc->next->s[0]->filename);
757 if (!lambda_vec_same(sc->foreign_lambda, s->foreign_lambda))
759 gmx_fatal(FARGS, "Foreign lambda in files %s and %s are not the same (and they should be)!",
760 s->filename, sc->next->s[0]->filename);
763 /* check if there's room */
764 if ( (sc->nsamples + 1) > sc->nsamples_alloc)
766 sc->nsamples_alloc = std::max(2*sc->nsamples_alloc, 2);
767 srenew(sc->s, sc->nsamples_alloc);
768 srenew(sc->r, sc->nsamples_alloc);
770 sc->s[sc->nsamples] = s;
771 sc->r[sc->nsamples] = *r;
774 sample_coll_calc_ntot(sc);
777 /* insert a sample into a lambda_list, creating the right sample_coll if
779 static void lambda_data_list_insert_sample(lambda_data_t *head, samples_t *s)
781 gmx_bool found = FALSE;
785 lambda_data_t *l = head->next;
787 /* first search for the right lambda_data_t */
790 if (lambda_vec_same(l->lambda, s->native_lambda) )
800 snew(l, 1); /* allocate a new one */
801 lambda_data_init(l, s->native_lambda, s->temp); /* initialize it */
802 lambda_data_insert_lambda(head, l); /* add it to the list */
805 /* now look for a sample collection */
806 sc = lambda_data_find_sample_coll(l, s->foreign_lambda);
809 snew(sc, 1); /* allocate a new one */
810 sample_coll_init(sc, s->native_lambda, s->foreign_lambda, s->temp);
811 lambda_data_insert_sample_coll(l, sc);
814 /* now insert the samples into the sample coll */
815 sample_range_init(&r, s);
816 sample_coll_insert_sample(sc, s, &r);
820 /* make a histogram out of a sample collection */
821 static void sample_coll_make_hist(sample_coll_t *sc, int **bin,
822 int *nbin_alloc, int *nbin,
823 double *dx, double *xmin, int nbin_default)
826 gmx_bool dx_set = FALSE;
827 gmx_bool xmin_set = FALSE;
829 gmx_bool xmax_set = FALSE;
830 gmx_bool xmax_set_hard = FALSE; /* whether the xmax is bounded by the
831 limits of a histogram */
834 /* first determine dx and xmin; try the histograms */
835 for (i = 0; i < sc->nsamples; i++)
839 hist_t *hist = sc->s[i]->hist;
840 for (k = 0; k < hist->nhist; k++)
842 double hdx = hist->dx[k];
843 double xmax_now = (hist->x0[k]+hist->nbin[k])*hdx;
845 /* we use the biggest dx*/
846 if ( (!dx_set) || hist->dx[0] > *dx)
851 if ( (!xmin_set) || (hist->x0[k]*hdx) < *xmin)
854 *xmin = (hist->x0[k]*hdx);
857 if ( (!xmax_set) || (xmax_now > xmax && !xmax_set_hard) )
861 if (hist->bin[k][hist->nbin[k]-1] != 0)
863 xmax_set_hard = TRUE;
866 if (hist->bin[k][hist->nbin[k]-1] != 0 && (xmax_now < xmax) )
868 xmax_set_hard = TRUE;
874 /* and the delta us */
875 for (i = 0; i < sc->nsamples; i++)
877 if (sc->s[i]->ndu > 0)
879 /* determine min and max */
880 int starti = sc->r[i].start;
881 int endi = sc->r[i].end;
882 double du_xmin = sc->s[i]->du[starti];
883 double du_xmax = sc->s[i]->du[starti];
884 for (j = starti+1; j < endi; j++)
886 if (sc->s[i]->du[j] < du_xmin)
888 du_xmin = sc->s[i]->du[j];
890 if (sc->s[i]->du[j] > du_xmax)
892 du_xmax = sc->s[i]->du[j];
896 /* and now change the limits */
897 if ( (!xmin_set) || (du_xmin < *xmin) )
902 if ( (!xmax_set) || ((du_xmax > xmax) && !xmax_set_hard) )
910 if (!xmax_set || !xmin_set)
919 *nbin = nbin_default;
920 *dx = (xmax-(*xmin))/((*nbin)-2); /* -2 because we want the last bin to
921 be 0, and we count from 0 */
925 *nbin = static_cast<int>((xmax-(*xmin))/(*dx));
928 if (*nbin > *nbin_alloc)
931 srenew(*bin, *nbin_alloc);
934 /* reset the histogram */
935 for (i = 0; i < (*nbin); i++)
940 /* now add the actual data */
941 for (i = 0; i < sc->nsamples; i++)
945 hist_t *hist = sc->s[i]->hist;
946 for (k = 0; k < hist->nhist; k++)
948 double hdx = hist->dx[k];
949 double xmin_hist = hist->x0[k]*hdx;
950 for (j = 0; j < hist->nbin[k]; j++)
952 /* calculate the bin corresponding to the middle of the
954 double x = hdx*(j+0.5) + xmin_hist;
955 int binnr = static_cast<int>((x-(*xmin))/(*dx));
957 if (binnr >= *nbin || binnr < 0)
962 (*bin)[binnr] += hist->bin[k][j];
968 int starti = sc->r[i].start;
969 int endi = sc->r[i].end;
970 for (j = starti; j < endi; j++)
972 int binnr = static_cast<int>((sc->s[i]->du[j]-(*xmin))/(*dx));
973 if (binnr >= *nbin || binnr < 0)
984 /* write a collection of histograms to a file */
985 static void sim_data_histogram(sim_data_t *sd, const char *filename,
986 int nbin_default, const gmx_output_env_t *oenv)
988 char label_x[STRLEN];
989 const char *dhdl = "dH/d\\lambda", *deltag = "\\DeltaH", *lambda = "\\lambda";
990 const char *title = "N(\\DeltaH)";
991 const char *label_y = "Samples";
995 char **setnames = nullptr;
996 gmx_bool first_set = FALSE;
997 /* histogram data: */
1004 lambda_data_t *bl_head = sd->lb;
1006 printf("\nWriting histogram to %s\n", filename);
1007 sprintf(label_x, "\\DeltaH (%s)", unit_energy);
1009 fp = xvgropen_type(filename, title, label_x, label_y, exvggtXNY, oenv);
1011 /* first get all the set names */
1013 /* iterate over all lambdas */
1014 while (bl != bl_head)
1016 sample_coll_t *sc = bl->sc->next;
1018 /* iterate over all samples */
1019 while (sc != bl->sc)
1021 char buf[STRLEN], buf2[STRLEN];
1024 srenew(setnames, nsets);
1025 snew(setnames[nsets-1], STRLEN);
1026 if (sc->foreign_lambda->dhdl < 0)
1028 lambda_vec_print(sc->native_lambda, buf, FALSE);
1029 lambda_vec_print(sc->foreign_lambda, buf2, FALSE);
1030 sprintf(setnames[nsets-1], "N(%s(%s=%s) | %s=%s)",
1031 deltag, lambda, buf2, lambda, buf);
1035 lambda_vec_print(sc->native_lambda, buf, FALSE);
1036 sprintf(setnames[nsets-1], "N(%s | %s=%s)",
1044 xvgr_legend(fp, nsets, (const char**)setnames, oenv);
1047 /* now make the histograms */
1049 /* iterate over all lambdas */
1050 while (bl != bl_head)
1052 sample_coll_t *sc = bl->sc->next;
1054 /* iterate over all samples */
1055 while (sc != bl->sc)
1059 xvgr_new_dataset(fp, 0, 0, nullptr, oenv);
1062 sample_coll_make_hist(sc, &hist, &nbin_alloc, &nbin, &dx, &minval,
1065 for (i = 0; i < nbin; i++)
1067 double xmin = i*dx + minval;
1068 double xmax = (i+1)*dx + minval;
1070 fprintf(fp, "%g %d\n%g %d\n", xmin, hist[i], xmax, hist[i]);
1089 snprint_lambda_vec(char *str, int sz, const char *label, lambda_vec_t *lambda)
1093 n += snprintf(str+n, sz-n, "lambda vector [%s]: ", label);
1094 if (lambda->index >= 0)
1096 n += snprintf(str+n, sz-n, " init-lambda-state=%d", lambda->index);
1098 if (lambda->dhdl >= 0)
1100 n += snprintf(str+n, sz-n, " dhdl index=%d", lambda->dhdl);
1105 for (i = 0; i < lambda->lc->N; i++)
1107 n += snprintf(str+n, sz-n, " (%s) l=%g", lambda->lc->names[i], lambda->val[i]);
1113 /* create a collection (array) of barres_t object given a ordered linked list
1114 of barlamda_t sample collections */
1115 static barres_t *barres_list_create(sim_data_t *sd, int *nres,
1121 gmx_bool dhdl = FALSE;
1122 gmx_bool first = TRUE;
1123 lambda_data_t *bl_head = sd->lb;
1125 /* first count the lambdas */
1127 while (bl != bl_head)
1132 snew(res, nlambda-1);
1134 /* next put the right samples in the res */
1136 bl = bl_head->next->next; /* we start with the second one. */
1137 while (bl != bl_head)
1139 sample_coll_t *sc, *scprev;
1140 barres_t *br = &(res[*nres]);
1141 /* there is always a previous one. we search for that as a foreign
1143 scprev = lambda_data_find_sample_coll(bl->prev, bl->lambda);
1144 sc = lambda_data_find_sample_coll(bl, bl->prev->lambda);
1152 scprev = lambda_data_find_sample_coll(bl->prev, bl->prev->lambda);
1153 sc = lambda_data_find_sample_coll(bl, bl->lambda);
1157 printf("\nWARNING: Using the derivative data (dH/dlambda) to extrapolate delta H values.\nThis will only work if the Hamiltonian is linear in lambda.\n");
1162 gmx_fatal(FARGS, "Some dhdl files contain only one value (dH/dl), while others \ncontain multiple values (dH/dl and/or Delta H), will not proceed \nbecause of possible inconsistencies.\n");
1165 else if (!scprev && !sc)
1167 char descX[STRLEN], descY[STRLEN];
1168 snprint_lambda_vec(descX, STRLEN, "X", bl->prev->lambda);
1169 snprint_lambda_vec(descY, STRLEN, "Y", bl->lambda);
1171 gmx_fatal(FARGS, "There is no path between the states X & Y below that is covered by foreign lambdas:\ncannot proceed with BAR.\nUse thermodynamic integration of dH/dl by calculating the averages of dH/dl\nwith g_analyze and integrating them.\nAlternatively, use the -extp option if (and only if) the Hamiltonian\ndepends linearly on lambda, which is NOT normally the case.\n\n%s\n%s\n", descX, descY);
1174 /* normal delta H */
1177 char descX[STRLEN], descY[STRLEN];
1178 snprint_lambda_vec(descX, STRLEN, "X", bl->lambda);
1179 snprint_lambda_vec(descY, STRLEN, "Y", bl->prev->lambda);
1180 gmx_fatal(FARGS, "Could not find a set for foreign lambda (state X below)\nin the files for main lambda (state Y below)\n\n%s\n%s\n", descX, descY);
1184 char descX[STRLEN], descY[STRLEN];
1185 snprint_lambda_vec(descX, STRLEN, "X", bl->prev->lambda);
1186 snprint_lambda_vec(descY, STRLEN, "Y", bl->lambda);
1187 gmx_fatal(FARGS, "Could not find a set for foreign lambda (state X below)\nin the files for main lambda (state Y below)\n\n%s\n%s\n", descX, descY);
1199 /* estimate the maximum discretization error */
1200 static double barres_list_max_disc_err(barres_t *res, int nres)
1203 double disc_err = 0.;
1204 double delta_lambda;
1206 for (i = 0; i < nres; i++)
1208 barres_t *br = &(res[i]);
1210 delta_lambda = lambda_vec_abs_diff(br->b->native_lambda,
1211 br->a->native_lambda);
1213 for (j = 0; j < br->a->nsamples; j++)
1215 if (br->a->s[j]->hist)
1218 if (br->a->s[j]->derivative)
1220 Wfac = delta_lambda;
1223 disc_err = std::max(disc_err, Wfac*br->a->s[j]->hist->dx[0]);
1226 for (j = 0; j < br->b->nsamples; j++)
1228 if (br->b->s[j]->hist)
1231 if (br->b->s[j]->derivative)
1233 Wfac = delta_lambda;
1235 disc_err = std::max(disc_err, Wfac*br->b->s[j]->hist->dx[0]);
1243 /* impose start and end times on a sample collection, updating sample_ranges */
1244 static void sample_coll_impose_times(sample_coll_t *sc, double begin_t,
1248 for (i = 0; i < sc->nsamples; i++)
1250 samples_t *s = sc->s[i];
1251 sample_range_t *r = &(sc->r[i]);
1254 double end_time = s->hist->delta_time*s->hist->sum +
1255 s->hist->start_time;
1256 if (s->hist->start_time < begin_t || end_time > end_t)
1266 if (s->start_time < begin_t)
1268 r->start = static_cast<int>((begin_t - s->start_time)/s->delta_time);
1270 end_time = s->delta_time*s->ndu + s->start_time;
1271 if (end_time > end_t)
1273 r->end = static_cast<int>((end_t - s->start_time)/s->delta_time);
1279 for (j = 0; j < s->ndu; j++)
1281 if (s->t[j] < begin_t)
1286 if (s->t[j] >= end_t)
1293 if (r->start > r->end)
1299 sample_coll_calc_ntot(sc);
1302 static void sim_data_impose_times(sim_data_t *sd, double begin, double end)
1304 double first_t, last_t;
1305 double begin_t, end_t;
1307 lambda_data_t *head = sd->lb;
1310 if (begin <= 0 && end < 0)
1315 /* first determine the global start and end times */
1321 sample_coll_t *sc = lc->sc->next;
1322 while (sc != lc->sc)
1324 for (j = 0; j < sc->nsamples; j++)
1326 double start_t, end_t;
1328 start_t = sc->s[j]->start_time;
1329 end_t = sc->s[j]->start_time;
1332 end_t += sc->s[j]->delta_time*sc->s[j]->hist->sum;
1338 end_t = sc->s[j]->t[sc->s[j]->ndu-1];
1342 end_t += sc->s[j]->delta_time*sc->s[j]->ndu;
1346 if (start_t < first_t || first_t < 0)
1360 /* calculate the actual times */
1378 printf("\n Samples in time interval: %.3f - %.3f\n", first_t, last_t);
1380 if (begin_t > end_t)
1384 printf("Removing samples outside of: %.3f - %.3f\n", begin_t, end_t);
1386 /* then impose them */
1390 sample_coll_t *sc = lc->sc->next;
1391 while (sc != lc->sc)
1393 sample_coll_impose_times(sc, begin_t, end_t);
1401 /* create subsample i out of ni from an existing sample_coll */
1402 static gmx_bool sample_coll_create_subsample(sample_coll_t *sc,
1403 sample_coll_t *sc_orig,
1408 gmx_int64_t ntot_start;
1409 gmx_int64_t ntot_end;
1410 gmx_int64_t ntot_so_far;
1412 *sc = *sc_orig; /* just copy all fields */
1414 /* allocate proprietary memory */
1415 snew(sc->s, sc_orig->nsamples);
1416 snew(sc->r, sc_orig->nsamples);
1418 /* copy the samples */
1419 for (j = 0; j < sc_orig->nsamples; j++)
1421 sc->s[j] = sc_orig->s[j];
1422 sc->r[j] = sc_orig->r[j]; /* copy the ranges too */
1425 /* now fix start and end fields */
1426 /* the casts avoid possible overflows */
1427 ntot_start = static_cast<gmx_int64_t>(sc_orig->ntot*static_cast<double>(i)/static_cast<double>(ni));
1428 ntot_end = static_cast<gmx_int64_t>(sc_orig->ntot*static_cast<double>(i+1)/static_cast<double>(ni));
1430 for (j = 0; j < sc->nsamples; j++)
1432 gmx_int64_t ntot_add;
1433 gmx_int64_t new_start, new_end;
1439 ntot_add = sc->s[j]->hist->sum;
1443 ntot_add = sc->r[j].end - sc->r[j].start;
1451 if (!sc->s[j]->hist)
1453 if (ntot_so_far < ntot_start)
1455 /* adjust starting point */
1456 new_start = sc->r[j].start + (ntot_start - ntot_so_far);
1460 new_start = sc->r[j].start;
1462 /* adjust end point */
1463 new_end = sc->r[j].start + (ntot_end - ntot_so_far);
1464 if (new_end > sc->r[j].end)
1466 new_end = sc->r[j].end;
1469 /* check if we're in range at all */
1470 if ( (new_end < new_start) || (new_start > sc->r[j].end) )
1475 /* and write the new range */
1476 GMX_RELEASE_ASSERT(new_start <= std::numeric_limits<int>::max(), "Value of 'new_start' too large for int converstion");
1477 GMX_RELEASE_ASSERT(new_end <= std::numeric_limits<int>::max(), "Value of 'new_end' too large for int converstion");
1478 sc->r[j].start = static_cast<int>(new_start);
1479 sc->r[j].end = static_cast<int>(new_end);
1486 double ntot_start_norm, ntot_end_norm;
1487 /* calculate the amount of overlap of the
1488 desired range (ntot_start -- ntot_end) onto
1489 the histogram range (ntot_so_far -- ntot_so_far+ntot_add)*/
1491 /* first calculate normalized bounds
1492 (where 0 is the start of the hist range, and 1 the end) */
1493 ntot_start_norm = (ntot_start-ntot_so_far)/static_cast<double>(ntot_add);
1494 ntot_end_norm = (ntot_end-ntot_so_far)/static_cast<double>(ntot_add);
1496 /* now fix the boundaries */
1497 ntot_start_norm = std::min(1.0, std::max(0.0, ntot_start_norm));
1498 ntot_end_norm = std::max(0.0, std::min(1.0, ntot_end_norm));
1500 /* and calculate the overlap */
1501 overlap = ntot_end_norm - ntot_start_norm;
1503 if (overlap > 0.95) /* we allow for 5% slack */
1505 sc->r[j].use = TRUE;
1507 else if (overlap < 0.05)
1509 sc->r[j].use = FALSE;
1517 ntot_so_far += ntot_add;
1519 sample_coll_calc_ntot(sc);
1524 /* calculate minimum and maximum work values in sample collection */
1525 static void sample_coll_min_max(sample_coll_t *sc, double Wfac,
1526 double *Wmin, double *Wmax)
1530 *Wmin = std::numeric_limits<float>::max();
1531 *Wmax = -std::numeric_limits<float>::max();
1533 for (i = 0; i < sc->nsamples; i++)
1535 samples_t *s = sc->s[i];
1536 sample_range_t *r = &(sc->r[i]);
1541 for (j = r->start; j < r->end; j++)
1543 *Wmin = std::min(*Wmin, s->du[j]*Wfac);
1544 *Wmax = std::max(*Wmax, s->du[j]*Wfac);
1549 int hd = 0; /* determine the histogram direction: */
1551 if ( (s->hist->nhist > 1) && (Wfac < 0) )
1555 dx = s->hist->dx[hd];
1557 for (j = s->hist->nbin[hd]-1; j >= 0; j--)
1559 *Wmin = std::min(*Wmin, Wfac*(s->hist->x0[hd])*dx);
1560 *Wmax = std::max(*Wmax, Wfac*(s->hist->x0[hd])*dx);
1561 /* look for the highest value bin with values */
1562 if (s->hist->bin[hd][j] > 0)
1564 *Wmin = std::min(*Wmin, Wfac*(j+s->hist->x0[hd]+1)*dx);
1565 *Wmax = std::max(*Wmax, Wfac*(j+s->hist->x0[hd]+1)*dx);
1574 /* Initialize a sim_data structure */
1575 static void sim_data_init(sim_data_t *sd)
1577 /* make linked list */
1578 sd->lb = &(sd->lb_head);
1579 sd->lb->next = sd->lb;
1580 sd->lb->prev = sd->lb;
1582 lambda_components_init(&(sd->lc));
1586 static double calc_bar_sum(int n, const double *W, double Wfac, double sbMmDG)
1593 for (i = 0; i < n; i++)
1595 sum += 1./(1. + std::exp(Wfac*W[i] + sbMmDG));
1601 /* calculate the BAR average given a histogram
1603 if type== 0, calculate the best estimate for the average,
1604 if type==-1, calculate the minimum possible value given the histogram
1605 if type== 1, calculate the maximum possible value given the histogram */
1606 static double calc_bar_sum_hist(const hist_t *hist, double Wfac, double sbMmDG,
1612 /* normalization factor multiplied with bin width and
1613 number of samples (we normalize through M): */
1615 int hd = 0; /* determine the histogram direction: */
1618 if ( (hist->nhist > 1) && (Wfac < 0) )
1623 maxbin = hist->nbin[hd]-1;
1626 maxbin = hist->nbin[hd]; /* we also add whatever was out of range */
1629 for (i = 0; i < maxbin; i++)
1631 double x = Wfac*((i+hist->x0[hd])+0.5)*dx; /* bin middle */
1632 double pxdx = hist->bin[0][i]*normdx; /* p(x)dx */
1634 sum += pxdx/(1. + std::exp(x + sbMmDG));
1640 static double calc_bar_lowlevel(sample_coll_t *ca, sample_coll_t *cb,
1641 double temp, double tol, int type)
1645 double Wfac1, Wfac2, Wmin, Wmax;
1646 double DG0, DG1, DG2, dDG1;
1647 double n1, n2; /* numbers of samples as doubles */
1652 /* count the numbers of samples */
1656 M = std::log(n1/n2);
1658 /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1659 if (ca->foreign_lambda->dhdl < 0)
1661 /* this is the case when the delta U were calculated directly
1662 (i.e. we're not scaling dhdl) */
1668 /* we're using dhdl, so delta_lambda needs to be a
1669 multiplication factor. */
1670 /*double delta_lambda=cb->native_lambda-ca->native_lambda;*/
1671 double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1673 if (cb->native_lambda->lc->N > 1)
1676 "Can't (yet) do multi-component dhdl interpolation");
1679 Wfac1 = beta*delta_lambda;
1680 Wfac2 = -beta*delta_lambda;
1685 /* We print the output both in kT and kJ/mol.
1686 * Here we determine DG in kT, so when beta < 1
1687 * the precision has to be increased.
1692 /* Calculate minimum and maximum work to give an initial estimate of
1693 * delta G as their average.
1696 double Wmin1, Wmin2, Wmax1, Wmax2;
1697 sample_coll_min_max(ca, Wfac1, &Wmin1, &Wmax1);
1698 sample_coll_min_max(cb, Wfac2, &Wmin2, &Wmax2);
1700 Wmin = std::min(Wmin1, Wmin2);
1701 Wmax = std::max(Wmax1, Wmax2);
1709 fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1711 /* We approximate by bisection: given our initial estimates
1712 we keep checking whether the halfway point is greater or
1713 smaller than what we get out of the BAR averages.
1715 For the comparison we can use twice the tolerance. */
1716 while (DG2 - DG0 > 2*tol)
1718 DG1 = 0.5*(DG0 + DG2);
1720 /* calculate the BAR averages */
1723 for (i = 0; i < ca->nsamples; i++)
1725 samples_t *s = ca->s[i];
1726 sample_range_t *r = &(ca->r[i]);
1731 dDG1 += calc_bar_sum_hist(s->hist, Wfac1, (M-DG1), type);
1735 dDG1 += calc_bar_sum(r->end - r->start, s->du + r->start,
1740 for (i = 0; i < cb->nsamples; i++)
1742 samples_t *s = cb->s[i];
1743 sample_range_t *r = &(cb->r[i]);
1748 dDG1 -= calc_bar_sum_hist(s->hist, Wfac2, -(M-DG1), type);
1752 dDG1 -= calc_bar_sum(r->end - r->start, s->du + r->start,
1768 fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1772 return 0.5*(DG0 + DG2);
1775 static void calc_rel_entropy(sample_coll_t *ca, sample_coll_t *cb,
1776 double temp, double dg, double *sa, double *sb)
1782 double Wfac1, Wfac2;
1788 /* count the numbers of samples */
1792 /* to ensure the work values are the same as during the delta_G */
1793 /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1794 if (ca->foreign_lambda->dhdl < 0)
1796 /* this is the case when the delta U were calculated directly
1797 (i.e. we're not scaling dhdl) */
1803 /* we're using dhdl, so delta_lambda needs to be a
1804 multiplication factor. */
1805 double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1807 Wfac1 = beta*delta_lambda;
1808 Wfac2 = -beta*delta_lambda;
1811 /* first calculate the average work in both directions */
1812 for (i = 0; i < ca->nsamples; i++)
1814 samples_t *s = ca->s[i];
1815 sample_range_t *r = &(ca->r[i]);
1820 for (j = r->start; j < r->end; j++)
1822 W_ab += Wfac1*s->du[j];
1827 /* normalization factor multiplied with bin width and
1828 number of samples (we normalize through M): */
1831 int hd = 0; /* histogram direction */
1832 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1836 dx = s->hist->dx[hd];
1838 for (j = 0; j < s->hist->nbin[0]; j++)
1840 double x = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1841 double pxdx = s->hist->bin[0][j]*normdx; /* p(x)dx */
1849 for (i = 0; i < cb->nsamples; i++)
1851 samples_t *s = cb->s[i];
1852 sample_range_t *r = &(cb->r[i]);
1857 for (j = r->start; j < r->end; j++)
1859 W_ba += Wfac1*s->du[j];
1864 /* normalization factor multiplied with bin width and
1865 number of samples (we normalize through M): */
1868 int hd = 0; /* histogram direction */
1869 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
1873 dx = s->hist->dx[hd];
1875 for (j = 0; j < s->hist->nbin[0]; j++)
1877 double x = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1878 double pxdx = s->hist->bin[0][j]*normdx; /* p(x)dx */
1886 /* then calculate the relative entropies */
1891 static void calc_dg_stddev(sample_coll_t *ca, sample_coll_t *cb,
1892 double temp, double dg, double *stddev)
1896 double sigmafact = 0.;
1898 double Wfac1, Wfac2;
1904 /* count the numbers of samples */
1908 /* to ensure the work values are the same as during the delta_G */
1909 /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1910 if (ca->foreign_lambda->dhdl < 0)
1912 /* this is the case when the delta U were calculated directly
1913 (i.e. we're not scaling dhdl) */
1919 /* we're using dhdl, so delta_lambda needs to be a
1920 multiplication factor. */
1921 double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1923 Wfac1 = beta*delta_lambda;
1924 Wfac2 = -beta*delta_lambda;
1927 M = std::log(n1/n2);
1930 /* calculate average in both directions */
1931 for (i = 0; i < ca->nsamples; i++)
1933 samples_t *s = ca->s[i];
1934 sample_range_t *r = &(ca->r[i]);
1939 for (j = r->start; j < r->end; j++)
1941 sigmafact += 1./(2. + 2.*std::cosh((M + Wfac1*s->du[j] - dg)));
1946 /* normalization factor multiplied with bin width and
1947 number of samples (we normalize through M): */
1950 int hd = 0; /* histogram direction */
1951 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1955 dx = s->hist->dx[hd];
1957 for (j = 0; j < s->hist->nbin[0]; j++)
1959 double x = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1960 double pxdx = s->hist->bin[0][j]*normdx; /* p(x)dx */
1962 sigmafact += pxdx/(2. + 2.*std::cosh((M + x - dg)));
1967 for (i = 0; i < cb->nsamples; i++)
1969 samples_t *s = cb->s[i];
1970 sample_range_t *r = &(cb->r[i]);
1975 for (j = r->start; j < r->end; j++)
1977 sigmafact += 1./(2. + 2.*std::cosh((M - Wfac2*s->du[j] - dg)));
1982 /* normalization factor multiplied with bin width and
1983 number of samples (we normalize through M): */
1986 int hd = 0; /* histogram direction */
1987 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
1991 dx = s->hist->dx[hd];
1993 for (j = 0; j < s->hist->nbin[0]; j++)
1995 double x = Wfac2*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1996 double pxdx = s->hist->bin[0][j]*normdx; /* p(x)dx */
1998 sigmafact += pxdx/(2. + 2.*std::cosh((M - x - dg)));
2004 sigmafact /= (n1 + n2);
2008 Shirts, Bair, Hooker & Pande, Phys. Rev. Lett 91, 140601 (2003): */
2009 *stddev = std::sqrt(((1.0/sigmafact) - ( (n1+n2)/n1 + (n1+n2)/n2 )));
2014 static void calc_bar(barres_t *br, double tol,
2015 int npee_min, int npee_max, gmx_bool *bEE,
2019 double dg_sig2, sa_sig2, sb_sig2, stddev_sig2; /* intermediate variance values
2020 for calculated quantities */
2021 double temp = br->a->temp;
2023 double dg_min, dg_max;
2024 gmx_bool have_hist = FALSE;
2026 br->dg = calc_bar_lowlevel(br->a, br->b, temp, tol, 0);
2028 br->dg_disc_err = 0.;
2029 br->dg_histrange_err = 0.;
2031 /* check if there are histograms */
2032 for (i = 0; i < br->a->nsamples; i++)
2034 if (br->a->r[i].use && br->a->s[i]->hist)
2042 for (i = 0; i < br->b->nsamples; i++)
2044 if (br->b->r[i].use && br->b->s[i]->hist)
2052 /* calculate histogram-specific errors */
2055 dg_min = calc_bar_lowlevel(br->a, br->b, temp, tol, -1);
2056 dg_max = calc_bar_lowlevel(br->a, br->b, temp, tol, 1);
2058 if (std::abs(dg_max - dg_min) > GMX_REAL_EPS*10)
2060 /* the histogram range error is the biggest of the differences
2061 between the best estimate and the extremes */
2062 br->dg_histrange_err = std::abs(dg_max - dg_min);
2064 br->dg_disc_err = 0.;
2065 for (i = 0; i < br->a->nsamples; i++)
2067 if (br->a->s[i]->hist)
2069 br->dg_disc_err = std::max(br->dg_disc_err, br->a->s[i]->hist->dx[0]);
2072 for (i = 0; i < br->b->nsamples; i++)
2074 if (br->b->s[i]->hist)
2076 br->dg_disc_err = std::max(br->dg_disc_err, br->b->s[i]->hist->dx[0]);
2080 calc_rel_entropy(br->a, br->b, temp, br->dg, &(br->sa), &(br->sb));
2082 calc_dg_stddev(br->a, br->b, temp, br->dg, &(br->dg_stddev) );
2091 sample_coll_t ca, cb;
2093 /* initialize the samples */
2094 sample_coll_init(&ca, br->a->native_lambda, br->a->foreign_lambda,
2096 sample_coll_init(&cb, br->b->native_lambda, br->b->foreign_lambda,
2099 for (npee = npee_min; npee <= npee_max; npee++)
2108 double dstddev2 = 0;
2111 for (p = 0; p < npee; p++)
2118 cac = sample_coll_create_subsample(&ca, br->a, p, npee);
2119 cbc = sample_coll_create_subsample(&cb, br->b, p, npee);
2123 printf("WARNING: histogram number incompatible with block number for averaging: can't do error estimate\n");
2127 sample_coll_destroy(&ca);
2131 sample_coll_destroy(&cb);
2136 dgp = calc_bar_lowlevel(&ca, &cb, temp, tol, 0);
2140 partsum[npee*(npee_max+1)+p] += dgp;
2142 calc_rel_entropy(&ca, &cb, temp, dgp, &sac, &sbc);
2147 calc_dg_stddev(&ca, &cb, temp, dgp, &stddevc );
2150 dstddev2 += stddevc*stddevc;
2152 sample_coll_destroy(&ca);
2153 sample_coll_destroy(&cb);
2157 dg_sig2 += (dgs2-dgs*dgs)/(npee-1);
2163 sa_sig2 += (dsa2-dsa*dsa)/(npee-1);
2164 sb_sig2 += (dsb2-dsb*dsb)/(npee-1);
2168 stddev_sig2 += (dstddev2-dstddev*dstddev)/(npee-1);
2170 br->dg_err = std::sqrt(dg_sig2/(npee_max - npee_min + 1));
2171 br->sa_err = std::sqrt(sa_sig2/(npee_max - npee_min + 1));
2172 br->sb_err = std::sqrt(sb_sig2/(npee_max - npee_min + 1));
2173 br->dg_stddev_err = std::sqrt(stddev_sig2/(npee_max - npee_min + 1));
2178 static double bar_err(int nbmin, int nbmax, const double *partsum)
2181 double svar, s, s2, dg;
2184 for (nb = nbmin; nb <= nbmax; nb++)
2188 for (b = 0; b < nb; b++)
2190 dg = partsum[nb*(nbmax+1)+b];
2196 svar += (s2 - s*s)/(nb - 1);
2199 return std::sqrt(svar/(nbmax + 1 - nbmin));
2203 /* Seek the end of an identifier (consecutive non-spaces), followed by
2204 an optional number of spaces or '='-signs. Returns a pointer to the
2205 first non-space value found after that. Returns NULL if the string
2208 static const char *find_value(const char *str)
2210 gmx_bool name_end_found = FALSE;
2212 /* if the string is a NULL pointer, return a NULL pointer. */
2217 while (*str != '\0')
2219 /* first find the end of the name */
2220 if (!name_end_found)
2222 if (std::isspace(*str) || (*str == '=') )
2224 name_end_found = TRUE;
2229 if (!( std::isspace(*str) || (*str == '=') ))
2240 /* read a vector-notation description of a lambda vector */
2241 static gmx_bool read_lambda_compvec(const char *str,
2243 const lambda_components_t *lc_in,
2244 lambda_components_t *lc_out,
2248 gmx_bool initialize_lc = FALSE; /* whether to initialize the lambda
2249 components, or to check them */
2250 gmx_bool start_reached = FALSE; /* whether the start of component names
2252 gmx_bool vector = FALSE; /* whether there are multiple components */
2253 int n = 0; /* current component number */
2254 const char *val_start = nullptr; /* start of the component name, or NULL
2255 if not in a value */
2265 if (lc_out && lc_out->N == 0)
2267 initialize_lc = TRUE;
2270 if (lc_in == nullptr)
2279 if (std::isalnum(*str))
2282 start_reached = TRUE;
2285 else if (*str == '(')
2288 start_reached = TRUE;
2290 else if (!std::isspace(*str))
2292 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2299 if (std::isspace(*str) || *str == ')' || *str == ',' || *str == '\0')
2306 lambda_components_add(lc_out, val_start,
2311 if (!lambda_components_check(lc_out, n, val_start,
2320 /* add a vector component to lv */
2321 lv->val[n] = strtod(val_start, &strtod_end);
2322 if (val_start == strtod_end)
2325 "Error reading lambda vector in %s", fn);
2328 /* reset for the next identifier */
2329 val_start = nullptr;
2337 else if (std::isalnum(*str))
2350 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2354 GMX_RELEASE_ASSERT(lc_in != nullptr, "Internal inconsistency? lc_in==NULL");
2359 else if (lv == nullptr)
2365 gmx_fatal(FARGS, "Incomplete lambda vector data in %s",
2385 gmx_fatal(FARGS, "Incomplete lambda components data in %s", fn);
2391 /* read and check the component names from a string */
2392 static gmx_bool read_lambda_components(const char *str,
2393 lambda_components_t *lc,
2397 return read_lambda_compvec(str, nullptr, nullptr, lc, end, fn);
2400 /* read an initialized lambda vector from a string */
2401 static gmx_bool read_lambda_vector(const char *str,
2406 return read_lambda_compvec(str, lv, lv->lc, nullptr, end, fn);
2411 /* deduce lambda value from legend.
2413 legend = the legend string
2415 lam = the initialized lambda vector
2416 returns whether to use the data in this set.
2418 static gmx_bool legend2lambda(const char *fn,
2422 const char *ptr = nullptr, *ptr2 = nullptr;
2423 gmx_bool ok = FALSE;
2424 gmx_bool bdhdl = FALSE;
2425 const char *tostr = " to ";
2427 if (legend == nullptr)
2429 gmx_fatal(FARGS, "There is no legend in file '%s', can not deduce lambda", fn);
2432 /* look for the last 'to': */
2436 ptr2 = std::strstr(ptr2, tostr);
2437 if (ptr2 != nullptr)
2443 while (ptr2 != nullptr && *ptr2 != '\0');
2447 ptr += std::strlen(tostr)-1; /* and advance past that 'to' */
2451 /* look for the = sign */
2452 ptr = std::strrchr(legend, '=');
2455 /* otherwise look for the last space */
2456 ptr = std::strrchr(legend, ' ');
2460 if (std::strstr(legend, "dH"))
2465 else if (std::strchr(legend, 'D') != nullptr && std::strchr(legend, 'H') != nullptr)
2470 else /*if (std::strstr(legend, "pV"))*/
2481 gmx_fatal(FARGS, "There is no proper lambda legend in file '%s', can not deduce lambda", fn);
2485 ptr = find_value(ptr);
2486 if (!ptr || !read_lambda_vector(ptr, lam, nullptr, fn))
2488 gmx_fatal(FARGS, "lambda vector '%s' %s faulty", legend, fn);
2496 ptr = std::strrchr(legend, '=');
2500 /* there must be a component name */
2504 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2506 /* now backtrack to the start of the identifier */
2507 while (isspace(*ptr))
2513 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2516 while (!std::isspace(*ptr))
2521 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2525 dhdl_index = lambda_components_find(lam->lc, ptr, (end-ptr));
2529 std::strncpy(buf, ptr, (end-ptr));
2530 buf[(end-ptr)] = '\0';
2532 "Did not find lambda component for '%s' in %s",
2541 "dhdl without component name with >1 lambda component in %s",
2546 lam->dhdl = dhdl_index;
2551 static gmx_bool subtitle2lambda(const char *subtitle, xvg_t *ba, const char *fn,
2552 lambda_components_t *lc)
2557 double native_lambda;
2561 /* first check for a state string */
2562 ptr = std::strstr(subtitle, "state");
2566 const char *val_end;
2568 /* the new 4.6 style lambda vectors */
2569 ptr = find_value(ptr);
2572 index = std::strtol(ptr, &end, 10);
2575 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2582 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2585 /* now find the lambda vector component names */
2586 while (*ptr != '(' && !std::isalnum(*ptr))
2592 "Incomplete lambda vector component data in %s", fn);
2597 if (!read_lambda_components(ptr, lc, &val_end, fn))
2600 "lambda vector components in %s don't match those previously read",
2603 ptr = find_value(val_end);
2606 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2609 lambda_vec_init(&(ba->native_lambda), lc);
2610 if (!read_lambda_vector(ptr, &(ba->native_lambda), nullptr, fn))
2612 gmx_fatal(FARGS, "lambda vector in %s faulty", fn);
2614 ba->native_lambda.index = index;
2619 /* compatibility mode: check for lambda in other ways. */
2620 /* plain text lambda string */
2621 ptr = std::strstr(subtitle, "lambda");
2624 /* xmgrace formatted lambda string */
2625 ptr = std::strstr(subtitle, "\\xl\\f{}");
2629 /* xmgr formatted lambda string */
2630 ptr = std::strstr(subtitle, "\\8l\\4");
2634 ptr = std::strstr(ptr, "=");
2638 bFound = (sscanf(ptr+1, "%lf", &(native_lambda)) == 1);
2639 /* add the lambda component name as an empty string */
2642 if (!lambda_components_check(lc, 0, "", 0))
2645 "lambda vector components in %s don't match those previously read",
2651 lambda_components_add(lc, "", 0);
2653 lambda_vec_init(&(ba->native_lambda), lc);
2654 ba->native_lambda.val[0] = native_lambda;
2661 static double filename2lambda(const char *fn)
2664 const char *ptr, *digitptr;
2668 /* go to the end of the path string and search backward to find the last
2669 directory in the path which has to contain the value of lambda
2671 while (ptr[1] != '\0')
2675 /* searching backward to find the second directory separator */
2680 if (ptr[0] != DIR_SEPARATOR && ptr[1] == DIR_SEPARATOR)
2688 /* save the last position of a digit between the last two
2689 separators = in the last dirname */
2690 if (dirsep > 0 && std::isdigit(*ptr))
2698 gmx_fatal(FARGS, "While trying to read the lambda value from the file path:"
2699 " last directory in the path '%s' does not contain a number", fn);
2701 if (digitptr[-1] == '-')
2705 lambda = strtod(digitptr, &endptr);
2706 if (endptr == digitptr)
2708 gmx_fatal(FARGS, "Malformed number in file path '%s'", fn);
2713 static void read_bar_xvg_lowlevel(const char *fn, const real *temp, xvg_t *ba,
2714 lambda_components_t *lc)
2717 char *subtitle, **legend, *ptr;
2719 gmx_bool native_lambda_read = FALSE;
2726 np = read_xvg_legend(fn, &ba->y, &ba->nset, &subtitle, &legend);
2729 gmx_fatal(FARGS, "File %s contains no usable data.", fn);
2731 /* Reorder the data */
2733 for (i = 1; i < ba->nset; i++)
2735 ba->y[i-1] = ba->y[i];
2739 snew(ba->np, ba->nset);
2740 for (i = 0; i < ba->nset; i++)
2746 if (subtitle != nullptr)
2748 /* try to extract temperature */
2749 ptr = std::strstr(subtitle, "T =");
2753 if (sscanf(ptr, "%lf", &ba->temp) == 1)
2757 gmx_fatal(FARGS, "Found temperature of %f in file '%s'",
2767 gmx_fatal(FARGS, "Did not find a temperature in the subtitle in file '%s', use the -temp option of [TT]gmx bar[tt]", fn);
2772 /* Try to deduce lambda from the subtitle */
2775 if (subtitle2lambda(subtitle, ba, fn, lc))
2777 native_lambda_read = TRUE;
2780 snew(ba->lambda, ba->nset);
2781 if (legend == nullptr)
2783 /* Check if we have a single set, no legend, nset=1 means t and dH/dl */
2786 if (!native_lambda_read)
2788 // Deduce lambda from the file name.
2789 // Updated the routine filename2lambda to actually return lambda
2790 // to avoid C++ warnings, but this usage does not seem to need it?
2792 filename2lambda(fn);
2793 native_lambda_read = TRUE;
2795 ba->lambda[0] = ba->native_lambda;
2799 gmx_fatal(FARGS, "File %s contains multiple sets but no legends, can not determine the lambda values", fn);
2804 for (i = 0; i < ba->nset; )
2806 /* Read lambda from the legend */
2807 lambda_vec_init( &(ba->lambda[i]), lc );
2808 lambda_vec_copy( &(ba->lambda[i]), &(ba->native_lambda));
2809 gmx_bool use = legend2lambda(fn, legend[i], &(ba->lambda[i]));
2812 lambda_vec_print(&(ba->lambda[i]), buf, FALSE);
2818 printf("%s: Ignoring set '%s'.\n", fn, legend[i]);
2819 for (j = i+1; j < ba->nset; j++)
2821 ba->y[j-1] = ba->y[j];
2822 legend[j-1] = legend[j];
2829 if (!native_lambda_read)
2831 gmx_fatal(FARGS, "File %s contains multiple sets but no indication of the native lambda", fn);
2834 if (legend != nullptr)
2836 for (i = 0; i < ba->nset-1; i++)
2844 static void read_bar_xvg(const char *fn, real *temp, sim_data_t *sd)
2852 read_bar_xvg_lowlevel(fn, temp, barsim, &(sd->lc));
2854 if (barsim->nset < 1)
2856 gmx_fatal(FARGS, "File '%s' contains fewer than two columns", fn);
2859 if (!gmx_within_tol(*temp, barsim->temp, GMX_FLOAT_EPS) && (*temp > 0) )
2861 gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
2863 *temp = barsim->temp;
2865 /* now create a series of samples_t */
2866 snew(s, barsim->nset);
2867 for (i = 0; i < barsim->nset; i++)
2869 samples_init(s+i, &(barsim->native_lambda), &(barsim->lambda[i]),
2870 barsim->temp, lambda_vec_same(&(barsim->native_lambda),
2871 &(barsim->lambda[i])),
2873 s[i].du = barsim->y[i];
2874 s[i].ndu = barsim->np[i];
2877 lambda_data_list_insert_sample(sd->lb, s+i);
2882 lambda_vec_print(s[0].native_lambda, buf, FALSE);
2883 printf("%s: %.1f - %.1f; lambda = %s\n dH/dl & foreign lambdas:\n",
2884 fn, s[0].t[0], s[0].t[s[0].ndu-1], buf);
2885 for (i = 0; i < barsim->nset; i++)
2887 lambda_vec_print(s[i].foreign_lambda, buf, TRUE);
2888 printf(" %s (%d pts)\n", buf, s[i].ndu);
2894 static void read_edr_rawdh_block(samples_t **smp, int *ndu, t_enxblock *blk,
2895 double start_time, double delta_time,
2896 lambda_vec_t *native_lambda, double temp,
2897 double *last_t, const char *filename)
2900 lambda_vec_t *foreign_lambda;
2902 samples_t *s; /* convenience pointer */
2905 /* check the block types etc. */
2906 if ( (blk->nsub < 3) ||
2907 (blk->sub[0].type != xdr_datatype_int) ||
2908 (blk->sub[1].type != xdr_datatype_double) ||
2910 (blk->sub[2].type != xdr_datatype_float) &&
2911 (blk->sub[2].type != xdr_datatype_double)
2913 (blk->sub[0].nr < 1) ||
2914 (blk->sub[1].nr < 1) )
2917 "Unexpected/corrupted block data in file %s around time %f.",
2918 filename, start_time);
2921 snew(foreign_lambda, 1);
2922 lambda_vec_init(foreign_lambda, native_lambda->lc);
2923 lambda_vec_copy(foreign_lambda, native_lambda);
2924 type = blk->sub[0].ival[0];
2927 for (i = 0; i < native_lambda->lc->N; i++)
2929 foreign_lambda->val[i] = blk->sub[1].dval[i];
2934 if (blk->sub[0].nr > 1)
2936 foreign_lambda->dhdl = blk->sub[0].ival[1];
2940 foreign_lambda->dhdl = 0;
2946 /* initialize the samples structure if it's empty. */
2948 samples_init(*smp, native_lambda, foreign_lambda, temp,
2949 type == dhbtDHDL, filename);
2950 (*smp)->start_time = start_time;
2951 (*smp)->delta_time = delta_time;
2954 /* set convenience pointer */
2957 /* now double check */
2958 if (!lambda_vec_same(s->foreign_lambda, foreign_lambda) )
2960 char buf[STRLEN], buf2[STRLEN];
2961 lambda_vec_print(foreign_lambda, buf, FALSE);
2962 lambda_vec_print(s->foreign_lambda, buf2, FALSE);
2963 fprintf(stderr, "Got foreign lambda=%s, expected: %s\n", buf, buf2);
2964 gmx_fatal(FARGS, "Corrupted data in file %s around t=%f.",
2965 filename, start_time);
2968 /* make room for the data */
2969 if (gmx::index(s->ndu_alloc) < s->ndu + blk->sub[2].nr)
2971 s->ndu_alloc += (s->ndu_alloc < (size_t)blk->sub[2].nr) ?
2972 blk->sub[2].nr*2 : s->ndu_alloc;
2973 srenew(s->du_alloc, s->ndu_alloc);
2974 s->du = s->du_alloc;
2977 s->ndu += blk->sub[2].nr;
2978 s->ntot += blk->sub[2].nr;
2979 *ndu = blk->sub[2].nr;
2981 /* and copy the data*/
2982 for (j = 0; j < blk->sub[2].nr; j++)
2984 if (blk->sub[2].type == xdr_datatype_float)
2986 s->du[startj+j] = blk->sub[2].fval[j];
2990 s->du[startj+j] = blk->sub[2].dval[j];
2993 if (start_time + blk->sub[2].nr*delta_time > *last_t)
2995 *last_t = start_time + blk->sub[2].nr*delta_time;
2999 static samples_t *read_edr_hist_block(int *nsamples, t_enxblock *blk,
3000 double start_time, double delta_time,
3001 lambda_vec_t *native_lambda, double temp,
3002 double *last_t, const char *filename)
3007 lambda_vec_t *foreign_lambda;
3011 /* check the block types etc. */
3012 if ( (blk->nsub < 2) ||
3013 (blk->sub[0].type != xdr_datatype_double) ||
3014 (blk->sub[1].type != xdr_datatype_int64) ||
3015 (blk->sub[0].nr < 2) ||
3016 (blk->sub[1].nr < 2) )
3019 "Unexpected/corrupted block data in file %s around time %f",
3020 filename, start_time);
3023 nhist = blk->nsub-2;
3031 "Unexpected/corrupted block data in file %s around time %f",
3032 filename, start_time);
3038 snew(foreign_lambda, 1);
3039 lambda_vec_init(foreign_lambda, native_lambda->lc);
3040 lambda_vec_copy(foreign_lambda, native_lambda);
3041 type = static_cast<int>(blk->sub[1].lval[1]);
3044 double old_foreign_lambda;
3046 old_foreign_lambda = blk->sub[0].dval[0];
3047 if (old_foreign_lambda >= 0)
3049 foreign_lambda->val[0] = old_foreign_lambda;
3050 if (foreign_lambda->lc->N > 1)
3053 "Single-component lambda in multi-component file %s",
3059 for (i = 0; i < native_lambda->lc->N; i++)
3061 foreign_lambda->val[i] = blk->sub[0].dval[i+2];
3067 if (foreign_lambda->lc->N > 1)
3069 if (blk->sub[1].nr < 3 + nhist)
3072 "Missing derivative coord in multi-component file %s",
3075 foreign_lambda->dhdl = blk->sub[1].lval[2 + nhist];
3079 foreign_lambda->dhdl = 0;
3083 samples_init(s, native_lambda, foreign_lambda, temp, type == dhbtDHDL,
3087 for (i = 0; i < nhist; i++)
3089 nbins[i] = blk->sub[i+2].nr;
3092 hist_init(s->hist, nhist, nbins);
3094 for (i = 0; i < nhist; i++)
3096 s->hist->x0[i] = blk->sub[1].lval[2+i];
3097 s->hist->dx[i] = blk->sub[0].dval[1];
3100 s->hist->dx[i] = -s->hist->dx[i];
3104 s->hist->start_time = start_time;
3105 s->hist->delta_time = delta_time;
3106 s->start_time = start_time;
3107 s->delta_time = delta_time;
3109 for (i = 0; i < nhist; i++)
3111 gmx_int64_t sum = 0;
3113 for (j = 0; j < s->hist->nbin[i]; j++)
3115 int binv = static_cast<int>(blk->sub[i+2].ival[j]);
3117 s->hist->bin[i][j] = binv;
3130 gmx_fatal(FARGS, "Histogram counts don't match in %s",
3136 if (start_time + s->hist->sum*delta_time > *last_t)
3138 *last_t = start_time + s->hist->sum*delta_time;
3144 static void read_barsim_edr(const char *fn, real *temp, sim_data_t *sd)
3150 gmx_enxnm_t *enm = nullptr;
3151 double first_t = -1;
3153 samples_t **samples_rawdh = nullptr; /* contains samples for raw delta_h */
3154 int *nhists = nullptr; /* array to keep count & print at end */
3155 int *npts = nullptr; /* array to keep count & print at end */
3156 lambda_vec_t **lambdas = nullptr; /* array to keep count & print at end */
3157 lambda_vec_t *native_lambda;
3159 lambda_vec_t start_lambda;
3161 fp = open_enx(fn, "r");
3162 do_enxnms(fp, &nre, &enm);
3165 snew(native_lambda, 1);
3166 start_lambda.lc = nullptr;
3167 start_lambda.val = nullptr;
3169 while (do_enx(fp, fr))
3171 /* count the data blocks */
3172 int nblocks_raw = 0;
3173 int nblocks_hist = 0;
3176 /* DHCOLL block information: */
3177 double start_time = 0, delta_time = 0, old_start_lambda = 0, delta_lambda = 0;
3180 /* count the blocks and handle collection information: */
3181 for (i = 0; i < fr->nblock; i++)
3183 if (fr->block[i].id == enxDHHIST)
3187 if (fr->block[i].id == enxDH)
3191 if (fr->block[i].id == enxDHCOLL)
3194 if ( (fr->block[i].nsub < 1) ||
3195 (fr->block[i].sub[0].type != xdr_datatype_double) ||
3196 (fr->block[i].sub[0].nr < 5))
3198 gmx_fatal(FARGS, "Unexpected block data in file %s", fn);
3201 /* read the data from the DHCOLL block */
3202 rtemp = fr->block[i].sub[0].dval[0];
3203 start_time = fr->block[i].sub[0].dval[1];
3204 delta_time = fr->block[i].sub[0].dval[2];
3205 old_start_lambda = fr->block[i].sub[0].dval[3];
3206 delta_lambda = fr->block[i].sub[0].dval[4];
3208 if (delta_lambda > 0)
3210 gmx_fatal(FARGS, "Lambda values not constant in %s: can't apply BAR method", fn);
3212 if ( ( *temp != rtemp) && (*temp > 0) )
3214 gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
3218 if (old_start_lambda >= 0)
3222 if (!lambda_components_check(&(sd->lc), 0, "", 0))
3225 "lambda vector components in %s don't match those previously read",
3231 lambda_components_add(&(sd->lc), "", 0);
3233 if (!start_lambda.lc)
3235 lambda_vec_init(&start_lambda, &(sd->lc));
3237 start_lambda.val[0] = old_start_lambda;
3241 /* read lambda vector */
3243 gmx_bool check = (sd->lc.N > 0);
3244 if (fr->block[i].nsub < 2)
3247 "No lambda vector, but start_lambda=%f\n",
3250 n_lambda_vec = fr->block[i].sub[1].ival[1];
3251 for (j = 0; j < n_lambda_vec; j++)
3254 efpt_singular_names[fr->block[i].sub[1].ival[1+j]];
3257 /* check the components */
3258 lambda_components_check(&(sd->lc), j, name,
3263 lambda_components_add(&(sd->lc), name,
3267 lambda_vec_init(&start_lambda, &(sd->lc));
3268 start_lambda.index = fr->block[i].sub[1].ival[0];
3269 for (j = 0; j < n_lambda_vec; j++)
3271 start_lambda.val[j] = fr->block[i].sub[0].dval[5+j];
3276 first_t = start_time;
3283 gmx_fatal(FARGS, "Did not find delta H information in file %s", fn);
3285 if (nblocks_raw > 0 && nblocks_hist > 0)
3287 gmx_fatal(FARGS, "Can't handle both raw delta U data and histograms in the same file %s", fn);
3292 /* this is the first round; allocate the associated data
3294 /*native_lambda=start_lambda;*/
3295 lambda_vec_init(native_lambda, &(sd->lc));
3296 lambda_vec_copy(native_lambda, &start_lambda);
3297 nsamples = nblocks_raw+nblocks_hist;
3298 snew(nhists, nsamples);
3299 snew(npts, nsamples);
3300 snew(lambdas, nsamples);
3301 snew(samples_rawdh, nsamples);
3302 for (i = 0; i < nsamples; i++)
3306 lambdas[i] = nullptr;
3307 samples_rawdh[i] = nullptr; /* init to NULL so we know which
3308 ones contain values */
3313 // nsamples > 0 means this is NOT the first iteration
3315 /* check the native lambda */
3316 if (!lambda_vec_same(&start_lambda, native_lambda) )
3318 gmx_fatal(FARGS, "Native lambda not constant in file %s: started at %f, and becomes %f at time %f",
3319 fn, native_lambda->val, start_lambda.val, start_time);
3321 /* check the number of samples against the previous number */
3322 if ( ((nblocks_raw+nblocks_hist) != nsamples) || (nlam != 1 ) )
3324 gmx_fatal(FARGS, "Unexpected block count in %s: was %d, now %d\n",
3325 fn, nsamples+1, nblocks_raw+nblocks_hist+nlam);
3327 /* check whether last iterations's end time matches with
3328 the currrent start time */
3329 if ( (std::abs(last_t - start_time) > 2*delta_time) && last_t >= 0)
3331 /* it didn't. We need to store our samples and reallocate */
3333 for (i = 0; i < nsamples; i++)
3335 // nsamples is always >0 here, so samples_rawdh must be a valid pointer. Unfortunately
3336 // cppcheck does not understand the logic unless the assert is inside the loop, but
3337 // this is not performance-sensitive code.
3338 GMX_RELEASE_ASSERT(samples_rawdh != nullptr, "samples_rawdh==NULL with nsamples>0");
3339 if (samples_rawdh[i])
3341 /* insert it into the existing list */
3342 lambda_data_list_insert_sample(sd->lb,
3344 /* and make sure we'll allocate a new one this time
3346 samples_rawdh[i] = nullptr;
3353 k = 0; /* counter for the lambdas, etc. arrays */
3354 for (i = 0; i < fr->nblock; i++)
3356 if (fr->block[i].id == enxDH)
3358 int type = (fr->block[i].sub[0].ival[0]);
3359 if (type == dhbtDH || type == dhbtDHDL)
3362 read_edr_rawdh_block(&(samples_rawdh[k]),
3365 start_time, delta_time,
3366 native_lambda, rtemp,
3369 if (samples_rawdh[k])
3371 lambdas[k] = samples_rawdh[k]->foreign_lambda;
3376 else if (fr->block[i].id == enxDHHIST)
3378 int type = static_cast<int>(fr->block[i].sub[1].lval[1]);
3379 if (type == dhbtDH || type == dhbtDHDL)
3383 samples_t *s; /* this is where the data will go */
3384 s = read_edr_hist_block(&nb, &(fr->block[i]),
3385 start_time, delta_time,
3386 native_lambda, rtemp,
3391 lambdas[k] = s->foreign_lambda;
3394 /* and insert the new sample immediately */
3395 for (j = 0; j < nb; j++)
3397 lambda_data_list_insert_sample(sd->lb, s+j);
3403 /* Now store all our extant sample collections */
3404 for (i = 0; i < nsamples; i++)
3406 if (samples_rawdh[i])
3408 /* insert it into the existing list */
3409 lambda_data_list_insert_sample(sd->lb, samples_rawdh[i]);
3417 lambda_vec_print(native_lambda, buf, FALSE);
3418 printf("%s: %.1f - %.1f; lambda = %s\n foreign lambdas:\n",
3419 fn, first_t, last_t, buf);
3420 for (i = 0; i < nsamples; i++)
3424 lambda_vec_print(lambdas[i], buf, TRUE);
3427 printf(" %s (%d hists)\n", buf, nhists[i]);
3431 printf(" %s (%d pts)\n", buf, npts[i]);
3443 int gmx_bar(int argc, char *argv[])
3445 static const char *desc[] = {
3446 "[THISMODULE] calculates free energy difference estimates through ",
3447 "Bennett's acceptance ratio method (BAR). It also automatically",
3448 "adds series of individual free energies obtained with BAR into",
3449 "a combined free energy estimate.[PAR]",
3451 "Every individual BAR free energy difference relies on two ",
3452 "simulations at different states: say state A and state B, as",
3453 "controlled by a parameter, [GRK]lambda[grk] (see the [REF].mdp[ref] parameter",
3454 "[TT]init_lambda[tt]). The BAR method calculates a ratio of weighted",
3455 "average of the Hamiltonian difference of state B given state A and",
3457 "The energy differences to the other state must be calculated",
3458 "explicitly during the simulation. This can be done with",
3459 "the [REF].mdp[ref] option [TT]foreign_lambda[tt].[PAR]",
3461 "Input option [TT]-f[tt] expects multiple [TT]dhdl.xvg[tt] files. ",
3462 "Two types of input files are supported:",
3464 " * Files with more than one [IT]y[it]-value. ",
3465 " The files should have columns ",
3466 " with dH/d[GRK]lambda[grk] and [GRK]Delta[grk][GRK]lambda[grk]. ",
3467 " The [GRK]lambda[grk] values are inferred ",
3468 " from the legends: [GRK]lambda[grk] of the simulation from the legend of ",
3469 " dH/d[GRK]lambda[grk] and the foreign [GRK]lambda[grk] values from the ",
3470 " legends of Delta H",
3471 " * Files with only one [IT]y[it]-value. Using the",
3472 " [TT]-extp[tt] option for these files, it is assumed",
3473 " that the [IT]y[it]-value is dH/d[GRK]lambda[grk] and that the ",
3474 " Hamiltonian depends linearly on [GRK]lambda[grk]. ",
3475 " The [GRK]lambda[grk] value of the simulation is inferred from the ",
3476 " subtitle (if present), otherwise from a number in the subdirectory ",
3477 " in the file name.",
3480 "The [GRK]lambda[grk] of the simulation is parsed from ",
3481 "[TT]dhdl.xvg[tt] file's legend containing the string 'dH', the ",
3482 "foreign [GRK]lambda[grk] values from the legend containing the ",
3483 "capitalized letters 'D' and 'H'. The temperature is parsed from ",
3484 "the legend line containing 'T ='.[PAR]",
3486 "The input option [TT]-g[tt] expects multiple [REF].edr[ref] files. ",
3487 "These can contain either lists of energy differences (see the ",
3488 "[REF].mdp[ref] option [TT]separate_dhdl_file[tt]), or a series of ",
3489 "histograms (see the [REF].mdp[ref] options [TT]dh_hist_size[tt] and ",
3490 "[TT]dh_hist_spacing[tt]).", "The temperature and [GRK]lambda[grk] ",
3491 "values are automatically deduced from the [TT]ener.edr[tt] file.[PAR]",
3493 "In addition to the [REF].mdp[ref] option [TT]foreign_lambda[tt], ",
3494 "the energy difference can also be extrapolated from the ",
3495 "dH/d[GRK]lambda[grk] values. This is done with the[TT]-extp[tt]",
3496 "option, which assumes that the system's Hamiltonian depends linearly",
3497 "on [GRK]lambda[grk], which is not normally the case.[PAR]",
3499 "The free energy estimates are determined using BAR with bisection, ",
3500 "with the precision of the output set with [TT]-prec[tt]. ",
3501 "An error estimate taking into account time correlations ",
3502 "is made by splitting the data into blocks and determining ",
3503 "the free energy differences over those blocks and assuming ",
3504 "the blocks are independent. ",
3505 "The final error estimate is determined from the average variance ",
3506 "over 5 blocks. A range of block numbers for error estimation can ",
3507 "be provided with the options [TT]-nbmin[tt] and [TT]-nbmax[tt].[PAR]",
3509 "[THISMODULE] tries to aggregate samples with the same 'native' and ",
3510 "'foreign' [GRK]lambda[grk] values, but always assumes independent ",
3511 "samples. [BB]Note[bb] that when aggregating energy ",
3512 "differences/derivatives with different sampling intervals, this is ",
3513 "almost certainly not correct. Usually subsequent energies are ",
3514 "correlated and different time intervals mean different degrees ",
3515 "of correlation between samples.[PAR]",
3517 "The results are split in two parts: the last part contains the final ",
3518 "results in kJ/mol, together with the error estimate for each part ",
3519 "and the total. The first part contains detailed free energy ",
3520 "difference estimates and phase space overlap measures in units of ",
3521 "kT (together with their computed error estimate). The printed ",
3524 " * lam_A: the [GRK]lambda[grk] values for point A.",
3525 " * lam_B: the [GRK]lambda[grk] values for point B.",
3526 " * DG: the free energy estimate.",
3527 " * s_A: an estimate of the relative entropy of B in A.",
3528 " * s_B: an estimate of the relative entropy of A in B.",
3529 " * stdev: an estimate expected per-sample standard deviation.",
3532 "The relative entropy of both states in each other's ensemble can be ",
3533 "interpreted as a measure of phase space overlap: ",
3534 "the relative entropy s_A of the work samples of lambda_B in the ",
3535 "ensemble of lambda_A (and vice versa for s_B), is a ",
3536 "measure of the 'distance' between Boltzmann distributions of ",
3537 "the two states, that goes to zero for identical distributions. See ",
3538 "Wu & Kofke, J. Chem. Phys. 123 084109 (2005) for more information.",
3540 "The estimate of the expected per-sample standard deviation, as given ",
3541 "in Bennett's original BAR paper: Bennett, J. Comp. Phys. 22, p 245 (1976).",
3542 "Eq. 10 therein gives an estimate of the quality of sampling (not directly",
3543 "of the actual statistical error, because it assumes independent samples).[PAR]",
3545 "To get a visual estimate of the phase space overlap, use the ",
3546 "[TT]-oh[tt] option to write series of histograms, together with the ",
3547 "[TT]-nbin[tt] option.[PAR]"
3549 static real begin = 0, end = -1, temp = -1;
3550 int nd = 2, nbmin = 5, nbmax = 5;
3552 gmx_bool use_dhdl = FALSE;
3554 { "-b", FALSE, etREAL, {&begin}, "Begin time for BAR" },
3555 { "-e", FALSE, etREAL, {&end}, "End time for BAR" },
3556 { "-temp", FALSE, etREAL, {&temp}, "Temperature (K)" },
3557 { "-prec", FALSE, etINT, {&nd}, "The number of digits after the decimal point" },
3558 { "-nbmin", FALSE, etINT, {&nbmin}, "Minimum number of blocks for error estimation" },
3559 { "-nbmax", FALSE, etINT, {&nbmax}, "Maximum number of blocks for error estimation" },
3560 { "-nbin", FALSE, etINT, {&nbin}, "Number of bins for histogram output"},
3561 { "-extp", FALSE, etBOOL, {&use_dhdl}, "Whether to linearly extrapolate dH/dl values to use as energies"}
3565 { efXVG, "-f", "dhdl", ffOPTRDMULT },
3566 { efEDR, "-g", "ener", ffOPTRDMULT },
3567 { efXVG, "-o", "bar", ffOPTWR },
3568 { efXVG, "-oi", "barint", ffOPTWR },
3569 { efXVG, "-oh", "histogram", ffOPTWR }
3571 #define NFILE asize(fnm)
3574 int nf = 0; /* file counter */
3575 int nfile_tot; /* total number of input files */
3576 sim_data_t sim_data; /* the simulation data */
3577 barres_t *results; /* the results */
3578 int nresults; /* number of results in results array */
3581 double prec, dg_tot;
3583 char dgformat[20], xvg2format[STRLEN], xvg3format[STRLEN];
3584 char buf[STRLEN], buf2[STRLEN];
3585 char ktformat[STRLEN], sktformat[STRLEN];
3586 char kteformat[STRLEN], skteformat[STRLEN];
3587 gmx_output_env_t *oenv;
3589 gmx_bool result_OK = TRUE, bEE = TRUE;
3591 gmx_bool disc_err = FALSE;
3592 double sum_disc_err = 0.; /* discretization error */
3593 gmx_bool histrange_err = FALSE;
3594 double sum_histrange_err = 0.; /* histogram range error */
3595 double stat_err = 0.; /* statistical error */
3597 if (!parse_common_args(&argc, argv,
3599 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, nullptr, &oenv))
3604 gmx::ArrayRef<const std::string> xvgFiles = opt2fnsIfOptionSet("-f", NFILE, fnm);
3605 gmx::ArrayRef<const std::string> edrFiles = opt2fnsIfOptionSet("-g", NFILE, fnm);
3607 sim_data_init(&sim_data);
3609 /* make linked list */
3611 lambda_data_init(lb, 0, 0);
3617 nfile_tot = xvgFiles.size() + edrFiles.size();
3621 gmx_fatal(FARGS, "No input files!");
3626 gmx_fatal(FARGS, "Can not have negative number of digits");
3628 prec = std::pow(10.0, static_cast<double>(-nd));
3630 snew(partsum, (nbmax+1)*(nbmax+1));
3633 /* read in all files. First xvg files */
3634 for (const std::string &filenm : xvgFiles)
3636 read_bar_xvg(filenm.c_str(), &temp, &sim_data);
3639 /* then .edr files */
3640 for (const std::string &filenm : edrFiles)
3642 read_barsim_edr(filenm.c_str(), &temp, &sim_data);;
3646 /* fix the times to allow for equilibration */
3647 sim_data_impose_times(&sim_data, begin, end);
3649 if (opt2bSet("-oh", NFILE, fnm))
3651 sim_data_histogram(&sim_data, opt2fn("-oh", NFILE, fnm), nbin, oenv);
3654 /* assemble the output structures from the lambdas */
3655 results = barres_list_create(&sim_data, &nresults, use_dhdl);
3657 sum_disc_err = barres_list_max_disc_err(results, nresults);
3661 printf("\nNo results to calculate.\n");
3665 if (sum_disc_err > prec)
3667 prec = sum_disc_err;
3668 nd = static_cast<int>(std::ceil(-std::log10(prec)));
3669 printf("WARNING: setting the precision to %g because that is the minimum\n reasonable number, given the expected discretization error.\n", prec);
3673 /*sprintf(lamformat,"%%6.3f");*/
3674 sprintf( dgformat, "%%%d.%df", 3+nd, nd);
3675 /* the format strings of the results in kT */
3676 sprintf( ktformat, "%%%d.%df", 5+nd, nd);
3677 sprintf( sktformat, "%%%ds", 6+nd);
3678 /* the format strings of the errors in kT */
3679 sprintf( kteformat, "%%%d.%df", 3+nd, nd);
3680 sprintf( skteformat, "%%%ds", 4+nd);
3681 sprintf(xvg2format, "%s %s\n", "%s", dgformat);
3682 sprintf(xvg3format, "%s %s %s\n", "%s", dgformat, dgformat);
3687 if (opt2bSet("-o", NFILE, fnm))
3689 sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3690 fpb = xvgropen_type(opt2fn("-o", NFILE, fnm), "Free energy differences",
3691 "\\lambda", buf, exvggtXYDY, oenv);
3695 if (opt2bSet("-oi", NFILE, fnm))
3697 sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3698 fpi = xvgropen(opt2fn("-oi", NFILE, fnm), "Free energy integral",
3699 "\\lambda", buf, oenv);
3709 /* first calculate results */
3712 for (f = 0; f < nresults; f++)
3714 /* Determine the free energy difference with a factor of 10
3715 * more accuracy than requested for printing.
3717 calc_bar(&(results[f]), 0.1*prec, nbmin, nbmax,
3720 if (results[f].dg_disc_err > prec/10.)
3724 if (results[f].dg_histrange_err > prec/10.)
3726 histrange_err = TRUE;
3730 /* print results in kT */
3733 printf("\nTemperature: %g K\n", temp);
3735 printf("\nDetailed results in kT (see help for explanation):\n\n");
3736 printf("%6s ", " lam_A");
3737 printf("%6s ", " lam_B");
3738 printf(sktformat, "DG ");
3741 printf(skteformat, "+/- ");
3745 printf(skteformat, "disc ");
3749 printf(skteformat, "range ");
3751 printf(sktformat, "s_A ");
3754 printf(skteformat, "+/- " );
3756 printf(sktformat, "s_B ");
3759 printf(skteformat, "+/- " );
3761 printf(sktformat, "stdev ");
3764 printf(skteformat, "+/- ");
3767 for (f = 0; f < nresults; f++)
3769 lambda_vec_print_short(results[f].a->native_lambda, buf);
3771 lambda_vec_print_short(results[f].b->native_lambda, buf);
3773 printf(ktformat, results[f].dg);
3777 printf(kteformat, results[f].dg_err);
3782 printf(kteformat, results[f].dg_disc_err);
3787 printf(kteformat, results[f].dg_histrange_err);
3790 printf(ktformat, results[f].sa);
3794 printf(kteformat, results[f].sa_err);
3797 printf(ktformat, results[f].sb);
3801 printf(kteformat, results[f].sb_err);
3804 printf(ktformat, results[f].dg_stddev);
3808 printf(kteformat, results[f].dg_stddev_err);
3812 /* Check for negative relative entropy with a 95% certainty. */
3813 if (results[f].sa < -2*results[f].sa_err ||
3814 results[f].sb < -2*results[f].sb_err)
3822 printf("\nWARNING: Some of these results violate the Second Law of "
3823 "Thermodynamics: \n"
3824 " This is can be the result of severe undersampling, or "
3826 " there is something wrong with the simulations.\n");
3830 /* final results in kJ/mol */
3831 printf("\n\nFinal results in kJ/mol:\n\n");
3833 for (f = 0; f < nresults; f++)
3838 lambda_vec_print_short(results[f].a->native_lambda, buf);
3839 fprintf(fpi, xvg2format, buf, dg_tot);
3845 lambda_vec_print_intermediate(results[f].a->native_lambda,
3846 results[f].b->native_lambda,
3849 fprintf(fpb, xvg3format, buf, results[f].dg, results[f].dg_err);
3853 lambda_vec_print_short(results[f].a->native_lambda, buf);
3854 lambda_vec_print_short(results[f].b->native_lambda, buf2);
3855 printf("%s - %s", buf, buf2);
3858 printf(dgformat, results[f].dg*kT);
3862 printf(dgformat, results[f].dg_err*kT);
3866 printf(" (max. range err. = ");
3867 printf(dgformat, results[f].dg_histrange_err*kT);
3869 sum_histrange_err += results[f].dg_histrange_err*kT;
3873 dg_tot += results[f].dg;
3877 lambda_vec_print_short(results[0].a->native_lambda, buf);
3878 lambda_vec_print_short(results[nresults-1].b->native_lambda, buf2);
3879 printf("%s - %s", buf, buf2);
3882 printf(dgformat, dg_tot*kT);
3885 stat_err = bar_err(nbmin, nbmax, partsum)*kT;
3887 printf(dgformat, std::max(std::max(stat_err, sum_disc_err), sum_histrange_err));
3892 printf("\nmaximum discretization error = ");
3893 printf(dgformat, sum_disc_err);
3894 if (bEE && stat_err < sum_disc_err)
3896 printf("WARNING: discretization error (%g) is larger than statistical error.\n Decrease histogram spacing for more accurate results\n", stat_err);
3901 printf("\nmaximum histogram range error = ");
3902 printf(dgformat, sum_histrange_err);
3903 if (bEE && stat_err < sum_histrange_err)
3905 printf("WARNING: histogram range error (%g) is larger than statistical error.\n Increase histogram range for more accurate results\n", stat_err);
3914 lambda_vec_print_short(results[nresults-1].b->native_lambda, buf);
3915 fprintf(fpi, xvg2format, buf, dg_tot);
3923 do_view(oenv, opt2fn_null("-o", NFILE, fnm), "-xydy");
3924 do_view(oenv, opt2fn_null("-oi", NFILE, fnm), "-xydy");