2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2010,2011,2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
45 #include "gromacs/legacyheaders/typedefs.h"
46 #include "gromacs/utility/smalloc.h"
47 #include "gromacs/utility/futil.h"
48 #include "gromacs/commandline/pargs.h"
49 #include "gromacs/legacyheaders/macros.h"
50 #include "gromacs/fileio/enxio.h"
51 #include "gromacs/math/units.h"
52 #include "gromacs/utility/fatalerror.h"
53 #include "gromacs/fileio/xvgr.h"
54 #include "gromacs/legacyheaders/viewit.h"
56 #include "gromacs/math/utilities.h"
57 #include "gromacs/utility/cstringutil.h"
58 #include "gromacs/legacyheaders/names.h"
59 #include "gromacs/legacyheaders/mdebin.h"
62 /* Structure for the names of lambda vector components */
63 typedef struct lambda_components_t
65 char **names; /* Array of strings with names for the lambda vector
67 int N; /* The number of components */
68 int Nalloc; /* The number of allocated components */
69 } lambda_components_t;
71 /* Structure for a lambda vector or a dhdl derivative direction */
72 typedef struct lambda_vec_t
74 double *val; /* The lambda vector component values. Only valid if
76 int dhdl; /* The coordinate index for the derivative described by this
78 const lambda_components_t *lc; /* the associated lambda_components
80 int index; /* The state number (init-lambda-state) of this lambda
81 vector, if known. If not, it is set to -1 */
84 /* the dhdl.xvg data from a simulation */
88 int ftp; /* file type */
89 int nset; /* number of lambdas, including dhdl */
90 int *np; /* number of data points (du or hists) per lambda */
91 int np_alloc; /* number of points (du or hists) allocated */
92 double temp; /* temperature */
93 lambda_vec_t *lambda; /* the lambdas (of first index for y). */
94 double *t; /* the times (of second index for y) */
95 double **y; /* the dU values. y[0] holds the derivative, while
96 further ones contain the energy differences between
97 the native lambda and the 'foreign' lambdas. */
98 lambda_vec_t native_lambda; /* the native lambda */
100 struct xvg_t *next, *prev; /*location in the global linked list of xvg_ts*/
104 typedef struct hist_t
106 unsigned int *bin[2]; /* the (forward + reverse) histogram values */
107 double dx[2]; /* the histogram spacing. The reverse
108 dx is the negative of the forward dx.*/
109 gmx_int64_t x0[2]; /* the (forward + reverse) histogram start
112 int nbin[2]; /* the (forward+reverse) number of bins */
113 gmx_int64_t sum; /* the total number of counts. Must be
114 the same for forward + reverse. */
115 int nhist; /* number of hist datas (forward or reverse) */
117 double start_time, delta_time; /* start time, end time of histogram */
121 /* an aggregate of samples for partial free energy calculation */
122 typedef struct samples_t
124 lambda_vec_t *native_lambda; /* pointer to native lambda vector */
125 lambda_vec_t *foreign_lambda; /* pointer to foreign lambda vector */
126 double temp; /* the temperature */
127 gmx_bool derivative; /* whether this sample is a derivative */
129 /* The samples come either as either delta U lists: */
130 int ndu; /* the number of delta U samples */
131 double *du; /* the delta u's */
132 double *t; /* the times associated with those samples, or: */
133 double start_time, delta_time; /*start time and delta time for linear time*/
135 /* or as histograms: */
136 hist_t *hist; /* a histogram */
138 /* allocation data: (not NULL for data 'owned' by this struct) */
139 double *du_alloc, *t_alloc; /* allocated delta u arrays */
140 size_t ndu_alloc, nt_alloc; /* pre-allocated sizes */
141 hist_t *hist_alloc; /* allocated hist */
143 gmx_int64_t ntot; /* total number of samples */
144 const char *filename; /* the file name this sample comes from */
147 /* a sample range (start to end for du-style data, or boolean
148 for both du-style data and histograms */
149 typedef struct sample_range_t
151 int start, end; /* start and end index for du style data */
152 gmx_bool use; /* whether to use this sample */
154 samples_t *s; /* the samples this range belongs to */
158 /* a collection of samples for a partial free energy calculation
159 (i.e. the collection of samples from one native lambda to one
161 typedef struct sample_coll_t
163 lambda_vec_t *native_lambda; /* these should be the same for all samples
165 lambda_vec_t *foreign_lambda; /* collection */
166 double temp; /* the temperature */
168 int nsamples; /* the number of samples */
169 samples_t **s; /* the samples themselves */
170 sample_range_t *r; /* the sample ranges */
171 int nsamples_alloc; /* number of allocated samples */
173 gmx_int64_t ntot; /* total number of samples in the ranges of
176 struct sample_coll_t *next, *prev; /* next and previous in the list */
179 /* all the samples associated with a lambda point */
180 typedef struct lambda_data_t
182 lambda_vec_t *lambda; /* the native lambda (at start time if dynamic) */
183 double temp; /* temperature */
185 sample_coll_t *sc; /* the samples */
187 sample_coll_t sc_head; /*the pre-allocated list head for the linked list.*/
189 struct lambda_data_t *next, *prev; /* the next and prev in the list */
192 /* Top-level data structure of simulation data */
193 typedef struct sim_data_t
195 lambda_data_t *lb; /* a lambda data linked list */
196 lambda_data_t lb_head; /* The head element of the linked list */
198 lambda_components_t lc; /* the allowed components of the lambda
202 /* Top-level data structure with calculated values. */
204 sample_coll_t *a, *b; /* the simulation data */
206 double dg; /* the free energy difference */
207 double dg_err; /* the free energy difference */
209 double dg_disc_err; /* discretization error */
210 double dg_histrange_err; /* histogram range error */
212 double sa; /* relative entropy of b in state a */
213 double sa_err; /* error in sa */
214 double sb; /* relative entropy of a in state b */
215 double sb_err; /* error in sb */
217 double dg_stddev; /* expected dg stddev per sample */
218 double dg_stddev_err; /* error in dg_stddev */
222 /* Initialize a lambda_components structure */
223 static void lambda_components_init(lambda_components_t *lc)
227 snew(lc->names, lc->Nalloc);
230 /* Add a component to a lambda_components structure */
231 static void lambda_components_add(lambda_components_t *lc,
232 const char *name, size_t name_length)
234 while (lc->N + 1 > lc->Nalloc)
236 lc->Nalloc = (lc->Nalloc == 0) ? 2 : 2*lc->Nalloc;
237 srenew(lc->names, lc->Nalloc);
239 snew(lc->names[lc->N], name_length+1);
240 strncpy(lc->names[lc->N], name, name_length);
244 /* check whether a component with index 'index' matches the given name, or
245 is also NULL. Returns TRUE if this is the case.
246 the string name does not need to end */
247 static gmx_bool lambda_components_check(const lambda_components_t *lc,
257 if (name == NULL && lc->names[index] == NULL)
261 if ((name == NULL) != (lc->names[index] == NULL))
265 len = strlen(lc->names[index]);
266 if (len != name_length)
270 if (strncmp(lc->names[index], name, name_length) == 0)
277 /* Find the index of a given lambda component name, or -1 if not found */
278 static int lambda_components_find(const lambda_components_t *lc,
284 for (i = 0; i < lc->N; i++)
286 if (strncmp(lc->names[i], name, name_length) == 0)
296 /* initialize a lambda vector */
297 static void lambda_vec_init(lambda_vec_t *lv, const lambda_components_t *lc)
299 snew(lv->val, lc->N);
305 static void lambda_vec_destroy(lambda_vec_t *lv)
310 static void lambda_vec_copy(lambda_vec_t *lv, const lambda_vec_t *orig)
314 lambda_vec_init(lv, orig->lc);
315 lv->dhdl = orig->dhdl;
316 lv->index = orig->index;
317 for (i = 0; i < lv->lc->N; i++)
319 lv->val[i] = orig->val[i];
323 /* write a lambda vec to a preallocated string */
324 static void lambda_vec_print(const lambda_vec_t *lv, char *str, gmx_bool named)
329 str[0] = 0; /* reset the string */
334 str += sprintf(str, "delta H to ");
338 str += sprintf(str, "(");
340 for (i = 0; i < lv->lc->N; i++)
342 str += sprintf(str, "%g", lv->val[i]);
345 str += sprintf(str, ", ");
350 str += sprintf(str, ")");
355 /* this lambda vector describes a derivative */
356 str += sprintf(str, "dH/dl");
357 if (strlen(lv->lc->names[lv->dhdl]) > 0)
359 sprintf(str, " (%s)", lv->lc->names[lv->dhdl]);
364 /* write a shortened version of the lambda vec to a preallocated string */
365 static void lambda_vec_print_short(const lambda_vec_t *lv, char *str)
372 sprintf(str, "%6d", lv->index);
378 sprintf(str, "%6.3f", lv->val[0]);
382 sprintf(str, "dH/dl[%d]", lv->dhdl);
387 /* write an intermediate version of two lambda vecs to a preallocated string */
388 static void lambda_vec_print_intermediate(const lambda_vec_t *a,
389 const lambda_vec_t *b, char *str)
395 if ( (a->index >= 0) && (b->index >= 0) )
397 sprintf(str, "%6.3f", ((double)a->index+(double)b->index)/2.);
401 if ( (a->dhdl < 0) && (b->dhdl < 0) )
403 sprintf(str, "%6.3f", (a->val[0]+b->val[0])/2.);
410 /* calculate the difference in lambda vectors: c = a-b.
411 c must be initialized already, and a and b must describe non-derivative
413 static void lambda_vec_diff(const lambda_vec_t *a, const lambda_vec_t *b,
418 if ( (a->dhdl > 0) || (b->dhdl > 0) )
421 "Trying to calculate the difference between derivatives instead of lambda points");
423 if ((a->lc != b->lc) || (a->lc != c->lc) )
426 "Trying to calculate the difference lambdas with differing basis set");
428 for (i = 0; i < a->lc->N; i++)
430 c->val[i] = a->val[i] - b->val[i];
434 /* calculate and return the absolute difference in lambda vectors: c = |a-b|.
435 a and b must describe non-derivative lambda points */
436 static double lambda_vec_abs_diff(const lambda_vec_t *a, const lambda_vec_t *b)
441 if ( (a->dhdl > 0) || (b->dhdl > 0) )
444 "Trying to calculate the difference between derivatives instead of lambda points");
449 "Trying to calculate the difference lambdas with differing basis set");
451 for (i = 0; i < a->lc->N; i++)
453 double df = a->val[i] - b->val[i];
460 /* check whether two lambda vectors are the same */
461 static gmx_bool lambda_vec_same(const lambda_vec_t *a, const lambda_vec_t *b)
471 for (i = 0; i < a->lc->N; i++)
473 if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
482 /* they're derivatives, so we check whether the indices match */
483 return (a->dhdl == b->dhdl);
487 /* Compare the sort order of two foreign lambda vectors
489 returns 1 if a is 'bigger' than b,
490 returns 0 if they're the same,
491 returns -1 if a is 'smaller' than b.*/
492 static gmx_bool lambda_vec_cmp_foreign(const lambda_vec_t *a,
493 const lambda_vec_t *b)
496 double norm_a = 0, norm_b = 0;
497 gmx_bool different = FALSE;
501 gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
503 /* if either one has an index we sort based on that */
504 if ((a->index >= 0) || (b->index >= 0))
506 if (a->index == b->index)
510 return (a->index > b->index) ? 1 : -1;
512 if (a->dhdl >= 0 || b->dhdl >= 0)
514 /* lambda vectors that are derivatives always sort higher than those
515 without derivatives */
516 if ((a->dhdl >= 0) != (b->dhdl >= 0) )
518 return (a->dhdl >= 0) ? 1 : -1;
520 return a->dhdl > b->dhdl;
523 /* neither has an index, so we can only sort on the lambda components,
524 which is only valid if there is one component */
525 for (i = 0; i < a->lc->N; i++)
527 if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
531 norm_a += a->val[i]*a->val[i];
532 norm_b += b->val[i]*b->val[i];
538 return norm_a > norm_b;
541 /* Compare the sort order of two native lambda vectors
543 returns 1 if a is 'bigger' than b,
544 returns 0 if they're the same,
545 returns -1 if a is 'smaller' than b.*/
546 static gmx_bool lambda_vec_cmp_native(const lambda_vec_t *a,
547 const lambda_vec_t *b)
553 gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
555 /* if either one has an index we sort based on that */
556 if ((a->index >= 0) || (b->index >= 0))
558 if (a->index == b->index)
562 return (a->index > b->index) ? 1 : -1;
564 /* neither has an index, so we can only sort on the lambda components,
565 which is only valid if there is one component */
569 "Can't compare lambdas with no index and > 1 component");
571 if (a->dhdl >= 0 || b->dhdl >= 0)
574 "Can't compare native lambdas that are derivatives");
576 if (gmx_within_tol(a->val[0], b->val[0], 10*GMX_REAL_EPS))
580 return a->val[0] > b->val[0] ? 1 : -1;
586 static void hist_init(hist_t *h, int nhist, int *nbin)
591 gmx_fatal(FARGS, "histogram with more than two sets of data!");
593 for (i = 0; i < nhist; i++)
595 snew(h->bin[i], nbin[i]);
597 h->nbin[i] = nbin[i];
598 h->start_time = h->delta_time = 0;
605 static void hist_destroy(hist_t *h)
611 static void xvg_init(xvg_t *ba)
620 static void samples_init(samples_t *s, lambda_vec_t *native_lambda,
621 lambda_vec_t *foreign_lambda, double temp,
622 gmx_bool derivative, const char *filename)
624 s->native_lambda = native_lambda;
625 s->foreign_lambda = foreign_lambda;
627 s->derivative = derivative;
632 s->start_time = s->delta_time = 0;
636 s->hist_alloc = NULL;
641 s->filename = filename;
644 static void sample_range_init(sample_range_t *r, samples_t *s)
652 static void sample_coll_init(sample_coll_t *sc, lambda_vec_t *native_lambda,
653 lambda_vec_t *foreign_lambda, double temp)
655 sc->native_lambda = native_lambda;
656 sc->foreign_lambda = foreign_lambda;
662 sc->nsamples_alloc = 0;
665 sc->next = sc->prev = NULL;
668 static void sample_coll_destroy(sample_coll_t *sc)
670 /* don't free the samples themselves */
676 static void lambda_data_init(lambda_data_t *l, lambda_vec_t *native_lambda,
679 l->lambda = native_lambda;
685 l->sc = &(l->sc_head);
687 sample_coll_init(l->sc, native_lambda, NULL, 0.);
692 static void barres_init(barres_t *br)
701 br->dg_stddev_err = 0;
708 /* calculate the total number of samples in a sample collection */
709 static void sample_coll_calc_ntot(sample_coll_t *sc)
714 for (i = 0; i < sc->nsamples; i++)
720 sc->ntot += sc->s[i]->ntot;
724 sc->ntot += sc->r[i].end - sc->r[i].start;
731 /* find the barsamples_t associated with a lambda that corresponds to
732 a specific foreign lambda */
733 static sample_coll_t *lambda_data_find_sample_coll(lambda_data_t *l,
734 lambda_vec_t *foreign_lambda)
736 sample_coll_t *sc = l->sc->next;
740 if (lambda_vec_same(sc->foreign_lambda, foreign_lambda))
750 /* insert li into an ordered list of lambda_colls */
751 static void lambda_data_insert_sample_coll(lambda_data_t *l, sample_coll_t *sc)
753 sample_coll_t *scn = l->sc->next;
754 while ( (scn != l->sc) )
756 if (lambda_vec_cmp_foreign(scn->foreign_lambda, sc->foreign_lambda) > 0)
762 /* now insert it before the found scn */
764 sc->prev = scn->prev;
765 scn->prev->next = sc;
769 /* insert li into an ordered list of lambdas */
770 static void lambda_data_insert_lambda(lambda_data_t *head, lambda_data_t *li)
772 lambda_data_t *lc = head->next;
775 if (lambda_vec_cmp_native(lc->lambda, li->lambda) > 0)
781 /* now insert ourselves before the found lc */
788 /* insert a sample and a sample_range into a sample_coll. The
789 samples are stored as a pointer, the range is copied. */
790 static void sample_coll_insert_sample(sample_coll_t *sc, samples_t *s,
793 /* first check if it belongs here */
794 if (sc->temp != s->temp)
796 gmx_fatal(FARGS, "Temperatures in files %s and %s are not the same!",
797 s->filename, sc->next->s[0]->filename);
799 if (!lambda_vec_same(sc->native_lambda, s->native_lambda))
801 gmx_fatal(FARGS, "Native lambda in files %s and %s are not the same (and they should be)!",
802 s->filename, sc->next->s[0]->filename);
804 if (!lambda_vec_same(sc->foreign_lambda, s->foreign_lambda))
806 gmx_fatal(FARGS, "Foreign lambda in files %s and %s are not the same (and they should be)!",
807 s->filename, sc->next->s[0]->filename);
810 /* check if there's room */
811 if ( (sc->nsamples + 1) > sc->nsamples_alloc)
813 sc->nsamples_alloc = max(2*sc->nsamples_alloc, 2);
814 srenew(sc->s, sc->nsamples_alloc);
815 srenew(sc->r, sc->nsamples_alloc);
817 sc->s[sc->nsamples] = s;
818 sc->r[sc->nsamples] = *r;
821 sample_coll_calc_ntot(sc);
824 /* insert a sample into a lambda_list, creating the right sample_coll if
826 static void lambda_data_list_insert_sample(lambda_data_t *head, samples_t *s)
828 gmx_bool found = FALSE;
832 lambda_data_t *l = head->next;
834 /* first search for the right lambda_data_t */
837 if (lambda_vec_same(l->lambda, s->native_lambda) )
847 snew(l, 1); /* allocate a new one */
848 lambda_data_init(l, s->native_lambda, s->temp); /* initialize it */
849 lambda_data_insert_lambda(head, l); /* add it to the list */
852 /* now look for a sample collection */
853 sc = lambda_data_find_sample_coll(l, s->foreign_lambda);
856 snew(sc, 1); /* allocate a new one */
857 sample_coll_init(sc, s->native_lambda, s->foreign_lambda, s->temp);
858 lambda_data_insert_sample_coll(l, sc);
861 /* now insert the samples into the sample coll */
862 sample_range_init(&r, s);
863 sample_coll_insert_sample(sc, s, &r);
867 /* make a histogram out of a sample collection */
868 static void sample_coll_make_hist(sample_coll_t *sc, int **bin,
869 int *nbin_alloc, int *nbin,
870 double *dx, double *xmin, int nbin_default)
873 gmx_bool dx_set = FALSE;
874 gmx_bool xmin_set = FALSE;
876 gmx_bool xmax_set = FALSE;
877 gmx_bool xmax_set_hard = FALSE; /* whether the xmax is bounded by the
878 limits of a histogram */
881 /* first determine dx and xmin; try the histograms */
882 for (i = 0; i < sc->nsamples; i++)
886 hist_t *hist = sc->s[i]->hist;
887 for (k = 0; k < hist->nhist; k++)
889 double hdx = hist->dx[k];
890 double xmax_now = (hist->x0[k]+hist->nbin[k])*hdx;
892 /* we use the biggest dx*/
893 if ( (!dx_set) || hist->dx[0] > *dx)
898 if ( (!xmin_set) || (hist->x0[k]*hdx) < *xmin)
901 *xmin = (hist->x0[k]*hdx);
904 if ( (!xmax_set) || (xmax_now > xmax && !xmax_set_hard) )
908 if (hist->bin[k][hist->nbin[k]-1] != 0)
910 xmax_set_hard = TRUE;
913 if (hist->bin[k][hist->nbin[k]-1] != 0 && (xmax_now < xmax) )
915 xmax_set_hard = TRUE;
921 /* and the delta us */
922 for (i = 0; i < sc->nsamples; i++)
924 if (sc->s[i]->ndu > 0)
926 /* determine min and max */
927 int starti = sc->r[i].start;
928 int endi = sc->r[i].end;
929 double du_xmin = sc->s[i]->du[starti];
930 double du_xmax = sc->s[i]->du[starti];
931 for (j = starti+1; j < endi; j++)
933 if (sc->s[i]->du[j] < du_xmin)
935 du_xmin = sc->s[i]->du[j];
937 if (sc->s[i]->du[j] > du_xmax)
939 du_xmax = sc->s[i]->du[j];
943 /* and now change the limits */
944 if ( (!xmin_set) || (du_xmin < *xmin) )
949 if ( (!xmax_set) || ((du_xmax > xmax) && !xmax_set_hard) )
957 if (!xmax_set || !xmin_set)
966 *nbin = nbin_default;
967 *dx = (xmax-(*xmin))/((*nbin)-2); /* -2 because we want the last bin to
968 be 0, and we count from 0 */
972 *nbin = (xmax-(*xmin))/(*dx);
975 if (*nbin > *nbin_alloc)
978 srenew(*bin, *nbin_alloc);
981 /* reset the histogram */
982 for (i = 0; i < (*nbin); i++)
987 /* now add the actual data */
988 for (i = 0; i < sc->nsamples; i++)
992 hist_t *hist = sc->s[i]->hist;
993 for (k = 0; k < hist->nhist; k++)
995 double hdx = hist->dx[k];
996 double xmin_hist = hist->x0[k]*hdx;
997 for (j = 0; j < hist->nbin[k]; j++)
999 /* calculate the bin corresponding to the middle of the
1001 double x = hdx*(j+0.5) + xmin_hist;
1002 int binnr = (int)((x-(*xmin))/(*dx));
1004 if (binnr >= *nbin || binnr < 0)
1009 (*bin)[binnr] += hist->bin[k][j];
1015 int starti = sc->r[i].start;
1016 int endi = sc->r[i].end;
1017 for (j = starti; j < endi; j++)
1019 int binnr = (int)((sc->s[i]->du[j]-(*xmin))/(*dx));
1020 if (binnr >= *nbin || binnr < 0)
1031 /* write a collection of histograms to a file */
1032 void sim_data_histogram(sim_data_t *sd, const char *filename,
1033 int nbin_default, const output_env_t oenv)
1035 char label_x[STRLEN];
1036 const char *dhdl = "dH/d\\lambda", *deltag = "\\DeltaH", *lambda = "\\lambda";
1037 const char *title = "N(\\DeltaH)";
1038 const char *label_y = "Samples";
1042 char **setnames = NULL;
1043 gmx_bool first_set = FALSE;
1044 /* histogram data: */
1051 lambda_data_t *bl_head = sd->lb;
1053 printf("\nWriting histogram to %s\n", filename);
1054 sprintf(label_x, "\\DeltaH (%s)", unit_energy);
1056 fp = xvgropen_type(filename, title, label_x, label_y, exvggtXNY, oenv);
1058 /* first get all the set names */
1060 /* iterate over all lambdas */
1061 while (bl != bl_head)
1063 sample_coll_t *sc = bl->sc->next;
1065 /* iterate over all samples */
1066 while (sc != bl->sc)
1068 char buf[STRLEN], buf2[STRLEN];
1071 srenew(setnames, nsets);
1072 snew(setnames[nsets-1], STRLEN);
1073 if (sc->foreign_lambda->dhdl < 0)
1075 lambda_vec_print(sc->native_lambda, buf, FALSE);
1076 lambda_vec_print(sc->foreign_lambda, buf2, FALSE);
1077 sprintf(setnames[nsets-1], "N(%s(%s=%s) | %s=%s)",
1078 deltag, lambda, buf2, lambda, buf);
1082 lambda_vec_print(sc->native_lambda, buf, FALSE);
1083 sprintf(setnames[nsets-1], "N(%s | %s=%s)",
1091 xvgr_legend(fp, nsets, (const char**)setnames, oenv);
1094 /* now make the histograms */
1096 /* iterate over all lambdas */
1097 while (bl != bl_head)
1099 sample_coll_t *sc = bl->sc->next;
1101 /* iterate over all samples */
1102 while (sc != bl->sc)
1106 xvgr_new_dataset(fp, 0, 0, NULL, oenv);
1109 sample_coll_make_hist(sc, &hist, &nbin_alloc, &nbin, &dx, &min,
1112 for (i = 0; i < nbin; i++)
1114 double xmin = i*dx + min;
1115 double xmax = (i+1)*dx + min;
1117 fprintf(fp, "%g %d\n%g %d\n", xmin, hist[i], xmax, hist[i]);
1136 snprint_lambda_vec(char *str, int sz, const char *label, lambda_vec_t *lambda)
1140 n += snprintf(str+n, sz-n, "lambda vector [%s]: ", label);
1141 if (lambda->index >= 0)
1143 n += snprintf(str+n, sz-n, " init-lambda-state=%d", lambda->index);
1145 if (lambda->dhdl >= 0)
1147 n += snprintf(str+n, sz-n, " dhdl index=%d", lambda->dhdl);
1152 for (i = 0; i < lambda->lc->N; i++)
1154 n += snprintf(str+n, sz-n, " (%s) l=%g", lambda->lc->names[i], lambda->val[i]);
1160 /* create a collection (array) of barres_t object given a ordered linked list
1161 of barlamda_t sample collections */
1162 static barres_t *barres_list_create(sim_data_t *sd, int *nres,
1169 gmx_bool dhdl = FALSE;
1170 gmx_bool first = TRUE;
1171 lambda_data_t *bl_head = sd->lb;
1173 /* first count the lambdas */
1175 while (bl != bl_head)
1180 snew(res, nlambda-1);
1182 /* next put the right samples in the res */
1184 bl = bl_head->next->next; /* we start with the second one. */
1185 while (bl != bl_head)
1187 sample_coll_t *sc, *scprev;
1188 barres_t *br = &(res[*nres]);
1189 /* there is always a previous one. we search for that as a foreign
1191 scprev = lambda_data_find_sample_coll(bl->prev, bl->lambda);
1192 sc = lambda_data_find_sample_coll(bl, bl->prev->lambda);
1200 scprev = lambda_data_find_sample_coll(bl->prev, bl->prev->lambda);
1201 sc = lambda_data_find_sample_coll(bl, bl->lambda);
1205 printf("\nWARNING: Using the derivative data (dH/dlambda) to extrapolate delta H values.\nThis will only work if the Hamiltonian is linear in lambda.\n");
1210 gmx_fatal(FARGS, "Some dhdl files contain only one value (dH/dl), while others \ncontain multiple values (dH/dl and/or Delta H), will not proceed \nbecause of possible inconsistencies.\n");
1213 else if (!scprev && !sc)
1215 char descX[STRLEN], descY[STRLEN];
1216 snprint_lambda_vec(descX, STRLEN, "X", bl->prev->lambda);
1217 snprint_lambda_vec(descY, STRLEN, "Y", bl->lambda);
1219 gmx_fatal(FARGS, "There is no path between the states X & Y below that is covered by foreign lambdas:\ncannot proceed with BAR.\nUse thermodynamic integration of dH/dl by calculating the averages of dH/dl\nwith g_analyze and integrating them.\nAlternatively, use the -extp option if (and only if) the Hamiltonian\ndepends linearly on lambda, which is NOT normally the case.\n\n%s\n%s\n", descX, descY);
1222 /* normal delta H */
1225 char descX[STRLEN], descY[STRLEN];
1226 snprint_lambda_vec(descX, STRLEN, "X", bl->lambda);
1227 snprint_lambda_vec(descY, STRLEN, "Y", bl->prev->lambda);
1228 gmx_fatal(FARGS, "Could not find a set for foreign lambda (state X below)\nin the files for main lambda (state Y below)\n\n%s\n%s\n", descX, descY);
1232 char descX[STRLEN], descY[STRLEN];
1233 snprint_lambda_vec(descX, STRLEN, "X", bl->prev->lambda);
1234 snprint_lambda_vec(descY, STRLEN, "Y", bl->lambda);
1235 gmx_fatal(FARGS, "Could not find a set for foreign lambda (state X below)\nin the files for main lambda (state Y below)\n\n%s\n%s\n", descX, descY);
1247 /* estimate the maximum discretization error */
1248 static double barres_list_max_disc_err(barres_t *res, int nres)
1251 double disc_err = 0.;
1252 double delta_lambda;
1254 for (i = 0; i < nres; i++)
1256 barres_t *br = &(res[i]);
1258 delta_lambda = lambda_vec_abs_diff(br->b->native_lambda,
1259 br->a->native_lambda);
1261 for (j = 0; j < br->a->nsamples; j++)
1263 if (br->a->s[j]->hist)
1266 if (br->a->s[j]->derivative)
1268 Wfac = delta_lambda;
1271 disc_err = max(disc_err, Wfac*br->a->s[j]->hist->dx[0]);
1274 for (j = 0; j < br->b->nsamples; j++)
1276 if (br->b->s[j]->hist)
1279 if (br->b->s[j]->derivative)
1281 Wfac = delta_lambda;
1283 disc_err = max(disc_err, Wfac*br->b->s[j]->hist->dx[0]);
1291 /* impose start and end times on a sample collection, updating sample_ranges */
1292 static void sample_coll_impose_times(sample_coll_t *sc, double begin_t,
1296 for (i = 0; i < sc->nsamples; i++)
1298 samples_t *s = sc->s[i];
1299 sample_range_t *r = &(sc->r[i]);
1302 double end_time = s->hist->delta_time*s->hist->sum +
1303 s->hist->start_time;
1304 if (s->hist->start_time < begin_t || end_time > end_t)
1314 if (s->start_time < begin_t)
1316 r->start = (int)((begin_t - s->start_time)/s->delta_time);
1318 end_time = s->delta_time*s->ndu + s->start_time;
1319 if (end_time > end_t)
1321 r->end = (int)((end_t - s->start_time)/s->delta_time);
1327 for (j = 0; j < s->ndu; j++)
1329 if (s->t[j] < begin_t)
1334 if (s->t[j] >= end_t)
1341 if (r->start > r->end)
1347 sample_coll_calc_ntot(sc);
1350 static void sim_data_impose_times(sim_data_t *sd, double begin, double end)
1352 double first_t, last_t;
1353 double begin_t, end_t;
1355 lambda_data_t *head = sd->lb;
1358 if (begin <= 0 && end < 0)
1363 /* first determine the global start and end times */
1369 sample_coll_t *sc = lc->sc->next;
1370 while (sc != lc->sc)
1372 for (j = 0; j < sc->nsamples; j++)
1374 double start_t, end_t;
1376 start_t = sc->s[j]->start_time;
1377 end_t = sc->s[j]->start_time;
1380 end_t += sc->s[j]->delta_time*sc->s[j]->hist->sum;
1386 end_t = sc->s[j]->t[sc->s[j]->ndu-1];
1390 end_t += sc->s[j]->delta_time*sc->s[j]->ndu;
1394 if (start_t < first_t || first_t < 0)
1408 /* calculate the actual times */
1426 printf("\n Samples in time interval: %.3f - %.3f\n", first_t, last_t);
1428 if (begin_t > end_t)
1432 printf("Removing samples outside of: %.3f - %.3f\n", begin_t, end_t);
1434 /* then impose them */
1438 sample_coll_t *sc = lc->sc->next;
1439 while (sc != lc->sc)
1441 sample_coll_impose_times(sc, begin_t, end_t);
1449 /* create subsample i out of ni from an existing sample_coll */
1450 static gmx_bool sample_coll_create_subsample(sample_coll_t *sc,
1451 sample_coll_t *sc_orig,
1455 int hist_start, hist_end;
1457 gmx_int64_t ntot_start;
1458 gmx_int64_t ntot_end;
1459 gmx_int64_t ntot_so_far;
1461 *sc = *sc_orig; /* just copy all fields */
1463 /* allocate proprietary memory */
1464 snew(sc->s, sc_orig->nsamples);
1465 snew(sc->r, sc_orig->nsamples);
1467 /* copy the samples */
1468 for (j = 0; j < sc_orig->nsamples; j++)
1470 sc->s[j] = sc_orig->s[j];
1471 sc->r[j] = sc_orig->r[j]; /* copy the ranges too */
1474 /* now fix start and end fields */
1475 /* the casts avoid possible overflows */
1476 ntot_start = (gmx_int64_t)(sc_orig->ntot*(double)i/(double)ni);
1477 ntot_end = (gmx_int64_t)(sc_orig->ntot*(double)(i+1)/(double)ni);
1479 for (j = 0; j < sc->nsamples; j++)
1481 gmx_int64_t ntot_add;
1482 gmx_int64_t new_start, new_end;
1488 ntot_add = sc->s[j]->hist->sum;
1492 ntot_add = sc->r[j].end - sc->r[j].start;
1500 if (!sc->s[j]->hist)
1502 if (ntot_so_far < ntot_start)
1504 /* adjust starting point */
1505 new_start = sc->r[j].start + (ntot_start - ntot_so_far);
1509 new_start = sc->r[j].start;
1511 /* adjust end point */
1512 new_end = sc->r[j].start + (ntot_end - ntot_so_far);
1513 if (new_end > sc->r[j].end)
1515 new_end = sc->r[j].end;
1518 /* check if we're in range at all */
1519 if ( (new_end < new_start) || (new_start > sc->r[j].end) )
1524 /* and write the new range */
1525 sc->r[j].start = (int)new_start;
1526 sc->r[j].end = (int)new_end;
1533 double ntot_start_norm, ntot_end_norm;
1534 /* calculate the amount of overlap of the
1535 desired range (ntot_start -- ntot_end) onto
1536 the histogram range (ntot_so_far -- ntot_so_far+ntot_add)*/
1538 /* first calculate normalized bounds
1539 (where 0 is the start of the hist range, and 1 the end) */
1540 ntot_start_norm = (ntot_start-ntot_so_far)/(double)ntot_add;
1541 ntot_end_norm = (ntot_end-ntot_so_far)/(double)ntot_add;
1543 /* now fix the boundaries */
1544 ntot_start_norm = min(1, max(0., ntot_start_norm));
1545 ntot_end_norm = max(0, min(1., ntot_end_norm));
1547 /* and calculate the overlap */
1548 overlap = ntot_end_norm - ntot_start_norm;
1550 if (overlap > 0.95) /* we allow for 5% slack */
1552 sc->r[j].use = TRUE;
1554 else if (overlap < 0.05)
1556 sc->r[j].use = FALSE;
1564 ntot_so_far += ntot_add;
1566 sample_coll_calc_ntot(sc);
1571 /* calculate minimum and maximum work values in sample collection */
1572 static void sample_coll_min_max(sample_coll_t *sc, double Wfac,
1573 double *Wmin, double *Wmax)
1580 for (i = 0; i < sc->nsamples; i++)
1582 samples_t *s = sc->s[i];
1583 sample_range_t *r = &(sc->r[i]);
1588 for (j = r->start; j < r->end; j++)
1590 *Wmin = min(*Wmin, s->du[j]*Wfac);
1591 *Wmax = max(*Wmax, s->du[j]*Wfac);
1596 int hd = 0; /* determine the histogram direction: */
1598 if ( (s->hist->nhist > 1) && (Wfac < 0) )
1602 dx = s->hist->dx[hd];
1604 for (j = s->hist->nbin[hd]-1; j >= 0; j--)
1606 *Wmin = min(*Wmin, Wfac*(s->hist->x0[hd])*dx);
1607 *Wmax = max(*Wmax, Wfac*(s->hist->x0[hd])*dx);
1608 /* look for the highest value bin with values */
1609 if (s->hist->bin[hd][j] > 0)
1611 *Wmin = min(*Wmin, Wfac*(j+s->hist->x0[hd]+1)*dx);
1612 *Wmax = max(*Wmax, Wfac*(j+s->hist->x0[hd]+1)*dx);
1621 /* Initialize a sim_data structure */
1622 static void sim_data_init(sim_data_t *sd)
1624 /* make linked list */
1625 sd->lb = &(sd->lb_head);
1626 sd->lb->next = sd->lb;
1627 sd->lb->prev = sd->lb;
1629 lambda_components_init(&(sd->lc));
1633 static double calc_bar_sum(int n, const double *W, double Wfac, double sbMmDG)
1640 for (i = 0; i < n; i++)
1642 sum += 1./(1. + exp(Wfac*W[i] + sbMmDG));
1648 /* calculate the BAR average given a histogram
1650 if type== 0, calculate the best estimate for the average,
1651 if type==-1, calculate the minimum possible value given the histogram
1652 if type== 1, calculate the maximum possible value given the histogram */
1653 static double calc_bar_sum_hist(const hist_t *hist, double Wfac, double sbMmDG,
1659 /* normalization factor multiplied with bin width and
1660 number of samples (we normalize through M): */
1662 int hd = 0; /* determine the histogram direction: */
1665 if ( (hist->nhist > 1) && (Wfac < 0) )
1670 max = hist->nbin[hd]-1;
1673 max = hist->nbin[hd]; /* we also add whatever was out of range */
1676 for (i = 0; i < max; i++)
1678 double x = Wfac*((i+hist->x0[hd])+0.5)*dx; /* bin middle */
1679 double pxdx = hist->bin[0][i]*normdx; /* p(x)dx */
1681 sum += pxdx/(1. + exp(x + sbMmDG));
1687 static double calc_bar_lowlevel(sample_coll_t *ca, sample_coll_t *cb,
1688 double temp, double tol, int type)
1693 double Wfac1, Wfac2, Wmin, Wmax;
1694 double DG0, DG1, DG2, dDG1;
1696 double n1, n2; /* numbers of samples as doubles */
1701 /* count the numbers of samples */
1707 /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1708 if (ca->foreign_lambda->dhdl < 0)
1710 /* this is the case when the delta U were calculated directly
1711 (i.e. we're not scaling dhdl) */
1717 /* we're using dhdl, so delta_lambda needs to be a
1718 multiplication factor. */
1719 /*double delta_lambda=cb->native_lambda-ca->native_lambda;*/
1720 double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1722 if (cb->native_lambda->lc->N > 1)
1725 "Can't (yet) do multi-component dhdl interpolation");
1728 Wfac1 = beta*delta_lambda;
1729 Wfac2 = -beta*delta_lambda;
1734 /* We print the output both in kT and kJ/mol.
1735 * Here we determine DG in kT, so when beta < 1
1736 * the precision has to be increased.
1741 /* Calculate minimum and maximum work to give an initial estimate of
1742 * delta G as their average.
1745 double Wmin1, Wmin2, Wmax1, Wmax2;
1746 sample_coll_min_max(ca, Wfac1, &Wmin1, &Wmax1);
1747 sample_coll_min_max(cb, Wfac2, &Wmin2, &Wmax2);
1749 Wmin = min(Wmin1, Wmin2);
1750 Wmax = max(Wmax1, Wmax2);
1758 fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1760 /* We approximate by bisection: given our initial estimates
1761 we keep checking whether the halfway point is greater or
1762 smaller than what we get out of the BAR averages.
1764 For the comparison we can use twice the tolerance. */
1765 while (DG2 - DG0 > 2*tol)
1767 DG1 = 0.5*(DG0 + DG2);
1769 /* calculate the BAR averages */
1772 for (i = 0; i < ca->nsamples; i++)
1774 samples_t *s = ca->s[i];
1775 sample_range_t *r = &(ca->r[i]);
1780 dDG1 += calc_bar_sum_hist(s->hist, Wfac1, (M-DG1), type);
1784 dDG1 += calc_bar_sum(r->end - r->start, s->du + r->start,
1789 for (i = 0; i < cb->nsamples; i++)
1791 samples_t *s = cb->s[i];
1792 sample_range_t *r = &(cb->r[i]);
1797 dDG1 -= calc_bar_sum_hist(s->hist, Wfac2, -(M-DG1), type);
1801 dDG1 -= calc_bar_sum(r->end - r->start, s->du + r->start,
1817 fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1821 return 0.5*(DG0 + DG2);
1824 static void calc_rel_entropy(sample_coll_t *ca, sample_coll_t *cb,
1825 double temp, double dg, double *sa, double *sb)
1831 double Wfac1, Wfac2;
1837 /* count the numbers of samples */
1841 /* to ensure the work values are the same as during the delta_G */
1842 /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1843 if (ca->foreign_lambda->dhdl < 0)
1845 /* this is the case when the delta U were calculated directly
1846 (i.e. we're not scaling dhdl) */
1852 /* we're using dhdl, so delta_lambda needs to be a
1853 multiplication factor. */
1854 double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1856 Wfac1 = beta*delta_lambda;
1857 Wfac2 = -beta*delta_lambda;
1860 /* first calculate the average work in both directions */
1861 for (i = 0; i < ca->nsamples; i++)
1863 samples_t *s = ca->s[i];
1864 sample_range_t *r = &(ca->r[i]);
1869 for (j = r->start; j < r->end; j++)
1871 W_ab += Wfac1*s->du[j];
1876 /* normalization factor multiplied with bin width and
1877 number of samples (we normalize through M): */
1880 int hd = 0; /* histogram direction */
1881 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1885 dx = s->hist->dx[hd];
1887 for (j = 0; j < s->hist->nbin[0]; j++)
1889 double x = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1890 double pxdx = s->hist->bin[0][j]*normdx; /* p(x)dx */
1898 for (i = 0; i < cb->nsamples; i++)
1900 samples_t *s = cb->s[i];
1901 sample_range_t *r = &(cb->r[i]);
1906 for (j = r->start; j < r->end; j++)
1908 W_ba += Wfac1*s->du[j];
1913 /* normalization factor multiplied with bin width and
1914 number of samples (we normalize through M): */
1917 int hd = 0; /* histogram direction */
1918 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
1922 dx = s->hist->dx[hd];
1924 for (j = 0; j < s->hist->nbin[0]; j++)
1926 double x = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1927 double pxdx = s->hist->bin[0][j]*normdx; /* p(x)dx */
1935 /* then calculate the relative entropies */
1940 static void calc_dg_stddev(sample_coll_t *ca, sample_coll_t *cb,
1941 double temp, double dg, double *stddev)
1945 double sigmafact = 0.;
1947 double Wfac1, Wfac2;
1953 /* count the numbers of samples */
1957 /* to ensure the work values are the same as during the delta_G */
1958 /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1959 if (ca->foreign_lambda->dhdl < 0)
1961 /* this is the case when the delta U were calculated directly
1962 (i.e. we're not scaling dhdl) */
1968 /* we're using dhdl, so delta_lambda needs to be a
1969 multiplication factor. */
1970 double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1972 Wfac1 = beta*delta_lambda;
1973 Wfac2 = -beta*delta_lambda;
1979 /* calculate average in both directions */
1980 for (i = 0; i < ca->nsamples; i++)
1982 samples_t *s = ca->s[i];
1983 sample_range_t *r = &(ca->r[i]);
1988 for (j = r->start; j < r->end; j++)
1990 sigmafact += 1./(2. + 2.*cosh((M + Wfac1*s->du[j] - dg)));
1995 /* normalization factor multiplied with bin width and
1996 number of samples (we normalize through M): */
1999 int hd = 0; /* histogram direction */
2000 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
2004 dx = s->hist->dx[hd];
2006 for (j = 0; j < s->hist->nbin[0]; j++)
2008 double x = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
2009 double pxdx = s->hist->bin[0][j]*normdx; /* p(x)dx */
2011 sigmafact += pxdx/(2. + 2.*cosh((M + x - dg)));
2016 for (i = 0; i < cb->nsamples; i++)
2018 samples_t *s = cb->s[i];
2019 sample_range_t *r = &(cb->r[i]);
2024 for (j = r->start; j < r->end; j++)
2026 sigmafact += 1./(2. + 2.*cosh((M - Wfac2*s->du[j] - dg)));
2031 /* normalization factor multiplied with bin width and
2032 number of samples (we normalize through M): */
2035 int hd = 0; /* histogram direction */
2036 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
2040 dx = s->hist->dx[hd];
2042 for (j = 0; j < s->hist->nbin[0]; j++)
2044 double x = Wfac2*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
2045 double pxdx = s->hist->bin[0][j]*normdx; /* p(x)dx */
2047 sigmafact += pxdx/(2. + 2.*cosh((M - x - dg)));
2053 sigmafact /= (n1 + n2);
2057 Shirts, Bair, Hooker & Pande, Phys. Rev. Lett 91, 140601 (2003): */
2058 *stddev = sqrt(((1./sigmafact) - ( (n1+n2)/n1 + (n1+n2)/n2 )));
2063 static void calc_bar(barres_t *br, double tol,
2064 int npee_min, int npee_max, gmx_bool *bEE,
2068 double dg_sig2, sa_sig2, sb_sig2, stddev_sig2; /* intermediate variance values
2069 for calculated quantities */
2070 int nsample1, nsample2;
2071 double temp = br->a->temp;
2073 double dg_min, dg_max;
2074 gmx_bool have_hist = FALSE;
2076 br->dg = calc_bar_lowlevel(br->a, br->b, temp, tol, 0);
2078 br->dg_disc_err = 0.;
2079 br->dg_histrange_err = 0.;
2081 /* check if there are histograms */
2082 for (i = 0; i < br->a->nsamples; i++)
2084 if (br->a->r[i].use && br->a->s[i]->hist)
2092 for (i = 0; i < br->b->nsamples; i++)
2094 if (br->b->r[i].use && br->b->s[i]->hist)
2102 /* calculate histogram-specific errors */
2105 dg_min = calc_bar_lowlevel(br->a, br->b, temp, tol, -1);
2106 dg_max = calc_bar_lowlevel(br->a, br->b, temp, tol, 1);
2108 if (fabs(dg_max - dg_min) > GMX_REAL_EPS*10)
2110 /* the histogram range error is the biggest of the differences
2111 between the best estimate and the extremes */
2112 br->dg_histrange_err = fabs(dg_max - dg_min);
2114 br->dg_disc_err = 0.;
2115 for (i = 0; i < br->a->nsamples; i++)
2117 if (br->a->s[i]->hist)
2119 br->dg_disc_err = max(br->dg_disc_err, br->a->s[i]->hist->dx[0]);
2122 for (i = 0; i < br->b->nsamples; i++)
2124 if (br->b->s[i]->hist)
2126 br->dg_disc_err = max(br->dg_disc_err, br->b->s[i]->hist->dx[0]);
2130 calc_rel_entropy(br->a, br->b, temp, br->dg, &(br->sa), &(br->sb));
2132 calc_dg_stddev(br->a, br->b, temp, br->dg, &(br->dg_stddev) );
2141 sample_coll_t ca, cb;
2143 /* initialize the samples */
2144 sample_coll_init(&ca, br->a->native_lambda, br->a->foreign_lambda,
2146 sample_coll_init(&cb, br->b->native_lambda, br->b->foreign_lambda,
2149 for (npee = npee_min; npee <= npee_max; npee++)
2158 double dstddev2 = 0;
2161 for (p = 0; p < npee; p++)
2168 cac = sample_coll_create_subsample(&ca, br->a, p, npee);
2169 cbc = sample_coll_create_subsample(&cb, br->b, p, npee);
2173 printf("WARNING: histogram number incompatible with block number for averaging: can't do error estimate\n");
2177 sample_coll_destroy(&ca);
2181 sample_coll_destroy(&cb);
2186 dgp = calc_bar_lowlevel(&ca, &cb, temp, tol, 0);
2190 partsum[npee*(npee_max+1)+p] += dgp;
2192 calc_rel_entropy(&ca, &cb, temp, dgp, &sac, &sbc);
2197 calc_dg_stddev(&ca, &cb, temp, dgp, &stddevc );
2200 dstddev2 += stddevc*stddevc;
2202 sample_coll_destroy(&ca);
2203 sample_coll_destroy(&cb);
2207 dg_sig2 += (dgs2-dgs*dgs)/(npee-1);
2213 sa_sig2 += (dsa2-dsa*dsa)/(npee-1);
2214 sb_sig2 += (dsb2-dsb*dsb)/(npee-1);
2218 stddev_sig2 += (dstddev2-dstddev*dstddev)/(npee-1);
2220 br->dg_err = sqrt(dg_sig2/(npee_max - npee_min + 1));
2221 br->sa_err = sqrt(sa_sig2/(npee_max - npee_min + 1));
2222 br->sb_err = sqrt(sb_sig2/(npee_max - npee_min + 1));
2223 br->dg_stddev_err = sqrt(stddev_sig2/(npee_max - npee_min + 1));
2228 static double bar_err(int nbmin, int nbmax, const double *partsum)
2231 double svar, s, s2, dg;
2234 for (nb = nbmin; nb <= nbmax; nb++)
2238 for (b = 0; b < nb; b++)
2240 dg = partsum[nb*(nbmax+1)+b];
2246 svar += (s2 - s*s)/(nb - 1);
2249 return sqrt(svar/(nbmax + 1 - nbmin));
2253 /* Seek the end of an identifier (consecutive non-spaces), followed by
2254 an optional number of spaces or '='-signs. Returns a pointer to the
2255 first non-space value found after that. Returns NULL if the string
2258 static const char *find_value(const char *str)
2260 gmx_bool name_end_found = FALSE;
2262 /* if the string is a NULL pointer, return a NULL pointer. */
2267 while (*str != '\0')
2269 /* first find the end of the name */
2270 if (!name_end_found)
2272 if (isspace(*str) || (*str == '=') )
2274 name_end_found = TRUE;
2279 if (!( isspace(*str) || (*str == '=') ))
2291 /* read a vector-notation description of a lambda vector */
2292 static gmx_bool read_lambda_compvec(const char *str,
2294 const lambda_components_t *lc_in,
2295 lambda_components_t *lc_out,
2299 gmx_bool initialize_lc = FALSE; /* whether to initialize the lambda
2300 components, or to check them */
2301 gmx_bool start_reached = FALSE; /* whether the start of component names
2303 gmx_bool vector = FALSE; /* whether there are multiple components */
2304 int n = 0; /* current component number */
2305 const char *val_start = NULL; /* start of the component name, or NULL
2306 if not in a value */
2316 if (lc_out && lc_out->N == 0)
2318 initialize_lc = TRUE;
2333 start_reached = TRUE;
2336 else if (*str == '(')
2339 start_reached = TRUE;
2341 else if (!isspace(*str))
2343 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2350 if (isspace(*str) || *str == ')' || *str == ',' || *str == '\0')
2357 lambda_components_add(lc_out, val_start,
2362 if (!lambda_components_check(lc_out, n, val_start,
2371 /* add a vector component to lv */
2372 lv->val[n] = strtod(val_start, &strtod_end);
2373 if (val_start == strtod_end)
2376 "Error reading lambda vector in %s", fn);
2379 /* reset for the next identifier */
2388 else if (isalnum(*str))
2401 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2409 else if (lv == NULL)
2415 gmx_fatal(FARGS, "Incomplete lambda vector data in %s",
2435 gmx_fatal(FARGS, "Incomplete lambda components data in %s", fn);
2441 /* read and check the component names from a string */
2442 static gmx_bool read_lambda_components(const char *str,
2443 lambda_components_t *lc,
2447 return read_lambda_compvec(str, NULL, NULL, lc, end, fn);
2450 /* read an initialized lambda vector from a string */
2451 static gmx_bool read_lambda_vector(const char *str,
2456 return read_lambda_compvec(str, lv, lv->lc, NULL, end, fn);
2461 /* deduce lambda value from legend.
2463 legend = the legend string
2465 lam = the initialized lambda vector
2466 returns whether to use the data in this set.
2468 static gmx_bool legend2lambda(const char *fn,
2473 const char *ptr = NULL, *ptr2 = NULL;
2474 gmx_bool ok = FALSE;
2475 gmx_bool bdhdl = FALSE;
2476 const char *tostr = " to ";
2480 gmx_fatal(FARGS, "There is no legend in file '%s', can not deduce lambda", fn);
2483 /* look for the last 'to': */
2487 ptr2 = strstr(ptr2, tostr);
2494 while (ptr2 != NULL && *ptr2 != '\0');
2498 ptr += strlen(tostr)-1; /* and advance past that 'to' */
2502 /* look for the = sign */
2503 ptr = strrchr(legend, '=');
2506 /* otherwise look for the last space */
2507 ptr = strrchr(legend, ' ');
2511 if (strstr(legend, "dH"))
2516 else if (strchr(legend, 'D') != NULL && strchr(legend, 'H') != NULL)
2521 else /*if (strstr(legend, "pV"))*/
2532 gmx_fatal(FARGS, "There is no proper lambda legend in file '%s', can not deduce lambda", fn);
2536 ptr = find_value(ptr);
2537 if (!ptr || !read_lambda_vector(ptr, lam, NULL, fn))
2539 gmx_fatal(FARGS, "lambda vector '%s' %s faulty", legend, fn);
2548 ptr = strrchr(legend, '=');
2552 /* there must be a component name */
2556 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2558 /* now backtrack to the start of the identifier */
2559 while (isspace(*ptr))
2565 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2568 while (!isspace(*ptr))
2573 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2577 strncpy(buf, ptr, (end-ptr));
2578 buf[(end-ptr)] = '\0';
2579 dhdl_index = lambda_components_find(lam->lc, ptr, (end-ptr));
2583 strncpy(buf, ptr, (end-ptr));
2584 buf[(end-ptr)] = '\0';
2586 "Did not find lambda component for '%s' in %s",
2595 "dhdl without component name with >1 lambda component in %s",
2600 lam->dhdl = dhdl_index;
2605 static gmx_bool subtitle2lambda(const char *subtitle, xvg_t *ba, const char *fn,
2606 lambda_components_t *lc)
2611 double native_lambda;
2615 /* first check for a state string */
2616 ptr = strstr(subtitle, "state");
2620 const char *val_end;
2622 /* the new 4.6 style lambda vectors */
2623 ptr = find_value(ptr);
2626 index = strtol(ptr, &end, 10);
2629 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2636 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2639 /* now find the lambda vector component names */
2640 while (*ptr != '(' && !isalnum(*ptr))
2646 "Incomplete lambda vector component data in %s", fn);
2651 if (!read_lambda_components(ptr, lc, &val_end, fn))
2654 "lambda vector components in %s don't match those previously read",
2657 ptr = find_value(val_end);
2660 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2663 lambda_vec_init(&(ba->native_lambda), lc);
2664 if (!read_lambda_vector(ptr, &(ba->native_lambda), NULL, fn))
2666 gmx_fatal(FARGS, "lambda vector in %s faulty", fn);
2668 ba->native_lambda.index = index;
2673 /* compatibility mode: check for lambda in other ways. */
2674 /* plain text lambda string */
2675 ptr = strstr(subtitle, "lambda");
2678 /* xmgrace formatted lambda string */
2679 ptr = strstr(subtitle, "\\xl\\f{}");
2683 /* xmgr formatted lambda string */
2684 ptr = strstr(subtitle, "\\8l\\4");
2688 ptr = strstr(ptr, "=");
2692 bFound = (sscanf(ptr+1, "%lf", &(native_lambda)) == 1);
2693 /* add the lambda component name as an empty string */
2696 if (!lambda_components_check(lc, 0, "", 0))
2699 "lambda vector components in %s don't match those previously read",
2705 lambda_components_add(lc, "", 0);
2707 lambda_vec_init(&(ba->native_lambda), lc);
2708 ba->native_lambda.val[0] = native_lambda;
2715 static void filename2lambda(const char *fn)
2718 const char *ptr, *digitptr;
2722 /* go to the end of the path string and search backward to find the last
2723 directory in the path which has to contain the value of lambda
2725 while (ptr[1] != '\0')
2729 /* searching backward to find the second directory separator */
2734 if (ptr[0] != DIR_SEPARATOR && ptr[1] == DIR_SEPARATOR)
2742 /* save the last position of a digit between the last two
2743 separators = in the last dirname */
2744 if (dirsep > 0 && isdigit(*ptr))
2752 gmx_fatal(FARGS, "While trying to read the lambda value from the file path:"
2753 " last directory in the path '%s' does not contain a number", fn);
2755 if (digitptr[-1] == '-')
2759 lambda = strtod(digitptr, &endptr);
2760 if (endptr == digitptr)
2762 gmx_fatal(FARGS, "Malformed number in file path '%s'", fn);
2766 static void read_bar_xvg_lowlevel(const char *fn, real *temp, xvg_t *ba,
2767 lambda_components_t *lc)
2770 char *subtitle, **legend, *ptr;
2772 gmx_bool native_lambda_read = FALSE;
2780 np = read_xvg_legend(fn, &ba->y, &ba->nset, &subtitle, &legend);
2783 gmx_fatal(FARGS, "File %s contains no usable data.", fn);
2785 /* Reorder the data */
2787 for (i = 1; i < ba->nset; i++)
2789 ba->y[i-1] = ba->y[i];
2793 snew(ba->np, ba->nset);
2794 for (i = 0; i < ba->nset; i++)
2800 if (subtitle != NULL)
2802 /* try to extract temperature */
2803 ptr = strstr(subtitle, "T =");
2807 if (sscanf(ptr, "%lf", &ba->temp) == 1)
2811 gmx_fatal(FARGS, "Found temperature of %f in file '%s'",
2821 gmx_fatal(FARGS, "Did not find a temperature in the subtitle in file '%s', use the -temp option of [TT]gmx bar[tt]", fn);
2826 /* Try to deduce lambda from the subtitle */
2829 if (subtitle2lambda(subtitle, ba, fn, lc))
2831 native_lambda_read = TRUE;
2834 snew(ba->lambda, ba->nset);
2837 /* Check if we have a single set, no legend, nset=1 means t and dH/dl */
2840 if (!native_lambda_read)
2842 /* Deduce lambda from the file name */
2843 filename2lambda(fn);
2844 native_lambda_read = TRUE;
2846 ba->lambda[0] = ba->native_lambda;
2850 gmx_fatal(FARGS, "File %s contains multiple sets but no legends, can not determine the lambda values", fn);
2855 for (i = 0; i < ba->nset; )
2857 gmx_bool use = FALSE;
2858 /* Read lambda from the legend */
2859 lambda_vec_init( &(ba->lambda[i]), lc );
2860 lambda_vec_copy( &(ba->lambda[i]), &(ba->native_lambda));
2861 use = legend2lambda(fn, legend[i], &(ba->lambda[i]));
2864 lambda_vec_print(&(ba->lambda[i]), buf, FALSE);
2870 printf("%s: Ignoring set '%s'.\n", fn, legend[i]);
2871 for (j = i+1; j < ba->nset; j++)
2873 ba->y[j-1] = ba->y[j];
2874 legend[j-1] = legend[j];
2881 if (!native_lambda_read)
2883 gmx_fatal(FARGS, "File %s contains multiple sets but no indication of the native lambda", fn);
2888 for (i = 0; i < ba->nset-1; i++)
2896 static void read_bar_xvg(char *fn, real *temp, sim_data_t *sd)
2905 read_bar_xvg_lowlevel(fn, temp, barsim, &(sd->lc));
2907 if (barsim->nset < 1)
2909 gmx_fatal(FARGS, "File '%s' contains fewer than two columns", fn);
2912 if (!gmx_within_tol(*temp, barsim->temp, GMX_FLOAT_EPS) && (*temp > 0) )
2914 gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
2916 *temp = barsim->temp;
2918 /* now create a series of samples_t */
2919 snew(s, barsim->nset);
2920 for (i = 0; i < barsim->nset; i++)
2922 samples_init(s+i, &(barsim->native_lambda), &(barsim->lambda[i]),
2923 barsim->temp, lambda_vec_same(&(barsim->native_lambda),
2924 &(barsim->lambda[i])),
2926 s[i].du = barsim->y[i];
2927 s[i].ndu = barsim->np[i];
2930 lambda_data_list_insert_sample(sd->lb, s+i);
2935 lambda_vec_print(s[0].native_lambda, buf, FALSE);
2936 printf("%s: %.1f - %.1f; lambda = %s\n dH/dl & foreign lambdas:\n",
2937 fn, s[0].t[0], s[0].t[s[0].ndu-1], buf);
2938 for (i = 0; i < barsim->nset; i++)
2940 lambda_vec_print(s[i].foreign_lambda, buf, TRUE);
2941 printf(" %s (%d pts)\n", buf, s[i].ndu);
2947 static void read_edr_rawdh_block(samples_t **smp, int *ndu, t_enxblock *blk,
2948 double start_time, double delta_time,
2949 lambda_vec_t *native_lambda, double temp,
2950 double *last_t, const char *filename)
2954 double old_foreign_lambda;
2955 lambda_vec_t *foreign_lambda;
2957 samples_t *s; /* convenience pointer */
2960 /* check the block types etc. */
2961 if ( (blk->nsub < 3) ||
2962 (blk->sub[0].type != xdr_datatype_int) ||
2963 (blk->sub[1].type != xdr_datatype_double) ||
2965 (blk->sub[2].type != xdr_datatype_float) &&
2966 (blk->sub[2].type != xdr_datatype_double)
2968 (blk->sub[0].nr < 1) ||
2969 (blk->sub[1].nr < 1) )
2972 "Unexpected/corrupted block data in file %s around time %f.",
2973 filename, start_time);
2976 snew(foreign_lambda, 1);
2977 lambda_vec_init(foreign_lambda, native_lambda->lc);
2978 lambda_vec_copy(foreign_lambda, native_lambda);
2979 type = blk->sub[0].ival[0];
2982 for (i = 0; i < native_lambda->lc->N; i++)
2984 foreign_lambda->val[i] = blk->sub[1].dval[i];
2989 if (blk->sub[0].nr > 1)
2991 foreign_lambda->dhdl = blk->sub[0].ival[1];
2995 foreign_lambda->dhdl = 0;
3001 /* initialize the samples structure if it's empty. */
3003 samples_init(*smp, native_lambda, foreign_lambda, temp,
3004 type == dhbtDHDL, filename);
3005 (*smp)->start_time = start_time;
3006 (*smp)->delta_time = delta_time;
3009 /* set convenience pointer */
3012 /* now double check */
3013 if (!lambda_vec_same(s->foreign_lambda, foreign_lambda) )
3015 char buf[STRLEN], buf2[STRLEN];
3016 lambda_vec_print(foreign_lambda, buf, FALSE);
3017 lambda_vec_print(s->foreign_lambda, buf2, FALSE);
3018 fprintf(stderr, "Got foreign lambda=%s, expected: %s\n", buf, buf2);
3019 gmx_fatal(FARGS, "Corrupted data in file %s around t=%f.",
3020 filename, start_time);
3023 /* make room for the data */
3024 if (s->ndu_alloc < (size_t)(s->ndu + blk->sub[2].nr) )
3026 s->ndu_alloc += (s->ndu_alloc < (size_t)blk->sub[2].nr) ?
3027 blk->sub[2].nr*2 : s->ndu_alloc;
3028 srenew(s->du_alloc, s->ndu_alloc);
3029 s->du = s->du_alloc;
3032 s->ndu += blk->sub[2].nr;
3033 s->ntot += blk->sub[2].nr;
3034 *ndu = blk->sub[2].nr;
3036 /* and copy the data*/
3037 for (j = 0; j < blk->sub[2].nr; j++)
3039 if (blk->sub[2].type == xdr_datatype_float)
3041 s->du[startj+j] = blk->sub[2].fval[j];
3045 s->du[startj+j] = blk->sub[2].dval[j];
3048 if (start_time + blk->sub[2].nr*delta_time > *last_t)
3050 *last_t = start_time + blk->sub[2].nr*delta_time;
3054 static samples_t *read_edr_hist_block(int *nsamples, t_enxblock *blk,
3055 double start_time, double delta_time,
3056 lambda_vec_t *native_lambda, double temp,
3057 double *last_t, const char *filename)
3062 double old_foreign_lambda;
3063 lambda_vec_t *foreign_lambda;
3067 /* check the block types etc. */
3068 if ( (blk->nsub < 2) ||
3069 (blk->sub[0].type != xdr_datatype_double) ||
3070 (blk->sub[1].type != xdr_datatype_int64) ||
3071 (blk->sub[0].nr < 2) ||
3072 (blk->sub[1].nr < 2) )
3075 "Unexpected/corrupted block data in file %s around time %f",
3076 filename, start_time);
3079 nhist = blk->nsub-2;
3087 "Unexpected/corrupted block data in file %s around time %f",
3088 filename, start_time);
3094 snew(foreign_lambda, 1);
3095 lambda_vec_init(foreign_lambda, native_lambda->lc);
3096 lambda_vec_copy(foreign_lambda, native_lambda);
3097 type = (int)(blk->sub[1].lval[1]);
3100 double old_foreign_lambda;
3102 old_foreign_lambda = blk->sub[0].dval[0];
3103 if (old_foreign_lambda >= 0)
3105 foreign_lambda->val[0] = old_foreign_lambda;
3106 if (foreign_lambda->lc->N > 1)
3109 "Single-component lambda in multi-component file %s",
3115 for (i = 0; i < native_lambda->lc->N; i++)
3117 foreign_lambda->val[i] = blk->sub[0].dval[i+2];
3123 if (foreign_lambda->lc->N > 1)
3125 if (blk->sub[1].nr < 3 + nhist)
3128 "Missing derivative coord in multi-component file %s",
3131 foreign_lambda->dhdl = blk->sub[1].lval[2 + nhist];
3135 foreign_lambda->dhdl = 0;
3139 samples_init(s, native_lambda, foreign_lambda, temp, type == dhbtDHDL,
3143 for (i = 0; i < nhist; i++)
3145 nbins[i] = blk->sub[i+2].nr;
3148 hist_init(s->hist, nhist, nbins);
3150 for (i = 0; i < nhist; i++)
3152 s->hist->x0[i] = blk->sub[1].lval[2+i];
3153 s->hist->dx[i] = blk->sub[0].dval[1];
3156 s->hist->dx[i] = -s->hist->dx[i];
3160 s->hist->start_time = start_time;
3161 s->hist->delta_time = delta_time;
3162 s->start_time = start_time;
3163 s->delta_time = delta_time;
3165 for (i = 0; i < nhist; i++)
3168 gmx_int64_t sum = 0;
3170 for (j = 0; j < s->hist->nbin[i]; j++)
3172 int binv = (int)(blk->sub[i+2].ival[j]);
3174 s->hist->bin[i][j] = binv;
3187 gmx_fatal(FARGS, "Histogram counts don't match in %s",
3193 if (start_time + s->hist->sum*delta_time > *last_t)
3195 *last_t = start_time + s->hist->sum*delta_time;
3201 static void read_barsim_edr(char *fn, real *temp, sim_data_t *sd)
3207 gmx_enxnm_t *enm = NULL;
3208 double first_t = -1;
3210 samples_t **samples_rawdh = NULL; /* contains samples for raw delta_h */
3211 int *nhists = NULL; /* array to keep count & print at end */
3212 int *npts = NULL; /* array to keep count & print at end */
3213 lambda_vec_t **lambdas = NULL; /* array to keep count & print at end */
3214 lambda_vec_t *native_lambda;
3215 double end_time; /* the end time of the last batch of samples */
3217 lambda_vec_t start_lambda;
3219 fp = open_enx(fn, "r");
3220 do_enxnms(fp, &nre, &enm);
3223 snew(native_lambda, 1);
3224 start_lambda.lc = NULL;
3226 while (do_enx(fp, fr))
3228 /* count the data blocks */
3229 int nblocks_raw = 0;
3230 int nblocks_hist = 0;
3233 /* DHCOLL block information: */
3234 double start_time = 0, delta_time = 0, old_start_lambda = 0, delta_lambda = 0;
3237 /* count the blocks and handle collection information: */
3238 for (i = 0; i < fr->nblock; i++)
3240 if (fr->block[i].id == enxDHHIST)
3244 if (fr->block[i].id == enxDH)
3248 if (fr->block[i].id == enxDHCOLL)
3251 if ( (fr->block[i].nsub < 1) ||
3252 (fr->block[i].sub[0].type != xdr_datatype_double) ||
3253 (fr->block[i].sub[0].nr < 5))
3255 gmx_fatal(FARGS, "Unexpected block data in file %s", fn);
3258 /* read the data from the DHCOLL block */
3259 rtemp = fr->block[i].sub[0].dval[0];
3260 start_time = fr->block[i].sub[0].dval[1];
3261 delta_time = fr->block[i].sub[0].dval[2];
3262 old_start_lambda = fr->block[i].sub[0].dval[3];
3263 delta_lambda = fr->block[i].sub[0].dval[4];
3265 if (delta_lambda > 0)
3267 gmx_fatal(FARGS, "Lambda values not constant in %s: can't apply BAR method", fn);
3269 if ( ( *temp != rtemp) && (*temp > 0) )
3271 gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
3275 if (old_start_lambda >= 0)
3279 if (!lambda_components_check(&(sd->lc), 0, "", 0))
3282 "lambda vector components in %s don't match those previously read",
3288 lambda_components_add(&(sd->lc), "", 0);
3290 if (!start_lambda.lc)
3292 lambda_vec_init(&start_lambda, &(sd->lc));
3294 start_lambda.val[0] = old_start_lambda;
3298 /* read lambda vector */
3300 gmx_bool check = (sd->lc.N > 0);
3301 if (fr->block[i].nsub < 2)
3304 "No lambda vector, but start_lambda=%f\n",
3307 n_lambda_vec = fr->block[i].sub[1].ival[1];
3308 for (j = 0; j < n_lambda_vec; j++)
3311 efpt_singular_names[fr->block[i].sub[1].ival[1+j]];
3314 /* check the components */
3315 lambda_components_check(&(sd->lc), j, name,
3320 lambda_components_add(&(sd->lc), name,
3324 lambda_vec_init(&start_lambda, &(sd->lc));
3325 start_lambda.index = fr->block[i].sub[1].ival[0];
3326 for (j = 0; j < n_lambda_vec; j++)
3328 start_lambda.val[j] = fr->block[i].sub[0].dval[5+j];
3333 first_t = start_time;
3340 gmx_fatal(FARGS, "Did not find delta H information in file %s", fn);
3342 if (nblocks_raw > 0 && nblocks_hist > 0)
3344 gmx_fatal(FARGS, "Can't handle both raw delta U data and histograms in the same file %s", fn);
3349 /* check the native lambda */
3350 if (!lambda_vec_same(&start_lambda, native_lambda) )
3352 gmx_fatal(FARGS, "Native lambda not constant in file %s: started at %f, and becomes %f at time %f",
3353 fn, native_lambda, start_lambda, start_time);
3355 /* check the number of samples against the previous number */
3356 if ( ((nblocks_raw+nblocks_hist) != nsamples) || (nlam != 1 ) )
3358 gmx_fatal(FARGS, "Unexpected block count in %s: was %d, now %d\n",
3359 fn, nsamples+1, nblocks_raw+nblocks_hist+nlam);
3361 /* check whether last iterations's end time matches with
3362 the currrent start time */
3363 if ( (fabs(last_t - start_time) > 2*delta_time) && last_t >= 0)
3365 /* it didn't. We need to store our samples and reallocate */
3366 for (i = 0; i < nsamples; i++)
3368 if (samples_rawdh[i])
3370 /* insert it into the existing list */
3371 lambda_data_list_insert_sample(sd->lb,
3373 /* and make sure we'll allocate a new one this time
3375 samples_rawdh[i] = NULL;
3382 /* this is the first round; allocate the associated data
3384 /*native_lambda=start_lambda;*/
3385 lambda_vec_init(native_lambda, &(sd->lc));
3386 lambda_vec_copy(native_lambda, &start_lambda);
3387 nsamples = nblocks_raw+nblocks_hist;
3388 snew(nhists, nsamples);
3389 snew(npts, nsamples);
3390 snew(lambdas, nsamples);
3391 snew(samples_rawdh, nsamples);
3392 for (i = 0; i < nsamples; i++)
3397 samples_rawdh[i] = NULL; /* init to NULL so we know which
3398 ones contain values */
3403 k = 0; /* counter for the lambdas, etc. arrays */
3404 for (i = 0; i < fr->nblock; i++)
3406 if (fr->block[i].id == enxDH)
3408 int type = (fr->block[i].sub[0].ival[0]);
3409 if (type == dhbtDH || type == dhbtDHDL)
3412 read_edr_rawdh_block(&(samples_rawdh[k]),
3415 start_time, delta_time,
3416 native_lambda, rtemp,
3419 if (samples_rawdh[k])
3421 lambdas[k] = samples_rawdh[k]->foreign_lambda;
3426 else if (fr->block[i].id == enxDHHIST)
3428 int type = (int)(fr->block[i].sub[1].lval[1]);
3429 if (type == dhbtDH || type == dhbtDHDL)
3433 samples_t *s; /* this is where the data will go */
3434 s = read_edr_hist_block(&nb, &(fr->block[i]),
3435 start_time, delta_time,
3436 native_lambda, rtemp,
3441 lambdas[k] = s->foreign_lambda;
3444 /* and insert the new sample immediately */
3445 for (j = 0; j < nb; j++)
3447 lambda_data_list_insert_sample(sd->lb, s+j);
3453 /* Now store all our extant sample collections */
3454 for (i = 0; i < nsamples; i++)
3456 if (samples_rawdh[i])
3458 /* insert it into the existing list */
3459 lambda_data_list_insert_sample(sd->lb, samples_rawdh[i]);
3467 lambda_vec_print(native_lambda, buf, FALSE);
3468 printf("%s: %.1f - %.1f; lambda = %s\n foreign lambdas:\n",
3469 fn, first_t, last_t, buf);
3470 for (i = 0; i < nsamples; i++)
3474 lambda_vec_print(lambdas[i], buf, TRUE);
3477 printf(" %s (%d hists)\n", buf, nhists[i]);
3481 printf(" %s (%d pts)\n", buf, npts[i]);
3493 int gmx_bar(int argc, char *argv[])
3495 static const char *desc[] = {
3496 "[THISMODULE] calculates free energy difference estimates through ",
3497 "Bennett's acceptance ratio method (BAR). It also automatically",
3498 "adds series of individual free energies obtained with BAR into",
3499 "a combined free energy estimate.[PAR]",
3501 "Every individual BAR free energy difference relies on two ",
3502 "simulations at different states: say state A and state B, as",
3503 "controlled by a parameter, [GRK]lambda[grk] (see the [TT].mdp[tt] parameter",
3504 "[TT]init_lambda[tt]). The BAR method calculates a ratio of weighted",
3505 "average of the Hamiltonian difference of state B given state A and",
3507 "The energy differences to the other state must be calculated",
3508 "explicitly during the simulation. This can be done with",
3509 "the [TT].mdp[tt] option [TT]foreign_lambda[tt].[PAR]",
3511 "Input option [TT]-f[tt] expects multiple [TT]dhdl.xvg[tt] files. ",
3512 "Two types of input files are supported:[BR]",
3513 "[TT]*[tt] Files with more than one [IT]y[it]-value. ",
3514 "The files should have columns ",
3515 "with dH/d[GRK]lambda[grk] and [GRK]Delta[grk][GRK]lambda[grk]. ",
3516 "The [GRK]lambda[grk] values are inferred ",
3517 "from the legends: [GRK]lambda[grk] of the simulation from the legend of ",
3518 "dH/d[GRK]lambda[grk] and the foreign [GRK]lambda[grk] values from the ",
3519 "legends of Delta H",
3521 "[TT]*[tt] Files with only one [IT]y[it]-value. Using the",
3522 "[TT]-extp[tt] option for these files, it is assumed",
3523 "that the [IT]y[it]-value is dH/d[GRK]lambda[grk] and that the ",
3524 "Hamiltonian depends linearly on [GRK]lambda[grk]. ",
3525 "The [GRK]lambda[grk] value of the simulation is inferred from the ",
3526 "subtitle (if present), otherwise from a number in the subdirectory ",
3527 "in the file name.[PAR]",
3529 "The [GRK]lambda[grk] of the simulation is parsed from ",
3530 "[TT]dhdl.xvg[tt] file's legend containing the string 'dH', the ",
3531 "foreign [GRK]lambda[grk] values from the legend containing the ",
3532 "capitalized letters 'D' and 'H'. The temperature is parsed from ",
3533 "the legend line containing 'T ='.[PAR]",
3535 "The input option [TT]-g[tt] expects multiple [TT].edr[tt] files. ",
3536 "These can contain either lists of energy differences (see the ",
3537 "[TT].mdp[tt] option [TT]separate_dhdl_file[tt]), or a series of ",
3538 "histograms (see the [TT].mdp[tt] options [TT]dh_hist_size[tt] and ",
3539 "[TT]dh_hist_spacing[tt]).", "The temperature and [GRK]lambda[grk] ",
3540 "values are automatically deduced from the [TT]ener.edr[tt] file.[PAR]",
3542 "In addition to the [TT].mdp[tt] option [TT]foreign_lambda[tt], ",
3543 "the energy difference can also be extrapolated from the ",
3544 "dH/d[GRK]lambda[grk] values. This is done with the[TT]-extp[tt]",
3545 "option, which assumes that the system's Hamiltonian depends linearly",
3546 "on [GRK]lambda[grk], which is not normally the case.[PAR]",
3548 "The free energy estimates are determined using BAR with bisection, ",
3549 "with the precision of the output set with [TT]-prec[tt]. ",
3550 "An error estimate taking into account time correlations ",
3551 "is made by splitting the data into blocks and determining ",
3552 "the free energy differences over those blocks and assuming ",
3553 "the blocks are independent. ",
3554 "The final error estimate is determined from the average variance ",
3555 "over 5 blocks. A range of block numbers for error estimation can ",
3556 "be provided with the options [TT]-nbmin[tt] and [TT]-nbmax[tt].[PAR]",
3558 "[THISMODULE] tries to aggregate samples with the same 'native' and ",
3559 "'foreign' [GRK]lambda[grk] values, but always assumes independent ",
3560 "samples. [BB]Note[bb] that when aggregating energy ",
3561 "differences/derivatives with different sampling intervals, this is ",
3562 "almost certainly not correct. Usually subsequent energies are ",
3563 "correlated and different time intervals mean different degrees ",
3564 "of correlation between samples.[PAR]",
3566 "The results are split in two parts: the last part contains the final ",
3567 "results in kJ/mol, together with the error estimate for each part ",
3568 "and the total. The first part contains detailed free energy ",
3569 "difference estimates and phase space overlap measures in units of ",
3570 "kT (together with their computed error estimate). The printed ",
3572 "[TT]*[tt] lam_A: the [GRK]lambda[grk] values for point A.[BR]",
3573 "[TT]*[tt] lam_B: the [GRK]lambda[grk] values for point B.[BR]",
3574 "[TT]*[tt] DG: the free energy estimate.[BR]",
3575 "[TT]*[tt] s_A: an estimate of the relative entropy of B in A.[BR]",
3576 "[TT]*[tt] s_B: an estimate of the relative entropy of A in B.[BR]",
3577 "[TT]*[tt] stdev: an estimate expected per-sample standard deviation.[PAR]",
3579 "The relative entropy of both states in each other's ensemble can be ",
3580 "interpreted as a measure of phase space overlap: ",
3581 "the relative entropy s_A of the work samples of lambda_B in the ",
3582 "ensemble of lambda_A (and vice versa for s_B), is a ",
3583 "measure of the 'distance' between Boltzmann distributions of ",
3584 "the two states, that goes to zero for identical distributions. See ",
3585 "Wu & Kofke, J. Chem. Phys. 123 084109 (2005) for more information.",
3587 "The estimate of the expected per-sample standard deviation, as given ",
3588 "in Bennett's original BAR paper: Bennett, J. Comp. Phys. 22, p 245 (1976).",
3589 "Eq. 10 therein gives an estimate of the quality of sampling (not directly",
3590 "of the actual statistical error, because it assumes independent samples).[PAR]",
3592 "To get a visual estimate of the phase space overlap, use the ",
3593 "[TT]-oh[tt] option to write series of histograms, together with the ",
3594 "[TT]-nbin[tt] option.[PAR]"
3596 static real begin = 0, end = -1, temp = -1;
3597 int nd = 2, nbmin = 5, nbmax = 5;
3599 gmx_bool use_dhdl = FALSE;
3600 gmx_bool calc_s, calc_v;
3602 { "-b", FALSE, etREAL, {&begin}, "Begin time for BAR" },
3603 { "-e", FALSE, etREAL, {&end}, "End time for BAR" },
3604 { "-temp", FALSE, etREAL, {&temp}, "Temperature (K)" },
3605 { "-prec", FALSE, etINT, {&nd}, "The number of digits after the decimal point" },
3606 { "-nbmin", FALSE, etINT, {&nbmin}, "Minimum number of blocks for error estimation" },
3607 { "-nbmax", FALSE, etINT, {&nbmax}, "Maximum number of blocks for error estimation" },
3608 { "-nbin", FALSE, etINT, {&nbin}, "Number of bins for histogram output"},
3609 { "-extp", FALSE, etBOOL, {&use_dhdl}, "Whether to linearly extrapolate dH/dl values to use as energies"}
3613 { efXVG, "-f", "dhdl", ffOPTRDMULT },
3614 { efEDR, "-g", "ener", ffOPTRDMULT },
3615 { efXVG, "-o", "bar", ffOPTWR },
3616 { efXVG, "-oi", "barint", ffOPTWR },
3617 { efXVG, "-oh", "histogram", ffOPTWR }
3619 #define NFILE asize(fnm)
3622 int nf = 0; /* file counter */
3624 int nfile_tot; /* total number of input files */
3629 sim_data_t sim_data; /* the simulation data */
3630 barres_t *results; /* the results */
3631 int nresults; /* number of results in results array */
3634 double prec, dg_tot, dg, sig, dg_tot_max, dg_tot_min;
3636 char dgformat[20], xvg2format[STRLEN], xvg3format[STRLEN];
3637 char buf[STRLEN], buf2[STRLEN];
3638 char ktformat[STRLEN], sktformat[STRLEN];
3639 char kteformat[STRLEN], skteformat[STRLEN];
3642 gmx_bool result_OK = TRUE, bEE = TRUE;
3644 gmx_bool disc_err = FALSE;
3645 double sum_disc_err = 0.; /* discretization error */
3646 gmx_bool histrange_err = FALSE;
3647 double sum_histrange_err = 0.; /* histogram range error */
3648 double stat_err = 0.; /* statistical error */
3650 if (!parse_common_args(&argc, argv,
3652 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &oenv))
3657 if (opt2bSet("-f", NFILE, fnm))
3659 nxvgfile = opt2fns(&fxvgnms, "-f", NFILE, fnm);
3661 if (opt2bSet("-g", NFILE, fnm))
3663 nedrfile = opt2fns(&fedrnms, "-g", NFILE, fnm);
3666 sim_data_init(&sim_data);
3668 /* make linked list */
3670 lambda_data_init(lb, 0, 0);
3676 nfile_tot = nxvgfile + nedrfile;
3680 gmx_fatal(FARGS, "No input files!");
3685 gmx_fatal(FARGS, "Can not have negative number of digits");
3687 prec = pow(10, -nd);
3689 snew(partsum, (nbmax+1)*(nbmax+1));
3692 /* read in all files. First xvg files */
3693 for (f = 0; f < nxvgfile; f++)
3695 read_bar_xvg(fxvgnms[f], &temp, &sim_data);
3698 /* then .edr files */
3699 for (f = 0; f < nedrfile; f++)
3701 read_barsim_edr(fedrnms[f], &temp, &sim_data);;
3705 /* fix the times to allow for equilibration */
3706 sim_data_impose_times(&sim_data, begin, end);
3708 if (opt2bSet("-oh", NFILE, fnm))
3710 sim_data_histogram(&sim_data, opt2fn("-oh", NFILE, fnm), nbin, oenv);
3713 /* assemble the output structures from the lambdas */
3714 results = barres_list_create(&sim_data, &nresults, use_dhdl);
3716 sum_disc_err = barres_list_max_disc_err(results, nresults);
3720 printf("\nNo results to calculate.\n");
3724 if (sum_disc_err > prec)
3726 prec = sum_disc_err;
3727 nd = ceil(-log10(prec));
3728 printf("WARNING: setting the precision to %g because that is the minimum\n reasonable number, given the expected discretization error.\n", prec);
3732 /*sprintf(lamformat,"%%6.3f");*/
3733 sprintf( dgformat, "%%%d.%df", 3+nd, nd);
3734 /* the format strings of the results in kT */
3735 sprintf( ktformat, "%%%d.%df", 5+nd, nd);
3736 sprintf( sktformat, "%%%ds", 6+nd);
3737 /* the format strings of the errors in kT */
3738 sprintf( kteformat, "%%%d.%df", 3+nd, nd);
3739 sprintf( skteformat, "%%%ds", 4+nd);
3740 sprintf(xvg2format, "%s %s\n", "%s", dgformat);
3741 sprintf(xvg3format, "%s %s %s\n", "%s", dgformat, dgformat);
3746 if (opt2bSet("-o", NFILE, fnm))
3748 sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3749 fpb = xvgropen_type(opt2fn("-o", NFILE, fnm), "Free energy differences",
3750 "\\lambda", buf, exvggtXYDY, oenv);
3754 if (opt2bSet("-oi", NFILE, fnm))
3756 sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3757 fpi = xvgropen(opt2fn("-oi", NFILE, fnm), "Free energy integral",
3758 "\\lambda", buf, oenv);
3768 /* first calculate results */
3771 for (f = 0; f < nresults; f++)
3773 /* Determine the free energy difference with a factor of 10
3774 * more accuracy than requested for printing.
3776 calc_bar(&(results[f]), 0.1*prec, nbmin, nbmax,
3779 if (results[f].dg_disc_err > prec/10.)
3783 if (results[f].dg_histrange_err > prec/10.)
3785 histrange_err = TRUE;
3789 /* print results in kT */
3793 printf("\nTemperature: %g K\n", temp);
3795 printf("\nDetailed results in kT (see help for explanation):\n\n");
3796 printf("%6s ", " lam_A");
3797 printf("%6s ", " lam_B");
3798 printf(sktformat, "DG ");
3801 printf(skteformat, "+/- ");
3805 printf(skteformat, "disc ");
3809 printf(skteformat, "range ");
3811 printf(sktformat, "s_A ");
3814 printf(skteformat, "+/- " );
3816 printf(sktformat, "s_B ");
3819 printf(skteformat, "+/- " );
3821 printf(sktformat, "stdev ");
3824 printf(skteformat, "+/- ");
3827 for (f = 0; f < nresults; f++)
3829 lambda_vec_print_short(results[f].a->native_lambda, buf);
3831 lambda_vec_print_short(results[f].b->native_lambda, buf);
3833 printf(ktformat, results[f].dg);
3837 printf(kteformat, results[f].dg_err);
3842 printf(kteformat, results[f].dg_disc_err);
3847 printf(kteformat, results[f].dg_histrange_err);
3850 printf(ktformat, results[f].sa);
3854 printf(kteformat, results[f].sa_err);
3857 printf(ktformat, results[f].sb);
3861 printf(kteformat, results[f].sb_err);
3864 printf(ktformat, results[f].dg_stddev);
3868 printf(kteformat, results[f].dg_stddev_err);
3872 /* Check for negative relative entropy with a 95% certainty. */
3873 if (results[f].sa < -2*results[f].sa_err ||
3874 results[f].sb < -2*results[f].sb_err)
3882 printf("\nWARNING: Some of these results violate the Second Law of "
3883 "Thermodynamics: \n"
3884 " This is can be the result of severe undersampling, or "
3886 " there is something wrong with the simulations.\n");
3890 /* final results in kJ/mol */
3891 printf("\n\nFinal results in kJ/mol:\n\n");
3893 for (f = 0; f < nresults; f++)
3898 lambda_vec_print_short(results[f].a->native_lambda, buf);
3899 fprintf(fpi, xvg2format, buf, dg_tot);
3905 lambda_vec_print_intermediate(results[f].a->native_lambda,
3906 results[f].b->native_lambda,
3909 fprintf(fpb, xvg3format, buf, results[f].dg, results[f].dg_err);
3913 lambda_vec_print_short(results[f].a->native_lambda, buf);
3914 lambda_vec_print_short(results[f].b->native_lambda, buf2);
3915 printf("%s - %s", buf, buf2);
3918 printf(dgformat, results[f].dg*kT);
3922 printf(dgformat, results[f].dg_err*kT);
3926 printf(" (max. range err. = ");
3927 printf(dgformat, results[f].dg_histrange_err*kT);
3929 sum_histrange_err += results[f].dg_histrange_err*kT;
3933 dg_tot += results[f].dg;
3937 lambda_vec_print_short(results[0].a->native_lambda, buf);
3938 lambda_vec_print_short(results[nresults-1].b->native_lambda, buf2);
3939 printf("%s - %s", buf, buf2);
3942 printf(dgformat, dg_tot*kT);
3945 stat_err = bar_err(nbmin, nbmax, partsum)*kT;
3947 printf(dgformat, max(max(stat_err, sum_disc_err), sum_histrange_err));
3952 printf("\nmaximum discretization error = ");
3953 printf(dgformat, sum_disc_err);
3954 if (bEE && stat_err < sum_disc_err)
3956 printf("WARNING: discretization error (%g) is larger than statistical error.\n Decrease histogram spacing for more accurate results\n", stat_err);
3961 printf("\nmaximum histogram range error = ");
3962 printf(dgformat, sum_histrange_err);
3963 if (bEE && stat_err < sum_histrange_err)
3965 printf("WARNING: histogram range error (%g) is larger than statistical error.\n Increase histogram range for more accurate results\n", stat_err);
3974 lambda_vec_print_short(results[nresults-1].b->native_lambda, buf);
3975 fprintf(fpi, xvg2format, buf, dg_tot);
3979 do_view(oenv, opt2fn_null("-o", NFILE, fnm), "-xydy");
3980 do_view(oenv, opt2fn_null("-oi", NFILE, fnm), "-xydy");