2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2010,2011,2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
48 #include "gromacs/fileio/futil.h"
49 #include "gromacs/commandline/pargs.h"
51 #include "gromacs/fileio/enxio.h"
53 #include "gmx_fatal.h"
56 #include "gromacs/math/utilities.h"
62 /* Structure for the names of lambda vector components */
63 typedef struct lambda_components_t
65 char **names; /* Array of strings with names for the lambda vector
67 int N; /* The number of components */
68 int Nalloc; /* The number of allocated components */
69 } lambda_components_t;
71 /* Structure for a lambda vector or a dhdl derivative direction */
72 typedef struct lambda_vec_t
74 double *val; /* The lambda vector component values. Only valid if
76 int dhdl; /* The coordinate index for the derivative described by this
78 const lambda_components_t *lc; /* the associated lambda_components
80 int index; /* The state number (init-lambda-state) of this lambda
81 vector, if known. If not, it is set to -1 */
84 /* the dhdl.xvg data from a simulation */
88 int ftp; /* file type */
89 int nset; /* number of lambdas, including dhdl */
90 int *np; /* number of data points (du or hists) per lambda */
91 int np_alloc; /* number of points (du or hists) allocated */
92 double temp; /* temperature */
93 lambda_vec_t *lambda; /* the lambdas (of first index for y). */
94 double *t; /* the times (of second index for y) */
95 double **y; /* the dU values. y[0] holds the derivative, while
96 further ones contain the energy differences between
97 the native lambda and the 'foreign' lambdas. */
98 lambda_vec_t native_lambda; /* the native lambda */
100 struct xvg_t *next, *prev; /*location in the global linked list of xvg_ts*/
104 typedef struct hist_t
106 unsigned int *bin[2]; /* the (forward + reverse) histogram values */
107 double dx[2]; /* the histogram spacing. The reverse
108 dx is the negative of the forward dx.*/
109 gmx_int64_t x0[2]; /* the (forward + reverse) histogram start
112 int nbin[2]; /* the (forward+reverse) number of bins */
113 gmx_int64_t sum; /* the total number of counts. Must be
114 the same for forward + reverse. */
115 int nhist; /* number of hist datas (forward or reverse) */
117 double start_time, delta_time; /* start time, end time of histogram */
121 /* an aggregate of samples for partial free energy calculation */
122 typedef struct samples_t
124 lambda_vec_t *native_lambda; /* pointer to native lambda vector */
125 lambda_vec_t *foreign_lambda; /* pointer to foreign lambda vector */
126 double temp; /* the temperature */
127 gmx_bool derivative; /* whether this sample is a derivative */
129 /* The samples come either as either delta U lists: */
130 int ndu; /* the number of delta U samples */
131 double *du; /* the delta u's */
132 double *t; /* the times associated with those samples, or: */
133 double start_time, delta_time; /*start time and delta time for linear time*/
135 /* or as histograms: */
136 hist_t *hist; /* a histogram */
138 /* allocation data: (not NULL for data 'owned' by this struct) */
139 double *du_alloc, *t_alloc; /* allocated delta u arrays */
140 size_t ndu_alloc, nt_alloc; /* pre-allocated sizes */
141 hist_t *hist_alloc; /* allocated hist */
143 gmx_int64_t ntot; /* total number of samples */
144 const char *filename; /* the file name this sample comes from */
147 /* a sample range (start to end for du-style data, or boolean
148 for both du-style data and histograms */
149 typedef struct sample_range_t
151 int start, end; /* start and end index for du style data */
152 gmx_bool use; /* whether to use this sample */
154 samples_t *s; /* the samples this range belongs to */
158 /* a collection of samples for a partial free energy calculation
159 (i.e. the collection of samples from one native lambda to one
161 typedef struct sample_coll_t
163 lambda_vec_t *native_lambda; /* these should be the same for all samples
165 lambda_vec_t *foreign_lambda; /* collection */
166 double temp; /* the temperature */
168 int nsamples; /* the number of samples */
169 samples_t **s; /* the samples themselves */
170 sample_range_t *r; /* the sample ranges */
171 int nsamples_alloc; /* number of allocated samples */
173 gmx_int64_t ntot; /* total number of samples in the ranges of
176 struct sample_coll_t *next, *prev; /* next and previous in the list */
179 /* all the samples associated with a lambda point */
180 typedef struct lambda_data_t
182 lambda_vec_t *lambda; /* the native lambda (at start time if dynamic) */
183 double temp; /* temperature */
185 sample_coll_t *sc; /* the samples */
187 sample_coll_t sc_head; /*the pre-allocated list head for the linked list.*/
189 struct lambda_data_t *next, *prev; /* the next and prev in the list */
192 /* Top-level data structure of simulation data */
193 typedef struct sim_data_t
195 lambda_data_t *lb; /* a lambda data linked list */
196 lambda_data_t lb_head; /* The head element of the linked list */
198 lambda_components_t lc; /* the allowed components of the lambda
202 /* Top-level data structure with calculated values. */
204 sample_coll_t *a, *b; /* the simulation data */
206 double dg; /* the free energy difference */
207 double dg_err; /* the free energy difference */
209 double dg_disc_err; /* discretization error */
210 double dg_histrange_err; /* histogram range error */
212 double sa; /* relative entropy of b in state a */
213 double sa_err; /* error in sa */
214 double sb; /* relative entropy of a in state b */
215 double sb_err; /* error in sb */
217 double dg_stddev; /* expected dg stddev per sample */
218 double dg_stddev_err; /* error in dg_stddev */
222 /* Initialize a lambda_components structure */
223 static void lambda_components_init(lambda_components_t *lc)
227 snew(lc->names, lc->Nalloc);
230 /* Add a component to a lambda_components structure */
231 static void lambda_components_add(lambda_components_t *lc,
232 const char *name, size_t name_length)
234 while (lc->N + 1 > lc->Nalloc)
236 lc->Nalloc = (lc->Nalloc == 0) ? 2 : 2*lc->Nalloc;
237 srealloc( lc->names, lc->Nalloc );
239 snew(lc->names[lc->N], name_length+1);
240 strncpy(lc->names[lc->N], name, name_length);
244 /* check whether a component with index 'index' matches the given name, or
245 is also NULL. Returns TRUE if this is the case.
246 the string name does not need to end */
247 static gmx_bool lambda_components_check(const lambda_components_t *lc,
257 if (name == NULL && lc->names[index] == NULL)
261 if ((name == NULL) != (lc->names[index] == NULL))
265 len = strlen(lc->names[index]);
266 if (len != name_length)
270 if (strncmp(lc->names[index], name, name_length) == 0)
277 /* Find the index of a given lambda component name, or -1 if not found */
278 static int lambda_components_find(const lambda_components_t *lc,
284 for (i = 0; i < lc->N; i++)
286 if (strncmp(lc->names[i], name, name_length) == 0)
296 /* initialize a lambda vector */
297 static void lambda_vec_init(lambda_vec_t *lv, const lambda_components_t *lc)
299 snew(lv->val, lc->N);
305 static void lambda_vec_destroy(lambda_vec_t *lv)
310 static void lambda_vec_copy(lambda_vec_t *lv, const lambda_vec_t *orig)
314 lambda_vec_init(lv, orig->lc);
315 lv->dhdl = orig->dhdl;
316 lv->index = orig->index;
317 for (i = 0; i < lv->lc->N; i++)
319 lv->val[i] = orig->val[i];
323 /* write a lambda vec to a preallocated string */
324 static void lambda_vec_print(const lambda_vec_t *lv, char *str, gmx_bool named)
329 str[0] = 0; /* reset the string */
334 str += sprintf(str, "delta H to ");
338 str += sprintf(str, "(");
340 for (i = 0; i < lv->lc->N; i++)
342 str += sprintf(str, "%g", lv->val[i]);
345 str += sprintf(str, ", ");
350 str += sprintf(str, ")");
355 /* this lambda vector describes a derivative */
356 str += sprintf(str, "dH/dl");
357 if (strlen(lv->lc->names[lv->dhdl]) > 0)
359 str += sprintf(str, " (%s)", lv->lc->names[lv->dhdl]);
364 /* write a shortened version of the lambda vec to a preallocated string */
365 static void lambda_vec_print_short(const lambda_vec_t *lv, char *str)
372 sprintf(str, "%6d", lv->index);
378 sprintf(str, "%6.3f", lv->val[0]);
382 sprintf(str, "dH/dl[%d]", lv->dhdl);
387 /* write an intermediate version of two lambda vecs to a preallocated string */
388 static void lambda_vec_print_intermediate(const lambda_vec_t *a,
389 const lambda_vec_t *b, char *str)
395 if ( (a->index >= 0) && (b->index >= 0) )
397 sprintf(str, "%6.3f", ((double)a->index+(double)b->index)/2.);
401 if ( (a->dhdl < 0) && (b->dhdl < 0) )
403 sprintf(str, "%6.3f", (a->val[0]+b->val[0])/2.);
410 /* calculate the difference in lambda vectors: c = a-b.
411 c must be initialized already, and a and b must describe non-derivative
413 static void lambda_vec_diff(const lambda_vec_t *a, const lambda_vec_t *b,
418 if ( (a->dhdl > 0) || (b->dhdl > 0) )
421 "Trying to calculate the difference between derivatives instead of lambda points");
423 if ((a->lc != b->lc) || (a->lc != c->lc) )
426 "Trying to calculate the difference lambdas with differing basis set");
428 for (i = 0; i < a->lc->N; i++)
430 c->val[i] = a->val[i] - b->val[i];
434 /* calculate and return the absolute difference in lambda vectors: c = |a-b|.
435 a and b must describe non-derivative lambda points */
436 static double lambda_vec_abs_diff(const lambda_vec_t *a, const lambda_vec_t *b)
441 if ( (a->dhdl > 0) || (b->dhdl > 0) )
444 "Trying to calculate the difference between derivatives instead of lambda points");
449 "Trying to calculate the difference lambdas with differing basis set");
451 for (i = 0; i < a->lc->N; i++)
453 double df = a->val[i] - b->val[i];
460 /* check whether two lambda vectors are the same */
461 static gmx_bool lambda_vec_same(const lambda_vec_t *a, const lambda_vec_t *b)
471 for (i = 0; i < a->lc->N; i++)
473 if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
482 /* they're derivatives, so we check whether the indices match */
483 return (a->dhdl == b->dhdl);
487 /* Compare the sort order of two foreign lambda vectors
489 returns 1 if a is 'bigger' than b,
490 returns 0 if they're the same,
491 returns -1 if a is 'smaller' than b.*/
492 static gmx_bool lambda_vec_cmp_foreign(const lambda_vec_t *a,
493 const lambda_vec_t *b)
496 double norm_a = 0, norm_b = 0;
497 gmx_bool different = FALSE;
501 gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
503 /* if either one has an index we sort based on that */
504 if ((a->index >= 0) || (b->index >= 0))
506 if (a->index == b->index)
510 return (a->index > b->index) ? 1 : -1;
512 if (a->dhdl >= 0 || b->dhdl >= 0)
514 /* lambda vectors that are derivatives always sort higher than those
515 without derivatives */
516 if ((a->dhdl >= 0) != (b->dhdl >= 0) )
518 return (a->dhdl >= 0) ? 1 : -1;
520 return a->dhdl > b->dhdl;
523 /* neither has an index, so we can only sort on the lambda components,
524 which is only valid if there is one component */
525 for (i = 0; i < a->lc->N; i++)
527 if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
531 norm_a += a->val[i]*a->val[i];
532 norm_b += b->val[i]*b->val[i];
538 return norm_a > norm_b;
541 /* Compare the sort order of two native lambda vectors
543 returns 1 if a is 'bigger' than b,
544 returns 0 if they're the same,
545 returns -1 if a is 'smaller' than b.*/
546 static gmx_bool lambda_vec_cmp_native(const lambda_vec_t *a,
547 const lambda_vec_t *b)
553 gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
555 /* if either one has an index we sort based on that */
556 if ((a->index >= 0) || (b->index >= 0))
558 if (a->index == b->index)
562 return (a->index > b->index) ? 1 : -1;
564 /* neither has an index, so we can only sort on the lambda components,
565 which is only valid if there is one component */
569 "Can't compare lambdas with no index and > 1 component");
571 if (a->dhdl >= 0 || b->dhdl >= 0)
574 "Can't compare native lambdas that are derivatives");
576 if (gmx_within_tol(a->val[0], b->val[0], 10*GMX_REAL_EPS))
580 return a->val[0] > b->val[0] ? 1 : -1;
586 static void hist_init(hist_t *h, int nhist, int *nbin)
591 gmx_fatal(FARGS, "histogram with more than two sets of data!");
593 for (i = 0; i < nhist; i++)
595 snew(h->bin[i], nbin[i]);
597 h->nbin[i] = nbin[i];
598 h->start_time = h->delta_time = 0;
605 static void hist_destroy(hist_t *h)
611 static void xvg_init(xvg_t *ba)
620 static void samples_init(samples_t *s, lambda_vec_t *native_lambda,
621 lambda_vec_t *foreign_lambda, double temp,
622 gmx_bool derivative, const char *filename)
624 s->native_lambda = native_lambda;
625 s->foreign_lambda = foreign_lambda;
627 s->derivative = derivative;
632 s->start_time = s->delta_time = 0;
636 s->hist_alloc = NULL;
641 s->filename = filename;
644 static void sample_range_init(sample_range_t *r, samples_t *s)
652 static void sample_coll_init(sample_coll_t *sc, lambda_vec_t *native_lambda,
653 lambda_vec_t *foreign_lambda, double temp)
655 sc->native_lambda = native_lambda;
656 sc->foreign_lambda = foreign_lambda;
662 sc->nsamples_alloc = 0;
665 sc->next = sc->prev = NULL;
668 static void sample_coll_destroy(sample_coll_t *sc)
670 /* don't free the samples themselves */
676 static void lambda_data_init(lambda_data_t *l, lambda_vec_t *native_lambda,
679 l->lambda = native_lambda;
685 l->sc = &(l->sc_head);
687 sample_coll_init(l->sc, native_lambda, NULL, 0.);
692 static void barres_init(barres_t *br)
701 br->dg_stddev_err = 0;
708 /* calculate the total number of samples in a sample collection */
709 static void sample_coll_calc_ntot(sample_coll_t *sc)
714 for (i = 0; i < sc->nsamples; i++)
720 sc->ntot += sc->s[i]->ntot;
724 sc->ntot += sc->r[i].end - sc->r[i].start;
731 /* find the barsamples_t associated with a lambda that corresponds to
732 a specific foreign lambda */
733 static sample_coll_t *lambda_data_find_sample_coll(lambda_data_t *l,
734 lambda_vec_t *foreign_lambda)
736 sample_coll_t *sc = l->sc->next;
740 if (lambda_vec_same(sc->foreign_lambda, foreign_lambda))
750 /* insert li into an ordered list of lambda_colls */
751 static void lambda_data_insert_sample_coll(lambda_data_t *l, sample_coll_t *sc)
753 sample_coll_t *scn = l->sc->next;
754 while ( (scn != l->sc) )
756 if (lambda_vec_cmp_foreign(scn->foreign_lambda, sc->foreign_lambda) > 0)
762 /* now insert it before the found scn */
764 sc->prev = scn->prev;
765 scn->prev->next = sc;
769 /* insert li into an ordered list of lambdas */
770 static void lambda_data_insert_lambda(lambda_data_t *head, lambda_data_t *li)
772 lambda_data_t *lc = head->next;
775 if (lambda_vec_cmp_native(lc->lambda, li->lambda) > 0)
781 /* now insert ourselves before the found lc */
788 /* insert a sample and a sample_range into a sample_coll. The
789 samples are stored as a pointer, the range is copied. */
790 static void sample_coll_insert_sample(sample_coll_t *sc, samples_t *s,
793 /* first check if it belongs here */
794 if (sc->temp != s->temp)
796 gmx_fatal(FARGS, "Temperatures in files %s and %s are not the same!",
797 s->filename, sc->next->s[0]->filename);
799 if (!lambda_vec_same(sc->native_lambda, s->native_lambda))
801 gmx_fatal(FARGS, "Native lambda in files %s and %s are not the same (and they should be)!",
802 s->filename, sc->next->s[0]->filename);
804 if (!lambda_vec_same(sc->foreign_lambda, s->foreign_lambda))
806 gmx_fatal(FARGS, "Foreign lambda in files %s and %s are not the same (and they should be)!",
807 s->filename, sc->next->s[0]->filename);
810 /* check if there's room */
811 if ( (sc->nsamples + 1) > sc->nsamples_alloc)
813 sc->nsamples_alloc = max(2*sc->nsamples_alloc, 2);
814 srenew(sc->s, sc->nsamples_alloc);
815 srenew(sc->r, sc->nsamples_alloc);
817 sc->s[sc->nsamples] = s;
818 sc->r[sc->nsamples] = *r;
821 sample_coll_calc_ntot(sc);
824 /* insert a sample into a lambda_list, creating the right sample_coll if
826 static void lambda_data_list_insert_sample(lambda_data_t *head, samples_t *s)
828 gmx_bool found = FALSE;
832 lambda_data_t *l = head->next;
834 /* first search for the right lambda_data_t */
837 if (lambda_vec_same(l->lambda, s->native_lambda) )
847 snew(l, 1); /* allocate a new one */
848 lambda_data_init(l, s->native_lambda, s->temp); /* initialize it */
849 lambda_data_insert_lambda(head, l); /* add it to the list */
852 /* now look for a sample collection */
853 sc = lambda_data_find_sample_coll(l, s->foreign_lambda);
856 snew(sc, 1); /* allocate a new one */
857 sample_coll_init(sc, s->native_lambda, s->foreign_lambda, s->temp);
858 lambda_data_insert_sample_coll(l, sc);
861 /* now insert the samples into the sample coll */
862 sample_range_init(&r, s);
863 sample_coll_insert_sample(sc, s, &r);
867 /* make a histogram out of a sample collection */
868 static void sample_coll_make_hist(sample_coll_t *sc, int **bin,
869 int *nbin_alloc, int *nbin,
870 double *dx, double *xmin, int nbin_default)
873 gmx_bool dx_set = FALSE;
874 gmx_bool xmin_set = FALSE;
876 gmx_bool xmax_set = FALSE;
877 gmx_bool xmax_set_hard = FALSE; /* whether the xmax is bounded by the
878 limits of a histogram */
881 /* first determine dx and xmin; try the histograms */
882 for (i = 0; i < sc->nsamples; i++)
886 hist_t *hist = sc->s[i]->hist;
887 for (k = 0; k < hist->nhist; k++)
889 double hdx = hist->dx[k];
890 double xmax_now = (hist->x0[k]+hist->nbin[k])*hdx;
892 /* we use the biggest dx*/
893 if ( (!dx_set) || hist->dx[0] > *dx)
898 if ( (!xmin_set) || (hist->x0[k]*hdx) < *xmin)
901 *xmin = (hist->x0[k]*hdx);
904 if ( (!xmax_set) || (xmax_now > xmax && !xmax_set_hard) )
908 if (hist->bin[k][hist->nbin[k]-1] != 0)
910 xmax_set_hard = TRUE;
913 if (hist->bin[k][hist->nbin[k]-1] != 0 && (xmax_now < xmax) )
915 xmax_set_hard = TRUE;
921 /* and the delta us */
922 for (i = 0; i < sc->nsamples; i++)
924 if (sc->s[i]->ndu > 0)
926 /* determine min and max */
927 int starti = sc->r[i].start;
928 int endi = sc->r[i].end;
929 double du_xmin = sc->s[i]->du[starti];
930 double du_xmax = sc->s[i]->du[starti];
931 for (j = starti+1; j < endi; j++)
933 if (sc->s[i]->du[j] < du_xmin)
935 du_xmin = sc->s[i]->du[j];
937 if (sc->s[i]->du[j] > du_xmax)
939 du_xmax = sc->s[i]->du[j];
943 /* and now change the limits */
944 if ( (!xmin_set) || (du_xmin < *xmin) )
949 if ( (!xmax_set) || ((du_xmax > xmax) && !xmax_set_hard) )
957 if (!xmax_set || !xmin_set)
966 *nbin = nbin_default;
967 *dx = (xmax-(*xmin))/((*nbin)-2); /* -2 because we want the last bin to
968 be 0, and we count from 0 */
972 *nbin = (xmax-(*xmin))/(*dx);
975 if (*nbin > *nbin_alloc)
978 srenew(*bin, *nbin_alloc);
981 /* reset the histogram */
982 for (i = 0; i < (*nbin); i++)
987 /* now add the actual data */
988 for (i = 0; i < sc->nsamples; i++)
992 hist_t *hist = sc->s[i]->hist;
993 for (k = 0; k < hist->nhist; k++)
995 double hdx = hist->dx[k];
996 double xmin_hist = hist->x0[k]*hdx;
997 for (j = 0; j < hist->nbin[k]; j++)
999 /* calculate the bin corresponding to the middle of the
1001 double x = hdx*(j+0.5) + xmin_hist;
1002 int binnr = (int)((x-(*xmin))/(*dx));
1004 if (binnr >= *nbin || binnr < 0)
1009 (*bin)[binnr] += hist->bin[k][j];
1015 int starti = sc->r[i].start;
1016 int endi = sc->r[i].end;
1017 for (j = starti; j < endi; j++)
1019 int binnr = (int)((sc->s[i]->du[j]-(*xmin))/(*dx));
1020 if (binnr >= *nbin || binnr < 0)
1031 /* write a collection of histograms to a file */
1032 void sim_data_histogram(sim_data_t *sd, const char *filename,
1033 int nbin_default, const output_env_t oenv)
1035 char label_x[STRLEN];
1036 const char *dhdl = "dH/d\\lambda", *deltag = "\\DeltaH", *lambda = "\\lambda";
1037 const char *title = "N(\\DeltaH)";
1038 const char *label_y = "Samples";
1042 char **setnames = NULL;
1043 gmx_bool first_set = FALSE;
1044 /* histogram data: */
1051 lambda_data_t *bl_head = sd->lb;
1053 printf("\nWriting histogram to %s\n", filename);
1054 sprintf(label_x, "\\DeltaH (%s)", unit_energy);
1056 fp = xvgropen_type(filename, title, label_x, label_y, exvggtXNY, oenv);
1058 /* first get all the set names */
1060 /* iterate over all lambdas */
1061 while (bl != bl_head)
1063 sample_coll_t *sc = bl->sc->next;
1065 /* iterate over all samples */
1066 while (sc != bl->sc)
1068 char buf[STRLEN], buf2[STRLEN];
1071 srenew(setnames, nsets);
1072 snew(setnames[nsets-1], STRLEN);
1073 if (sc->foreign_lambda->dhdl < 0)
1075 lambda_vec_print(sc->native_lambda, buf, FALSE);
1076 lambda_vec_print(sc->foreign_lambda, buf2, FALSE);
1077 sprintf(setnames[nsets-1], "N(%s(%s=%s) | %s=%s)",
1078 deltag, lambda, buf2, lambda, buf);
1082 lambda_vec_print(sc->native_lambda, buf, FALSE);
1083 sprintf(setnames[nsets-1], "N(%s | %s=%s)",
1091 xvgr_legend(fp, nsets, (const char**)setnames, oenv);
1094 /* now make the histograms */
1096 /* iterate over all lambdas */
1097 while (bl != bl_head)
1099 sample_coll_t *sc = bl->sc->next;
1101 /* iterate over all samples */
1102 while (sc != bl->sc)
1106 xvgr_new_dataset(fp, 0, 0, NULL, oenv);
1109 sample_coll_make_hist(sc, &hist, &nbin_alloc, &nbin, &dx, &min,
1112 for (i = 0; i < nbin; i++)
1114 double xmin = i*dx + min;
1115 double xmax = (i+1)*dx + min;
1117 fprintf(fp, "%g %d\n%g %d\n", xmin, hist[i], xmax, hist[i]);
1135 /* create a collection (array) of barres_t object given a ordered linked list
1136 of barlamda_t sample collections */
1137 static barres_t *barres_list_create(sim_data_t *sd, int *nres,
1144 gmx_bool dhdl = FALSE;
1145 gmx_bool first = TRUE;
1146 lambda_data_t *bl_head = sd->lb;
1148 /* first count the lambdas */
1150 while (bl != bl_head)
1155 snew(res, nlambda-1);
1157 /* next put the right samples in the res */
1159 bl = bl_head->next->next; /* we start with the second one. */
1160 while (bl != bl_head)
1162 sample_coll_t *sc, *scprev;
1163 barres_t *br = &(res[*nres]);
1164 /* there is always a previous one. we search for that as a foreign
1166 scprev = lambda_data_find_sample_coll(bl->prev, bl->lambda);
1167 sc = lambda_data_find_sample_coll(bl, bl->prev->lambda);
1175 scprev = lambda_data_find_sample_coll(bl->prev, bl->prev->lambda);
1176 sc = lambda_data_find_sample_coll(bl, bl->lambda);
1180 printf("\nWARNING: Using the derivative data (dH/dlambda) to extrapolate delta H values.\nThis will only work if the Hamiltonian is linear in lambda.\n");
1185 gmx_fatal(FARGS, "Some dhdl files contain only one value (dH/dl), while others \ncontain multiple values (dH/dl and/or Delta H), will not proceed \nbecause of possible inconsistencies.\n");
1188 else if (!scprev && !sc)
1190 gmx_fatal(FARGS, "There is no path from lambda=%f -> %f that is covered by foreign lambdas:\ncannot proceed with BAR.\nUse thermodynamic integration of dH/dl by calculating the averages of dH/dl\nwith g_analyze and integrating them.\nAlternatively, use the -extp option if (and only if) the Hamiltonian\ndepends linearly on lambda, which is NOT normally the case.\n", bl->prev->lambda, bl->lambda);
1193 /* normal delta H */
1196 gmx_fatal(FARGS, "Could not find a set for foreign lambda = %f\nin the files for lambda = %f", bl->lambda, bl->prev->lambda);
1200 gmx_fatal(FARGS, "Could not find a set for foreign lambda = %f\nin the files for lambda = %f", bl->prev->lambda, bl->lambda);
1212 /* estimate the maximum discretization error */
1213 static double barres_list_max_disc_err(barres_t *res, int nres)
1216 double disc_err = 0.;
1217 double delta_lambda;
1219 for (i = 0; i < nres; i++)
1221 barres_t *br = &(res[i]);
1223 delta_lambda = lambda_vec_abs_diff(br->b->native_lambda,
1224 br->a->native_lambda);
1226 for (j = 0; j < br->a->nsamples; j++)
1228 if (br->a->s[j]->hist)
1231 if (br->a->s[j]->derivative)
1233 Wfac = delta_lambda;
1236 disc_err = max(disc_err, Wfac*br->a->s[j]->hist->dx[0]);
1239 for (j = 0; j < br->b->nsamples; j++)
1241 if (br->b->s[j]->hist)
1244 if (br->b->s[j]->derivative)
1246 Wfac = delta_lambda;
1248 disc_err = max(disc_err, Wfac*br->b->s[j]->hist->dx[0]);
1256 /* impose start and end times on a sample collection, updating sample_ranges */
1257 static void sample_coll_impose_times(sample_coll_t *sc, double begin_t,
1261 for (i = 0; i < sc->nsamples; i++)
1263 samples_t *s = sc->s[i];
1264 sample_range_t *r = &(sc->r[i]);
1267 double end_time = s->hist->delta_time*s->hist->sum +
1268 s->hist->start_time;
1269 if (s->hist->start_time < begin_t || end_time > end_t)
1279 if (s->start_time < begin_t)
1281 r->start = (int)((begin_t - s->start_time)/s->delta_time);
1283 end_time = s->delta_time*s->ndu + s->start_time;
1284 if (end_time > end_t)
1286 r->end = (int)((end_t - s->start_time)/s->delta_time);
1292 for (j = 0; j < s->ndu; j++)
1294 if (s->t[j] < begin_t)
1299 if (s->t[j] >= end_t)
1306 if (r->start > r->end)
1312 sample_coll_calc_ntot(sc);
1315 static void sim_data_impose_times(sim_data_t *sd, double begin, double end)
1317 double first_t, last_t;
1318 double begin_t, end_t;
1320 lambda_data_t *head = sd->lb;
1323 if (begin <= 0 && end < 0)
1328 /* first determine the global start and end times */
1334 sample_coll_t *sc = lc->sc->next;
1335 while (sc != lc->sc)
1337 for (j = 0; j < sc->nsamples; j++)
1339 double start_t, end_t;
1341 start_t = sc->s[j]->start_time;
1342 end_t = sc->s[j]->start_time;
1345 end_t += sc->s[j]->delta_time*sc->s[j]->hist->sum;
1351 end_t = sc->s[j]->t[sc->s[j]->ndu-1];
1355 end_t += sc->s[j]->delta_time*sc->s[j]->ndu;
1359 if (start_t < first_t || first_t < 0)
1373 /* calculate the actual times */
1391 printf("\n Samples in time interval: %.3f - %.3f\n", first_t, last_t);
1393 if (begin_t > end_t)
1397 printf("Removing samples outside of: %.3f - %.3f\n", begin_t, end_t);
1399 /* then impose them */
1403 sample_coll_t *sc = lc->sc->next;
1404 while (sc != lc->sc)
1406 sample_coll_impose_times(sc, begin_t, end_t);
1414 /* create subsample i out of ni from an existing sample_coll */
1415 static gmx_bool sample_coll_create_subsample(sample_coll_t *sc,
1416 sample_coll_t *sc_orig,
1420 int hist_start, hist_end;
1422 gmx_int64_t ntot_start;
1423 gmx_int64_t ntot_end;
1424 gmx_int64_t ntot_so_far;
1426 *sc = *sc_orig; /* just copy all fields */
1428 /* allocate proprietary memory */
1429 snew(sc->s, sc_orig->nsamples);
1430 snew(sc->r, sc_orig->nsamples);
1432 /* copy the samples */
1433 for (j = 0; j < sc_orig->nsamples; j++)
1435 sc->s[j] = sc_orig->s[j];
1436 sc->r[j] = sc_orig->r[j]; /* copy the ranges too */
1439 /* now fix start and end fields */
1440 /* the casts avoid possible overflows */
1441 ntot_start = (gmx_int64_t)(sc_orig->ntot*(double)i/(double)ni);
1442 ntot_end = (gmx_int64_t)(sc_orig->ntot*(double)(i+1)/(double)ni);
1444 for (j = 0; j < sc->nsamples; j++)
1446 gmx_int64_t ntot_add;
1447 gmx_int64_t new_start, new_end;
1453 ntot_add = sc->s[j]->hist->sum;
1457 ntot_add = sc->r[j].end - sc->r[j].start;
1465 if (!sc->s[j]->hist)
1467 if (ntot_so_far < ntot_start)
1469 /* adjust starting point */
1470 new_start = sc->r[j].start + (ntot_start - ntot_so_far);
1474 new_start = sc->r[j].start;
1476 /* adjust end point */
1477 new_end = sc->r[j].start + (ntot_end - ntot_so_far);
1478 if (new_end > sc->r[j].end)
1480 new_end = sc->r[j].end;
1483 /* check if we're in range at all */
1484 if ( (new_end < new_start) || (new_start > sc->r[j].end) )
1489 /* and write the new range */
1490 sc->r[j].start = (int)new_start;
1491 sc->r[j].end = (int)new_end;
1498 double ntot_start_norm, ntot_end_norm;
1499 /* calculate the amount of overlap of the
1500 desired range (ntot_start -- ntot_end) onto
1501 the histogram range (ntot_so_far -- ntot_so_far+ntot_add)*/
1503 /* first calculate normalized bounds
1504 (where 0 is the start of the hist range, and 1 the end) */
1505 ntot_start_norm = (ntot_start-ntot_so_far)/(double)ntot_add;
1506 ntot_end_norm = (ntot_end-ntot_so_far)/(double)ntot_add;
1508 /* now fix the boundaries */
1509 ntot_start_norm = min(1, max(0., ntot_start_norm));
1510 ntot_end_norm = max(0, min(1., ntot_end_norm));
1512 /* and calculate the overlap */
1513 overlap = ntot_end_norm - ntot_start_norm;
1515 if (overlap > 0.95) /* we allow for 5% slack */
1517 sc->r[j].use = TRUE;
1519 else if (overlap < 0.05)
1521 sc->r[j].use = FALSE;
1529 ntot_so_far += ntot_add;
1531 sample_coll_calc_ntot(sc);
1536 /* calculate minimum and maximum work values in sample collection */
1537 static void sample_coll_min_max(sample_coll_t *sc, double Wfac,
1538 double *Wmin, double *Wmax)
1545 for (i = 0; i < sc->nsamples; i++)
1547 samples_t *s = sc->s[i];
1548 sample_range_t *r = &(sc->r[i]);
1553 for (j = r->start; j < r->end; j++)
1555 *Wmin = min(*Wmin, s->du[j]*Wfac);
1556 *Wmax = max(*Wmax, s->du[j]*Wfac);
1561 int hd = 0; /* determine the histogram direction: */
1563 if ( (s->hist->nhist > 1) && (Wfac < 0) )
1567 dx = s->hist->dx[hd];
1569 for (j = s->hist->nbin[hd]-1; j >= 0; j--)
1571 *Wmin = min(*Wmin, Wfac*(s->hist->x0[hd])*dx);
1572 *Wmax = max(*Wmax, Wfac*(s->hist->x0[hd])*dx);
1573 /* look for the highest value bin with values */
1574 if (s->hist->bin[hd][j] > 0)
1576 *Wmin = min(*Wmin, Wfac*(j+s->hist->x0[hd]+1)*dx);
1577 *Wmax = max(*Wmax, Wfac*(j+s->hist->x0[hd]+1)*dx);
1586 /* Initialize a sim_data structure */
1587 static void sim_data_init(sim_data_t *sd)
1589 /* make linked list */
1590 sd->lb = &(sd->lb_head);
1591 sd->lb->next = sd->lb;
1592 sd->lb->prev = sd->lb;
1594 lambda_components_init(&(sd->lc));
1598 static double calc_bar_sum(int n, const double *W, double Wfac, double sbMmDG)
1605 for (i = 0; i < n; i++)
1607 sum += 1./(1. + exp(Wfac*W[i] + sbMmDG));
1613 /* calculate the BAR average given a histogram
1615 if type== 0, calculate the best estimate for the average,
1616 if type==-1, calculate the minimum possible value given the histogram
1617 if type== 1, calculate the maximum possible value given the histogram */
1618 static double calc_bar_sum_hist(const hist_t *hist, double Wfac, double sbMmDG,
1624 /* normalization factor multiplied with bin width and
1625 number of samples (we normalize through M): */
1627 int hd = 0; /* determine the histogram direction: */
1630 if ( (hist->nhist > 1) && (Wfac < 0) )
1635 max = hist->nbin[hd]-1;
1638 max = hist->nbin[hd]; /* we also add whatever was out of range */
1641 for (i = 0; i < max; i++)
1643 double x = Wfac*((i+hist->x0[hd])+0.5)*dx; /* bin middle */
1644 double pxdx = hist->bin[0][i]*normdx; /* p(x)dx */
1646 sum += pxdx/(1. + exp(x + sbMmDG));
1652 static double calc_bar_lowlevel(sample_coll_t *ca, sample_coll_t *cb,
1653 double temp, double tol, int type)
1658 double Wfac1, Wfac2, Wmin, Wmax;
1659 double DG0, DG1, DG2, dDG1;
1661 double n1, n2; /* numbers of samples as doubles */
1666 /* count the numbers of samples */
1672 /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1673 if (ca->foreign_lambda->dhdl < 0)
1675 /* this is the case when the delta U were calculated directly
1676 (i.e. we're not scaling dhdl) */
1682 /* we're using dhdl, so delta_lambda needs to be a
1683 multiplication factor. */
1684 /*double delta_lambda=cb->native_lambda-ca->native_lambda;*/
1685 double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1687 if (cb->native_lambda->lc->N > 1)
1690 "Can't (yet) do multi-component dhdl interpolation");
1693 Wfac1 = beta*delta_lambda;
1694 Wfac2 = -beta*delta_lambda;
1699 /* We print the output both in kT and kJ/mol.
1700 * Here we determine DG in kT, so when beta < 1
1701 * the precision has to be increased.
1706 /* Calculate minimum and maximum work to give an initial estimate of
1707 * delta G as their average.
1710 double Wmin1, Wmin2, Wmax1, Wmax2;
1711 sample_coll_min_max(ca, Wfac1, &Wmin1, &Wmax1);
1712 sample_coll_min_max(cb, Wfac2, &Wmin2, &Wmax2);
1714 Wmin = min(Wmin1, Wmin2);
1715 Wmax = max(Wmax1, Wmax2);
1723 fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1725 /* We approximate by bisection: given our initial estimates
1726 we keep checking whether the halfway point is greater or
1727 smaller than what we get out of the BAR averages.
1729 For the comparison we can use twice the tolerance. */
1730 while (DG2 - DG0 > 2*tol)
1732 DG1 = 0.5*(DG0 + DG2);
1734 /* calculate the BAR averages */
1737 for (i = 0; i < ca->nsamples; i++)
1739 samples_t *s = ca->s[i];
1740 sample_range_t *r = &(ca->r[i]);
1745 dDG1 += calc_bar_sum_hist(s->hist, Wfac1, (M-DG1), type);
1749 dDG1 += calc_bar_sum(r->end - r->start, s->du + r->start,
1754 for (i = 0; i < cb->nsamples; i++)
1756 samples_t *s = cb->s[i];
1757 sample_range_t *r = &(cb->r[i]);
1762 dDG1 -= calc_bar_sum_hist(s->hist, Wfac2, -(M-DG1), type);
1766 dDG1 -= calc_bar_sum(r->end - r->start, s->du + r->start,
1782 fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1786 return 0.5*(DG0 + DG2);
1789 static void calc_rel_entropy(sample_coll_t *ca, sample_coll_t *cb,
1790 double temp, double dg, double *sa, double *sb)
1796 double Wfac1, Wfac2;
1802 /* count the numbers of samples */
1806 /* to ensure the work values are the same as during the delta_G */
1807 /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1808 if (ca->foreign_lambda->dhdl < 0)
1810 /* this is the case when the delta U were calculated directly
1811 (i.e. we're not scaling dhdl) */
1817 /* we're using dhdl, so delta_lambda needs to be a
1818 multiplication factor. */
1819 double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1821 Wfac1 = beta*delta_lambda;
1822 Wfac2 = -beta*delta_lambda;
1825 /* first calculate the average work in both directions */
1826 for (i = 0; i < ca->nsamples; i++)
1828 samples_t *s = ca->s[i];
1829 sample_range_t *r = &(ca->r[i]);
1834 for (j = r->start; j < r->end; j++)
1836 W_ab += Wfac1*s->du[j];
1841 /* normalization factor multiplied with bin width and
1842 number of samples (we normalize through M): */
1845 int hd = 0; /* histogram direction */
1846 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1850 dx = s->hist->dx[hd];
1852 for (j = 0; j < s->hist->nbin[0]; j++)
1854 double x = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1855 double pxdx = s->hist->bin[0][j]*normdx; /* p(x)dx */
1863 for (i = 0; i < cb->nsamples; i++)
1865 samples_t *s = cb->s[i];
1866 sample_range_t *r = &(cb->r[i]);
1871 for (j = r->start; j < r->end; j++)
1873 W_ba += Wfac1*s->du[j];
1878 /* normalization factor multiplied with bin width and
1879 number of samples (we normalize through M): */
1882 int hd = 0; /* histogram direction */
1883 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
1887 dx = s->hist->dx[hd];
1889 for (j = 0; j < s->hist->nbin[0]; j++)
1891 double x = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1892 double pxdx = s->hist->bin[0][j]*normdx; /* p(x)dx */
1900 /* then calculate the relative entropies */
1905 static void calc_dg_stddev(sample_coll_t *ca, sample_coll_t *cb,
1906 double temp, double dg, double *stddev)
1910 double sigmafact = 0.;
1912 double Wfac1, Wfac2;
1918 /* count the numbers of samples */
1922 /* to ensure the work values are the same as during the delta_G */
1923 /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1924 if (ca->foreign_lambda->dhdl < 0)
1926 /* this is the case when the delta U were calculated directly
1927 (i.e. we're not scaling dhdl) */
1933 /* we're using dhdl, so delta_lambda needs to be a
1934 multiplication factor. */
1935 double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1937 Wfac1 = beta*delta_lambda;
1938 Wfac2 = -beta*delta_lambda;
1944 /* calculate average in both directions */
1945 for (i = 0; i < ca->nsamples; i++)
1947 samples_t *s = ca->s[i];
1948 sample_range_t *r = &(ca->r[i]);
1953 for (j = r->start; j < r->end; j++)
1955 sigmafact += 1./(2. + 2.*cosh((M + Wfac1*s->du[j] - dg)));
1960 /* normalization factor multiplied with bin width and
1961 number of samples (we normalize through M): */
1964 int hd = 0; /* histogram direction */
1965 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1969 dx = s->hist->dx[hd];
1971 for (j = 0; j < s->hist->nbin[0]; j++)
1973 double x = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1974 double pxdx = s->hist->bin[0][j]*normdx; /* p(x)dx */
1976 sigmafact += pxdx/(2. + 2.*cosh((M + x - dg)));
1981 for (i = 0; i < cb->nsamples; i++)
1983 samples_t *s = cb->s[i];
1984 sample_range_t *r = &(cb->r[i]);
1989 for (j = r->start; j < r->end; j++)
1991 sigmafact += 1./(2. + 2.*cosh((M - Wfac2*s->du[j] - dg)));
1996 /* normalization factor multiplied with bin width and
1997 number of samples (we normalize through M): */
2000 int hd = 0; /* histogram direction */
2001 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
2005 dx = s->hist->dx[hd];
2007 for (j = 0; j < s->hist->nbin[0]; j++)
2009 double x = Wfac2*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
2010 double pxdx = s->hist->bin[0][j]*normdx; /* p(x)dx */
2012 sigmafact += pxdx/(2. + 2.*cosh((M - x - dg)));
2018 sigmafact /= (n1 + n2);
2022 Shirts, Bair, Hooker & Pande, Phys. Rev. Lett 91, 140601 (2003): */
2023 *stddev = sqrt(((1./sigmafact) - ( (n1+n2)/n1 + (n1+n2)/n2 )));
2028 static void calc_bar(barres_t *br, double tol,
2029 int npee_min, int npee_max, gmx_bool *bEE,
2033 double dg_sig2, sa_sig2, sb_sig2, stddev_sig2; /* intermediate variance values
2034 for calculated quantities */
2035 int nsample1, nsample2;
2036 double temp = br->a->temp;
2038 double dg_min, dg_max;
2039 gmx_bool have_hist = FALSE;
2041 br->dg = calc_bar_lowlevel(br->a, br->b, temp, tol, 0);
2043 br->dg_disc_err = 0.;
2044 br->dg_histrange_err = 0.;
2046 /* check if there are histograms */
2047 for (i = 0; i < br->a->nsamples; i++)
2049 if (br->a->r[i].use && br->a->s[i]->hist)
2057 for (i = 0; i < br->b->nsamples; i++)
2059 if (br->b->r[i].use && br->b->s[i]->hist)
2067 /* calculate histogram-specific errors */
2070 dg_min = calc_bar_lowlevel(br->a, br->b, temp, tol, -1);
2071 dg_max = calc_bar_lowlevel(br->a, br->b, temp, tol, 1);
2073 if (fabs(dg_max - dg_min) > GMX_REAL_EPS*10)
2075 /* the histogram range error is the biggest of the differences
2076 between the best estimate and the extremes */
2077 br->dg_histrange_err = fabs(dg_max - dg_min);
2079 br->dg_disc_err = 0.;
2080 for (i = 0; i < br->a->nsamples; i++)
2082 if (br->a->s[i]->hist)
2084 br->dg_disc_err = max(br->dg_disc_err, br->a->s[i]->hist->dx[0]);
2087 for (i = 0; i < br->b->nsamples; i++)
2089 if (br->b->s[i]->hist)
2091 br->dg_disc_err = max(br->dg_disc_err, br->b->s[i]->hist->dx[0]);
2095 calc_rel_entropy(br->a, br->b, temp, br->dg, &(br->sa), &(br->sb));
2097 calc_dg_stddev(br->a, br->b, temp, br->dg, &(br->dg_stddev) );
2106 sample_coll_t ca, cb;
2108 /* initialize the samples */
2109 sample_coll_init(&ca, br->a->native_lambda, br->a->foreign_lambda,
2111 sample_coll_init(&cb, br->b->native_lambda, br->b->foreign_lambda,
2114 for (npee = npee_min; npee <= npee_max; npee++)
2123 double dstddev2 = 0;
2126 for (p = 0; p < npee; p++)
2133 cac = sample_coll_create_subsample(&ca, br->a, p, npee);
2134 cbc = sample_coll_create_subsample(&cb, br->b, p, npee);
2138 printf("WARNING: histogram number incompatible with block number for averaging: can't do error estimate\n");
2142 sample_coll_destroy(&ca);
2146 sample_coll_destroy(&cb);
2151 dgp = calc_bar_lowlevel(&ca, &cb, temp, tol, 0);
2155 partsum[npee*(npee_max+1)+p] += dgp;
2157 calc_rel_entropy(&ca, &cb, temp, dgp, &sac, &sbc);
2162 calc_dg_stddev(&ca, &cb, temp, dgp, &stddevc );
2165 dstddev2 += stddevc*stddevc;
2167 sample_coll_destroy(&ca);
2168 sample_coll_destroy(&cb);
2172 dg_sig2 += (dgs2-dgs*dgs)/(npee-1);
2178 sa_sig2 += (dsa2-dsa*dsa)/(npee-1);
2179 sb_sig2 += (dsb2-dsb*dsb)/(npee-1);
2183 stddev_sig2 += (dstddev2-dstddev*dstddev)/(npee-1);
2185 br->dg_err = sqrt(dg_sig2/(npee_max - npee_min + 1));
2186 br->sa_err = sqrt(sa_sig2/(npee_max - npee_min + 1));
2187 br->sb_err = sqrt(sb_sig2/(npee_max - npee_min + 1));
2188 br->dg_stddev_err = sqrt(stddev_sig2/(npee_max - npee_min + 1));
2193 static double bar_err(int nbmin, int nbmax, const double *partsum)
2196 double svar, s, s2, dg;
2199 for (nb = nbmin; nb <= nbmax; nb++)
2203 for (b = 0; b < nb; b++)
2205 dg = partsum[nb*(nbmax+1)+b];
2211 svar += (s2 - s*s)/(nb - 1);
2214 return sqrt(svar/(nbmax + 1 - nbmin));
2218 /* Seek the end of an identifier (consecutive non-spaces), followed by
2219 an optional number of spaces or '='-signs. Returns a pointer to the
2220 first non-space value found after that. Returns NULL if the string
2223 static const char *find_value(const char *str)
2225 gmx_bool name_end_found = FALSE;
2227 /* if the string is a NULL pointer, return a NULL pointer. */
2232 while (*str != '\0')
2234 /* first find the end of the name */
2235 if (!name_end_found)
2237 if (isspace(*str) || (*str == '=') )
2239 name_end_found = TRUE;
2244 if (!( isspace(*str) || (*str == '=') ))
2256 /* read a vector-notation description of a lambda vector */
2257 static gmx_bool read_lambda_compvec(const char *str,
2259 const lambda_components_t *lc_in,
2260 lambda_components_t *lc_out,
2264 gmx_bool initialize_lc = FALSE; /* whether to initialize the lambda
2265 components, or to check them */
2266 gmx_bool start_reached = FALSE; /* whether the start of component names
2268 gmx_bool vector = FALSE; /* whether there are multiple components */
2269 int n = 0; /* current component number */
2270 const char *val_start = NULL; /* start of the component name, or NULL
2271 if not in a value */
2281 if (lc_out && lc_out->N == 0)
2283 initialize_lc = TRUE;
2298 start_reached = TRUE;
2301 else if (*str == '(')
2304 start_reached = TRUE;
2306 else if (!isspace(*str))
2308 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2315 if (isspace(*str) || *str == ')' || *str == ',' || *str == '\0')
2322 lambda_components_add(lc_out, val_start,
2327 if (!lambda_components_check(lc_out, n, val_start,
2336 /* add a vector component to lv */
2337 lv->val[n] = strtod(val_start, &strtod_end);
2338 if (val_start == strtod_end)
2341 "Error reading lambda vector in %s", fn);
2344 /* reset for the next identifier */
2353 else if (isalnum(*str))
2366 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2374 else if (lv == NULL)
2380 gmx_fatal(FARGS, "Incomplete lambda vector data in %s",
2400 gmx_fatal(FARGS, "Incomplete lambda components data in %s", fn);
2406 /* read and check the component names from a string */
2407 static gmx_bool read_lambda_components(const char *str,
2408 lambda_components_t *lc,
2412 return read_lambda_compvec(str, NULL, NULL, lc, end, fn);
2415 /* read an initialized lambda vector from a string */
2416 static gmx_bool read_lambda_vector(const char *str,
2421 return read_lambda_compvec(str, lv, lv->lc, NULL, end, fn);
2426 /* deduce lambda value from legend.
2428 legend = the legend string
2430 lam = the initialized lambda vector
2431 returns whether to use the data in this set.
2433 static gmx_bool legend2lambda(const char *fn,
2438 const char *ptr = NULL, *ptr2 = NULL;
2439 gmx_bool ok = FALSE;
2440 gmx_bool bdhdl = FALSE;
2441 const char *tostr = " to ";
2445 gmx_fatal(FARGS, "There is no legend in file '%s', can not deduce lambda", fn);
2448 /* look for the last 'to': */
2452 ptr2 = strstr(ptr2, tostr);
2459 while (ptr2 != NULL && *ptr2 != '\0');
2463 ptr += strlen(tostr)-1; /* and advance past that 'to' */
2467 /* look for the = sign */
2468 ptr = strrchr(legend, '=');
2471 /* otherwise look for the last space */
2472 ptr = strrchr(legend, ' ');
2476 if (strstr(legend, "dH"))
2481 else if (strchr(legend, 'D') != NULL && strchr(legend, 'H') != NULL)
2486 else /*if (strstr(legend, "pV"))*/
2497 gmx_fatal(FARGS, "There is no proper lambda legend in file '%s', can not deduce lambda", fn);
2501 ptr = find_value(ptr);
2502 if (!ptr || !read_lambda_vector(ptr, lam, NULL, fn))
2504 gmx_fatal(FARGS, "lambda vector '%s' %s faulty", legend, fn);
2513 ptr = strrchr(legend, '=');
2517 /* there must be a component name */
2521 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2523 /* now backtrack to the start of the identifier */
2524 while (isspace(*ptr))
2530 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2533 while (!isspace(*ptr))
2538 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2542 strncpy(buf, ptr, (end-ptr));
2543 buf[(end-ptr)] = '\0';
2544 dhdl_index = lambda_components_find(lam->lc, ptr, (end-ptr));
2548 strncpy(buf, ptr, (end-ptr));
2549 buf[(end-ptr)] = '\0';
2551 "Did not find lambda component for '%s' in %s",
2560 "dhdl without component name with >1 lambda component in %s",
2565 lam->dhdl = dhdl_index;
2570 static gmx_bool subtitle2lambda(const char *subtitle, xvg_t *ba, const char *fn,
2571 lambda_components_t *lc)
2576 double native_lambda;
2580 /* first check for a state string */
2581 ptr = strstr(subtitle, "state");
2585 const char *val_end;
2587 /* the new 4.6 style lambda vectors */
2588 ptr = find_value(ptr);
2591 index = strtol(ptr, &end, 10);
2594 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2601 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2604 /* now find the lambda vector component names */
2605 while (*ptr != '(' && !isalnum(*ptr))
2611 "Incomplete lambda vector component data in %s", fn);
2616 if (!read_lambda_components(ptr, lc, &val_end, fn))
2619 "lambda vector components in %s don't match those previously read",
2622 ptr = find_value(val_end);
2625 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2628 lambda_vec_init(&(ba->native_lambda), lc);
2629 if (!read_lambda_vector(ptr, &(ba->native_lambda), NULL, fn))
2631 gmx_fatal(FARGS, "lambda vector in %s faulty", fn);
2633 ba->native_lambda.index = index;
2638 /* compatibility mode: check for lambda in other ways. */
2639 /* plain text lambda string */
2640 ptr = strstr(subtitle, "lambda");
2643 /* xmgrace formatted lambda string */
2644 ptr = strstr(subtitle, "\\xl\\f{}");
2648 /* xmgr formatted lambda string */
2649 ptr = strstr(subtitle, "\\8l\\4");
2653 ptr = strstr(ptr, "=");
2657 bFound = (sscanf(ptr+1, "%lf", &(native_lambda)) == 1);
2658 /* add the lambda component name as an empty string */
2661 if (!lambda_components_check(lc, 0, "", 0))
2664 "lambda vector components in %s don't match those previously read",
2670 lambda_components_add(lc, "", 0);
2672 lambda_vec_init(&(ba->native_lambda), lc);
2673 ba->native_lambda.val[0] = native_lambda;
2680 static void filename2lambda(const char *fn)
2683 const char *ptr, *digitptr;
2687 /* go to the end of the path string and search backward to find the last
2688 directory in the path which has to contain the value of lambda
2690 while (ptr[1] != '\0')
2694 /* searching backward to find the second directory separator */
2699 if (ptr[0] != DIR_SEPARATOR && ptr[1] == DIR_SEPARATOR)
2707 /* save the last position of a digit between the last two
2708 separators = in the last dirname */
2709 if (dirsep > 0 && isdigit(*ptr))
2717 gmx_fatal(FARGS, "While trying to read the lambda value from the file path:"
2718 " last directory in the path '%s' does not contain a number", fn);
2720 if (digitptr[-1] == '-')
2724 lambda = strtod(digitptr, &endptr);
2725 if (endptr == digitptr)
2727 gmx_fatal(FARGS, "Malformed number in file path '%s'", fn);
2731 static void read_bar_xvg_lowlevel(const char *fn, real *temp, xvg_t *ba,
2732 lambda_components_t *lc)
2735 char *subtitle, **legend, *ptr;
2737 gmx_bool native_lambda_read = FALSE;
2745 np = read_xvg_legend(fn, &ba->y, &ba->nset, &subtitle, &legend);
2748 gmx_fatal(FARGS, "File %s contains no usable data.", fn);
2750 /* Reorder the data */
2752 for (i = 1; i < ba->nset; i++)
2754 ba->y[i-1] = ba->y[i];
2758 snew(ba->np, ba->nset);
2759 for (i = 0; i < ba->nset; i++)
2765 if (subtitle != NULL)
2767 /* try to extract temperature */
2768 ptr = strstr(subtitle, "T =");
2772 if (sscanf(ptr, "%lf", &ba->temp) == 1)
2776 gmx_fatal(FARGS, "Found temperature of %f in file '%s'",
2786 gmx_fatal(FARGS, "Did not find a temperature in the subtitle in file '%s', use the -temp option of [TT]gmx bar[tt]", fn);
2791 /* Try to deduce lambda from the subtitle */
2794 if (subtitle2lambda(subtitle, ba, fn, lc))
2796 native_lambda_read = TRUE;
2799 snew(ba->lambda, ba->nset);
2802 /* Check if we have a single set, no legend, nset=1 means t and dH/dl */
2805 if (!native_lambda_read)
2807 /* Deduce lambda from the file name */
2808 filename2lambda(fn);
2809 native_lambda_read = TRUE;
2811 ba->lambda[0] = ba->native_lambda;
2815 gmx_fatal(FARGS, "File %s contains multiple sets but no legends, can not determine the lambda values", fn);
2820 for (i = 0; i < ba->nset; )
2822 gmx_bool use = FALSE;
2823 /* Read lambda from the legend */
2824 lambda_vec_init( &(ba->lambda[i]), lc );
2825 lambda_vec_copy( &(ba->lambda[i]), &(ba->native_lambda));
2826 use = legend2lambda(fn, legend[i], &(ba->lambda[i]));
2829 lambda_vec_print(&(ba->lambda[i]), buf, FALSE);
2835 printf("%s: Ignoring set '%s'.\n", fn, legend[i]);
2836 for (j = i+1; j < ba->nset; j++)
2838 ba->y[j-1] = ba->y[j];
2839 legend[j-1] = legend[j];
2846 if (!native_lambda_read)
2848 gmx_fatal(FARGS, "File %s contains multiple sets but no indication of the native lambda", fn);
2853 for (i = 0; i < ba->nset-1; i++)
2861 static void read_bar_xvg(char *fn, real *temp, sim_data_t *sd)
2870 read_bar_xvg_lowlevel(fn, temp, barsim, &(sd->lc));
2872 if (barsim->nset < 1)
2874 gmx_fatal(FARGS, "File '%s' contains fewer than two columns", fn);
2877 if (!gmx_within_tol(*temp, barsim->temp, GMX_FLOAT_EPS) && (*temp > 0) )
2879 gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
2881 *temp = barsim->temp;
2883 /* now create a series of samples_t */
2884 snew(s, barsim->nset);
2885 for (i = 0; i < barsim->nset; i++)
2887 samples_init(s+i, &(barsim->native_lambda), &(barsim->lambda[i]),
2888 barsim->temp, lambda_vec_same(&(barsim->native_lambda),
2889 &(barsim->lambda[i])),
2891 s[i].du = barsim->y[i];
2892 s[i].ndu = barsim->np[i];
2895 lambda_data_list_insert_sample(sd->lb, s+i);
2900 lambda_vec_print(s[0].native_lambda, buf, FALSE);
2901 printf("%s: %.1f - %.1f; lambda = %s\n dH/dl & foreign lambdas:\n",
2902 fn, s[0].t[0], s[0].t[s[0].ndu-1], buf);
2903 for (i = 0; i < barsim->nset; i++)
2905 lambda_vec_print(s[i].foreign_lambda, buf, TRUE);
2906 printf(" %s (%d pts)\n", buf, s[i].ndu);
2912 static void read_edr_rawdh_block(samples_t **smp, int *ndu, t_enxblock *blk,
2913 double start_time, double delta_time,
2914 lambda_vec_t *native_lambda, double temp,
2915 double *last_t, const char *filename)
2919 double old_foreign_lambda;
2920 lambda_vec_t *foreign_lambda;
2922 samples_t *s; /* convenience pointer */
2925 /* check the block types etc. */
2926 if ( (blk->nsub < 3) ||
2927 (blk->sub[0].type != xdr_datatype_int) ||
2928 (blk->sub[1].type != xdr_datatype_double) ||
2930 (blk->sub[2].type != xdr_datatype_float) &&
2931 (blk->sub[2].type != xdr_datatype_double)
2933 (blk->sub[0].nr < 1) ||
2934 (blk->sub[1].nr < 1) )
2937 "Unexpected/corrupted block data in file %s around time %f.",
2938 filename, start_time);
2941 snew(foreign_lambda, 1);
2942 lambda_vec_init(foreign_lambda, native_lambda->lc);
2943 lambda_vec_copy(foreign_lambda, native_lambda);
2944 type = blk->sub[0].ival[0];
2947 for (i = 0; i < native_lambda->lc->N; i++)
2949 foreign_lambda->val[i] = blk->sub[1].dval[i];
2954 if (blk->sub[0].nr > 1)
2956 foreign_lambda->dhdl = blk->sub[0].ival[1];
2960 foreign_lambda->dhdl = 0;
2966 /* initialize the samples structure if it's empty. */
2968 samples_init(*smp, native_lambda, foreign_lambda, temp,
2969 type == dhbtDHDL, filename);
2970 (*smp)->start_time = start_time;
2971 (*smp)->delta_time = delta_time;
2974 /* set convenience pointer */
2977 /* now double check */
2978 if (!lambda_vec_same(s->foreign_lambda, foreign_lambda) )
2980 char buf[STRLEN], buf2[STRLEN];
2981 lambda_vec_print(foreign_lambda, buf, FALSE);
2982 lambda_vec_print(s->foreign_lambda, buf2, FALSE);
2983 fprintf(stderr, "Got foreign lambda=%s, expected: %s\n", buf, buf2);
2984 gmx_fatal(FARGS, "Corrupted data in file %s around t=%f.",
2985 filename, start_time);
2988 /* make room for the data */
2989 if (s->ndu_alloc < (size_t)(s->ndu + blk->sub[2].nr) )
2991 s->ndu_alloc += (s->ndu_alloc < (size_t)blk->sub[2].nr) ?
2992 blk->sub[2].nr*2 : s->ndu_alloc;
2993 srenew(s->du_alloc, s->ndu_alloc);
2994 s->du = s->du_alloc;
2997 s->ndu += blk->sub[2].nr;
2998 s->ntot += blk->sub[2].nr;
2999 *ndu = blk->sub[2].nr;
3001 /* and copy the data*/
3002 for (j = 0; j < blk->sub[2].nr; j++)
3004 if (blk->sub[2].type == xdr_datatype_float)
3006 s->du[startj+j] = blk->sub[2].fval[j];
3010 s->du[startj+j] = blk->sub[2].dval[j];
3013 if (start_time + blk->sub[2].nr*delta_time > *last_t)
3015 *last_t = start_time + blk->sub[2].nr*delta_time;
3019 static samples_t *read_edr_hist_block(int *nsamples, t_enxblock *blk,
3020 double start_time, double delta_time,
3021 lambda_vec_t *native_lambda, double temp,
3022 double *last_t, const char *filename)
3027 double old_foreign_lambda;
3028 lambda_vec_t *foreign_lambda;
3032 /* check the block types etc. */
3033 if ( (blk->nsub < 2) ||
3034 (blk->sub[0].type != xdr_datatype_double) ||
3035 (blk->sub[1].type != xdr_datatype_int64) ||
3036 (blk->sub[0].nr < 2) ||
3037 (blk->sub[1].nr < 2) )
3040 "Unexpected/corrupted block data in file %s around time %f",
3041 filename, start_time);
3044 nhist = blk->nsub-2;
3052 "Unexpected/corrupted block data in file %s around time %f",
3053 filename, start_time);
3059 snew(foreign_lambda, 1);
3060 lambda_vec_init(foreign_lambda, native_lambda->lc);
3061 lambda_vec_copy(foreign_lambda, native_lambda);
3062 type = (int)(blk->sub[1].lval[1]);
3065 double old_foreign_lambda;
3067 old_foreign_lambda = blk->sub[0].dval[0];
3068 if (old_foreign_lambda >= 0)
3070 foreign_lambda->val[0] = old_foreign_lambda;
3071 if (foreign_lambda->lc->N > 1)
3074 "Single-component lambda in multi-component file %s",
3080 for (i = 0; i < native_lambda->lc->N; i++)
3082 foreign_lambda->val[i] = blk->sub[0].dval[i+2];
3088 if (foreign_lambda->lc->N > 1)
3090 if (blk->sub[1].nr < 3 + nhist)
3093 "Missing derivative coord in multi-component file %s",
3096 foreign_lambda->dhdl = blk->sub[1].lval[2 + nhist];
3100 foreign_lambda->dhdl = 0;
3104 samples_init(s, native_lambda, foreign_lambda, temp, type == dhbtDHDL,
3108 for (i = 0; i < nhist; i++)
3110 nbins[i] = blk->sub[i+2].nr;
3113 hist_init(s->hist, nhist, nbins);
3115 for (i = 0; i < nhist; i++)
3117 s->hist->x0[i] = blk->sub[1].lval[2+i];
3118 s->hist->dx[i] = blk->sub[0].dval[1];
3121 s->hist->dx[i] = -s->hist->dx[i];
3125 s->hist->start_time = start_time;
3126 s->hist->delta_time = delta_time;
3127 s->start_time = start_time;
3128 s->delta_time = delta_time;
3130 for (i = 0; i < nhist; i++)
3133 gmx_int64_t sum = 0;
3135 for (j = 0; j < s->hist->nbin[i]; j++)
3137 int binv = (int)(blk->sub[i+2].ival[j]);
3139 s->hist->bin[i][j] = binv;
3152 gmx_fatal(FARGS, "Histogram counts don't match in %s",
3158 if (start_time + s->hist->sum*delta_time > *last_t)
3160 *last_t = start_time + s->hist->sum*delta_time;
3166 static void read_barsim_edr(char *fn, real *temp, sim_data_t *sd)
3172 gmx_enxnm_t *enm = NULL;
3173 double first_t = -1;
3175 samples_t **samples_rawdh = NULL; /* contains samples for raw delta_h */
3176 int *nhists = NULL; /* array to keep count & print at end */
3177 int *npts = NULL; /* array to keep count & print at end */
3178 lambda_vec_t **lambdas = NULL; /* array to keep count & print at end */
3179 lambda_vec_t *native_lambda;
3180 double end_time; /* the end time of the last batch of samples */
3182 lambda_vec_t start_lambda;
3184 fp = open_enx(fn, "r");
3185 do_enxnms(fp, &nre, &enm);
3188 snew(native_lambda, 1);
3189 start_lambda.lc = NULL;
3191 while (do_enx(fp, fr))
3193 /* count the data blocks */
3194 int nblocks_raw = 0;
3195 int nblocks_hist = 0;
3198 /* DHCOLL block information: */
3199 double start_time = 0, delta_time = 0, old_start_lambda = 0, delta_lambda = 0;
3202 /* count the blocks and handle collection information: */
3203 for (i = 0; i < fr->nblock; i++)
3205 if (fr->block[i].id == enxDHHIST)
3209 if (fr->block[i].id == enxDH)
3213 if (fr->block[i].id == enxDHCOLL)
3216 if ( (fr->block[i].nsub < 1) ||
3217 (fr->block[i].sub[0].type != xdr_datatype_double) ||
3218 (fr->block[i].sub[0].nr < 5))
3220 gmx_fatal(FARGS, "Unexpected block data in file %s", fn);
3223 /* read the data from the DHCOLL block */
3224 rtemp = fr->block[i].sub[0].dval[0];
3225 start_time = fr->block[i].sub[0].dval[1];
3226 delta_time = fr->block[i].sub[0].dval[2];
3227 old_start_lambda = fr->block[i].sub[0].dval[3];
3228 delta_lambda = fr->block[i].sub[0].dval[4];
3230 if (delta_lambda > 0)
3232 gmx_fatal(FARGS, "Lambda values not constant in %s: can't apply BAR method", fn);
3234 if ( ( *temp != rtemp) && (*temp > 0) )
3236 gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
3240 if (old_start_lambda >= 0)
3244 if (!lambda_components_check(&(sd->lc), 0, "", 0))
3247 "lambda vector components in %s don't match those previously read",
3253 lambda_components_add(&(sd->lc), "", 0);
3255 if (!start_lambda.lc)
3257 lambda_vec_init(&start_lambda, &(sd->lc));
3259 start_lambda.val[0] = old_start_lambda;
3263 /* read lambda vector */
3265 gmx_bool check = (sd->lc.N > 0);
3266 if (fr->block[i].nsub < 2)
3269 "No lambda vector, but start_lambda=%f\n",
3272 n_lambda_vec = fr->block[i].sub[1].ival[1];
3273 for (j = 0; j < n_lambda_vec; j++)
3276 efpt_singular_names[fr->block[i].sub[1].ival[1+j]];
3279 /* check the components */
3280 lambda_components_check(&(sd->lc), j, name,
3285 lambda_components_add(&(sd->lc), name,
3289 lambda_vec_init(&start_lambda, &(sd->lc));
3290 start_lambda.index = fr->block[i].sub[1].ival[0];
3291 for (j = 0; j < n_lambda_vec; j++)
3293 start_lambda.val[j] = fr->block[i].sub[0].dval[5+j];
3298 first_t = start_time;
3305 gmx_fatal(FARGS, "Did not find delta H information in file %s", fn);
3307 if (nblocks_raw > 0 && nblocks_hist > 0)
3309 gmx_fatal(FARGS, "Can't handle both raw delta U data and histograms in the same file %s", fn);
3314 /* check the native lambda */
3315 if (!lambda_vec_same(&start_lambda, native_lambda) )
3317 gmx_fatal(FARGS, "Native lambda not constant in file %s: started at %f, and becomes %f at time %f",
3318 fn, native_lambda, start_lambda, start_time);
3320 /* check the number of samples against the previous number */
3321 if ( ((nblocks_raw+nblocks_hist) != nsamples) || (nlam != 1 ) )
3323 gmx_fatal(FARGS, "Unexpected block count in %s: was %d, now %d\n",
3324 fn, nsamples+1, nblocks_raw+nblocks_hist+nlam);
3326 /* check whether last iterations's end time matches with
3327 the currrent start time */
3328 if ( (fabs(last_t - start_time) > 2*delta_time) && last_t >= 0)
3330 /* it didn't. We need to store our samples and reallocate */
3331 for (i = 0; i < nsamples; i++)
3333 if (samples_rawdh[i])
3335 /* insert it into the existing list */
3336 lambda_data_list_insert_sample(sd->lb,
3338 /* and make sure we'll allocate a new one this time
3340 samples_rawdh[i] = NULL;
3347 /* this is the first round; allocate the associated data
3349 /*native_lambda=start_lambda;*/
3350 lambda_vec_init(native_lambda, &(sd->lc));
3351 lambda_vec_copy(native_lambda, &start_lambda);
3352 nsamples = nblocks_raw+nblocks_hist;
3353 snew(nhists, nsamples);
3354 snew(npts, nsamples);
3355 snew(lambdas, nsamples);
3356 snew(samples_rawdh, nsamples);
3357 for (i = 0; i < nsamples; i++)
3362 samples_rawdh[i] = NULL; /* init to NULL so we know which
3363 ones contain values */
3368 k = 0; /* counter for the lambdas, etc. arrays */
3369 for (i = 0; i < fr->nblock; i++)
3371 if (fr->block[i].id == enxDH)
3373 int type = (fr->block[i].sub[0].ival[0]);
3374 if (type == dhbtDH || type == dhbtDHDL)
3377 read_edr_rawdh_block(&(samples_rawdh[k]),
3380 start_time, delta_time,
3381 native_lambda, rtemp,
3384 if (samples_rawdh[k])
3386 lambdas[k] = samples_rawdh[k]->foreign_lambda;
3391 else if (fr->block[i].id == enxDHHIST)
3393 int type = (int)(fr->block[i].sub[1].lval[1]);
3394 if (type == dhbtDH || type == dhbtDHDL)
3398 samples_t *s; /* this is where the data will go */
3399 s = read_edr_hist_block(&nb, &(fr->block[i]),
3400 start_time, delta_time,
3401 native_lambda, rtemp,
3406 lambdas[k] = s->foreign_lambda;
3409 /* and insert the new sample immediately */
3410 for (j = 0; j < nb; j++)
3412 lambda_data_list_insert_sample(sd->lb, s+j);
3418 /* Now store all our extant sample collections */
3419 for (i = 0; i < nsamples; i++)
3421 if (samples_rawdh[i])
3423 /* insert it into the existing list */
3424 lambda_data_list_insert_sample(sd->lb, samples_rawdh[i]);
3432 lambda_vec_print(native_lambda, buf, FALSE);
3433 printf("%s: %.1f - %.1f; lambda = %s\n foreign lambdas:\n",
3434 fn, first_t, last_t, buf);
3435 for (i = 0; i < nsamples; i++)
3439 lambda_vec_print(lambdas[i], buf, TRUE);
3442 printf(" %s (%d hists)\n", buf, nhists[i]);
3446 printf(" %s (%d pts)\n", buf, npts[i]);
3458 int gmx_bar(int argc, char *argv[])
3460 static const char *desc[] = {
3461 "[THISMODULE] calculates free energy difference estimates through ",
3462 "Bennett's acceptance ratio method (BAR). It also automatically",
3463 "adds series of individual free energies obtained with BAR into",
3464 "a combined free energy estimate.[PAR]",
3466 "Every individual BAR free energy difference relies on two ",
3467 "simulations at different states: say state A and state B, as",
3468 "controlled by a parameter, [GRK]lambda[grk] (see the [TT].mdp[tt] parameter",
3469 "[TT]init_lambda[tt]). The BAR method calculates a ratio of weighted",
3470 "average of the Hamiltonian difference of state B given state A and",
3472 "The energy differences to the other state must be calculated",
3473 "explicitly during the simulation. This can be done with",
3474 "the [TT].mdp[tt] option [TT]foreign_lambda[tt].[PAR]",
3476 "Input option [TT]-f[tt] expects multiple [TT]dhdl.xvg[tt] files. ",
3477 "Two types of input files are supported:[BR]",
3478 "[TT]*[tt] Files with more than one [IT]y[it]-value. ",
3479 "The files should have columns ",
3480 "with dH/d[GRK]lambda[grk] and [GRK]Delta[grk][GRK]lambda[grk]. ",
3481 "The [GRK]lambda[grk] values are inferred ",
3482 "from the legends: [GRK]lambda[grk] of the simulation from the legend of ",
3483 "dH/d[GRK]lambda[grk] and the foreign [GRK]lambda[grk] values from the ",
3484 "legends of Delta H",
3486 "[TT]*[tt] Files with only one [IT]y[it]-value. Using the",
3487 "[TT]-extp[tt] option for these files, it is assumed",
3488 "that the [IT]y[it]-value is dH/d[GRK]lambda[grk] and that the ",
3489 "Hamiltonian depends linearly on [GRK]lambda[grk]. ",
3490 "The [GRK]lambda[grk] value of the simulation is inferred from the ",
3491 "subtitle (if present), otherwise from a number in the subdirectory ",
3492 "in the file name.[PAR]",
3494 "The [GRK]lambda[grk] of the simulation is parsed from ",
3495 "[TT]dhdl.xvg[tt] file's legend containing the string 'dH', the ",
3496 "foreign [GRK]lambda[grk] values from the legend containing the ",
3497 "capitalized letters 'D' and 'H'. The temperature is parsed from ",
3498 "the legend line containing 'T ='.[PAR]",
3500 "The input option [TT]-g[tt] expects multiple [TT].edr[tt] files. ",
3501 "These can contain either lists of energy differences (see the ",
3502 "[TT].mdp[tt] option [TT]separate_dhdl_file[tt]), or a series of ",
3503 "histograms (see the [TT].mdp[tt] options [TT]dh_hist_size[tt] and ",
3504 "[TT]dh_hist_spacing[tt]).", "The temperature and [GRK]lambda[grk] ",
3505 "values are automatically deduced from the [TT]ener.edr[tt] file.[PAR]",
3507 "In addition to the [TT].mdp[tt] option [TT]foreign_lambda[tt], ",
3508 "the energy difference can also be extrapolated from the ",
3509 "dH/d[GRK]lambda[grk] values. This is done with the[TT]-extp[tt]",
3510 "option, which assumes that the system's Hamiltonian depends linearly",
3511 "on [GRK]lambda[grk], which is not normally the case.[PAR]",
3513 "The free energy estimates are determined using BAR with bisection, ",
3514 "with the precision of the output set with [TT]-prec[tt]. ",
3515 "An error estimate taking into account time correlations ",
3516 "is made by splitting the data into blocks and determining ",
3517 "the free energy differences over those blocks and assuming ",
3518 "the blocks are independent. ",
3519 "The final error estimate is determined from the average variance ",
3520 "over 5 blocks. A range of block numbers for error estimation can ",
3521 "be provided with the options [TT]-nbmin[tt] and [TT]-nbmax[tt].[PAR]",
3523 "[THISMODULE] tries to aggregate samples with the same 'native' and ",
3524 "'foreign' [GRK]lambda[grk] values, but always assumes independent ",
3525 "samples. [BB]Note[bb] that when aggregating energy ",
3526 "differences/derivatives with different sampling intervals, this is ",
3527 "almost certainly not correct. Usually subsequent energies are ",
3528 "correlated and different time intervals mean different degrees ",
3529 "of correlation between samples.[PAR]",
3531 "The results are split in two parts: the last part contains the final ",
3532 "results in kJ/mol, together with the error estimate for each part ",
3533 "and the total. The first part contains detailed free energy ",
3534 "difference estimates and phase space overlap measures in units of ",
3535 "kT (together with their computed error estimate). The printed ",
3537 "[TT]*[tt] lam_A: the [GRK]lambda[grk] values for point A.[BR]",
3538 "[TT]*[tt] lam_B: the [GRK]lambda[grk] values for point B.[BR]",
3539 "[TT]*[tt] DG: the free energy estimate.[BR]",
3540 "[TT]*[tt] s_A: an estimate of the relative entropy of B in A.[BR]",
3541 "[TT]*[tt] s_B: an estimate of the relative entropy of A in B.[BR]",
3542 "[TT]*[tt] stdev: an estimate expected per-sample standard deviation.[PAR]",
3544 "The relative entropy of both states in each other's ensemble can be ",
3545 "interpreted as a measure of phase space overlap: ",
3546 "the relative entropy s_A of the work samples of lambda_B in the ",
3547 "ensemble of lambda_A (and vice versa for s_B), is a ",
3548 "measure of the 'distance' between Boltzmann distributions of ",
3549 "the two states, that goes to zero for identical distributions. See ",
3550 "Wu & Kofke, J. Chem. Phys. 123 084109 (2005) for more information.",
3552 "The estimate of the expected per-sample standard deviation, as given ",
3553 "in Bennett's original BAR paper: Bennett, J. Comp. Phys. 22, p 245 (1976).",
3554 "Eq. 10 therein gives an estimate of the quality of sampling (not directly",
3555 "of the actual statistical error, because it assumes independent samples).[PAR]",
3557 "To get a visual estimate of the phase space overlap, use the ",
3558 "[TT]-oh[tt] option to write series of histograms, together with the ",
3559 "[TT]-nbin[tt] option.[PAR]"
3561 static real begin = 0, end = -1, temp = -1;
3562 int nd = 2, nbmin = 5, nbmax = 5;
3564 gmx_bool use_dhdl = FALSE;
3565 gmx_bool calc_s, calc_v;
3567 { "-b", FALSE, etREAL, {&begin}, "Begin time for BAR" },
3568 { "-e", FALSE, etREAL, {&end}, "End time for BAR" },
3569 { "-temp", FALSE, etREAL, {&temp}, "Temperature (K)" },
3570 { "-prec", FALSE, etINT, {&nd}, "The number of digits after the decimal point" },
3571 { "-nbmin", FALSE, etINT, {&nbmin}, "Minimum number of blocks for error estimation" },
3572 { "-nbmax", FALSE, etINT, {&nbmax}, "Maximum number of blocks for error estimation" },
3573 { "-nbin", FALSE, etINT, {&nbin}, "Number of bins for histogram output"},
3574 { "-extp", FALSE, etBOOL, {&use_dhdl}, "Whether to linearly extrapolate dH/dl values to use as energies"}
3578 { efXVG, "-f", "dhdl", ffOPTRDMULT },
3579 { efEDR, "-g", "ener", ffOPTRDMULT },
3580 { efXVG, "-o", "bar", ffOPTWR },
3581 { efXVG, "-oi", "barint", ffOPTWR },
3582 { efXVG, "-oh", "histogram", ffOPTWR }
3584 #define NFILE asize(fnm)
3587 int nf = 0; /* file counter */
3589 int nfile_tot; /* total number of input files */
3594 sim_data_t sim_data; /* the simulation data */
3595 barres_t *results; /* the results */
3596 int nresults; /* number of results in results array */
3599 double prec, dg_tot, dg, sig, dg_tot_max, dg_tot_min;
3601 char dgformat[20], xvg2format[STRLEN], xvg3format[STRLEN];
3602 char buf[STRLEN], buf2[STRLEN];
3603 char ktformat[STRLEN], sktformat[STRLEN];
3604 char kteformat[STRLEN], skteformat[STRLEN];
3607 gmx_bool result_OK = TRUE, bEE = TRUE;
3609 gmx_bool disc_err = FALSE;
3610 double sum_disc_err = 0.; /* discretization error */
3611 gmx_bool histrange_err = FALSE;
3612 double sum_histrange_err = 0.; /* histogram range error */
3613 double stat_err = 0.; /* statistical error */
3615 if (!parse_common_args(&argc, argv,
3617 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &oenv))
3622 if (opt2bSet("-f", NFILE, fnm))
3624 nxvgfile = opt2fns(&fxvgnms, "-f", NFILE, fnm);
3626 if (opt2bSet("-g", NFILE, fnm))
3628 nedrfile = opt2fns(&fedrnms, "-g", NFILE, fnm);
3631 sim_data_init(&sim_data);
3633 /* make linked list */
3635 lambda_data_init(lb, 0, 0);
3641 nfile_tot = nxvgfile + nedrfile;
3645 gmx_fatal(FARGS, "No input files!");
3650 gmx_fatal(FARGS, "Can not have negative number of digits");
3652 prec = pow(10, -nd);
3654 snew(partsum, (nbmax+1)*(nbmax+1));
3657 /* read in all files. First xvg files */
3658 for (f = 0; f < nxvgfile; f++)
3660 read_bar_xvg(fxvgnms[f], &temp, &sim_data);
3663 /* then .edr files */
3664 for (f = 0; f < nedrfile; f++)
3666 read_barsim_edr(fedrnms[f], &temp, &sim_data);;
3670 /* fix the times to allow for equilibration */
3671 sim_data_impose_times(&sim_data, begin, end);
3673 if (opt2bSet("-oh", NFILE, fnm))
3675 sim_data_histogram(&sim_data, opt2fn("-oh", NFILE, fnm), nbin, oenv);
3678 /* assemble the output structures from the lambdas */
3679 results = barres_list_create(&sim_data, &nresults, use_dhdl);
3681 sum_disc_err = barres_list_max_disc_err(results, nresults);
3685 printf("\nNo results to calculate.\n");
3689 if (sum_disc_err > prec)
3691 prec = sum_disc_err;
3692 nd = ceil(-log10(prec));
3693 printf("WARNING: setting the precision to %g because that is the minimum\n reasonable number, given the expected discretization error.\n", prec);
3697 /*sprintf(lamformat,"%%6.3f");*/
3698 sprintf( dgformat, "%%%d.%df", 3+nd, nd);
3699 /* the format strings of the results in kT */
3700 sprintf( ktformat, "%%%d.%df", 5+nd, nd);
3701 sprintf( sktformat, "%%%ds", 6+nd);
3702 /* the format strings of the errors in kT */
3703 sprintf( kteformat, "%%%d.%df", 3+nd, nd);
3704 sprintf( skteformat, "%%%ds", 4+nd);
3705 sprintf(xvg2format, "%s %s\n", "%s", dgformat);
3706 sprintf(xvg3format, "%s %s %s\n", "%s", dgformat, dgformat);
3711 if (opt2bSet("-o", NFILE, fnm))
3713 sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3714 fpb = xvgropen_type(opt2fn("-o", NFILE, fnm), "Free energy differences",
3715 "\\lambda", buf, exvggtXYDY, oenv);
3719 if (opt2bSet("-oi", NFILE, fnm))
3721 sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3722 fpi = xvgropen(opt2fn("-oi", NFILE, fnm), "Free energy integral",
3723 "\\lambda", buf, oenv);
3733 /* first calculate results */
3736 for (f = 0; f < nresults; f++)
3738 /* Determine the free energy difference with a factor of 10
3739 * more accuracy than requested for printing.
3741 calc_bar(&(results[f]), 0.1*prec, nbmin, nbmax,
3744 if (results[f].dg_disc_err > prec/10.)
3748 if (results[f].dg_histrange_err > prec/10.)
3750 histrange_err = TRUE;
3754 /* print results in kT */
3758 printf("\nTemperature: %g K\n", temp);
3760 printf("\nDetailed results in kT (see help for explanation):\n\n");
3761 printf("%6s ", " lam_A");
3762 printf("%6s ", " lam_B");
3763 printf(sktformat, "DG ");
3766 printf(skteformat, "+/- ");
3770 printf(skteformat, "disc ");
3774 printf(skteformat, "range ");
3776 printf(sktformat, "s_A ");
3779 printf(skteformat, "+/- " );
3781 printf(sktformat, "s_B ");
3784 printf(skteformat, "+/- " );
3786 printf(sktformat, "stdev ");
3789 printf(skteformat, "+/- ");
3792 for (f = 0; f < nresults; f++)
3794 lambda_vec_print_short(results[f].a->native_lambda, buf);
3796 lambda_vec_print_short(results[f].b->native_lambda, buf);
3798 printf(ktformat, results[f].dg);
3802 printf(kteformat, results[f].dg_err);
3807 printf(kteformat, results[f].dg_disc_err);
3812 printf(kteformat, results[f].dg_histrange_err);
3815 printf(ktformat, results[f].sa);
3819 printf(kteformat, results[f].sa_err);
3822 printf(ktformat, results[f].sb);
3826 printf(kteformat, results[f].sb_err);
3829 printf(ktformat, results[f].dg_stddev);
3833 printf(kteformat, results[f].dg_stddev_err);
3837 /* Check for negative relative entropy with a 95% certainty. */
3838 if (results[f].sa < -2*results[f].sa_err ||
3839 results[f].sb < -2*results[f].sb_err)
3847 printf("\nWARNING: Some of these results violate the Second Law of "
3848 "Thermodynamics: \n"
3849 " This is can be the result of severe undersampling, or "
3851 " there is something wrong with the simulations.\n");
3855 /* final results in kJ/mol */
3856 printf("\n\nFinal results in kJ/mol:\n\n");
3858 for (f = 0; f < nresults; f++)
3863 lambda_vec_print_short(results[f].a->native_lambda, buf);
3864 fprintf(fpi, xvg2format, buf, dg_tot);
3870 lambda_vec_print_intermediate(results[f].a->native_lambda,
3871 results[f].b->native_lambda,
3874 fprintf(fpb, xvg3format, buf, results[f].dg, results[f].dg_err);
3878 lambda_vec_print_short(results[f].a->native_lambda, buf);
3879 lambda_vec_print_short(results[f].b->native_lambda, buf2);
3880 printf("%s - %s", buf, buf2);
3883 printf(dgformat, results[f].dg*kT);
3887 printf(dgformat, results[f].dg_err*kT);
3891 printf(" (max. range err. = ");
3892 printf(dgformat, results[f].dg_histrange_err*kT);
3894 sum_histrange_err += results[f].dg_histrange_err*kT;
3898 dg_tot += results[f].dg;
3902 lambda_vec_print_short(results[0].a->native_lambda, buf);
3903 lambda_vec_print_short(results[nresults-1].b->native_lambda, buf2);
3904 printf("%s - %s", buf, buf2);
3907 printf(dgformat, dg_tot*kT);
3910 stat_err = bar_err(nbmin, nbmax, partsum)*kT;
3912 printf(dgformat, max(max(stat_err, sum_disc_err), sum_histrange_err));
3917 printf("\nmaximum discretization error = ");
3918 printf(dgformat, sum_disc_err);
3919 if (bEE && stat_err < sum_disc_err)
3921 printf("WARNING: discretization error (%g) is larger than statistical error.\n Decrease histogram spacing for more accurate results\n", stat_err);
3926 printf("\nmaximum histogram range error = ");
3927 printf(dgformat, sum_histrange_err);
3928 if (bEE && stat_err < sum_histrange_err)
3930 printf("WARNING: histogram range error (%g) is larger than statistical error.\n Increase histogram range for more accurate results\n", stat_err);
3939 lambda_vec_print_short(results[nresults-1].b->native_lambda, buf);
3940 fprintf(fpi, xvg2format, buf, dg_tot);
3944 do_view(oenv, opt2fn_null("-o", NFILE, fnm), "-xydy");
3945 do_view(oenv, opt2fn_null("-oi", NFILE, fnm), "-xydy");