2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2010,2011,2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
43 #include "gromacs/legacyheaders/typedefs.h"
44 #include "gromacs/utility/smalloc.h"
45 #include "gromacs/utility/futil.h"
46 #include "gromacs/commandline/pargs.h"
47 #include "gromacs/legacyheaders/macros.h"
48 #include "gromacs/fileio/enxio.h"
49 #include "gromacs/math/units.h"
50 #include "gromacs/utility/fatalerror.h"
51 #include "gromacs/fileio/xvgr.h"
52 #include "gromacs/legacyheaders/viewit.h"
54 #include "gromacs/math/utilities.h"
55 #include "gromacs/utility/cstringutil.h"
56 #include "gromacs/legacyheaders/names.h"
57 #include "gromacs/legacyheaders/mdebin.h"
60 /* Structure for the names of lambda vector components */
61 typedef struct lambda_components_t
63 char **names; /* Array of strings with names for the lambda vector
65 int N; /* The number of components */
66 int Nalloc; /* The number of allocated components */
67 } lambda_components_t;
69 /* Structure for a lambda vector or a dhdl derivative direction */
70 typedef struct lambda_vec_t
72 double *val; /* The lambda vector component values. Only valid if
74 int dhdl; /* The coordinate index for the derivative described by this
76 const lambda_components_t *lc; /* the associated lambda_components
78 int index; /* The state number (init-lambda-state) of this lambda
79 vector, if known. If not, it is set to -1 */
82 /* the dhdl.xvg data from a simulation */
86 int ftp; /* file type */
87 int nset; /* number of lambdas, including dhdl */
88 int *np; /* number of data points (du or hists) per lambda */
89 int np_alloc; /* number of points (du or hists) allocated */
90 double temp; /* temperature */
91 lambda_vec_t *lambda; /* the lambdas (of first index for y). */
92 double *t; /* the times (of second index for y) */
93 double **y; /* the dU values. y[0] holds the derivative, while
94 further ones contain the energy differences between
95 the native lambda and the 'foreign' lambdas. */
96 lambda_vec_t native_lambda; /* the native lambda */
98 struct xvg_t *next, *prev; /*location in the global linked list of xvg_ts*/
102 typedef struct hist_t
104 unsigned int *bin[2]; /* the (forward + reverse) histogram values */
105 double dx[2]; /* the histogram spacing. The reverse
106 dx is the negative of the forward dx.*/
107 gmx_int64_t x0[2]; /* the (forward + reverse) histogram start
110 int nbin[2]; /* the (forward+reverse) number of bins */
111 gmx_int64_t sum; /* the total number of counts. Must be
112 the same for forward + reverse. */
113 int nhist; /* number of hist datas (forward or reverse) */
115 double start_time, delta_time; /* start time, end time of histogram */
119 /* an aggregate of samples for partial free energy calculation */
120 typedef struct samples_t
122 lambda_vec_t *native_lambda; /* pointer to native lambda vector */
123 lambda_vec_t *foreign_lambda; /* pointer to foreign lambda vector */
124 double temp; /* the temperature */
125 gmx_bool derivative; /* whether this sample is a derivative */
127 /* The samples come either as either delta U lists: */
128 int ndu; /* the number of delta U samples */
129 double *du; /* the delta u's */
130 double *t; /* the times associated with those samples, or: */
131 double start_time, delta_time; /*start time and delta time for linear time*/
133 /* or as histograms: */
134 hist_t *hist; /* a histogram */
136 /* allocation data: (not NULL for data 'owned' by this struct) */
137 double *du_alloc, *t_alloc; /* allocated delta u arrays */
138 size_t ndu_alloc, nt_alloc; /* pre-allocated sizes */
139 hist_t *hist_alloc; /* allocated hist */
141 gmx_int64_t ntot; /* total number of samples */
142 const char *filename; /* the file name this sample comes from */
145 /* a sample range (start to end for du-style data, or boolean
146 for both du-style data and histograms */
147 typedef struct sample_range_t
149 int start, end; /* start and end index for du style data */
150 gmx_bool use; /* whether to use this sample */
152 samples_t *s; /* the samples this range belongs to */
156 /* a collection of samples for a partial free energy calculation
157 (i.e. the collection of samples from one native lambda to one
159 typedef struct sample_coll_t
161 lambda_vec_t *native_lambda; /* these should be the same for all samples
163 lambda_vec_t *foreign_lambda; /* collection */
164 double temp; /* the temperature */
166 int nsamples; /* the number of samples */
167 samples_t **s; /* the samples themselves */
168 sample_range_t *r; /* the sample ranges */
169 int nsamples_alloc; /* number of allocated samples */
171 gmx_int64_t ntot; /* total number of samples in the ranges of
174 struct sample_coll_t *next, *prev; /* next and previous in the list */
177 /* all the samples associated with a lambda point */
178 typedef struct lambda_data_t
180 lambda_vec_t *lambda; /* the native lambda (at start time if dynamic) */
181 double temp; /* temperature */
183 sample_coll_t *sc; /* the samples */
185 sample_coll_t sc_head; /*the pre-allocated list head for the linked list.*/
187 struct lambda_data_t *next, *prev; /* the next and prev in the list */
190 /* Top-level data structure of simulation data */
191 typedef struct sim_data_t
193 lambda_data_t *lb; /* a lambda data linked list */
194 lambda_data_t lb_head; /* The head element of the linked list */
196 lambda_components_t lc; /* the allowed components of the lambda
200 /* Top-level data structure with calculated values. */
202 sample_coll_t *a, *b; /* the simulation data */
204 double dg; /* the free energy difference */
205 double dg_err; /* the free energy difference */
207 double dg_disc_err; /* discretization error */
208 double dg_histrange_err; /* histogram range error */
210 double sa; /* relative entropy of b in state a */
211 double sa_err; /* error in sa */
212 double sb; /* relative entropy of a in state b */
213 double sb_err; /* error in sb */
215 double dg_stddev; /* expected dg stddev per sample */
216 double dg_stddev_err; /* error in dg_stddev */
220 /* Initialize a lambda_components structure */
221 static void lambda_components_init(lambda_components_t *lc)
225 snew(lc->names, lc->Nalloc);
228 /* Add a component to a lambda_components structure */
229 static void lambda_components_add(lambda_components_t *lc,
230 const char *name, size_t name_length)
232 while (lc->N + 1 > lc->Nalloc)
234 lc->Nalloc = (lc->Nalloc == 0) ? 2 : 2*lc->Nalloc;
235 srenew(lc->names, lc->Nalloc);
237 snew(lc->names[lc->N], name_length+1);
238 strncpy(lc->names[lc->N], name, name_length);
242 /* check whether a component with index 'index' matches the given name, or
243 is also NULL. Returns TRUE if this is the case.
244 the string name does not need to end */
245 static gmx_bool lambda_components_check(const lambda_components_t *lc,
255 if (name == NULL && lc->names[index] == NULL)
259 if ((name == NULL) != (lc->names[index] == NULL))
263 len = strlen(lc->names[index]);
264 if (len != name_length)
268 if (strncmp(lc->names[index], name, name_length) == 0)
275 /* Find the index of a given lambda component name, or -1 if not found */
276 static int lambda_components_find(const lambda_components_t *lc,
282 for (i = 0; i < lc->N; i++)
284 if (strncmp(lc->names[i], name, name_length) == 0)
294 /* initialize a lambda vector */
295 static void lambda_vec_init(lambda_vec_t *lv, const lambda_components_t *lc)
297 snew(lv->val, lc->N);
303 static void lambda_vec_destroy(lambda_vec_t *lv)
308 static void lambda_vec_copy(lambda_vec_t *lv, const lambda_vec_t *orig)
312 lambda_vec_init(lv, orig->lc);
313 lv->dhdl = orig->dhdl;
314 lv->index = orig->index;
315 for (i = 0; i < lv->lc->N; i++)
317 lv->val[i] = orig->val[i];
321 /* write a lambda vec to a preallocated string */
322 static void lambda_vec_print(const lambda_vec_t *lv, char *str, gmx_bool named)
327 str[0] = 0; /* reset the string */
332 str += sprintf(str, "delta H to ");
336 str += sprintf(str, "(");
338 for (i = 0; i < lv->lc->N; i++)
340 str += sprintf(str, "%g", lv->val[i]);
343 str += sprintf(str, ", ");
348 str += sprintf(str, ")");
353 /* this lambda vector describes a derivative */
354 str += sprintf(str, "dH/dl");
355 if (strlen(lv->lc->names[lv->dhdl]) > 0)
357 sprintf(str, " (%s)", lv->lc->names[lv->dhdl]);
362 /* write a shortened version of the lambda vec to a preallocated string */
363 static void lambda_vec_print_short(const lambda_vec_t *lv, char *str)
370 sprintf(str, "%6d", lv->index);
376 sprintf(str, "%6.3f", lv->val[0]);
380 sprintf(str, "dH/dl[%d]", lv->dhdl);
385 /* write an intermediate version of two lambda vecs to a preallocated string */
386 static void lambda_vec_print_intermediate(const lambda_vec_t *a,
387 const lambda_vec_t *b, char *str)
393 if ( (a->index >= 0) && (b->index >= 0) )
395 sprintf(str, "%6.3f", ((double)a->index+(double)b->index)/2.);
399 if ( (a->dhdl < 0) && (b->dhdl < 0) )
401 sprintf(str, "%6.3f", (a->val[0]+b->val[0])/2.);
408 /* calculate the difference in lambda vectors: c = a-b.
409 c must be initialized already, and a and b must describe non-derivative
411 static void lambda_vec_diff(const lambda_vec_t *a, const lambda_vec_t *b,
416 if ( (a->dhdl > 0) || (b->dhdl > 0) )
419 "Trying to calculate the difference between derivatives instead of lambda points");
421 if ((a->lc != b->lc) || (a->lc != c->lc) )
424 "Trying to calculate the difference lambdas with differing basis set");
426 for (i = 0; i < a->lc->N; i++)
428 c->val[i] = a->val[i] - b->val[i];
432 /* calculate and return the absolute difference in lambda vectors: c = |a-b|.
433 a and b must describe non-derivative lambda points */
434 static double lambda_vec_abs_diff(const lambda_vec_t *a, const lambda_vec_t *b)
439 if ( (a->dhdl > 0) || (b->dhdl > 0) )
442 "Trying to calculate the difference between derivatives instead of lambda points");
447 "Trying to calculate the difference lambdas with differing basis set");
449 for (i = 0; i < a->lc->N; i++)
451 double df = a->val[i] - b->val[i];
458 /* check whether two lambda vectors are the same */
459 static gmx_bool lambda_vec_same(const lambda_vec_t *a, const lambda_vec_t *b)
469 for (i = 0; i < a->lc->N; i++)
471 if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
480 /* they're derivatives, so we check whether the indices match */
481 return (a->dhdl == b->dhdl);
485 /* Compare the sort order of two foreign lambda vectors
487 returns 1 if a is 'bigger' than b,
488 returns 0 if they're the same,
489 returns -1 if a is 'smaller' than b.*/
490 static gmx_bool lambda_vec_cmp_foreign(const lambda_vec_t *a,
491 const lambda_vec_t *b)
494 double norm_a = 0, norm_b = 0;
495 gmx_bool different = FALSE;
499 gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
501 /* if either one has an index we sort based on that */
502 if ((a->index >= 0) || (b->index >= 0))
504 if (a->index == b->index)
508 return (a->index > b->index) ? 1 : -1;
510 if (a->dhdl >= 0 || b->dhdl >= 0)
512 /* lambda vectors that are derivatives always sort higher than those
513 without derivatives */
514 if ((a->dhdl >= 0) != (b->dhdl >= 0) )
516 return (a->dhdl >= 0) ? 1 : -1;
518 return a->dhdl > b->dhdl;
521 /* neither has an index, so we can only sort on the lambda components,
522 which is only valid if there is one component */
523 for (i = 0; i < a->lc->N; i++)
525 if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
529 norm_a += a->val[i]*a->val[i];
530 norm_b += b->val[i]*b->val[i];
536 return norm_a > norm_b;
539 /* Compare the sort order of two native lambda vectors
541 returns 1 if a is 'bigger' than b,
542 returns 0 if they're the same,
543 returns -1 if a is 'smaller' than b.*/
544 static gmx_bool lambda_vec_cmp_native(const lambda_vec_t *a,
545 const lambda_vec_t *b)
551 gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
553 /* if either one has an index we sort based on that */
554 if ((a->index >= 0) || (b->index >= 0))
556 if (a->index == b->index)
560 return (a->index > b->index) ? 1 : -1;
562 /* neither has an index, so we can only sort on the lambda components,
563 which is only valid if there is one component */
567 "Can't compare lambdas with no index and > 1 component");
569 if (a->dhdl >= 0 || b->dhdl >= 0)
572 "Can't compare native lambdas that are derivatives");
574 if (gmx_within_tol(a->val[0], b->val[0], 10*GMX_REAL_EPS))
578 return a->val[0] > b->val[0] ? 1 : -1;
584 static void hist_init(hist_t *h, int nhist, int *nbin)
589 gmx_fatal(FARGS, "histogram with more than two sets of data!");
591 for (i = 0; i < nhist; i++)
593 snew(h->bin[i], nbin[i]);
595 h->nbin[i] = nbin[i];
596 h->start_time = h->delta_time = 0;
603 static void hist_destroy(hist_t *h)
609 static void xvg_init(xvg_t *ba)
618 static void samples_init(samples_t *s, lambda_vec_t *native_lambda,
619 lambda_vec_t *foreign_lambda, double temp,
620 gmx_bool derivative, const char *filename)
622 s->native_lambda = native_lambda;
623 s->foreign_lambda = foreign_lambda;
625 s->derivative = derivative;
630 s->start_time = s->delta_time = 0;
634 s->hist_alloc = NULL;
639 s->filename = filename;
642 static void sample_range_init(sample_range_t *r, samples_t *s)
650 static void sample_coll_init(sample_coll_t *sc, lambda_vec_t *native_lambda,
651 lambda_vec_t *foreign_lambda, double temp)
653 sc->native_lambda = native_lambda;
654 sc->foreign_lambda = foreign_lambda;
660 sc->nsamples_alloc = 0;
663 sc->next = sc->prev = NULL;
666 static void sample_coll_destroy(sample_coll_t *sc)
668 /* don't free the samples themselves */
674 static void lambda_data_init(lambda_data_t *l, lambda_vec_t *native_lambda,
677 l->lambda = native_lambda;
683 l->sc = &(l->sc_head);
685 sample_coll_init(l->sc, native_lambda, NULL, 0.);
690 static void barres_init(barres_t *br)
699 br->dg_stddev_err = 0;
706 /* calculate the total number of samples in a sample collection */
707 static void sample_coll_calc_ntot(sample_coll_t *sc)
712 for (i = 0; i < sc->nsamples; i++)
718 sc->ntot += sc->s[i]->ntot;
722 sc->ntot += sc->r[i].end - sc->r[i].start;
729 /* find the barsamples_t associated with a lambda that corresponds to
730 a specific foreign lambda */
731 static sample_coll_t *lambda_data_find_sample_coll(lambda_data_t *l,
732 lambda_vec_t *foreign_lambda)
734 sample_coll_t *sc = l->sc->next;
738 if (lambda_vec_same(sc->foreign_lambda, foreign_lambda))
748 /* insert li into an ordered list of lambda_colls */
749 static void lambda_data_insert_sample_coll(lambda_data_t *l, sample_coll_t *sc)
751 sample_coll_t *scn = l->sc->next;
752 while ( (scn != l->sc) )
754 if (lambda_vec_cmp_foreign(scn->foreign_lambda, sc->foreign_lambda) > 0)
760 /* now insert it before the found scn */
762 sc->prev = scn->prev;
763 scn->prev->next = sc;
767 /* insert li into an ordered list of lambdas */
768 static void lambda_data_insert_lambda(lambda_data_t *head, lambda_data_t *li)
770 lambda_data_t *lc = head->next;
773 if (lambda_vec_cmp_native(lc->lambda, li->lambda) > 0)
779 /* now insert ourselves before the found lc */
786 /* insert a sample and a sample_range into a sample_coll. The
787 samples are stored as a pointer, the range is copied. */
788 static void sample_coll_insert_sample(sample_coll_t *sc, samples_t *s,
791 /* first check if it belongs here */
792 if (sc->temp != s->temp)
794 gmx_fatal(FARGS, "Temperatures in files %s and %s are not the same!",
795 s->filename, sc->next->s[0]->filename);
797 if (!lambda_vec_same(sc->native_lambda, s->native_lambda))
799 gmx_fatal(FARGS, "Native lambda in files %s and %s are not the same (and they should be)!",
800 s->filename, sc->next->s[0]->filename);
802 if (!lambda_vec_same(sc->foreign_lambda, s->foreign_lambda))
804 gmx_fatal(FARGS, "Foreign lambda in files %s and %s are not the same (and they should be)!",
805 s->filename, sc->next->s[0]->filename);
808 /* check if there's room */
809 if ( (sc->nsamples + 1) > sc->nsamples_alloc)
811 sc->nsamples_alloc = max(2*sc->nsamples_alloc, 2);
812 srenew(sc->s, sc->nsamples_alloc);
813 srenew(sc->r, sc->nsamples_alloc);
815 sc->s[sc->nsamples] = s;
816 sc->r[sc->nsamples] = *r;
819 sample_coll_calc_ntot(sc);
822 /* insert a sample into a lambda_list, creating the right sample_coll if
824 static void lambda_data_list_insert_sample(lambda_data_t *head, samples_t *s)
826 gmx_bool found = FALSE;
830 lambda_data_t *l = head->next;
832 /* first search for the right lambda_data_t */
835 if (lambda_vec_same(l->lambda, s->native_lambda) )
845 snew(l, 1); /* allocate a new one */
846 lambda_data_init(l, s->native_lambda, s->temp); /* initialize it */
847 lambda_data_insert_lambda(head, l); /* add it to the list */
850 /* now look for a sample collection */
851 sc = lambda_data_find_sample_coll(l, s->foreign_lambda);
854 snew(sc, 1); /* allocate a new one */
855 sample_coll_init(sc, s->native_lambda, s->foreign_lambda, s->temp);
856 lambda_data_insert_sample_coll(l, sc);
859 /* now insert the samples into the sample coll */
860 sample_range_init(&r, s);
861 sample_coll_insert_sample(sc, s, &r);
865 /* make a histogram out of a sample collection */
866 static void sample_coll_make_hist(sample_coll_t *sc, int **bin,
867 int *nbin_alloc, int *nbin,
868 double *dx, double *xmin, int nbin_default)
871 gmx_bool dx_set = FALSE;
872 gmx_bool xmin_set = FALSE;
874 gmx_bool xmax_set = FALSE;
875 gmx_bool xmax_set_hard = FALSE; /* whether the xmax is bounded by the
876 limits of a histogram */
879 /* first determine dx and xmin; try the histograms */
880 for (i = 0; i < sc->nsamples; i++)
884 hist_t *hist = sc->s[i]->hist;
885 for (k = 0; k < hist->nhist; k++)
887 double hdx = hist->dx[k];
888 double xmax_now = (hist->x0[k]+hist->nbin[k])*hdx;
890 /* we use the biggest dx*/
891 if ( (!dx_set) || hist->dx[0] > *dx)
896 if ( (!xmin_set) || (hist->x0[k]*hdx) < *xmin)
899 *xmin = (hist->x0[k]*hdx);
902 if ( (!xmax_set) || (xmax_now > xmax && !xmax_set_hard) )
906 if (hist->bin[k][hist->nbin[k]-1] != 0)
908 xmax_set_hard = TRUE;
911 if (hist->bin[k][hist->nbin[k]-1] != 0 && (xmax_now < xmax) )
913 xmax_set_hard = TRUE;
919 /* and the delta us */
920 for (i = 0; i < sc->nsamples; i++)
922 if (sc->s[i]->ndu > 0)
924 /* determine min and max */
925 int starti = sc->r[i].start;
926 int endi = sc->r[i].end;
927 double du_xmin = sc->s[i]->du[starti];
928 double du_xmax = sc->s[i]->du[starti];
929 for (j = starti+1; j < endi; j++)
931 if (sc->s[i]->du[j] < du_xmin)
933 du_xmin = sc->s[i]->du[j];
935 if (sc->s[i]->du[j] > du_xmax)
937 du_xmax = sc->s[i]->du[j];
941 /* and now change the limits */
942 if ( (!xmin_set) || (du_xmin < *xmin) )
947 if ( (!xmax_set) || ((du_xmax > xmax) && !xmax_set_hard) )
955 if (!xmax_set || !xmin_set)
964 *nbin = nbin_default;
965 *dx = (xmax-(*xmin))/((*nbin)-2); /* -2 because we want the last bin to
966 be 0, and we count from 0 */
970 *nbin = (xmax-(*xmin))/(*dx);
973 if (*nbin > *nbin_alloc)
976 srenew(*bin, *nbin_alloc);
979 /* reset the histogram */
980 for (i = 0; i < (*nbin); i++)
985 /* now add the actual data */
986 for (i = 0; i < sc->nsamples; i++)
990 hist_t *hist = sc->s[i]->hist;
991 for (k = 0; k < hist->nhist; k++)
993 double hdx = hist->dx[k];
994 double xmin_hist = hist->x0[k]*hdx;
995 for (j = 0; j < hist->nbin[k]; j++)
997 /* calculate the bin corresponding to the middle of the
999 double x = hdx*(j+0.5) + xmin_hist;
1000 int binnr = (int)((x-(*xmin))/(*dx));
1002 if (binnr >= *nbin || binnr < 0)
1007 (*bin)[binnr] += hist->bin[k][j];
1013 int starti = sc->r[i].start;
1014 int endi = sc->r[i].end;
1015 for (j = starti; j < endi; j++)
1017 int binnr = (int)((sc->s[i]->du[j]-(*xmin))/(*dx));
1018 if (binnr >= *nbin || binnr < 0)
1029 /* write a collection of histograms to a file */
1030 void sim_data_histogram(sim_data_t *sd, const char *filename,
1031 int nbin_default, const output_env_t oenv)
1033 char label_x[STRLEN];
1034 const char *dhdl = "dH/d\\lambda", *deltag = "\\DeltaH", *lambda = "\\lambda";
1035 const char *title = "N(\\DeltaH)";
1036 const char *label_y = "Samples";
1040 char **setnames = NULL;
1041 gmx_bool first_set = FALSE;
1042 /* histogram data: */
1049 lambda_data_t *bl_head = sd->lb;
1051 printf("\nWriting histogram to %s\n", filename);
1052 sprintf(label_x, "\\DeltaH (%s)", unit_energy);
1054 fp = xvgropen_type(filename, title, label_x, label_y, exvggtXNY, oenv);
1056 /* first get all the set names */
1058 /* iterate over all lambdas */
1059 while (bl != bl_head)
1061 sample_coll_t *sc = bl->sc->next;
1063 /* iterate over all samples */
1064 while (sc != bl->sc)
1066 char buf[STRLEN], buf2[STRLEN];
1069 srenew(setnames, nsets);
1070 snew(setnames[nsets-1], STRLEN);
1071 if (sc->foreign_lambda->dhdl < 0)
1073 lambda_vec_print(sc->native_lambda, buf, FALSE);
1074 lambda_vec_print(sc->foreign_lambda, buf2, FALSE);
1075 sprintf(setnames[nsets-1], "N(%s(%s=%s) | %s=%s)",
1076 deltag, lambda, buf2, lambda, buf);
1080 lambda_vec_print(sc->native_lambda, buf, FALSE);
1081 sprintf(setnames[nsets-1], "N(%s | %s=%s)",
1089 xvgr_legend(fp, nsets, (const char**)setnames, oenv);
1092 /* now make the histograms */
1094 /* iterate over all lambdas */
1095 while (bl != bl_head)
1097 sample_coll_t *sc = bl->sc->next;
1099 /* iterate over all samples */
1100 while (sc != bl->sc)
1104 xvgr_new_dataset(fp, 0, 0, NULL, oenv);
1107 sample_coll_make_hist(sc, &hist, &nbin_alloc, &nbin, &dx, &min,
1110 for (i = 0; i < nbin; i++)
1112 double xmin = i*dx + min;
1113 double xmax = (i+1)*dx + min;
1115 fprintf(fp, "%g %d\n%g %d\n", xmin, hist[i], xmax, hist[i]);
1134 snprint_lambda_vec(char *str, int sz, const char *label, lambda_vec_t *lambda)
1138 n += snprintf(str+n, sz-n, "lambda vector [%s]: ", label);
1139 if (lambda->index >= 0)
1141 n += snprintf(str+n, sz-n, " init-lambda-state=%d", lambda->index);
1143 if (lambda->dhdl >= 0)
1145 n += snprintf(str+n, sz-n, " dhdl index=%d", lambda->dhdl);
1150 for (i = 0; i < lambda->lc->N; i++)
1152 n += snprintf(str+n, sz-n, " (%s) l=%g", lambda->lc->names[i], lambda->val[i]);
1158 /* create a collection (array) of barres_t object given a ordered linked list
1159 of barlamda_t sample collections */
1160 static barres_t *barres_list_create(sim_data_t *sd, int *nres,
1167 gmx_bool dhdl = FALSE;
1168 gmx_bool first = TRUE;
1169 lambda_data_t *bl_head = sd->lb;
1171 /* first count the lambdas */
1173 while (bl != bl_head)
1178 snew(res, nlambda-1);
1180 /* next put the right samples in the res */
1182 bl = bl_head->next->next; /* we start with the second one. */
1183 while (bl != bl_head)
1185 sample_coll_t *sc, *scprev;
1186 barres_t *br = &(res[*nres]);
1187 /* there is always a previous one. we search for that as a foreign
1189 scprev = lambda_data_find_sample_coll(bl->prev, bl->lambda);
1190 sc = lambda_data_find_sample_coll(bl, bl->prev->lambda);
1198 scprev = lambda_data_find_sample_coll(bl->prev, bl->prev->lambda);
1199 sc = lambda_data_find_sample_coll(bl, bl->lambda);
1203 printf("\nWARNING: Using the derivative data (dH/dlambda) to extrapolate delta H values.\nThis will only work if the Hamiltonian is linear in lambda.\n");
1208 gmx_fatal(FARGS, "Some dhdl files contain only one value (dH/dl), while others \ncontain multiple values (dH/dl and/or Delta H), will not proceed \nbecause of possible inconsistencies.\n");
1211 else if (!scprev && !sc)
1213 char descX[STRLEN], descY[STRLEN];
1214 snprint_lambda_vec(descX, STRLEN, "X", bl->prev->lambda);
1215 snprint_lambda_vec(descY, STRLEN, "Y", bl->lambda);
1217 gmx_fatal(FARGS, "There is no path between the states X & Y below that is covered by foreign lambdas:\ncannot proceed with BAR.\nUse thermodynamic integration of dH/dl by calculating the averages of dH/dl\nwith g_analyze and integrating them.\nAlternatively, use the -extp option if (and only if) the Hamiltonian\ndepends linearly on lambda, which is NOT normally the case.\n\n%s\n%s\n", descX, descY);
1220 /* normal delta H */
1223 char descX[STRLEN], descY[STRLEN];
1224 snprint_lambda_vec(descX, STRLEN, "X", bl->lambda);
1225 snprint_lambda_vec(descY, STRLEN, "Y", bl->prev->lambda);
1226 gmx_fatal(FARGS, "Could not find a set for foreign lambda (state X below)\nin the files for main lambda (state Y below)\n\n%s\n%s\n", descX, descY);
1230 char descX[STRLEN], descY[STRLEN];
1231 snprint_lambda_vec(descX, STRLEN, "X", bl->prev->lambda);
1232 snprint_lambda_vec(descY, STRLEN, "Y", bl->lambda);
1233 gmx_fatal(FARGS, "Could not find a set for foreign lambda (state X below)\nin the files for main lambda (state Y below)\n\n%s\n%s\n", descX, descY);
1245 /* estimate the maximum discretization error */
1246 static double barres_list_max_disc_err(barres_t *res, int nres)
1249 double disc_err = 0.;
1250 double delta_lambda;
1252 for (i = 0; i < nres; i++)
1254 barres_t *br = &(res[i]);
1256 delta_lambda = lambda_vec_abs_diff(br->b->native_lambda,
1257 br->a->native_lambda);
1259 for (j = 0; j < br->a->nsamples; j++)
1261 if (br->a->s[j]->hist)
1264 if (br->a->s[j]->derivative)
1266 Wfac = delta_lambda;
1269 disc_err = max(disc_err, Wfac*br->a->s[j]->hist->dx[0]);
1272 for (j = 0; j < br->b->nsamples; j++)
1274 if (br->b->s[j]->hist)
1277 if (br->b->s[j]->derivative)
1279 Wfac = delta_lambda;
1281 disc_err = max(disc_err, Wfac*br->b->s[j]->hist->dx[0]);
1289 /* impose start and end times on a sample collection, updating sample_ranges */
1290 static void sample_coll_impose_times(sample_coll_t *sc, double begin_t,
1294 for (i = 0; i < sc->nsamples; i++)
1296 samples_t *s = sc->s[i];
1297 sample_range_t *r = &(sc->r[i]);
1300 double end_time = s->hist->delta_time*s->hist->sum +
1301 s->hist->start_time;
1302 if (s->hist->start_time < begin_t || end_time > end_t)
1312 if (s->start_time < begin_t)
1314 r->start = (int)((begin_t - s->start_time)/s->delta_time);
1316 end_time = s->delta_time*s->ndu + s->start_time;
1317 if (end_time > end_t)
1319 r->end = (int)((end_t - s->start_time)/s->delta_time);
1325 for (j = 0; j < s->ndu; j++)
1327 if (s->t[j] < begin_t)
1332 if (s->t[j] >= end_t)
1339 if (r->start > r->end)
1345 sample_coll_calc_ntot(sc);
1348 static void sim_data_impose_times(sim_data_t *sd, double begin, double end)
1350 double first_t, last_t;
1351 double begin_t, end_t;
1353 lambda_data_t *head = sd->lb;
1356 if (begin <= 0 && end < 0)
1361 /* first determine the global start and end times */
1367 sample_coll_t *sc = lc->sc->next;
1368 while (sc != lc->sc)
1370 for (j = 0; j < sc->nsamples; j++)
1372 double start_t, end_t;
1374 start_t = sc->s[j]->start_time;
1375 end_t = sc->s[j]->start_time;
1378 end_t += sc->s[j]->delta_time*sc->s[j]->hist->sum;
1384 end_t = sc->s[j]->t[sc->s[j]->ndu-1];
1388 end_t += sc->s[j]->delta_time*sc->s[j]->ndu;
1392 if (start_t < first_t || first_t < 0)
1406 /* calculate the actual times */
1424 printf("\n Samples in time interval: %.3f - %.3f\n", first_t, last_t);
1426 if (begin_t > end_t)
1430 printf("Removing samples outside of: %.3f - %.3f\n", begin_t, end_t);
1432 /* then impose them */
1436 sample_coll_t *sc = lc->sc->next;
1437 while (sc != lc->sc)
1439 sample_coll_impose_times(sc, begin_t, end_t);
1447 /* create subsample i out of ni from an existing sample_coll */
1448 static gmx_bool sample_coll_create_subsample(sample_coll_t *sc,
1449 sample_coll_t *sc_orig,
1453 int hist_start, hist_end;
1455 gmx_int64_t ntot_start;
1456 gmx_int64_t ntot_end;
1457 gmx_int64_t ntot_so_far;
1459 *sc = *sc_orig; /* just copy all fields */
1461 /* allocate proprietary memory */
1462 snew(sc->s, sc_orig->nsamples);
1463 snew(sc->r, sc_orig->nsamples);
1465 /* copy the samples */
1466 for (j = 0; j < sc_orig->nsamples; j++)
1468 sc->s[j] = sc_orig->s[j];
1469 sc->r[j] = sc_orig->r[j]; /* copy the ranges too */
1472 /* now fix start and end fields */
1473 /* the casts avoid possible overflows */
1474 ntot_start = (gmx_int64_t)(sc_orig->ntot*(double)i/(double)ni);
1475 ntot_end = (gmx_int64_t)(sc_orig->ntot*(double)(i+1)/(double)ni);
1477 for (j = 0; j < sc->nsamples; j++)
1479 gmx_int64_t ntot_add;
1480 gmx_int64_t new_start, new_end;
1486 ntot_add = sc->s[j]->hist->sum;
1490 ntot_add = sc->r[j].end - sc->r[j].start;
1498 if (!sc->s[j]->hist)
1500 if (ntot_so_far < ntot_start)
1502 /* adjust starting point */
1503 new_start = sc->r[j].start + (ntot_start - ntot_so_far);
1507 new_start = sc->r[j].start;
1509 /* adjust end point */
1510 new_end = sc->r[j].start + (ntot_end - ntot_so_far);
1511 if (new_end > sc->r[j].end)
1513 new_end = sc->r[j].end;
1516 /* check if we're in range at all */
1517 if ( (new_end < new_start) || (new_start > sc->r[j].end) )
1522 /* and write the new range */
1523 sc->r[j].start = (int)new_start;
1524 sc->r[j].end = (int)new_end;
1531 double ntot_start_norm, ntot_end_norm;
1532 /* calculate the amount of overlap of the
1533 desired range (ntot_start -- ntot_end) onto
1534 the histogram range (ntot_so_far -- ntot_so_far+ntot_add)*/
1536 /* first calculate normalized bounds
1537 (where 0 is the start of the hist range, and 1 the end) */
1538 ntot_start_norm = (ntot_start-ntot_so_far)/(double)ntot_add;
1539 ntot_end_norm = (ntot_end-ntot_so_far)/(double)ntot_add;
1541 /* now fix the boundaries */
1542 ntot_start_norm = min(1, max(0., ntot_start_norm));
1543 ntot_end_norm = max(0, min(1., ntot_end_norm));
1545 /* and calculate the overlap */
1546 overlap = ntot_end_norm - ntot_start_norm;
1548 if (overlap > 0.95) /* we allow for 5% slack */
1550 sc->r[j].use = TRUE;
1552 else if (overlap < 0.05)
1554 sc->r[j].use = FALSE;
1562 ntot_so_far += ntot_add;
1564 sample_coll_calc_ntot(sc);
1569 /* calculate minimum and maximum work values in sample collection */
1570 static void sample_coll_min_max(sample_coll_t *sc, double Wfac,
1571 double *Wmin, double *Wmax)
1578 for (i = 0; i < sc->nsamples; i++)
1580 samples_t *s = sc->s[i];
1581 sample_range_t *r = &(sc->r[i]);
1586 for (j = r->start; j < r->end; j++)
1588 *Wmin = min(*Wmin, s->du[j]*Wfac);
1589 *Wmax = max(*Wmax, s->du[j]*Wfac);
1594 int hd = 0; /* determine the histogram direction: */
1596 if ( (s->hist->nhist > 1) && (Wfac < 0) )
1600 dx = s->hist->dx[hd];
1602 for (j = s->hist->nbin[hd]-1; j >= 0; j--)
1604 *Wmin = min(*Wmin, Wfac*(s->hist->x0[hd])*dx);
1605 *Wmax = max(*Wmax, Wfac*(s->hist->x0[hd])*dx);
1606 /* look for the highest value bin with values */
1607 if (s->hist->bin[hd][j] > 0)
1609 *Wmin = min(*Wmin, Wfac*(j+s->hist->x0[hd]+1)*dx);
1610 *Wmax = max(*Wmax, Wfac*(j+s->hist->x0[hd]+1)*dx);
1619 /* Initialize a sim_data structure */
1620 static void sim_data_init(sim_data_t *sd)
1622 /* make linked list */
1623 sd->lb = &(sd->lb_head);
1624 sd->lb->next = sd->lb;
1625 sd->lb->prev = sd->lb;
1627 lambda_components_init(&(sd->lc));
1631 static double calc_bar_sum(int n, const double *W, double Wfac, double sbMmDG)
1638 for (i = 0; i < n; i++)
1640 sum += 1./(1. + exp(Wfac*W[i] + sbMmDG));
1646 /* calculate the BAR average given a histogram
1648 if type== 0, calculate the best estimate for the average,
1649 if type==-1, calculate the minimum possible value given the histogram
1650 if type== 1, calculate the maximum possible value given the histogram */
1651 static double calc_bar_sum_hist(const hist_t *hist, double Wfac, double sbMmDG,
1657 /* normalization factor multiplied with bin width and
1658 number of samples (we normalize through M): */
1660 int hd = 0; /* determine the histogram direction: */
1663 if ( (hist->nhist > 1) && (Wfac < 0) )
1668 max = hist->nbin[hd]-1;
1671 max = hist->nbin[hd]; /* we also add whatever was out of range */
1674 for (i = 0; i < max; i++)
1676 double x = Wfac*((i+hist->x0[hd])+0.5)*dx; /* bin middle */
1677 double pxdx = hist->bin[0][i]*normdx; /* p(x)dx */
1679 sum += pxdx/(1. + exp(x + sbMmDG));
1685 static double calc_bar_lowlevel(sample_coll_t *ca, sample_coll_t *cb,
1686 double temp, double tol, int type)
1691 double Wfac1, Wfac2, Wmin, Wmax;
1692 double DG0, DG1, DG2, dDG1;
1694 double n1, n2; /* numbers of samples as doubles */
1699 /* count the numbers of samples */
1705 /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1706 if (ca->foreign_lambda->dhdl < 0)
1708 /* this is the case when the delta U were calculated directly
1709 (i.e. we're not scaling dhdl) */
1715 /* we're using dhdl, so delta_lambda needs to be a
1716 multiplication factor. */
1717 /*double delta_lambda=cb->native_lambda-ca->native_lambda;*/
1718 double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1720 if (cb->native_lambda->lc->N > 1)
1723 "Can't (yet) do multi-component dhdl interpolation");
1726 Wfac1 = beta*delta_lambda;
1727 Wfac2 = -beta*delta_lambda;
1732 /* We print the output both in kT and kJ/mol.
1733 * Here we determine DG in kT, so when beta < 1
1734 * the precision has to be increased.
1739 /* Calculate minimum and maximum work to give an initial estimate of
1740 * delta G as their average.
1743 double Wmin1, Wmin2, Wmax1, Wmax2;
1744 sample_coll_min_max(ca, Wfac1, &Wmin1, &Wmax1);
1745 sample_coll_min_max(cb, Wfac2, &Wmin2, &Wmax2);
1747 Wmin = min(Wmin1, Wmin2);
1748 Wmax = max(Wmax1, Wmax2);
1756 fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1758 /* We approximate by bisection: given our initial estimates
1759 we keep checking whether the halfway point is greater or
1760 smaller than what we get out of the BAR averages.
1762 For the comparison we can use twice the tolerance. */
1763 while (DG2 - DG0 > 2*tol)
1765 DG1 = 0.5*(DG0 + DG2);
1767 /* calculate the BAR averages */
1770 for (i = 0; i < ca->nsamples; i++)
1772 samples_t *s = ca->s[i];
1773 sample_range_t *r = &(ca->r[i]);
1778 dDG1 += calc_bar_sum_hist(s->hist, Wfac1, (M-DG1), type);
1782 dDG1 += calc_bar_sum(r->end - r->start, s->du + r->start,
1787 for (i = 0; i < cb->nsamples; i++)
1789 samples_t *s = cb->s[i];
1790 sample_range_t *r = &(cb->r[i]);
1795 dDG1 -= calc_bar_sum_hist(s->hist, Wfac2, -(M-DG1), type);
1799 dDG1 -= calc_bar_sum(r->end - r->start, s->du + r->start,
1815 fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1819 return 0.5*(DG0 + DG2);
1822 static void calc_rel_entropy(sample_coll_t *ca, sample_coll_t *cb,
1823 double temp, double dg, double *sa, double *sb)
1829 double Wfac1, Wfac2;
1835 /* count the numbers of samples */
1839 /* to ensure the work values are the same as during the delta_G */
1840 /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1841 if (ca->foreign_lambda->dhdl < 0)
1843 /* this is the case when the delta U were calculated directly
1844 (i.e. we're not scaling dhdl) */
1850 /* we're using dhdl, so delta_lambda needs to be a
1851 multiplication factor. */
1852 double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1854 Wfac1 = beta*delta_lambda;
1855 Wfac2 = -beta*delta_lambda;
1858 /* first calculate the average work in both directions */
1859 for (i = 0; i < ca->nsamples; i++)
1861 samples_t *s = ca->s[i];
1862 sample_range_t *r = &(ca->r[i]);
1867 for (j = r->start; j < r->end; j++)
1869 W_ab += Wfac1*s->du[j];
1874 /* normalization factor multiplied with bin width and
1875 number of samples (we normalize through M): */
1878 int hd = 0; /* histogram direction */
1879 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1883 dx = s->hist->dx[hd];
1885 for (j = 0; j < s->hist->nbin[0]; j++)
1887 double x = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1888 double pxdx = s->hist->bin[0][j]*normdx; /* p(x)dx */
1896 for (i = 0; i < cb->nsamples; i++)
1898 samples_t *s = cb->s[i];
1899 sample_range_t *r = &(cb->r[i]);
1904 for (j = r->start; j < r->end; j++)
1906 W_ba += Wfac1*s->du[j];
1911 /* normalization factor multiplied with bin width and
1912 number of samples (we normalize through M): */
1915 int hd = 0; /* histogram direction */
1916 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
1920 dx = s->hist->dx[hd];
1922 for (j = 0; j < s->hist->nbin[0]; j++)
1924 double x = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1925 double pxdx = s->hist->bin[0][j]*normdx; /* p(x)dx */
1933 /* then calculate the relative entropies */
1938 static void calc_dg_stddev(sample_coll_t *ca, sample_coll_t *cb,
1939 double temp, double dg, double *stddev)
1943 double sigmafact = 0.;
1945 double Wfac1, Wfac2;
1951 /* count the numbers of samples */
1955 /* to ensure the work values are the same as during the delta_G */
1956 /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1957 if (ca->foreign_lambda->dhdl < 0)
1959 /* this is the case when the delta U were calculated directly
1960 (i.e. we're not scaling dhdl) */
1966 /* we're using dhdl, so delta_lambda needs to be a
1967 multiplication factor. */
1968 double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1970 Wfac1 = beta*delta_lambda;
1971 Wfac2 = -beta*delta_lambda;
1977 /* calculate average in both directions */
1978 for (i = 0; i < ca->nsamples; i++)
1980 samples_t *s = ca->s[i];
1981 sample_range_t *r = &(ca->r[i]);
1986 for (j = r->start; j < r->end; j++)
1988 sigmafact += 1./(2. + 2.*cosh((M + Wfac1*s->du[j] - dg)));
1993 /* normalization factor multiplied with bin width and
1994 number of samples (we normalize through M): */
1997 int hd = 0; /* histogram direction */
1998 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
2002 dx = s->hist->dx[hd];
2004 for (j = 0; j < s->hist->nbin[0]; j++)
2006 double x = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
2007 double pxdx = s->hist->bin[0][j]*normdx; /* p(x)dx */
2009 sigmafact += pxdx/(2. + 2.*cosh((M + x - dg)));
2014 for (i = 0; i < cb->nsamples; i++)
2016 samples_t *s = cb->s[i];
2017 sample_range_t *r = &(cb->r[i]);
2022 for (j = r->start; j < r->end; j++)
2024 sigmafact += 1./(2. + 2.*cosh((M - Wfac2*s->du[j] - dg)));
2029 /* normalization factor multiplied with bin width and
2030 number of samples (we normalize through M): */
2033 int hd = 0; /* histogram direction */
2034 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
2038 dx = s->hist->dx[hd];
2040 for (j = 0; j < s->hist->nbin[0]; j++)
2042 double x = Wfac2*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
2043 double pxdx = s->hist->bin[0][j]*normdx; /* p(x)dx */
2045 sigmafact += pxdx/(2. + 2.*cosh((M - x - dg)));
2051 sigmafact /= (n1 + n2);
2055 Shirts, Bair, Hooker & Pande, Phys. Rev. Lett 91, 140601 (2003): */
2056 *stddev = sqrt(((1./sigmafact) - ( (n1+n2)/n1 + (n1+n2)/n2 )));
2061 static void calc_bar(barres_t *br, double tol,
2062 int npee_min, int npee_max, gmx_bool *bEE,
2066 double dg_sig2, sa_sig2, sb_sig2, stddev_sig2; /* intermediate variance values
2067 for calculated quantities */
2068 int nsample1, nsample2;
2069 double temp = br->a->temp;
2071 double dg_min, dg_max;
2072 gmx_bool have_hist = FALSE;
2074 br->dg = calc_bar_lowlevel(br->a, br->b, temp, tol, 0);
2076 br->dg_disc_err = 0.;
2077 br->dg_histrange_err = 0.;
2079 /* check if there are histograms */
2080 for (i = 0; i < br->a->nsamples; i++)
2082 if (br->a->r[i].use && br->a->s[i]->hist)
2090 for (i = 0; i < br->b->nsamples; i++)
2092 if (br->b->r[i].use && br->b->s[i]->hist)
2100 /* calculate histogram-specific errors */
2103 dg_min = calc_bar_lowlevel(br->a, br->b, temp, tol, -1);
2104 dg_max = calc_bar_lowlevel(br->a, br->b, temp, tol, 1);
2106 if (fabs(dg_max - dg_min) > GMX_REAL_EPS*10)
2108 /* the histogram range error is the biggest of the differences
2109 between the best estimate and the extremes */
2110 br->dg_histrange_err = fabs(dg_max - dg_min);
2112 br->dg_disc_err = 0.;
2113 for (i = 0; i < br->a->nsamples; i++)
2115 if (br->a->s[i]->hist)
2117 br->dg_disc_err = max(br->dg_disc_err, br->a->s[i]->hist->dx[0]);
2120 for (i = 0; i < br->b->nsamples; i++)
2122 if (br->b->s[i]->hist)
2124 br->dg_disc_err = max(br->dg_disc_err, br->b->s[i]->hist->dx[0]);
2128 calc_rel_entropy(br->a, br->b, temp, br->dg, &(br->sa), &(br->sb));
2130 calc_dg_stddev(br->a, br->b, temp, br->dg, &(br->dg_stddev) );
2139 sample_coll_t ca, cb;
2141 /* initialize the samples */
2142 sample_coll_init(&ca, br->a->native_lambda, br->a->foreign_lambda,
2144 sample_coll_init(&cb, br->b->native_lambda, br->b->foreign_lambda,
2147 for (npee = npee_min; npee <= npee_max; npee++)
2156 double dstddev2 = 0;
2159 for (p = 0; p < npee; p++)
2166 cac = sample_coll_create_subsample(&ca, br->a, p, npee);
2167 cbc = sample_coll_create_subsample(&cb, br->b, p, npee);
2171 printf("WARNING: histogram number incompatible with block number for averaging: can't do error estimate\n");
2175 sample_coll_destroy(&ca);
2179 sample_coll_destroy(&cb);
2184 dgp = calc_bar_lowlevel(&ca, &cb, temp, tol, 0);
2188 partsum[npee*(npee_max+1)+p] += dgp;
2190 calc_rel_entropy(&ca, &cb, temp, dgp, &sac, &sbc);
2195 calc_dg_stddev(&ca, &cb, temp, dgp, &stddevc );
2198 dstddev2 += stddevc*stddevc;
2200 sample_coll_destroy(&ca);
2201 sample_coll_destroy(&cb);
2205 dg_sig2 += (dgs2-dgs*dgs)/(npee-1);
2211 sa_sig2 += (dsa2-dsa*dsa)/(npee-1);
2212 sb_sig2 += (dsb2-dsb*dsb)/(npee-1);
2216 stddev_sig2 += (dstddev2-dstddev*dstddev)/(npee-1);
2218 br->dg_err = sqrt(dg_sig2/(npee_max - npee_min + 1));
2219 br->sa_err = sqrt(sa_sig2/(npee_max - npee_min + 1));
2220 br->sb_err = sqrt(sb_sig2/(npee_max - npee_min + 1));
2221 br->dg_stddev_err = sqrt(stddev_sig2/(npee_max - npee_min + 1));
2226 static double bar_err(int nbmin, int nbmax, const double *partsum)
2229 double svar, s, s2, dg;
2232 for (nb = nbmin; nb <= nbmax; nb++)
2236 for (b = 0; b < nb; b++)
2238 dg = partsum[nb*(nbmax+1)+b];
2244 svar += (s2 - s*s)/(nb - 1);
2247 return sqrt(svar/(nbmax + 1 - nbmin));
2251 /* Seek the end of an identifier (consecutive non-spaces), followed by
2252 an optional number of spaces or '='-signs. Returns a pointer to the
2253 first non-space value found after that. Returns NULL if the string
2256 static const char *find_value(const char *str)
2258 gmx_bool name_end_found = FALSE;
2260 /* if the string is a NULL pointer, return a NULL pointer. */
2265 while (*str != '\0')
2267 /* first find the end of the name */
2268 if (!name_end_found)
2270 if (isspace(*str) || (*str == '=') )
2272 name_end_found = TRUE;
2277 if (!( isspace(*str) || (*str == '=') ))
2289 /* read a vector-notation description of a lambda vector */
2290 static gmx_bool read_lambda_compvec(const char *str,
2292 const lambda_components_t *lc_in,
2293 lambda_components_t *lc_out,
2297 gmx_bool initialize_lc = FALSE; /* whether to initialize the lambda
2298 components, or to check them */
2299 gmx_bool start_reached = FALSE; /* whether the start of component names
2301 gmx_bool vector = FALSE; /* whether there are multiple components */
2302 int n = 0; /* current component number */
2303 const char *val_start = NULL; /* start of the component name, or NULL
2304 if not in a value */
2314 if (lc_out && lc_out->N == 0)
2316 initialize_lc = TRUE;
2331 start_reached = TRUE;
2334 else if (*str == '(')
2337 start_reached = TRUE;
2339 else if (!isspace(*str))
2341 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2348 if (isspace(*str) || *str == ')' || *str == ',' || *str == '\0')
2355 lambda_components_add(lc_out, val_start,
2360 if (!lambda_components_check(lc_out, n, val_start,
2369 /* add a vector component to lv */
2370 lv->val[n] = strtod(val_start, &strtod_end);
2371 if (val_start == strtod_end)
2374 "Error reading lambda vector in %s", fn);
2377 /* reset for the next identifier */
2386 else if (isalnum(*str))
2399 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2407 else if (lv == NULL)
2413 gmx_fatal(FARGS, "Incomplete lambda vector data in %s",
2433 gmx_fatal(FARGS, "Incomplete lambda components data in %s", fn);
2439 /* read and check the component names from a string */
2440 static gmx_bool read_lambda_components(const char *str,
2441 lambda_components_t *lc,
2445 return read_lambda_compvec(str, NULL, NULL, lc, end, fn);
2448 /* read an initialized lambda vector from a string */
2449 static gmx_bool read_lambda_vector(const char *str,
2454 return read_lambda_compvec(str, lv, lv->lc, NULL, end, fn);
2459 /* deduce lambda value from legend.
2461 legend = the legend string
2463 lam = the initialized lambda vector
2464 returns whether to use the data in this set.
2466 static gmx_bool legend2lambda(const char *fn,
2471 const char *ptr = NULL, *ptr2 = NULL;
2472 gmx_bool ok = FALSE;
2473 gmx_bool bdhdl = FALSE;
2474 const char *tostr = " to ";
2478 gmx_fatal(FARGS, "There is no legend in file '%s', can not deduce lambda", fn);
2481 /* look for the last 'to': */
2485 ptr2 = strstr(ptr2, tostr);
2492 while (ptr2 != NULL && *ptr2 != '\0');
2496 ptr += strlen(tostr)-1; /* and advance past that 'to' */
2500 /* look for the = sign */
2501 ptr = strrchr(legend, '=');
2504 /* otherwise look for the last space */
2505 ptr = strrchr(legend, ' ');
2509 if (strstr(legend, "dH"))
2514 else if (strchr(legend, 'D') != NULL && strchr(legend, 'H') != NULL)
2519 else /*if (strstr(legend, "pV"))*/
2530 gmx_fatal(FARGS, "There is no proper lambda legend in file '%s', can not deduce lambda", fn);
2534 ptr = find_value(ptr);
2535 if (!ptr || !read_lambda_vector(ptr, lam, NULL, fn))
2537 gmx_fatal(FARGS, "lambda vector '%s' %s faulty", legend, fn);
2546 ptr = strrchr(legend, '=');
2550 /* there must be a component name */
2554 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2556 /* now backtrack to the start of the identifier */
2557 while (isspace(*ptr))
2563 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2566 while (!isspace(*ptr))
2571 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2575 strncpy(buf, ptr, (end-ptr));
2576 buf[(end-ptr)] = '\0';
2577 dhdl_index = lambda_components_find(lam->lc, ptr, (end-ptr));
2581 strncpy(buf, ptr, (end-ptr));
2582 buf[(end-ptr)] = '\0';
2584 "Did not find lambda component for '%s' in %s",
2593 "dhdl without component name with >1 lambda component in %s",
2598 lam->dhdl = dhdl_index;
2603 static gmx_bool subtitle2lambda(const char *subtitle, xvg_t *ba, const char *fn,
2604 lambda_components_t *lc)
2609 double native_lambda;
2613 /* first check for a state string */
2614 ptr = strstr(subtitle, "state");
2618 const char *val_end;
2620 /* the new 4.6 style lambda vectors */
2621 ptr = find_value(ptr);
2624 index = strtol(ptr, &end, 10);
2627 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2634 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2637 /* now find the lambda vector component names */
2638 while (*ptr != '(' && !isalnum(*ptr))
2644 "Incomplete lambda vector component data in %s", fn);
2649 if (!read_lambda_components(ptr, lc, &val_end, fn))
2652 "lambda vector components in %s don't match those previously read",
2655 ptr = find_value(val_end);
2658 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2661 lambda_vec_init(&(ba->native_lambda), lc);
2662 if (!read_lambda_vector(ptr, &(ba->native_lambda), NULL, fn))
2664 gmx_fatal(FARGS, "lambda vector in %s faulty", fn);
2666 ba->native_lambda.index = index;
2671 /* compatibility mode: check for lambda in other ways. */
2672 /* plain text lambda string */
2673 ptr = strstr(subtitle, "lambda");
2676 /* xmgrace formatted lambda string */
2677 ptr = strstr(subtitle, "\\xl\\f{}");
2681 /* xmgr formatted lambda string */
2682 ptr = strstr(subtitle, "\\8l\\4");
2686 ptr = strstr(ptr, "=");
2690 bFound = (sscanf(ptr+1, "%lf", &(native_lambda)) == 1);
2691 /* add the lambda component name as an empty string */
2694 if (!lambda_components_check(lc, 0, "", 0))
2697 "lambda vector components in %s don't match those previously read",
2703 lambda_components_add(lc, "", 0);
2705 lambda_vec_init(&(ba->native_lambda), lc);
2706 ba->native_lambda.val[0] = native_lambda;
2713 static void filename2lambda(const char *fn)
2716 const char *ptr, *digitptr;
2720 /* go to the end of the path string and search backward to find the last
2721 directory in the path which has to contain the value of lambda
2723 while (ptr[1] != '\0')
2727 /* searching backward to find the second directory separator */
2732 if (ptr[0] != DIR_SEPARATOR && ptr[1] == DIR_SEPARATOR)
2740 /* save the last position of a digit between the last two
2741 separators = in the last dirname */
2742 if (dirsep > 0 && isdigit(*ptr))
2750 gmx_fatal(FARGS, "While trying to read the lambda value from the file path:"
2751 " last directory in the path '%s' does not contain a number", fn);
2753 if (digitptr[-1] == '-')
2757 lambda = strtod(digitptr, &endptr);
2758 if (endptr == digitptr)
2760 gmx_fatal(FARGS, "Malformed number in file path '%s'", fn);
2764 static void read_bar_xvg_lowlevel(const char *fn, real *temp, xvg_t *ba,
2765 lambda_components_t *lc)
2768 char *subtitle, **legend, *ptr;
2770 gmx_bool native_lambda_read = FALSE;
2778 np = read_xvg_legend(fn, &ba->y, &ba->nset, &subtitle, &legend);
2781 gmx_fatal(FARGS, "File %s contains no usable data.", fn);
2783 /* Reorder the data */
2785 for (i = 1; i < ba->nset; i++)
2787 ba->y[i-1] = ba->y[i];
2791 snew(ba->np, ba->nset);
2792 for (i = 0; i < ba->nset; i++)
2798 if (subtitle != NULL)
2800 /* try to extract temperature */
2801 ptr = strstr(subtitle, "T =");
2805 if (sscanf(ptr, "%lf", &ba->temp) == 1)
2809 gmx_fatal(FARGS, "Found temperature of %f in file '%s'",
2819 gmx_fatal(FARGS, "Did not find a temperature in the subtitle in file '%s', use the -temp option of [TT]gmx bar[tt]", fn);
2824 /* Try to deduce lambda from the subtitle */
2827 if (subtitle2lambda(subtitle, ba, fn, lc))
2829 native_lambda_read = TRUE;
2832 snew(ba->lambda, ba->nset);
2835 /* Check if we have a single set, no legend, nset=1 means t and dH/dl */
2838 if (!native_lambda_read)
2840 /* Deduce lambda from the file name */
2841 filename2lambda(fn);
2842 native_lambda_read = TRUE;
2844 ba->lambda[0] = ba->native_lambda;
2848 gmx_fatal(FARGS, "File %s contains multiple sets but no legends, can not determine the lambda values", fn);
2853 for (i = 0; i < ba->nset; )
2855 gmx_bool use = FALSE;
2856 /* Read lambda from the legend */
2857 lambda_vec_init( &(ba->lambda[i]), lc );
2858 lambda_vec_copy( &(ba->lambda[i]), &(ba->native_lambda));
2859 use = legend2lambda(fn, legend[i], &(ba->lambda[i]));
2862 lambda_vec_print(&(ba->lambda[i]), buf, FALSE);
2868 printf("%s: Ignoring set '%s'.\n", fn, legend[i]);
2869 for (j = i+1; j < ba->nset; j++)
2871 ba->y[j-1] = ba->y[j];
2872 legend[j-1] = legend[j];
2879 if (!native_lambda_read)
2881 gmx_fatal(FARGS, "File %s contains multiple sets but no indication of the native lambda", fn);
2886 for (i = 0; i < ba->nset-1; i++)
2894 static void read_bar_xvg(char *fn, real *temp, sim_data_t *sd)
2903 read_bar_xvg_lowlevel(fn, temp, barsim, &(sd->lc));
2905 if (barsim->nset < 1)
2907 gmx_fatal(FARGS, "File '%s' contains fewer than two columns", fn);
2910 if (!gmx_within_tol(*temp, barsim->temp, GMX_FLOAT_EPS) && (*temp > 0) )
2912 gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
2914 *temp = barsim->temp;
2916 /* now create a series of samples_t */
2917 snew(s, barsim->nset);
2918 for (i = 0; i < barsim->nset; i++)
2920 samples_init(s+i, &(barsim->native_lambda), &(barsim->lambda[i]),
2921 barsim->temp, lambda_vec_same(&(barsim->native_lambda),
2922 &(barsim->lambda[i])),
2924 s[i].du = barsim->y[i];
2925 s[i].ndu = barsim->np[i];
2928 lambda_data_list_insert_sample(sd->lb, s+i);
2933 lambda_vec_print(s[0].native_lambda, buf, FALSE);
2934 printf("%s: %.1f - %.1f; lambda = %s\n dH/dl & foreign lambdas:\n",
2935 fn, s[0].t[0], s[0].t[s[0].ndu-1], buf);
2936 for (i = 0; i < barsim->nset; i++)
2938 lambda_vec_print(s[i].foreign_lambda, buf, TRUE);
2939 printf(" %s (%d pts)\n", buf, s[i].ndu);
2945 static void read_edr_rawdh_block(samples_t **smp, int *ndu, t_enxblock *blk,
2946 double start_time, double delta_time,
2947 lambda_vec_t *native_lambda, double temp,
2948 double *last_t, const char *filename)
2952 double old_foreign_lambda;
2953 lambda_vec_t *foreign_lambda;
2955 samples_t *s; /* convenience pointer */
2958 /* check the block types etc. */
2959 if ( (blk->nsub < 3) ||
2960 (blk->sub[0].type != xdr_datatype_int) ||
2961 (blk->sub[1].type != xdr_datatype_double) ||
2963 (blk->sub[2].type != xdr_datatype_float) &&
2964 (blk->sub[2].type != xdr_datatype_double)
2966 (blk->sub[0].nr < 1) ||
2967 (blk->sub[1].nr < 1) )
2970 "Unexpected/corrupted block data in file %s around time %f.",
2971 filename, start_time);
2974 snew(foreign_lambda, 1);
2975 lambda_vec_init(foreign_lambda, native_lambda->lc);
2976 lambda_vec_copy(foreign_lambda, native_lambda);
2977 type = blk->sub[0].ival[0];
2980 for (i = 0; i < native_lambda->lc->N; i++)
2982 foreign_lambda->val[i] = blk->sub[1].dval[i];
2987 if (blk->sub[0].nr > 1)
2989 foreign_lambda->dhdl = blk->sub[0].ival[1];
2993 foreign_lambda->dhdl = 0;
2999 /* initialize the samples structure if it's empty. */
3001 samples_init(*smp, native_lambda, foreign_lambda, temp,
3002 type == dhbtDHDL, filename);
3003 (*smp)->start_time = start_time;
3004 (*smp)->delta_time = delta_time;
3007 /* set convenience pointer */
3010 /* now double check */
3011 if (!lambda_vec_same(s->foreign_lambda, foreign_lambda) )
3013 char buf[STRLEN], buf2[STRLEN];
3014 lambda_vec_print(foreign_lambda, buf, FALSE);
3015 lambda_vec_print(s->foreign_lambda, buf2, FALSE);
3016 fprintf(stderr, "Got foreign lambda=%s, expected: %s\n", buf, buf2);
3017 gmx_fatal(FARGS, "Corrupted data in file %s around t=%f.",
3018 filename, start_time);
3021 /* make room for the data */
3022 if (s->ndu_alloc < (size_t)(s->ndu + blk->sub[2].nr) )
3024 s->ndu_alloc += (s->ndu_alloc < (size_t)blk->sub[2].nr) ?
3025 blk->sub[2].nr*2 : s->ndu_alloc;
3026 srenew(s->du_alloc, s->ndu_alloc);
3027 s->du = s->du_alloc;
3030 s->ndu += blk->sub[2].nr;
3031 s->ntot += blk->sub[2].nr;
3032 *ndu = blk->sub[2].nr;
3034 /* and copy the data*/
3035 for (j = 0; j < blk->sub[2].nr; j++)
3037 if (blk->sub[2].type == xdr_datatype_float)
3039 s->du[startj+j] = blk->sub[2].fval[j];
3043 s->du[startj+j] = blk->sub[2].dval[j];
3046 if (start_time + blk->sub[2].nr*delta_time > *last_t)
3048 *last_t = start_time + blk->sub[2].nr*delta_time;
3052 static samples_t *read_edr_hist_block(int *nsamples, t_enxblock *blk,
3053 double start_time, double delta_time,
3054 lambda_vec_t *native_lambda, double temp,
3055 double *last_t, const char *filename)
3060 double old_foreign_lambda;
3061 lambda_vec_t *foreign_lambda;
3065 /* check the block types etc. */
3066 if ( (blk->nsub < 2) ||
3067 (blk->sub[0].type != xdr_datatype_double) ||
3068 (blk->sub[1].type != xdr_datatype_int64) ||
3069 (blk->sub[0].nr < 2) ||
3070 (blk->sub[1].nr < 2) )
3073 "Unexpected/corrupted block data in file %s around time %f",
3074 filename, start_time);
3077 nhist = blk->nsub-2;
3085 "Unexpected/corrupted block data in file %s around time %f",
3086 filename, start_time);
3092 snew(foreign_lambda, 1);
3093 lambda_vec_init(foreign_lambda, native_lambda->lc);
3094 lambda_vec_copy(foreign_lambda, native_lambda);
3095 type = (int)(blk->sub[1].lval[1]);
3098 double old_foreign_lambda;
3100 old_foreign_lambda = blk->sub[0].dval[0];
3101 if (old_foreign_lambda >= 0)
3103 foreign_lambda->val[0] = old_foreign_lambda;
3104 if (foreign_lambda->lc->N > 1)
3107 "Single-component lambda in multi-component file %s",
3113 for (i = 0; i < native_lambda->lc->N; i++)
3115 foreign_lambda->val[i] = blk->sub[0].dval[i+2];
3121 if (foreign_lambda->lc->N > 1)
3123 if (blk->sub[1].nr < 3 + nhist)
3126 "Missing derivative coord in multi-component file %s",
3129 foreign_lambda->dhdl = blk->sub[1].lval[2 + nhist];
3133 foreign_lambda->dhdl = 0;
3137 samples_init(s, native_lambda, foreign_lambda, temp, type == dhbtDHDL,
3141 for (i = 0; i < nhist; i++)
3143 nbins[i] = blk->sub[i+2].nr;
3146 hist_init(s->hist, nhist, nbins);
3148 for (i = 0; i < nhist; i++)
3150 s->hist->x0[i] = blk->sub[1].lval[2+i];
3151 s->hist->dx[i] = blk->sub[0].dval[1];
3154 s->hist->dx[i] = -s->hist->dx[i];
3158 s->hist->start_time = start_time;
3159 s->hist->delta_time = delta_time;
3160 s->start_time = start_time;
3161 s->delta_time = delta_time;
3163 for (i = 0; i < nhist; i++)
3166 gmx_int64_t sum = 0;
3168 for (j = 0; j < s->hist->nbin[i]; j++)
3170 int binv = (int)(blk->sub[i+2].ival[j]);
3172 s->hist->bin[i][j] = binv;
3185 gmx_fatal(FARGS, "Histogram counts don't match in %s",
3191 if (start_time + s->hist->sum*delta_time > *last_t)
3193 *last_t = start_time + s->hist->sum*delta_time;
3199 static void read_barsim_edr(char *fn, real *temp, sim_data_t *sd)
3205 gmx_enxnm_t *enm = NULL;
3206 double first_t = -1;
3208 samples_t **samples_rawdh = NULL; /* contains samples for raw delta_h */
3209 int *nhists = NULL; /* array to keep count & print at end */
3210 int *npts = NULL; /* array to keep count & print at end */
3211 lambda_vec_t **lambdas = NULL; /* array to keep count & print at end */
3212 lambda_vec_t *native_lambda;
3213 double end_time; /* the end time of the last batch of samples */
3215 lambda_vec_t start_lambda;
3217 fp = open_enx(fn, "r");
3218 do_enxnms(fp, &nre, &enm);
3221 snew(native_lambda, 1);
3222 start_lambda.lc = NULL;
3224 while (do_enx(fp, fr))
3226 /* count the data blocks */
3227 int nblocks_raw = 0;
3228 int nblocks_hist = 0;
3231 /* DHCOLL block information: */
3232 double start_time = 0, delta_time = 0, old_start_lambda = 0, delta_lambda = 0;
3235 /* count the blocks and handle collection information: */
3236 for (i = 0; i < fr->nblock; i++)
3238 if (fr->block[i].id == enxDHHIST)
3242 if (fr->block[i].id == enxDH)
3246 if (fr->block[i].id == enxDHCOLL)
3249 if ( (fr->block[i].nsub < 1) ||
3250 (fr->block[i].sub[0].type != xdr_datatype_double) ||
3251 (fr->block[i].sub[0].nr < 5))
3253 gmx_fatal(FARGS, "Unexpected block data in file %s", fn);
3256 /* read the data from the DHCOLL block */
3257 rtemp = fr->block[i].sub[0].dval[0];
3258 start_time = fr->block[i].sub[0].dval[1];
3259 delta_time = fr->block[i].sub[0].dval[2];
3260 old_start_lambda = fr->block[i].sub[0].dval[3];
3261 delta_lambda = fr->block[i].sub[0].dval[4];
3263 if (delta_lambda > 0)
3265 gmx_fatal(FARGS, "Lambda values not constant in %s: can't apply BAR method", fn);
3267 if ( ( *temp != rtemp) && (*temp > 0) )
3269 gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
3273 if (old_start_lambda >= 0)
3277 if (!lambda_components_check(&(sd->lc), 0, "", 0))
3280 "lambda vector components in %s don't match those previously read",
3286 lambda_components_add(&(sd->lc), "", 0);
3288 if (!start_lambda.lc)
3290 lambda_vec_init(&start_lambda, &(sd->lc));
3292 start_lambda.val[0] = old_start_lambda;
3296 /* read lambda vector */
3298 gmx_bool check = (sd->lc.N > 0);
3299 if (fr->block[i].nsub < 2)
3302 "No lambda vector, but start_lambda=%f\n",
3305 n_lambda_vec = fr->block[i].sub[1].ival[1];
3306 for (j = 0; j < n_lambda_vec; j++)
3309 efpt_singular_names[fr->block[i].sub[1].ival[1+j]];
3312 /* check the components */
3313 lambda_components_check(&(sd->lc), j, name,
3318 lambda_components_add(&(sd->lc), name,
3322 lambda_vec_init(&start_lambda, &(sd->lc));
3323 start_lambda.index = fr->block[i].sub[1].ival[0];
3324 for (j = 0; j < n_lambda_vec; j++)
3326 start_lambda.val[j] = fr->block[i].sub[0].dval[5+j];
3331 first_t = start_time;
3338 gmx_fatal(FARGS, "Did not find delta H information in file %s", fn);
3340 if (nblocks_raw > 0 && nblocks_hist > 0)
3342 gmx_fatal(FARGS, "Can't handle both raw delta U data and histograms in the same file %s", fn);
3347 /* check the native lambda */
3348 if (!lambda_vec_same(&start_lambda, native_lambda) )
3350 gmx_fatal(FARGS, "Native lambda not constant in file %s: started at %f, and becomes %f at time %f",
3351 fn, native_lambda, start_lambda, start_time);
3353 /* check the number of samples against the previous number */
3354 if ( ((nblocks_raw+nblocks_hist) != nsamples) || (nlam != 1 ) )
3356 gmx_fatal(FARGS, "Unexpected block count in %s: was %d, now %d\n",
3357 fn, nsamples+1, nblocks_raw+nblocks_hist+nlam);
3359 /* check whether last iterations's end time matches with
3360 the currrent start time */
3361 if ( (fabs(last_t - start_time) > 2*delta_time) && last_t >= 0)
3363 /* it didn't. We need to store our samples and reallocate */
3364 for (i = 0; i < nsamples; i++)
3366 if (samples_rawdh[i])
3368 /* insert it into the existing list */
3369 lambda_data_list_insert_sample(sd->lb,
3371 /* and make sure we'll allocate a new one this time
3373 samples_rawdh[i] = NULL;
3380 /* this is the first round; allocate the associated data
3382 /*native_lambda=start_lambda;*/
3383 lambda_vec_init(native_lambda, &(sd->lc));
3384 lambda_vec_copy(native_lambda, &start_lambda);
3385 nsamples = nblocks_raw+nblocks_hist;
3386 snew(nhists, nsamples);
3387 snew(npts, nsamples);
3388 snew(lambdas, nsamples);
3389 snew(samples_rawdh, nsamples);
3390 for (i = 0; i < nsamples; i++)
3395 samples_rawdh[i] = NULL; /* init to NULL so we know which
3396 ones contain values */
3401 k = 0; /* counter for the lambdas, etc. arrays */
3402 for (i = 0; i < fr->nblock; i++)
3404 if (fr->block[i].id == enxDH)
3406 int type = (fr->block[i].sub[0].ival[0]);
3407 if (type == dhbtDH || type == dhbtDHDL)
3410 read_edr_rawdh_block(&(samples_rawdh[k]),
3413 start_time, delta_time,
3414 native_lambda, rtemp,
3417 if (samples_rawdh[k])
3419 lambdas[k] = samples_rawdh[k]->foreign_lambda;
3424 else if (fr->block[i].id == enxDHHIST)
3426 int type = (int)(fr->block[i].sub[1].lval[1]);
3427 if (type == dhbtDH || type == dhbtDHDL)
3431 samples_t *s; /* this is where the data will go */
3432 s = read_edr_hist_block(&nb, &(fr->block[i]),
3433 start_time, delta_time,
3434 native_lambda, rtemp,
3439 lambdas[k] = s->foreign_lambda;
3442 /* and insert the new sample immediately */
3443 for (j = 0; j < nb; j++)
3445 lambda_data_list_insert_sample(sd->lb, s+j);
3451 /* Now store all our extant sample collections */
3452 for (i = 0; i < nsamples; i++)
3454 if (samples_rawdh[i])
3456 /* insert it into the existing list */
3457 lambda_data_list_insert_sample(sd->lb, samples_rawdh[i]);
3465 lambda_vec_print(native_lambda, buf, FALSE);
3466 printf("%s: %.1f - %.1f; lambda = %s\n foreign lambdas:\n",
3467 fn, first_t, last_t, buf);
3468 for (i = 0; i < nsamples; i++)
3472 lambda_vec_print(lambdas[i], buf, TRUE);
3475 printf(" %s (%d hists)\n", buf, nhists[i]);
3479 printf(" %s (%d pts)\n", buf, npts[i]);
3491 int gmx_bar(int argc, char *argv[])
3493 static const char *desc[] = {
3494 "[THISMODULE] calculates free energy difference estimates through ",
3495 "Bennett's acceptance ratio method (BAR). It also automatically",
3496 "adds series of individual free energies obtained with BAR into",
3497 "a combined free energy estimate.[PAR]",
3499 "Every individual BAR free energy difference relies on two ",
3500 "simulations at different states: say state A and state B, as",
3501 "controlled by a parameter, [GRK]lambda[grk] (see the [TT].mdp[tt] parameter",
3502 "[TT]init_lambda[tt]). The BAR method calculates a ratio of weighted",
3503 "average of the Hamiltonian difference of state B given state A and",
3505 "The energy differences to the other state must be calculated",
3506 "explicitly during the simulation. This can be done with",
3507 "the [TT].mdp[tt] option [TT]foreign_lambda[tt].[PAR]",
3509 "Input option [TT]-f[tt] expects multiple [TT]dhdl.xvg[tt] files. ",
3510 "Two types of input files are supported:[BR]",
3511 "[TT]*[tt] Files with more than one [IT]y[it]-value. ",
3512 "The files should have columns ",
3513 "with dH/d[GRK]lambda[grk] and [GRK]Delta[grk][GRK]lambda[grk]. ",
3514 "The [GRK]lambda[grk] values are inferred ",
3515 "from the legends: [GRK]lambda[grk] of the simulation from the legend of ",
3516 "dH/d[GRK]lambda[grk] and the foreign [GRK]lambda[grk] values from the ",
3517 "legends of Delta H",
3519 "[TT]*[tt] Files with only one [IT]y[it]-value. Using the",
3520 "[TT]-extp[tt] option for these files, it is assumed",
3521 "that the [IT]y[it]-value is dH/d[GRK]lambda[grk] and that the ",
3522 "Hamiltonian depends linearly on [GRK]lambda[grk]. ",
3523 "The [GRK]lambda[grk] value of the simulation is inferred from the ",
3524 "subtitle (if present), otherwise from a number in the subdirectory ",
3525 "in the file name.[PAR]",
3527 "The [GRK]lambda[grk] of the simulation is parsed from ",
3528 "[TT]dhdl.xvg[tt] file's legend containing the string 'dH', the ",
3529 "foreign [GRK]lambda[grk] values from the legend containing the ",
3530 "capitalized letters 'D' and 'H'. The temperature is parsed from ",
3531 "the legend line containing 'T ='.[PAR]",
3533 "The input option [TT]-g[tt] expects multiple [TT].edr[tt] files. ",
3534 "These can contain either lists of energy differences (see the ",
3535 "[TT].mdp[tt] option [TT]separate_dhdl_file[tt]), or a series of ",
3536 "histograms (see the [TT].mdp[tt] options [TT]dh_hist_size[tt] and ",
3537 "[TT]dh_hist_spacing[tt]).", "The temperature and [GRK]lambda[grk] ",
3538 "values are automatically deduced from the [TT]ener.edr[tt] file.[PAR]",
3540 "In addition to the [TT].mdp[tt] option [TT]foreign_lambda[tt], ",
3541 "the energy difference can also be extrapolated from the ",
3542 "dH/d[GRK]lambda[grk] values. This is done with the[TT]-extp[tt]",
3543 "option, which assumes that the system's Hamiltonian depends linearly",
3544 "on [GRK]lambda[grk], which is not normally the case.[PAR]",
3546 "The free energy estimates are determined using BAR with bisection, ",
3547 "with the precision of the output set with [TT]-prec[tt]. ",
3548 "An error estimate taking into account time correlations ",
3549 "is made by splitting the data into blocks and determining ",
3550 "the free energy differences over those blocks and assuming ",
3551 "the blocks are independent. ",
3552 "The final error estimate is determined from the average variance ",
3553 "over 5 blocks. A range of block numbers for error estimation can ",
3554 "be provided with the options [TT]-nbmin[tt] and [TT]-nbmax[tt].[PAR]",
3556 "[THISMODULE] tries to aggregate samples with the same 'native' and ",
3557 "'foreign' [GRK]lambda[grk] values, but always assumes independent ",
3558 "samples. [BB]Note[bb] that when aggregating energy ",
3559 "differences/derivatives with different sampling intervals, this is ",
3560 "almost certainly not correct. Usually subsequent energies are ",
3561 "correlated and different time intervals mean different degrees ",
3562 "of correlation between samples.[PAR]",
3564 "The results are split in two parts: the last part contains the final ",
3565 "results in kJ/mol, together with the error estimate for each part ",
3566 "and the total. The first part contains detailed free energy ",
3567 "difference estimates and phase space overlap measures in units of ",
3568 "kT (together with their computed error estimate). The printed ",
3570 "[TT]*[tt] lam_A: the [GRK]lambda[grk] values for point A.[BR]",
3571 "[TT]*[tt] lam_B: the [GRK]lambda[grk] values for point B.[BR]",
3572 "[TT]*[tt] DG: the free energy estimate.[BR]",
3573 "[TT]*[tt] s_A: an estimate of the relative entropy of B in A.[BR]",
3574 "[TT]*[tt] s_B: an estimate of the relative entropy of A in B.[BR]",
3575 "[TT]*[tt] stdev: an estimate expected per-sample standard deviation.[PAR]",
3577 "The relative entropy of both states in each other's ensemble can be ",
3578 "interpreted as a measure of phase space overlap: ",
3579 "the relative entropy s_A of the work samples of lambda_B in the ",
3580 "ensemble of lambda_A (and vice versa for s_B), is a ",
3581 "measure of the 'distance' between Boltzmann distributions of ",
3582 "the two states, that goes to zero for identical distributions. See ",
3583 "Wu & Kofke, J. Chem. Phys. 123 084109 (2005) for more information.",
3585 "The estimate of the expected per-sample standard deviation, as given ",
3586 "in Bennett's original BAR paper: Bennett, J. Comp. Phys. 22, p 245 (1976).",
3587 "Eq. 10 therein gives an estimate of the quality of sampling (not directly",
3588 "of the actual statistical error, because it assumes independent samples).[PAR]",
3590 "To get a visual estimate of the phase space overlap, use the ",
3591 "[TT]-oh[tt] option to write series of histograms, together with the ",
3592 "[TT]-nbin[tt] option.[PAR]"
3594 static real begin = 0, end = -1, temp = -1;
3595 int nd = 2, nbmin = 5, nbmax = 5;
3597 gmx_bool use_dhdl = FALSE;
3598 gmx_bool calc_s, calc_v;
3600 { "-b", FALSE, etREAL, {&begin}, "Begin time for BAR" },
3601 { "-e", FALSE, etREAL, {&end}, "End time for BAR" },
3602 { "-temp", FALSE, etREAL, {&temp}, "Temperature (K)" },
3603 { "-prec", FALSE, etINT, {&nd}, "The number of digits after the decimal point" },
3604 { "-nbmin", FALSE, etINT, {&nbmin}, "Minimum number of blocks for error estimation" },
3605 { "-nbmax", FALSE, etINT, {&nbmax}, "Maximum number of blocks for error estimation" },
3606 { "-nbin", FALSE, etINT, {&nbin}, "Number of bins for histogram output"},
3607 { "-extp", FALSE, etBOOL, {&use_dhdl}, "Whether to linearly extrapolate dH/dl values to use as energies"}
3611 { efXVG, "-f", "dhdl", ffOPTRDMULT },
3612 { efEDR, "-g", "ener", ffOPTRDMULT },
3613 { efXVG, "-o", "bar", ffOPTWR },
3614 { efXVG, "-oi", "barint", ffOPTWR },
3615 { efXVG, "-oh", "histogram", ffOPTWR }
3617 #define NFILE asize(fnm)
3620 int nf = 0; /* file counter */
3622 int nfile_tot; /* total number of input files */
3627 sim_data_t sim_data; /* the simulation data */
3628 barres_t *results; /* the results */
3629 int nresults; /* number of results in results array */
3632 double prec, dg_tot, dg, sig, dg_tot_max, dg_tot_min;
3634 char dgformat[20], xvg2format[STRLEN], xvg3format[STRLEN];
3635 char buf[STRLEN], buf2[STRLEN];
3636 char ktformat[STRLEN], sktformat[STRLEN];
3637 char kteformat[STRLEN], skteformat[STRLEN];
3640 gmx_bool result_OK = TRUE, bEE = TRUE;
3642 gmx_bool disc_err = FALSE;
3643 double sum_disc_err = 0.; /* discretization error */
3644 gmx_bool histrange_err = FALSE;
3645 double sum_histrange_err = 0.; /* histogram range error */
3646 double stat_err = 0.; /* statistical error */
3648 if (!parse_common_args(&argc, argv,
3650 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &oenv))
3655 if (opt2bSet("-f", NFILE, fnm))
3657 nxvgfile = opt2fns(&fxvgnms, "-f", NFILE, fnm);
3659 if (opt2bSet("-g", NFILE, fnm))
3661 nedrfile = opt2fns(&fedrnms, "-g", NFILE, fnm);
3664 sim_data_init(&sim_data);
3666 /* make linked list */
3668 lambda_data_init(lb, 0, 0);
3674 nfile_tot = nxvgfile + nedrfile;
3678 gmx_fatal(FARGS, "No input files!");
3683 gmx_fatal(FARGS, "Can not have negative number of digits");
3685 prec = pow(10, -nd);
3687 snew(partsum, (nbmax+1)*(nbmax+1));
3690 /* read in all files. First xvg files */
3691 for (f = 0; f < nxvgfile; f++)
3693 read_bar_xvg(fxvgnms[f], &temp, &sim_data);
3696 /* then .edr files */
3697 for (f = 0; f < nedrfile; f++)
3699 read_barsim_edr(fedrnms[f], &temp, &sim_data);;
3703 /* fix the times to allow for equilibration */
3704 sim_data_impose_times(&sim_data, begin, end);
3706 if (opt2bSet("-oh", NFILE, fnm))
3708 sim_data_histogram(&sim_data, opt2fn("-oh", NFILE, fnm), nbin, oenv);
3711 /* assemble the output structures from the lambdas */
3712 results = barres_list_create(&sim_data, &nresults, use_dhdl);
3714 sum_disc_err = barres_list_max_disc_err(results, nresults);
3718 printf("\nNo results to calculate.\n");
3722 if (sum_disc_err > prec)
3724 prec = sum_disc_err;
3725 nd = ceil(-log10(prec));
3726 printf("WARNING: setting the precision to %g because that is the minimum\n reasonable number, given the expected discretization error.\n", prec);
3730 /*sprintf(lamformat,"%%6.3f");*/
3731 sprintf( dgformat, "%%%d.%df", 3+nd, nd);
3732 /* the format strings of the results in kT */
3733 sprintf( ktformat, "%%%d.%df", 5+nd, nd);
3734 sprintf( sktformat, "%%%ds", 6+nd);
3735 /* the format strings of the errors in kT */
3736 sprintf( kteformat, "%%%d.%df", 3+nd, nd);
3737 sprintf( skteformat, "%%%ds", 4+nd);
3738 sprintf(xvg2format, "%s %s\n", "%s", dgformat);
3739 sprintf(xvg3format, "%s %s %s\n", "%s", dgformat, dgformat);
3744 if (opt2bSet("-o", NFILE, fnm))
3746 sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3747 fpb = xvgropen_type(opt2fn("-o", NFILE, fnm), "Free energy differences",
3748 "\\lambda", buf, exvggtXYDY, oenv);
3752 if (opt2bSet("-oi", NFILE, fnm))
3754 sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3755 fpi = xvgropen(opt2fn("-oi", NFILE, fnm), "Free energy integral",
3756 "\\lambda", buf, oenv);
3766 /* first calculate results */
3769 for (f = 0; f < nresults; f++)
3771 /* Determine the free energy difference with a factor of 10
3772 * more accuracy than requested for printing.
3774 calc_bar(&(results[f]), 0.1*prec, nbmin, nbmax,
3777 if (results[f].dg_disc_err > prec/10.)
3781 if (results[f].dg_histrange_err > prec/10.)
3783 histrange_err = TRUE;
3787 /* print results in kT */
3791 printf("\nTemperature: %g K\n", temp);
3793 printf("\nDetailed results in kT (see help for explanation):\n\n");
3794 printf("%6s ", " lam_A");
3795 printf("%6s ", " lam_B");
3796 printf(sktformat, "DG ");
3799 printf(skteformat, "+/- ");
3803 printf(skteformat, "disc ");
3807 printf(skteformat, "range ");
3809 printf(sktformat, "s_A ");
3812 printf(skteformat, "+/- " );
3814 printf(sktformat, "s_B ");
3817 printf(skteformat, "+/- " );
3819 printf(sktformat, "stdev ");
3822 printf(skteformat, "+/- ");
3825 for (f = 0; f < nresults; f++)
3827 lambda_vec_print_short(results[f].a->native_lambda, buf);
3829 lambda_vec_print_short(results[f].b->native_lambda, buf);
3831 printf(ktformat, results[f].dg);
3835 printf(kteformat, results[f].dg_err);
3840 printf(kteformat, results[f].dg_disc_err);
3845 printf(kteformat, results[f].dg_histrange_err);
3848 printf(ktformat, results[f].sa);
3852 printf(kteformat, results[f].sa_err);
3855 printf(ktformat, results[f].sb);
3859 printf(kteformat, results[f].sb_err);
3862 printf(ktformat, results[f].dg_stddev);
3866 printf(kteformat, results[f].dg_stddev_err);
3870 /* Check for negative relative entropy with a 95% certainty. */
3871 if (results[f].sa < -2*results[f].sa_err ||
3872 results[f].sb < -2*results[f].sb_err)
3880 printf("\nWARNING: Some of these results violate the Second Law of "
3881 "Thermodynamics: \n"
3882 " This is can be the result of severe undersampling, or "
3884 " there is something wrong with the simulations.\n");
3888 /* final results in kJ/mol */
3889 printf("\n\nFinal results in kJ/mol:\n\n");
3891 for (f = 0; f < nresults; f++)
3896 lambda_vec_print_short(results[f].a->native_lambda, buf);
3897 fprintf(fpi, xvg2format, buf, dg_tot);
3903 lambda_vec_print_intermediate(results[f].a->native_lambda,
3904 results[f].b->native_lambda,
3907 fprintf(fpb, xvg3format, buf, results[f].dg, results[f].dg_err);
3911 lambda_vec_print_short(results[f].a->native_lambda, buf);
3912 lambda_vec_print_short(results[f].b->native_lambda, buf2);
3913 printf("%s - %s", buf, buf2);
3916 printf(dgformat, results[f].dg*kT);
3920 printf(dgformat, results[f].dg_err*kT);
3924 printf(" (max. range err. = ");
3925 printf(dgformat, results[f].dg_histrange_err*kT);
3927 sum_histrange_err += results[f].dg_histrange_err*kT;
3931 dg_tot += results[f].dg;
3935 lambda_vec_print_short(results[0].a->native_lambda, buf);
3936 lambda_vec_print_short(results[nresults-1].b->native_lambda, buf2);
3937 printf("%s - %s", buf, buf2);
3940 printf(dgformat, dg_tot*kT);
3943 stat_err = bar_err(nbmin, nbmax, partsum)*kT;
3945 printf(dgformat, max(max(stat_err, sum_disc_err), sum_histrange_err));
3950 printf("\nmaximum discretization error = ");
3951 printf(dgformat, sum_disc_err);
3952 if (bEE && stat_err < sum_disc_err)
3954 printf("WARNING: discretization error (%g) is larger than statistical error.\n Decrease histogram spacing for more accurate results\n", stat_err);
3959 printf("\nmaximum histogram range error = ");
3960 printf(dgformat, sum_histrange_err);
3961 if (bEE && stat_err < sum_histrange_err)
3963 printf("WARNING: histogram range error (%g) is larger than statistical error.\n Increase histogram range for more accurate results\n", stat_err);
3972 lambda_vec_print_short(results[nresults-1].b->native_lambda, buf);
3973 fprintf(fpi, xvg2format, buf, dg_tot);
3977 do_view(oenv, opt2fn_null("-o", NFILE, fnm), "-xydy");
3978 do_view(oenv, opt2fn_null("-oi", NFILE, fnm), "-xydy");