2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2010,2011,2012,2013,2014,2015, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
43 #include "gromacs/commandline/pargs.h"
44 #include "gromacs/fileio/enxio.h"
45 #include "gromacs/fileio/xvgr.h"
46 #include "gromacs/gmxana/gmx_ana.h"
47 #include "gromacs/legacyheaders/macros.h"
48 #include "gromacs/legacyheaders/mdebin.h"
49 #include "gromacs/legacyheaders/names.h"
50 #include "gromacs/legacyheaders/typedefs.h"
51 #include "gromacs/legacyheaders/viewit.h"
52 #include "gromacs/math/units.h"
53 #include "gromacs/math/utilities.h"
54 #include "gromacs/utility/cstringutil.h"
55 #include "gromacs/utility/dir_separator.h"
56 #include "gromacs/utility/fatalerror.h"
57 #include "gromacs/utility/smalloc.h"
58 #include "gromacs/utility/snprintf.h"
61 /* Structure for the names of lambda vector components */
62 typedef struct lambda_components_t
64 char **names; /* Array of strings with names for the lambda vector
66 int N; /* The number of components */
67 int Nalloc; /* The number of allocated components */
68 } lambda_components_t;
70 /* Structure for a lambda vector or a dhdl derivative direction */
71 typedef struct lambda_vec_t
73 double *val; /* The lambda vector component values. Only valid if
75 int dhdl; /* The coordinate index for the derivative described by this
77 const lambda_components_t *lc; /* the associated lambda_components
79 int index; /* The state number (init-lambda-state) of this lambda
80 vector, if known. If not, it is set to -1 */
83 /* the dhdl.xvg data from a simulation */
87 int ftp; /* file type */
88 int nset; /* number of lambdas, including dhdl */
89 int *np; /* number of data points (du or hists) per lambda */
90 int np_alloc; /* number of points (du or hists) allocated */
91 double temp; /* temperature */
92 lambda_vec_t *lambda; /* the lambdas (of first index for y). */
93 double *t; /* the times (of second index for y) */
94 double **y; /* the dU values. y[0] holds the derivative, while
95 further ones contain the energy differences between
96 the native lambda and the 'foreign' lambdas. */
97 lambda_vec_t native_lambda; /* the native lambda */
99 struct xvg_t *next, *prev; /*location in the global linked list of xvg_ts*/
103 typedef struct hist_t
105 unsigned int *bin[2]; /* the (forward + reverse) histogram values */
106 double dx[2]; /* the histogram spacing. The reverse
107 dx is the negative of the forward dx.*/
108 gmx_int64_t x0[2]; /* the (forward + reverse) histogram start
111 int nbin[2]; /* the (forward+reverse) number of bins */
112 gmx_int64_t sum; /* the total number of counts. Must be
113 the same for forward + reverse. */
114 int nhist; /* number of hist datas (forward or reverse) */
116 double start_time, delta_time; /* start time, end time of histogram */
120 /* an aggregate of samples for partial free energy calculation */
121 typedef struct samples_t
123 lambda_vec_t *native_lambda; /* pointer to native lambda vector */
124 lambda_vec_t *foreign_lambda; /* pointer to foreign lambda vector */
125 double temp; /* the temperature */
126 gmx_bool derivative; /* whether this sample is a derivative */
128 /* The samples come either as either delta U lists: */
129 int ndu; /* the number of delta U samples */
130 double *du; /* the delta u's */
131 double *t; /* the times associated with those samples, or: */
132 double start_time, delta_time; /*start time and delta time for linear time*/
134 /* or as histograms: */
135 hist_t *hist; /* a histogram */
137 /* allocation data: (not NULL for data 'owned' by this struct) */
138 double *du_alloc, *t_alloc; /* allocated delta u arrays */
139 size_t ndu_alloc, nt_alloc; /* pre-allocated sizes */
140 hist_t *hist_alloc; /* allocated hist */
142 gmx_int64_t ntot; /* total number of samples */
143 const char *filename; /* the file name this sample comes from */
146 /* a sample range (start to end for du-style data, or boolean
147 for both du-style data and histograms */
148 typedef struct sample_range_t
150 int start, end; /* start and end index for du style data */
151 gmx_bool use; /* whether to use this sample */
153 samples_t *s; /* the samples this range belongs to */
157 /* a collection of samples for a partial free energy calculation
158 (i.e. the collection of samples from one native lambda to one
160 typedef struct sample_coll_t
162 lambda_vec_t *native_lambda; /* these should be the same for all samples
164 lambda_vec_t *foreign_lambda; /* collection */
165 double temp; /* the temperature */
167 int nsamples; /* the number of samples */
168 samples_t **s; /* the samples themselves */
169 sample_range_t *r; /* the sample ranges */
170 int nsamples_alloc; /* number of allocated samples */
172 gmx_int64_t ntot; /* total number of samples in the ranges of
175 struct sample_coll_t *next, *prev; /* next and previous in the list */
178 /* all the samples associated with a lambda point */
179 typedef struct lambda_data_t
181 lambda_vec_t *lambda; /* the native lambda (at start time if dynamic) */
182 double temp; /* temperature */
184 sample_coll_t *sc; /* the samples */
186 sample_coll_t sc_head; /*the pre-allocated list head for the linked list.*/
188 struct lambda_data_t *next, *prev; /* the next and prev in the list */
191 /* Top-level data structure of simulation data */
192 typedef struct sim_data_t
194 lambda_data_t *lb; /* a lambda data linked list */
195 lambda_data_t lb_head; /* The head element of the linked list */
197 lambda_components_t lc; /* the allowed components of the lambda
201 /* Top-level data structure with calculated values. */
203 sample_coll_t *a, *b; /* the simulation data */
205 double dg; /* the free energy difference */
206 double dg_err; /* the free energy difference */
208 double dg_disc_err; /* discretization error */
209 double dg_histrange_err; /* histogram range error */
211 double sa; /* relative entropy of b in state a */
212 double sa_err; /* error in sa */
213 double sb; /* relative entropy of a in state b */
214 double sb_err; /* error in sb */
216 double dg_stddev; /* expected dg stddev per sample */
217 double dg_stddev_err; /* error in dg_stddev */
221 /* Initialize a lambda_components structure */
222 static void lambda_components_init(lambda_components_t *lc)
226 snew(lc->names, lc->Nalloc);
229 /* Add a component to a lambda_components structure */
230 static void lambda_components_add(lambda_components_t *lc,
231 const char *name, size_t name_length)
233 while (lc->N + 1 > lc->Nalloc)
235 lc->Nalloc = (lc->Nalloc == 0) ? 2 : 2*lc->Nalloc;
236 srenew(lc->names, lc->Nalloc);
238 snew(lc->names[lc->N], name_length+1);
239 strncpy(lc->names[lc->N], name, name_length);
243 /* check whether a component with index 'index' matches the given name, or
244 is also NULL. Returns TRUE if this is the case.
245 the string name does not need to end */
246 static gmx_bool lambda_components_check(const lambda_components_t *lc,
256 if (name == NULL && lc->names[index] == NULL)
260 if ((name == NULL) != (lc->names[index] == NULL))
264 len = strlen(lc->names[index]);
265 if (len != name_length)
269 if (strncmp(lc->names[index], name, name_length) == 0)
276 /* Find the index of a given lambda component name, or -1 if not found */
277 static int lambda_components_find(const lambda_components_t *lc,
283 for (i = 0; i < lc->N; i++)
285 if (strncmp(lc->names[i], name, name_length) == 0)
295 /* initialize a lambda vector */
296 static void lambda_vec_init(lambda_vec_t *lv, const lambda_components_t *lc)
298 snew(lv->val, lc->N);
304 static void lambda_vec_destroy(lambda_vec_t *lv)
309 static void lambda_vec_copy(lambda_vec_t *lv, const lambda_vec_t *orig)
313 lambda_vec_init(lv, orig->lc);
314 lv->dhdl = orig->dhdl;
315 lv->index = orig->index;
316 for (i = 0; i < lv->lc->N; i++)
318 lv->val[i] = orig->val[i];
322 /* write a lambda vec to a preallocated string */
323 static void lambda_vec_print(const lambda_vec_t *lv, char *str, gmx_bool named)
328 str[0] = 0; /* reset the string */
333 str += sprintf(str, "delta H to ");
337 str += sprintf(str, "(");
339 for (i = 0; i < lv->lc->N; i++)
341 str += sprintf(str, "%g", lv->val[i]);
344 str += sprintf(str, ", ");
349 str += sprintf(str, ")");
354 /* this lambda vector describes a derivative */
355 str += sprintf(str, "dH/dl");
356 if (strlen(lv->lc->names[lv->dhdl]) > 0)
358 sprintf(str, " (%s)", lv->lc->names[lv->dhdl]);
363 /* write a shortened version of the lambda vec to a preallocated string */
364 static void lambda_vec_print_short(const lambda_vec_t *lv, char *str)
371 sprintf(str, "%6d", lv->index);
377 sprintf(str, "%6.3f", lv->val[0]);
381 sprintf(str, "dH/dl[%d]", lv->dhdl);
386 /* write an intermediate version of two lambda vecs to a preallocated string */
387 static void lambda_vec_print_intermediate(const lambda_vec_t *a,
388 const lambda_vec_t *b, char *str)
394 if ( (a->index >= 0) && (b->index >= 0) )
396 sprintf(str, "%6.3f", ((double)a->index+(double)b->index)/2.);
400 if ( (a->dhdl < 0) && (b->dhdl < 0) )
402 sprintf(str, "%6.3f", (a->val[0]+b->val[0])/2.);
409 /* calculate the difference in lambda vectors: c = a-b.
410 c must be initialized already, and a and b must describe non-derivative
412 static void lambda_vec_diff(const lambda_vec_t *a, const lambda_vec_t *b,
417 if ( (a->dhdl > 0) || (b->dhdl > 0) )
420 "Trying to calculate the difference between derivatives instead of lambda points");
422 if ((a->lc != b->lc) || (a->lc != c->lc) )
425 "Trying to calculate the difference lambdas with differing basis set");
427 for (i = 0; i < a->lc->N; i++)
429 c->val[i] = a->val[i] - b->val[i];
433 /* calculate and return the absolute difference in lambda vectors: c = |a-b|.
434 a and b must describe non-derivative lambda points */
435 static double lambda_vec_abs_diff(const lambda_vec_t *a, const lambda_vec_t *b)
440 if ( (a->dhdl > 0) || (b->dhdl > 0) )
443 "Trying to calculate the difference between derivatives instead of lambda points");
448 "Trying to calculate the difference lambdas with differing basis set");
450 for (i = 0; i < a->lc->N; i++)
452 double df = a->val[i] - b->val[i];
459 /* check whether two lambda vectors are the same */
460 static gmx_bool lambda_vec_same(const lambda_vec_t *a, const lambda_vec_t *b)
470 for (i = 0; i < a->lc->N; i++)
472 if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
481 /* they're derivatives, so we check whether the indices match */
482 return (a->dhdl == b->dhdl);
486 /* Compare the sort order of two foreign lambda vectors
488 returns 1 if a is 'bigger' than b,
489 returns 0 if they're the same,
490 returns -1 if a is 'smaller' than b.*/
491 static gmx_bool lambda_vec_cmp_foreign(const lambda_vec_t *a,
492 const lambda_vec_t *b)
495 double norm_a = 0, norm_b = 0;
496 gmx_bool different = FALSE;
500 gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
502 /* if either one has an index we sort based on that */
503 if ((a->index >= 0) || (b->index >= 0))
505 if (a->index == b->index)
509 return (a->index > b->index) ? 1 : -1;
511 if (a->dhdl >= 0 || b->dhdl >= 0)
513 /* lambda vectors that are derivatives always sort higher than those
514 without derivatives */
515 if ((a->dhdl >= 0) != (b->dhdl >= 0) )
517 return (a->dhdl >= 0) ? 1 : -1;
519 return a->dhdl > b->dhdl;
522 /* neither has an index, so we can only sort on the lambda components,
523 which is only valid if there is one component */
524 for (i = 0; i < a->lc->N; i++)
526 if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
530 norm_a += a->val[i]*a->val[i];
531 norm_b += b->val[i]*b->val[i];
537 return norm_a > norm_b;
540 /* Compare the sort order of two native lambda vectors
542 returns 1 if a is 'bigger' than b,
543 returns 0 if they're the same,
544 returns -1 if a is 'smaller' than b.*/
545 static gmx_bool lambda_vec_cmp_native(const lambda_vec_t *a,
546 const lambda_vec_t *b)
552 gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
554 /* if either one has an index we sort based on that */
555 if ((a->index >= 0) || (b->index >= 0))
557 if (a->index == b->index)
561 return (a->index > b->index) ? 1 : -1;
563 /* neither has an index, so we can only sort on the lambda components,
564 which is only valid if there is one component */
568 "Can't compare lambdas with no index and > 1 component");
570 if (a->dhdl >= 0 || b->dhdl >= 0)
573 "Can't compare native lambdas that are derivatives");
575 if (gmx_within_tol(a->val[0], b->val[0], 10*GMX_REAL_EPS))
579 return a->val[0] > b->val[0] ? 1 : -1;
585 static void hist_init(hist_t *h, int nhist, int *nbin)
590 gmx_fatal(FARGS, "histogram with more than two sets of data!");
592 for (i = 0; i < nhist; i++)
594 snew(h->bin[i], nbin[i]);
596 h->nbin[i] = nbin[i];
597 h->start_time = h->delta_time = 0;
604 static void hist_destroy(hist_t *h)
610 static void xvg_init(xvg_t *ba)
619 static void samples_init(samples_t *s, lambda_vec_t *native_lambda,
620 lambda_vec_t *foreign_lambda, double temp,
621 gmx_bool derivative, const char *filename)
623 s->native_lambda = native_lambda;
624 s->foreign_lambda = foreign_lambda;
626 s->derivative = derivative;
631 s->start_time = s->delta_time = 0;
635 s->hist_alloc = NULL;
640 s->filename = filename;
643 static void sample_range_init(sample_range_t *r, samples_t *s)
651 static void sample_coll_init(sample_coll_t *sc, lambda_vec_t *native_lambda,
652 lambda_vec_t *foreign_lambda, double temp)
654 sc->native_lambda = native_lambda;
655 sc->foreign_lambda = foreign_lambda;
661 sc->nsamples_alloc = 0;
664 sc->next = sc->prev = NULL;
667 static void sample_coll_destroy(sample_coll_t *sc)
669 /* don't free the samples themselves */
675 static void lambda_data_init(lambda_data_t *l, lambda_vec_t *native_lambda,
678 l->lambda = native_lambda;
684 l->sc = &(l->sc_head);
686 sample_coll_init(l->sc, native_lambda, NULL, 0.);
691 static void barres_init(barres_t *br)
700 br->dg_stddev_err = 0;
707 /* calculate the total number of samples in a sample collection */
708 static void sample_coll_calc_ntot(sample_coll_t *sc)
713 for (i = 0; i < sc->nsamples; i++)
719 sc->ntot += sc->s[i]->ntot;
723 sc->ntot += sc->r[i].end - sc->r[i].start;
730 /* find the barsamples_t associated with a lambda that corresponds to
731 a specific foreign lambda */
732 static sample_coll_t *lambda_data_find_sample_coll(lambda_data_t *l,
733 lambda_vec_t *foreign_lambda)
735 sample_coll_t *sc = l->sc->next;
739 if (lambda_vec_same(sc->foreign_lambda, foreign_lambda))
749 /* insert li into an ordered list of lambda_colls */
750 static void lambda_data_insert_sample_coll(lambda_data_t *l, sample_coll_t *sc)
752 sample_coll_t *scn = l->sc->next;
753 while ( (scn != l->sc) )
755 if (lambda_vec_cmp_foreign(scn->foreign_lambda, sc->foreign_lambda) > 0)
761 /* now insert it before the found scn */
763 sc->prev = scn->prev;
764 scn->prev->next = sc;
768 /* insert li into an ordered list of lambdas */
769 static void lambda_data_insert_lambda(lambda_data_t *head, lambda_data_t *li)
771 lambda_data_t *lc = head->next;
774 if (lambda_vec_cmp_native(lc->lambda, li->lambda) > 0)
780 /* now insert ourselves before the found lc */
787 /* insert a sample and a sample_range into a sample_coll. The
788 samples are stored as a pointer, the range is copied. */
789 static void sample_coll_insert_sample(sample_coll_t *sc, samples_t *s,
792 /* first check if it belongs here */
793 if (sc->temp != s->temp)
795 gmx_fatal(FARGS, "Temperatures in files %s and %s are not the same!",
796 s->filename, sc->next->s[0]->filename);
798 if (!lambda_vec_same(sc->native_lambda, s->native_lambda))
800 gmx_fatal(FARGS, "Native lambda in files %s and %s are not the same (and they should be)!",
801 s->filename, sc->next->s[0]->filename);
803 if (!lambda_vec_same(sc->foreign_lambda, s->foreign_lambda))
805 gmx_fatal(FARGS, "Foreign lambda in files %s and %s are not the same (and they should be)!",
806 s->filename, sc->next->s[0]->filename);
809 /* check if there's room */
810 if ( (sc->nsamples + 1) > sc->nsamples_alloc)
812 sc->nsamples_alloc = max(2*sc->nsamples_alloc, 2);
813 srenew(sc->s, sc->nsamples_alloc);
814 srenew(sc->r, sc->nsamples_alloc);
816 sc->s[sc->nsamples] = s;
817 sc->r[sc->nsamples] = *r;
820 sample_coll_calc_ntot(sc);
823 /* insert a sample into a lambda_list, creating the right sample_coll if
825 static void lambda_data_list_insert_sample(lambda_data_t *head, samples_t *s)
827 gmx_bool found = FALSE;
831 lambda_data_t *l = head->next;
833 /* first search for the right lambda_data_t */
836 if (lambda_vec_same(l->lambda, s->native_lambda) )
846 snew(l, 1); /* allocate a new one */
847 lambda_data_init(l, s->native_lambda, s->temp); /* initialize it */
848 lambda_data_insert_lambda(head, l); /* add it to the list */
851 /* now look for a sample collection */
852 sc = lambda_data_find_sample_coll(l, s->foreign_lambda);
855 snew(sc, 1); /* allocate a new one */
856 sample_coll_init(sc, s->native_lambda, s->foreign_lambda, s->temp);
857 lambda_data_insert_sample_coll(l, sc);
860 /* now insert the samples into the sample coll */
861 sample_range_init(&r, s);
862 sample_coll_insert_sample(sc, s, &r);
866 /* make a histogram out of a sample collection */
867 static void sample_coll_make_hist(sample_coll_t *sc, int **bin,
868 int *nbin_alloc, int *nbin,
869 double *dx, double *xmin, int nbin_default)
872 gmx_bool dx_set = FALSE;
873 gmx_bool xmin_set = FALSE;
875 gmx_bool xmax_set = FALSE;
876 gmx_bool xmax_set_hard = FALSE; /* whether the xmax is bounded by the
877 limits of a histogram */
880 /* first determine dx and xmin; try the histograms */
881 for (i = 0; i < sc->nsamples; i++)
885 hist_t *hist = sc->s[i]->hist;
886 for (k = 0; k < hist->nhist; k++)
888 double hdx = hist->dx[k];
889 double xmax_now = (hist->x0[k]+hist->nbin[k])*hdx;
891 /* we use the biggest dx*/
892 if ( (!dx_set) || hist->dx[0] > *dx)
897 if ( (!xmin_set) || (hist->x0[k]*hdx) < *xmin)
900 *xmin = (hist->x0[k]*hdx);
903 if ( (!xmax_set) || (xmax_now > xmax && !xmax_set_hard) )
907 if (hist->bin[k][hist->nbin[k]-1] != 0)
909 xmax_set_hard = TRUE;
912 if (hist->bin[k][hist->nbin[k]-1] != 0 && (xmax_now < xmax) )
914 xmax_set_hard = TRUE;
920 /* and the delta us */
921 for (i = 0; i < sc->nsamples; i++)
923 if (sc->s[i]->ndu > 0)
925 /* determine min and max */
926 int starti = sc->r[i].start;
927 int endi = sc->r[i].end;
928 double du_xmin = sc->s[i]->du[starti];
929 double du_xmax = sc->s[i]->du[starti];
930 for (j = starti+1; j < endi; j++)
932 if (sc->s[i]->du[j] < du_xmin)
934 du_xmin = sc->s[i]->du[j];
936 if (sc->s[i]->du[j] > du_xmax)
938 du_xmax = sc->s[i]->du[j];
942 /* and now change the limits */
943 if ( (!xmin_set) || (du_xmin < *xmin) )
948 if ( (!xmax_set) || ((du_xmax > xmax) && !xmax_set_hard) )
956 if (!xmax_set || !xmin_set)
965 *nbin = nbin_default;
966 *dx = (xmax-(*xmin))/((*nbin)-2); /* -2 because we want the last bin to
967 be 0, and we count from 0 */
971 *nbin = (xmax-(*xmin))/(*dx);
974 if (*nbin > *nbin_alloc)
977 srenew(*bin, *nbin_alloc);
980 /* reset the histogram */
981 for (i = 0; i < (*nbin); i++)
986 /* now add the actual data */
987 for (i = 0; i < sc->nsamples; i++)
991 hist_t *hist = sc->s[i]->hist;
992 for (k = 0; k < hist->nhist; k++)
994 double hdx = hist->dx[k];
995 double xmin_hist = hist->x0[k]*hdx;
996 for (j = 0; j < hist->nbin[k]; j++)
998 /* calculate the bin corresponding to the middle of the
1000 double x = hdx*(j+0.5) + xmin_hist;
1001 int binnr = (int)((x-(*xmin))/(*dx));
1003 if (binnr >= *nbin || binnr < 0)
1008 (*bin)[binnr] += hist->bin[k][j];
1014 int starti = sc->r[i].start;
1015 int endi = sc->r[i].end;
1016 for (j = starti; j < endi; j++)
1018 int binnr = (int)((sc->s[i]->du[j]-(*xmin))/(*dx));
1019 if (binnr >= *nbin || binnr < 0)
1030 /* write a collection of histograms to a file */
1031 void sim_data_histogram(sim_data_t *sd, const char *filename,
1032 int nbin_default, const output_env_t oenv)
1034 char label_x[STRLEN];
1035 const char *dhdl = "dH/d\\lambda", *deltag = "\\DeltaH", *lambda = "\\lambda";
1036 const char *title = "N(\\DeltaH)";
1037 const char *label_y = "Samples";
1041 char **setnames = NULL;
1042 gmx_bool first_set = FALSE;
1043 /* histogram data: */
1050 lambda_data_t *bl_head = sd->lb;
1052 printf("\nWriting histogram to %s\n", filename);
1053 sprintf(label_x, "\\DeltaH (%s)", unit_energy);
1055 fp = xvgropen_type(filename, title, label_x, label_y, exvggtXNY, oenv);
1057 /* first get all the set names */
1059 /* iterate over all lambdas */
1060 while (bl != bl_head)
1062 sample_coll_t *sc = bl->sc->next;
1064 /* iterate over all samples */
1065 while (sc != bl->sc)
1067 char buf[STRLEN], buf2[STRLEN];
1070 srenew(setnames, nsets);
1071 snew(setnames[nsets-1], STRLEN);
1072 if (sc->foreign_lambda->dhdl < 0)
1074 lambda_vec_print(sc->native_lambda, buf, FALSE);
1075 lambda_vec_print(sc->foreign_lambda, buf2, FALSE);
1076 sprintf(setnames[nsets-1], "N(%s(%s=%s) | %s=%s)",
1077 deltag, lambda, buf2, lambda, buf);
1081 lambda_vec_print(sc->native_lambda, buf, FALSE);
1082 sprintf(setnames[nsets-1], "N(%s | %s=%s)",
1090 xvgr_legend(fp, nsets, (const char**)setnames, oenv);
1093 /* now make the histograms */
1095 /* iterate over all lambdas */
1096 while (bl != bl_head)
1098 sample_coll_t *sc = bl->sc->next;
1100 /* iterate over all samples */
1101 while (sc != bl->sc)
1105 xvgr_new_dataset(fp, 0, 0, NULL, oenv);
1108 sample_coll_make_hist(sc, &hist, &nbin_alloc, &nbin, &dx, &min,
1111 for (i = 0; i < nbin; i++)
1113 double xmin = i*dx + min;
1114 double xmax = (i+1)*dx + min;
1116 fprintf(fp, "%g %d\n%g %d\n", xmin, hist[i], xmax, hist[i]);
1135 snprint_lambda_vec(char *str, int sz, const char *label, lambda_vec_t *lambda)
1139 n += snprintf(str+n, sz-n, "lambda vector [%s]: ", label);
1140 if (lambda->index >= 0)
1142 n += snprintf(str+n, sz-n, " init-lambda-state=%d", lambda->index);
1144 if (lambda->dhdl >= 0)
1146 n += snprintf(str+n, sz-n, " dhdl index=%d", lambda->dhdl);
1151 for (i = 0; i < lambda->lc->N; i++)
1153 n += snprintf(str+n, sz-n, " (%s) l=%g", lambda->lc->names[i], lambda->val[i]);
1159 /* create a collection (array) of barres_t object given a ordered linked list
1160 of barlamda_t sample collections */
1161 static barres_t *barres_list_create(sim_data_t *sd, int *nres,
1168 gmx_bool dhdl = FALSE;
1169 gmx_bool first = TRUE;
1170 lambda_data_t *bl_head = sd->lb;
1172 /* first count the lambdas */
1174 while (bl != bl_head)
1179 snew(res, nlambda-1);
1181 /* next put the right samples in the res */
1183 bl = bl_head->next->next; /* we start with the second one. */
1184 while (bl != bl_head)
1186 sample_coll_t *sc, *scprev;
1187 barres_t *br = &(res[*nres]);
1188 /* there is always a previous one. we search for that as a foreign
1190 scprev = lambda_data_find_sample_coll(bl->prev, bl->lambda);
1191 sc = lambda_data_find_sample_coll(bl, bl->prev->lambda);
1199 scprev = lambda_data_find_sample_coll(bl->prev, bl->prev->lambda);
1200 sc = lambda_data_find_sample_coll(bl, bl->lambda);
1204 printf("\nWARNING: Using the derivative data (dH/dlambda) to extrapolate delta H values.\nThis will only work if the Hamiltonian is linear in lambda.\n");
1209 gmx_fatal(FARGS, "Some dhdl files contain only one value (dH/dl), while others \ncontain multiple values (dH/dl and/or Delta H), will not proceed \nbecause of possible inconsistencies.\n");
1212 else if (!scprev && !sc)
1214 char descX[STRLEN], descY[STRLEN];
1215 snprint_lambda_vec(descX, STRLEN, "X", bl->prev->lambda);
1216 snprint_lambda_vec(descY, STRLEN, "Y", bl->lambda);
1218 gmx_fatal(FARGS, "There is no path between the states X & Y below that is covered by foreign lambdas:\ncannot proceed with BAR.\nUse thermodynamic integration of dH/dl by calculating the averages of dH/dl\nwith g_analyze and integrating them.\nAlternatively, use the -extp option if (and only if) the Hamiltonian\ndepends linearly on lambda, which is NOT normally the case.\n\n%s\n%s\n", descX, descY);
1221 /* normal delta H */
1224 char descX[STRLEN], descY[STRLEN];
1225 snprint_lambda_vec(descX, STRLEN, "X", bl->lambda);
1226 snprint_lambda_vec(descY, STRLEN, "Y", bl->prev->lambda);
1227 gmx_fatal(FARGS, "Could not find a set for foreign lambda (state X below)\nin the files for main lambda (state Y below)\n\n%s\n%s\n", descX, descY);
1231 char descX[STRLEN], descY[STRLEN];
1232 snprint_lambda_vec(descX, STRLEN, "X", bl->prev->lambda);
1233 snprint_lambda_vec(descY, STRLEN, "Y", bl->lambda);
1234 gmx_fatal(FARGS, "Could not find a set for foreign lambda (state X below)\nin the files for main lambda (state Y below)\n\n%s\n%s\n", descX, descY);
1246 /* estimate the maximum discretization error */
1247 static double barres_list_max_disc_err(barres_t *res, int nres)
1250 double disc_err = 0.;
1251 double delta_lambda;
1253 for (i = 0; i < nres; i++)
1255 barres_t *br = &(res[i]);
1257 delta_lambda = lambda_vec_abs_diff(br->b->native_lambda,
1258 br->a->native_lambda);
1260 for (j = 0; j < br->a->nsamples; j++)
1262 if (br->a->s[j]->hist)
1265 if (br->a->s[j]->derivative)
1267 Wfac = delta_lambda;
1270 disc_err = max(disc_err, Wfac*br->a->s[j]->hist->dx[0]);
1273 for (j = 0; j < br->b->nsamples; j++)
1275 if (br->b->s[j]->hist)
1278 if (br->b->s[j]->derivative)
1280 Wfac = delta_lambda;
1282 disc_err = max(disc_err, Wfac*br->b->s[j]->hist->dx[0]);
1290 /* impose start and end times on a sample collection, updating sample_ranges */
1291 static void sample_coll_impose_times(sample_coll_t *sc, double begin_t,
1295 for (i = 0; i < sc->nsamples; i++)
1297 samples_t *s = sc->s[i];
1298 sample_range_t *r = &(sc->r[i]);
1301 double end_time = s->hist->delta_time*s->hist->sum +
1302 s->hist->start_time;
1303 if (s->hist->start_time < begin_t || end_time > end_t)
1313 if (s->start_time < begin_t)
1315 r->start = (int)((begin_t - s->start_time)/s->delta_time);
1317 end_time = s->delta_time*s->ndu + s->start_time;
1318 if (end_time > end_t)
1320 r->end = (int)((end_t - s->start_time)/s->delta_time);
1326 for (j = 0; j < s->ndu; j++)
1328 if (s->t[j] < begin_t)
1333 if (s->t[j] >= end_t)
1340 if (r->start > r->end)
1346 sample_coll_calc_ntot(sc);
1349 static void sim_data_impose_times(sim_data_t *sd, double begin, double end)
1351 double first_t, last_t;
1352 double begin_t, end_t;
1354 lambda_data_t *head = sd->lb;
1357 if (begin <= 0 && end < 0)
1362 /* first determine the global start and end times */
1368 sample_coll_t *sc = lc->sc->next;
1369 while (sc != lc->sc)
1371 for (j = 0; j < sc->nsamples; j++)
1373 double start_t, end_t;
1375 start_t = sc->s[j]->start_time;
1376 end_t = sc->s[j]->start_time;
1379 end_t += sc->s[j]->delta_time*sc->s[j]->hist->sum;
1385 end_t = sc->s[j]->t[sc->s[j]->ndu-1];
1389 end_t += sc->s[j]->delta_time*sc->s[j]->ndu;
1393 if (start_t < first_t || first_t < 0)
1407 /* calculate the actual times */
1425 printf("\n Samples in time interval: %.3f - %.3f\n", first_t, last_t);
1427 if (begin_t > end_t)
1431 printf("Removing samples outside of: %.3f - %.3f\n", begin_t, end_t);
1433 /* then impose them */
1437 sample_coll_t *sc = lc->sc->next;
1438 while (sc != lc->sc)
1440 sample_coll_impose_times(sc, begin_t, end_t);
1448 /* create subsample i out of ni from an existing sample_coll */
1449 static gmx_bool sample_coll_create_subsample(sample_coll_t *sc,
1450 sample_coll_t *sc_orig,
1454 int hist_start, hist_end;
1456 gmx_int64_t ntot_start;
1457 gmx_int64_t ntot_end;
1458 gmx_int64_t ntot_so_far;
1460 *sc = *sc_orig; /* just copy all fields */
1462 /* allocate proprietary memory */
1463 snew(sc->s, sc_orig->nsamples);
1464 snew(sc->r, sc_orig->nsamples);
1466 /* copy the samples */
1467 for (j = 0; j < sc_orig->nsamples; j++)
1469 sc->s[j] = sc_orig->s[j];
1470 sc->r[j] = sc_orig->r[j]; /* copy the ranges too */
1473 /* now fix start and end fields */
1474 /* the casts avoid possible overflows */
1475 ntot_start = (gmx_int64_t)(sc_orig->ntot*(double)i/(double)ni);
1476 ntot_end = (gmx_int64_t)(sc_orig->ntot*(double)(i+1)/(double)ni);
1478 for (j = 0; j < sc->nsamples; j++)
1480 gmx_int64_t ntot_add;
1481 gmx_int64_t new_start, new_end;
1487 ntot_add = sc->s[j]->hist->sum;
1491 ntot_add = sc->r[j].end - sc->r[j].start;
1499 if (!sc->s[j]->hist)
1501 if (ntot_so_far < ntot_start)
1503 /* adjust starting point */
1504 new_start = sc->r[j].start + (ntot_start - ntot_so_far);
1508 new_start = sc->r[j].start;
1510 /* adjust end point */
1511 new_end = sc->r[j].start + (ntot_end - ntot_so_far);
1512 if (new_end > sc->r[j].end)
1514 new_end = sc->r[j].end;
1517 /* check if we're in range at all */
1518 if ( (new_end < new_start) || (new_start > sc->r[j].end) )
1523 /* and write the new range */
1524 sc->r[j].start = (int)new_start;
1525 sc->r[j].end = (int)new_end;
1532 double ntot_start_norm, ntot_end_norm;
1533 /* calculate the amount of overlap of the
1534 desired range (ntot_start -- ntot_end) onto
1535 the histogram range (ntot_so_far -- ntot_so_far+ntot_add)*/
1537 /* first calculate normalized bounds
1538 (where 0 is the start of the hist range, and 1 the end) */
1539 ntot_start_norm = (ntot_start-ntot_so_far)/(double)ntot_add;
1540 ntot_end_norm = (ntot_end-ntot_so_far)/(double)ntot_add;
1542 /* now fix the boundaries */
1543 ntot_start_norm = min(1, max(0., ntot_start_norm));
1544 ntot_end_norm = max(0, min(1., ntot_end_norm));
1546 /* and calculate the overlap */
1547 overlap = ntot_end_norm - ntot_start_norm;
1549 if (overlap > 0.95) /* we allow for 5% slack */
1551 sc->r[j].use = TRUE;
1553 else if (overlap < 0.05)
1555 sc->r[j].use = FALSE;
1563 ntot_so_far += ntot_add;
1565 sample_coll_calc_ntot(sc);
1570 /* calculate minimum and maximum work values in sample collection */
1571 static void sample_coll_min_max(sample_coll_t *sc, double Wfac,
1572 double *Wmin, double *Wmax)
1579 for (i = 0; i < sc->nsamples; i++)
1581 samples_t *s = sc->s[i];
1582 sample_range_t *r = &(sc->r[i]);
1587 for (j = r->start; j < r->end; j++)
1589 *Wmin = min(*Wmin, s->du[j]*Wfac);
1590 *Wmax = max(*Wmax, s->du[j]*Wfac);
1595 int hd = 0; /* determine the histogram direction: */
1597 if ( (s->hist->nhist > 1) && (Wfac < 0) )
1601 dx = s->hist->dx[hd];
1603 for (j = s->hist->nbin[hd]-1; j >= 0; j--)
1605 *Wmin = min(*Wmin, Wfac*(s->hist->x0[hd])*dx);
1606 *Wmax = max(*Wmax, Wfac*(s->hist->x0[hd])*dx);
1607 /* look for the highest value bin with values */
1608 if (s->hist->bin[hd][j] > 0)
1610 *Wmin = min(*Wmin, Wfac*(j+s->hist->x0[hd]+1)*dx);
1611 *Wmax = max(*Wmax, Wfac*(j+s->hist->x0[hd]+1)*dx);
1620 /* Initialize a sim_data structure */
1621 static void sim_data_init(sim_data_t *sd)
1623 /* make linked list */
1624 sd->lb = &(sd->lb_head);
1625 sd->lb->next = sd->lb;
1626 sd->lb->prev = sd->lb;
1628 lambda_components_init(&(sd->lc));
1632 static double calc_bar_sum(int n, const double *W, double Wfac, double sbMmDG)
1639 for (i = 0; i < n; i++)
1641 sum += 1./(1. + exp(Wfac*W[i] + sbMmDG));
1647 /* calculate the BAR average given a histogram
1649 if type== 0, calculate the best estimate for the average,
1650 if type==-1, calculate the minimum possible value given the histogram
1651 if type== 1, calculate the maximum possible value given the histogram */
1652 static double calc_bar_sum_hist(const hist_t *hist, double Wfac, double sbMmDG,
1658 /* normalization factor multiplied with bin width and
1659 number of samples (we normalize through M): */
1661 int hd = 0; /* determine the histogram direction: */
1664 if ( (hist->nhist > 1) && (Wfac < 0) )
1669 max = hist->nbin[hd]-1;
1672 max = hist->nbin[hd]; /* we also add whatever was out of range */
1675 for (i = 0; i < max; i++)
1677 double x = Wfac*((i+hist->x0[hd])+0.5)*dx; /* bin middle */
1678 double pxdx = hist->bin[0][i]*normdx; /* p(x)dx */
1680 sum += pxdx/(1. + exp(x + sbMmDG));
1686 static double calc_bar_lowlevel(sample_coll_t *ca, sample_coll_t *cb,
1687 double temp, double tol, int type)
1692 double Wfac1, Wfac2, Wmin, Wmax;
1693 double DG0, DG1, DG2, dDG1;
1695 double n1, n2; /* numbers of samples as doubles */
1700 /* count the numbers of samples */
1706 /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1707 if (ca->foreign_lambda->dhdl < 0)
1709 /* this is the case when the delta U were calculated directly
1710 (i.e. we're not scaling dhdl) */
1716 /* we're using dhdl, so delta_lambda needs to be a
1717 multiplication factor. */
1718 /*double delta_lambda=cb->native_lambda-ca->native_lambda;*/
1719 double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1721 if (cb->native_lambda->lc->N > 1)
1724 "Can't (yet) do multi-component dhdl interpolation");
1727 Wfac1 = beta*delta_lambda;
1728 Wfac2 = -beta*delta_lambda;
1733 /* We print the output both in kT and kJ/mol.
1734 * Here we determine DG in kT, so when beta < 1
1735 * the precision has to be increased.
1740 /* Calculate minimum and maximum work to give an initial estimate of
1741 * delta G as their average.
1744 double Wmin1, Wmin2, Wmax1, Wmax2;
1745 sample_coll_min_max(ca, Wfac1, &Wmin1, &Wmax1);
1746 sample_coll_min_max(cb, Wfac2, &Wmin2, &Wmax2);
1748 Wmin = min(Wmin1, Wmin2);
1749 Wmax = max(Wmax1, Wmax2);
1757 fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1759 /* We approximate by bisection: given our initial estimates
1760 we keep checking whether the halfway point is greater or
1761 smaller than what we get out of the BAR averages.
1763 For the comparison we can use twice the tolerance. */
1764 while (DG2 - DG0 > 2*tol)
1766 DG1 = 0.5*(DG0 + DG2);
1768 /* calculate the BAR averages */
1771 for (i = 0; i < ca->nsamples; i++)
1773 samples_t *s = ca->s[i];
1774 sample_range_t *r = &(ca->r[i]);
1779 dDG1 += calc_bar_sum_hist(s->hist, Wfac1, (M-DG1), type);
1783 dDG1 += calc_bar_sum(r->end - r->start, s->du + r->start,
1788 for (i = 0; i < cb->nsamples; i++)
1790 samples_t *s = cb->s[i];
1791 sample_range_t *r = &(cb->r[i]);
1796 dDG1 -= calc_bar_sum_hist(s->hist, Wfac2, -(M-DG1), type);
1800 dDG1 -= calc_bar_sum(r->end - r->start, s->du + r->start,
1816 fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1820 return 0.5*(DG0 + DG2);
1823 static void calc_rel_entropy(sample_coll_t *ca, sample_coll_t *cb,
1824 double temp, double dg, double *sa, double *sb)
1830 double Wfac1, Wfac2;
1836 /* count the numbers of samples */
1840 /* to ensure the work values are the same as during the delta_G */
1841 /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1842 if (ca->foreign_lambda->dhdl < 0)
1844 /* this is the case when the delta U were calculated directly
1845 (i.e. we're not scaling dhdl) */
1851 /* we're using dhdl, so delta_lambda needs to be a
1852 multiplication factor. */
1853 double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1855 Wfac1 = beta*delta_lambda;
1856 Wfac2 = -beta*delta_lambda;
1859 /* first calculate the average work in both directions */
1860 for (i = 0; i < ca->nsamples; i++)
1862 samples_t *s = ca->s[i];
1863 sample_range_t *r = &(ca->r[i]);
1868 for (j = r->start; j < r->end; j++)
1870 W_ab += Wfac1*s->du[j];
1875 /* normalization factor multiplied with bin width and
1876 number of samples (we normalize through M): */
1879 int hd = 0; /* histogram direction */
1880 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1884 dx = s->hist->dx[hd];
1886 for (j = 0; j < s->hist->nbin[0]; j++)
1888 double x = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1889 double pxdx = s->hist->bin[0][j]*normdx; /* p(x)dx */
1897 for (i = 0; i < cb->nsamples; i++)
1899 samples_t *s = cb->s[i];
1900 sample_range_t *r = &(cb->r[i]);
1905 for (j = r->start; j < r->end; j++)
1907 W_ba += Wfac1*s->du[j];
1912 /* normalization factor multiplied with bin width and
1913 number of samples (we normalize through M): */
1916 int hd = 0; /* histogram direction */
1917 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
1921 dx = s->hist->dx[hd];
1923 for (j = 0; j < s->hist->nbin[0]; j++)
1925 double x = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1926 double pxdx = s->hist->bin[0][j]*normdx; /* p(x)dx */
1934 /* then calculate the relative entropies */
1939 static void calc_dg_stddev(sample_coll_t *ca, sample_coll_t *cb,
1940 double temp, double dg, double *stddev)
1944 double sigmafact = 0.;
1946 double Wfac1, Wfac2;
1952 /* count the numbers of samples */
1956 /* to ensure the work values are the same as during the delta_G */
1957 /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1958 if (ca->foreign_lambda->dhdl < 0)
1960 /* this is the case when the delta U were calculated directly
1961 (i.e. we're not scaling dhdl) */
1967 /* we're using dhdl, so delta_lambda needs to be a
1968 multiplication factor. */
1969 double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1971 Wfac1 = beta*delta_lambda;
1972 Wfac2 = -beta*delta_lambda;
1978 /* calculate average in both directions */
1979 for (i = 0; i < ca->nsamples; i++)
1981 samples_t *s = ca->s[i];
1982 sample_range_t *r = &(ca->r[i]);
1987 for (j = r->start; j < r->end; j++)
1989 sigmafact += 1./(2. + 2.*cosh((M + Wfac1*s->du[j] - dg)));
1994 /* normalization factor multiplied with bin width and
1995 number of samples (we normalize through M): */
1998 int hd = 0; /* histogram direction */
1999 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
2003 dx = s->hist->dx[hd];
2005 for (j = 0; j < s->hist->nbin[0]; j++)
2007 double x = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
2008 double pxdx = s->hist->bin[0][j]*normdx; /* p(x)dx */
2010 sigmafact += pxdx/(2. + 2.*cosh((M + x - dg)));
2015 for (i = 0; i < cb->nsamples; i++)
2017 samples_t *s = cb->s[i];
2018 sample_range_t *r = &(cb->r[i]);
2023 for (j = r->start; j < r->end; j++)
2025 sigmafact += 1./(2. + 2.*cosh((M - Wfac2*s->du[j] - dg)));
2030 /* normalization factor multiplied with bin width and
2031 number of samples (we normalize through M): */
2034 int hd = 0; /* histogram direction */
2035 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
2039 dx = s->hist->dx[hd];
2041 for (j = 0; j < s->hist->nbin[0]; j++)
2043 double x = Wfac2*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
2044 double pxdx = s->hist->bin[0][j]*normdx; /* p(x)dx */
2046 sigmafact += pxdx/(2. + 2.*cosh((M - x - dg)));
2052 sigmafact /= (n1 + n2);
2056 Shirts, Bair, Hooker & Pande, Phys. Rev. Lett 91, 140601 (2003): */
2057 *stddev = sqrt(((1./sigmafact) - ( (n1+n2)/n1 + (n1+n2)/n2 )));
2062 static void calc_bar(barres_t *br, double tol,
2063 int npee_min, int npee_max, gmx_bool *bEE,
2067 double dg_sig2, sa_sig2, sb_sig2, stddev_sig2; /* intermediate variance values
2068 for calculated quantities */
2069 int nsample1, nsample2;
2070 double temp = br->a->temp;
2072 double dg_min, dg_max;
2073 gmx_bool have_hist = FALSE;
2075 br->dg = calc_bar_lowlevel(br->a, br->b, temp, tol, 0);
2077 br->dg_disc_err = 0.;
2078 br->dg_histrange_err = 0.;
2080 /* check if there are histograms */
2081 for (i = 0; i < br->a->nsamples; i++)
2083 if (br->a->r[i].use && br->a->s[i]->hist)
2091 for (i = 0; i < br->b->nsamples; i++)
2093 if (br->b->r[i].use && br->b->s[i]->hist)
2101 /* calculate histogram-specific errors */
2104 dg_min = calc_bar_lowlevel(br->a, br->b, temp, tol, -1);
2105 dg_max = calc_bar_lowlevel(br->a, br->b, temp, tol, 1);
2107 if (fabs(dg_max - dg_min) > GMX_REAL_EPS*10)
2109 /* the histogram range error is the biggest of the differences
2110 between the best estimate and the extremes */
2111 br->dg_histrange_err = fabs(dg_max - dg_min);
2113 br->dg_disc_err = 0.;
2114 for (i = 0; i < br->a->nsamples; i++)
2116 if (br->a->s[i]->hist)
2118 br->dg_disc_err = max(br->dg_disc_err, br->a->s[i]->hist->dx[0]);
2121 for (i = 0; i < br->b->nsamples; i++)
2123 if (br->b->s[i]->hist)
2125 br->dg_disc_err = max(br->dg_disc_err, br->b->s[i]->hist->dx[0]);
2129 calc_rel_entropy(br->a, br->b, temp, br->dg, &(br->sa), &(br->sb));
2131 calc_dg_stddev(br->a, br->b, temp, br->dg, &(br->dg_stddev) );
2140 sample_coll_t ca, cb;
2142 /* initialize the samples */
2143 sample_coll_init(&ca, br->a->native_lambda, br->a->foreign_lambda,
2145 sample_coll_init(&cb, br->b->native_lambda, br->b->foreign_lambda,
2148 for (npee = npee_min; npee <= npee_max; npee++)
2157 double dstddev2 = 0;
2160 for (p = 0; p < npee; p++)
2167 cac = sample_coll_create_subsample(&ca, br->a, p, npee);
2168 cbc = sample_coll_create_subsample(&cb, br->b, p, npee);
2172 printf("WARNING: histogram number incompatible with block number for averaging: can't do error estimate\n");
2176 sample_coll_destroy(&ca);
2180 sample_coll_destroy(&cb);
2185 dgp = calc_bar_lowlevel(&ca, &cb, temp, tol, 0);
2189 partsum[npee*(npee_max+1)+p] += dgp;
2191 calc_rel_entropy(&ca, &cb, temp, dgp, &sac, &sbc);
2196 calc_dg_stddev(&ca, &cb, temp, dgp, &stddevc );
2199 dstddev2 += stddevc*stddevc;
2201 sample_coll_destroy(&ca);
2202 sample_coll_destroy(&cb);
2206 dg_sig2 += (dgs2-dgs*dgs)/(npee-1);
2212 sa_sig2 += (dsa2-dsa*dsa)/(npee-1);
2213 sb_sig2 += (dsb2-dsb*dsb)/(npee-1);
2217 stddev_sig2 += (dstddev2-dstddev*dstddev)/(npee-1);
2219 br->dg_err = sqrt(dg_sig2/(npee_max - npee_min + 1));
2220 br->sa_err = sqrt(sa_sig2/(npee_max - npee_min + 1));
2221 br->sb_err = sqrt(sb_sig2/(npee_max - npee_min + 1));
2222 br->dg_stddev_err = sqrt(stddev_sig2/(npee_max - npee_min + 1));
2227 static double bar_err(int nbmin, int nbmax, const double *partsum)
2230 double svar, s, s2, dg;
2233 for (nb = nbmin; nb <= nbmax; nb++)
2237 for (b = 0; b < nb; b++)
2239 dg = partsum[nb*(nbmax+1)+b];
2245 svar += (s2 - s*s)/(nb - 1);
2248 return sqrt(svar/(nbmax + 1 - nbmin));
2252 /* Seek the end of an identifier (consecutive non-spaces), followed by
2253 an optional number of spaces or '='-signs. Returns a pointer to the
2254 first non-space value found after that. Returns NULL if the string
2257 static const char *find_value(const char *str)
2259 gmx_bool name_end_found = FALSE;
2261 /* if the string is a NULL pointer, return a NULL pointer. */
2266 while (*str != '\0')
2268 /* first find the end of the name */
2269 if (!name_end_found)
2271 if (isspace(*str) || (*str == '=') )
2273 name_end_found = TRUE;
2278 if (!( isspace(*str) || (*str == '=') ))
2290 /* read a vector-notation description of a lambda vector */
2291 static gmx_bool read_lambda_compvec(const char *str,
2293 const lambda_components_t *lc_in,
2294 lambda_components_t *lc_out,
2298 gmx_bool initialize_lc = FALSE; /* whether to initialize the lambda
2299 components, or to check them */
2300 gmx_bool start_reached = FALSE; /* whether the start of component names
2302 gmx_bool vector = FALSE; /* whether there are multiple components */
2303 int n = 0; /* current component number */
2304 const char *val_start = NULL; /* start of the component name, or NULL
2305 if not in a value */
2315 if (lc_out && lc_out->N == 0)
2317 initialize_lc = TRUE;
2332 start_reached = TRUE;
2335 else if (*str == '(')
2338 start_reached = TRUE;
2340 else if (!isspace(*str))
2342 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2349 if (isspace(*str) || *str == ')' || *str == ',' || *str == '\0')
2356 lambda_components_add(lc_out, val_start,
2361 if (!lambda_components_check(lc_out, n, val_start,
2370 /* add a vector component to lv */
2371 lv->val[n] = strtod(val_start, &strtod_end);
2372 if (val_start == strtod_end)
2375 "Error reading lambda vector in %s", fn);
2378 /* reset for the next identifier */
2387 else if (isalnum(*str))
2400 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2408 else if (lv == NULL)
2414 gmx_fatal(FARGS, "Incomplete lambda vector data in %s",
2434 gmx_fatal(FARGS, "Incomplete lambda components data in %s", fn);
2440 /* read and check the component names from a string */
2441 static gmx_bool read_lambda_components(const char *str,
2442 lambda_components_t *lc,
2446 return read_lambda_compvec(str, NULL, NULL, lc, end, fn);
2449 /* read an initialized lambda vector from a string */
2450 static gmx_bool read_lambda_vector(const char *str,
2455 return read_lambda_compvec(str, lv, lv->lc, NULL, end, fn);
2460 /* deduce lambda value from legend.
2462 legend = the legend string
2464 lam = the initialized lambda vector
2465 returns whether to use the data in this set.
2467 static gmx_bool legend2lambda(const char *fn,
2472 const char *ptr = NULL, *ptr2 = NULL;
2473 gmx_bool ok = FALSE;
2474 gmx_bool bdhdl = FALSE;
2475 const char *tostr = " to ";
2479 gmx_fatal(FARGS, "There is no legend in file '%s', can not deduce lambda", fn);
2482 /* look for the last 'to': */
2486 ptr2 = strstr(ptr2, tostr);
2493 while (ptr2 != NULL && *ptr2 != '\0');
2497 ptr += strlen(tostr)-1; /* and advance past that 'to' */
2501 /* look for the = sign */
2502 ptr = strrchr(legend, '=');
2505 /* otherwise look for the last space */
2506 ptr = strrchr(legend, ' ');
2510 if (strstr(legend, "dH"))
2515 else if (strchr(legend, 'D') != NULL && strchr(legend, 'H') != NULL)
2520 else /*if (strstr(legend, "pV"))*/
2531 gmx_fatal(FARGS, "There is no proper lambda legend in file '%s', can not deduce lambda", fn);
2535 ptr = find_value(ptr);
2536 if (!ptr || !read_lambda_vector(ptr, lam, NULL, fn))
2538 gmx_fatal(FARGS, "lambda vector '%s' %s faulty", legend, fn);
2547 ptr = strrchr(legend, '=');
2551 /* there must be a component name */
2555 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2557 /* now backtrack to the start of the identifier */
2558 while (isspace(*ptr))
2564 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2567 while (!isspace(*ptr))
2572 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2576 strncpy(buf, ptr, (end-ptr));
2577 buf[(end-ptr)] = '\0';
2578 dhdl_index = lambda_components_find(lam->lc, ptr, (end-ptr));
2582 strncpy(buf, ptr, (end-ptr));
2583 buf[(end-ptr)] = '\0';
2585 "Did not find lambda component for '%s' in %s",
2594 "dhdl without component name with >1 lambda component in %s",
2599 lam->dhdl = dhdl_index;
2604 static gmx_bool subtitle2lambda(const char *subtitle, xvg_t *ba, const char *fn,
2605 lambda_components_t *lc)
2610 double native_lambda;
2614 /* first check for a state string */
2615 ptr = strstr(subtitle, "state");
2619 const char *val_end;
2621 /* the new 4.6 style lambda vectors */
2622 ptr = find_value(ptr);
2625 index = strtol(ptr, &end, 10);
2628 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2635 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2638 /* now find the lambda vector component names */
2639 while (*ptr != '(' && !isalnum(*ptr))
2645 "Incomplete lambda vector component data in %s", fn);
2650 if (!read_lambda_components(ptr, lc, &val_end, fn))
2653 "lambda vector components in %s don't match those previously read",
2656 ptr = find_value(val_end);
2659 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2662 lambda_vec_init(&(ba->native_lambda), lc);
2663 if (!read_lambda_vector(ptr, &(ba->native_lambda), NULL, fn))
2665 gmx_fatal(FARGS, "lambda vector in %s faulty", fn);
2667 ba->native_lambda.index = index;
2672 /* compatibility mode: check for lambda in other ways. */
2673 /* plain text lambda string */
2674 ptr = strstr(subtitle, "lambda");
2677 /* xmgrace formatted lambda string */
2678 ptr = strstr(subtitle, "\\xl\\f{}");
2682 /* xmgr formatted lambda string */
2683 ptr = strstr(subtitle, "\\8l\\4");
2687 ptr = strstr(ptr, "=");
2691 bFound = (sscanf(ptr+1, "%lf", &(native_lambda)) == 1);
2692 /* add the lambda component name as an empty string */
2695 if (!lambda_components_check(lc, 0, "", 0))
2698 "lambda vector components in %s don't match those previously read",
2704 lambda_components_add(lc, "", 0);
2706 lambda_vec_init(&(ba->native_lambda), lc);
2707 ba->native_lambda.val[0] = native_lambda;
2714 static void filename2lambda(const char *fn)
2717 const char *ptr, *digitptr;
2721 /* go to the end of the path string and search backward to find the last
2722 directory in the path which has to contain the value of lambda
2724 while (ptr[1] != '\0')
2728 /* searching backward to find the second directory separator */
2733 if (ptr[0] != DIR_SEPARATOR && ptr[1] == DIR_SEPARATOR)
2741 /* save the last position of a digit between the last two
2742 separators = in the last dirname */
2743 if (dirsep > 0 && isdigit(*ptr))
2751 gmx_fatal(FARGS, "While trying to read the lambda value from the file path:"
2752 " last directory in the path '%s' does not contain a number", fn);
2754 if (digitptr[-1] == '-')
2758 lambda = strtod(digitptr, &endptr);
2759 if (endptr == digitptr)
2761 gmx_fatal(FARGS, "Malformed number in file path '%s'", fn);
2765 static void read_bar_xvg_lowlevel(const char *fn, real *temp, xvg_t *ba,
2766 lambda_components_t *lc)
2769 char *subtitle, **legend, *ptr;
2771 gmx_bool native_lambda_read = FALSE;
2779 np = read_xvg_legend(fn, &ba->y, &ba->nset, &subtitle, &legend);
2782 gmx_fatal(FARGS, "File %s contains no usable data.", fn);
2784 /* Reorder the data */
2786 for (i = 1; i < ba->nset; i++)
2788 ba->y[i-1] = ba->y[i];
2792 snew(ba->np, ba->nset);
2793 for (i = 0; i < ba->nset; i++)
2799 if (subtitle != NULL)
2801 /* try to extract temperature */
2802 ptr = strstr(subtitle, "T =");
2806 if (sscanf(ptr, "%lf", &ba->temp) == 1)
2810 gmx_fatal(FARGS, "Found temperature of %f in file '%s'",
2820 gmx_fatal(FARGS, "Did not find a temperature in the subtitle in file '%s', use the -temp option of [TT]gmx bar[tt]", fn);
2825 /* Try to deduce lambda from the subtitle */
2828 if (subtitle2lambda(subtitle, ba, fn, lc))
2830 native_lambda_read = TRUE;
2833 snew(ba->lambda, ba->nset);
2836 /* Check if we have a single set, no legend, nset=1 means t and dH/dl */
2839 if (!native_lambda_read)
2841 /* Deduce lambda from the file name */
2842 filename2lambda(fn);
2843 native_lambda_read = TRUE;
2845 ba->lambda[0] = ba->native_lambda;
2849 gmx_fatal(FARGS, "File %s contains multiple sets but no legends, can not determine the lambda values", fn);
2854 for (i = 0; i < ba->nset; )
2856 gmx_bool use = FALSE;
2857 /* Read lambda from the legend */
2858 lambda_vec_init( &(ba->lambda[i]), lc );
2859 lambda_vec_copy( &(ba->lambda[i]), &(ba->native_lambda));
2860 use = legend2lambda(fn, legend[i], &(ba->lambda[i]));
2863 lambda_vec_print(&(ba->lambda[i]), buf, FALSE);
2869 printf("%s: Ignoring set '%s'.\n", fn, legend[i]);
2870 for (j = i+1; j < ba->nset; j++)
2872 ba->y[j-1] = ba->y[j];
2873 legend[j-1] = legend[j];
2880 if (!native_lambda_read)
2882 gmx_fatal(FARGS, "File %s contains multiple sets but no indication of the native lambda", fn);
2887 for (i = 0; i < ba->nset-1; i++)
2895 static void read_bar_xvg(char *fn, real *temp, sim_data_t *sd)
2904 read_bar_xvg_lowlevel(fn, temp, barsim, &(sd->lc));
2906 if (barsim->nset < 1)
2908 gmx_fatal(FARGS, "File '%s' contains fewer than two columns", fn);
2911 if (!gmx_within_tol(*temp, barsim->temp, GMX_FLOAT_EPS) && (*temp > 0) )
2913 gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
2915 *temp = barsim->temp;
2917 /* now create a series of samples_t */
2918 snew(s, barsim->nset);
2919 for (i = 0; i < barsim->nset; i++)
2921 samples_init(s+i, &(barsim->native_lambda), &(barsim->lambda[i]),
2922 barsim->temp, lambda_vec_same(&(barsim->native_lambda),
2923 &(barsim->lambda[i])),
2925 s[i].du = barsim->y[i];
2926 s[i].ndu = barsim->np[i];
2929 lambda_data_list_insert_sample(sd->lb, s+i);
2934 lambda_vec_print(s[0].native_lambda, buf, FALSE);
2935 printf("%s: %.1f - %.1f; lambda = %s\n dH/dl & foreign lambdas:\n",
2936 fn, s[0].t[0], s[0].t[s[0].ndu-1], buf);
2937 for (i = 0; i < barsim->nset; i++)
2939 lambda_vec_print(s[i].foreign_lambda, buf, TRUE);
2940 printf(" %s (%d pts)\n", buf, s[i].ndu);
2946 static void read_edr_rawdh_block(samples_t **smp, int *ndu, t_enxblock *blk,
2947 double start_time, double delta_time,
2948 lambda_vec_t *native_lambda, double temp,
2949 double *last_t, const char *filename)
2953 double old_foreign_lambda;
2954 lambda_vec_t *foreign_lambda;
2956 samples_t *s; /* convenience pointer */
2959 /* check the block types etc. */
2960 if ( (blk->nsub < 3) ||
2961 (blk->sub[0].type != xdr_datatype_int) ||
2962 (blk->sub[1].type != xdr_datatype_double) ||
2964 (blk->sub[2].type != xdr_datatype_float) &&
2965 (blk->sub[2].type != xdr_datatype_double)
2967 (blk->sub[0].nr < 1) ||
2968 (blk->sub[1].nr < 1) )
2971 "Unexpected/corrupted block data in file %s around time %f.",
2972 filename, start_time);
2975 snew(foreign_lambda, 1);
2976 lambda_vec_init(foreign_lambda, native_lambda->lc);
2977 lambda_vec_copy(foreign_lambda, native_lambda);
2978 type = blk->sub[0].ival[0];
2981 for (i = 0; i < native_lambda->lc->N; i++)
2983 foreign_lambda->val[i] = blk->sub[1].dval[i];
2988 if (blk->sub[0].nr > 1)
2990 foreign_lambda->dhdl = blk->sub[0].ival[1];
2994 foreign_lambda->dhdl = 0;
3000 /* initialize the samples structure if it's empty. */
3002 samples_init(*smp, native_lambda, foreign_lambda, temp,
3003 type == dhbtDHDL, filename);
3004 (*smp)->start_time = start_time;
3005 (*smp)->delta_time = delta_time;
3008 /* set convenience pointer */
3011 /* now double check */
3012 if (!lambda_vec_same(s->foreign_lambda, foreign_lambda) )
3014 char buf[STRLEN], buf2[STRLEN];
3015 lambda_vec_print(foreign_lambda, buf, FALSE);
3016 lambda_vec_print(s->foreign_lambda, buf2, FALSE);
3017 fprintf(stderr, "Got foreign lambda=%s, expected: %s\n", buf, buf2);
3018 gmx_fatal(FARGS, "Corrupted data in file %s around t=%f.",
3019 filename, start_time);
3022 /* make room for the data */
3023 if (s->ndu_alloc < (size_t)(s->ndu + blk->sub[2].nr) )
3025 s->ndu_alloc += (s->ndu_alloc < (size_t)blk->sub[2].nr) ?
3026 blk->sub[2].nr*2 : s->ndu_alloc;
3027 srenew(s->du_alloc, s->ndu_alloc);
3028 s->du = s->du_alloc;
3031 s->ndu += blk->sub[2].nr;
3032 s->ntot += blk->sub[2].nr;
3033 *ndu = blk->sub[2].nr;
3035 /* and copy the data*/
3036 for (j = 0; j < blk->sub[2].nr; j++)
3038 if (blk->sub[2].type == xdr_datatype_float)
3040 s->du[startj+j] = blk->sub[2].fval[j];
3044 s->du[startj+j] = blk->sub[2].dval[j];
3047 if (start_time + blk->sub[2].nr*delta_time > *last_t)
3049 *last_t = start_time + blk->sub[2].nr*delta_time;
3053 static samples_t *read_edr_hist_block(int *nsamples, t_enxblock *blk,
3054 double start_time, double delta_time,
3055 lambda_vec_t *native_lambda, double temp,
3056 double *last_t, const char *filename)
3061 double old_foreign_lambda;
3062 lambda_vec_t *foreign_lambda;
3066 /* check the block types etc. */
3067 if ( (blk->nsub < 2) ||
3068 (blk->sub[0].type != xdr_datatype_double) ||
3069 (blk->sub[1].type != xdr_datatype_int64) ||
3070 (blk->sub[0].nr < 2) ||
3071 (blk->sub[1].nr < 2) )
3074 "Unexpected/corrupted block data in file %s around time %f",
3075 filename, start_time);
3078 nhist = blk->nsub-2;
3086 "Unexpected/corrupted block data in file %s around time %f",
3087 filename, start_time);
3093 snew(foreign_lambda, 1);
3094 lambda_vec_init(foreign_lambda, native_lambda->lc);
3095 lambda_vec_copy(foreign_lambda, native_lambda);
3096 type = (int)(blk->sub[1].lval[1]);
3099 double old_foreign_lambda;
3101 old_foreign_lambda = blk->sub[0].dval[0];
3102 if (old_foreign_lambda >= 0)
3104 foreign_lambda->val[0] = old_foreign_lambda;
3105 if (foreign_lambda->lc->N > 1)
3108 "Single-component lambda in multi-component file %s",
3114 for (i = 0; i < native_lambda->lc->N; i++)
3116 foreign_lambda->val[i] = blk->sub[0].dval[i+2];
3122 if (foreign_lambda->lc->N > 1)
3124 if (blk->sub[1].nr < 3 + nhist)
3127 "Missing derivative coord in multi-component file %s",
3130 foreign_lambda->dhdl = blk->sub[1].lval[2 + nhist];
3134 foreign_lambda->dhdl = 0;
3138 samples_init(s, native_lambda, foreign_lambda, temp, type == dhbtDHDL,
3142 for (i = 0; i < nhist; i++)
3144 nbins[i] = blk->sub[i+2].nr;
3147 hist_init(s->hist, nhist, nbins);
3149 for (i = 0; i < nhist; i++)
3151 s->hist->x0[i] = blk->sub[1].lval[2+i];
3152 s->hist->dx[i] = blk->sub[0].dval[1];
3155 s->hist->dx[i] = -s->hist->dx[i];
3159 s->hist->start_time = start_time;
3160 s->hist->delta_time = delta_time;
3161 s->start_time = start_time;
3162 s->delta_time = delta_time;
3164 for (i = 0; i < nhist; i++)
3167 gmx_int64_t sum = 0;
3169 for (j = 0; j < s->hist->nbin[i]; j++)
3171 int binv = (int)(blk->sub[i+2].ival[j]);
3173 s->hist->bin[i][j] = binv;
3186 gmx_fatal(FARGS, "Histogram counts don't match in %s",
3192 if (start_time + s->hist->sum*delta_time > *last_t)
3194 *last_t = start_time + s->hist->sum*delta_time;
3200 static void read_barsim_edr(char *fn, real *temp, sim_data_t *sd)
3206 gmx_enxnm_t *enm = NULL;
3207 double first_t = -1;
3209 samples_t **samples_rawdh = NULL; /* contains samples for raw delta_h */
3210 int *nhists = NULL; /* array to keep count & print at end */
3211 int *npts = NULL; /* array to keep count & print at end */
3212 lambda_vec_t **lambdas = NULL; /* array to keep count & print at end */
3213 lambda_vec_t *native_lambda;
3214 double end_time; /* the end time of the last batch of samples */
3216 lambda_vec_t start_lambda;
3218 fp = open_enx(fn, "r");
3219 do_enxnms(fp, &nre, &enm);
3222 snew(native_lambda, 1);
3223 start_lambda.lc = NULL;
3225 while (do_enx(fp, fr))
3227 /* count the data blocks */
3228 int nblocks_raw = 0;
3229 int nblocks_hist = 0;
3232 /* DHCOLL block information: */
3233 double start_time = 0, delta_time = 0, old_start_lambda = 0, delta_lambda = 0;
3236 /* count the blocks and handle collection information: */
3237 for (i = 0; i < fr->nblock; i++)
3239 if (fr->block[i].id == enxDHHIST)
3243 if (fr->block[i].id == enxDH)
3247 if (fr->block[i].id == enxDHCOLL)
3250 if ( (fr->block[i].nsub < 1) ||
3251 (fr->block[i].sub[0].type != xdr_datatype_double) ||
3252 (fr->block[i].sub[0].nr < 5))
3254 gmx_fatal(FARGS, "Unexpected block data in file %s", fn);
3257 /* read the data from the DHCOLL block */
3258 rtemp = fr->block[i].sub[0].dval[0];
3259 start_time = fr->block[i].sub[0].dval[1];
3260 delta_time = fr->block[i].sub[0].dval[2];
3261 old_start_lambda = fr->block[i].sub[0].dval[3];
3262 delta_lambda = fr->block[i].sub[0].dval[4];
3264 if (delta_lambda > 0)
3266 gmx_fatal(FARGS, "Lambda values not constant in %s: can't apply BAR method", fn);
3268 if ( ( *temp != rtemp) && (*temp > 0) )
3270 gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
3274 if (old_start_lambda >= 0)
3278 if (!lambda_components_check(&(sd->lc), 0, "", 0))
3281 "lambda vector components in %s don't match those previously read",
3287 lambda_components_add(&(sd->lc), "", 0);
3289 if (!start_lambda.lc)
3291 lambda_vec_init(&start_lambda, &(sd->lc));
3293 start_lambda.val[0] = old_start_lambda;
3297 /* read lambda vector */
3299 gmx_bool check = (sd->lc.N > 0);
3300 if (fr->block[i].nsub < 2)
3303 "No lambda vector, but start_lambda=%f\n",
3306 n_lambda_vec = fr->block[i].sub[1].ival[1];
3307 for (j = 0; j < n_lambda_vec; j++)
3310 efpt_singular_names[fr->block[i].sub[1].ival[1+j]];
3313 /* check the components */
3314 lambda_components_check(&(sd->lc), j, name,
3319 lambda_components_add(&(sd->lc), name,
3323 lambda_vec_init(&start_lambda, &(sd->lc));
3324 start_lambda.index = fr->block[i].sub[1].ival[0];
3325 for (j = 0; j < n_lambda_vec; j++)
3327 start_lambda.val[j] = fr->block[i].sub[0].dval[5+j];
3332 first_t = start_time;
3339 gmx_fatal(FARGS, "Did not find delta H information in file %s", fn);
3341 if (nblocks_raw > 0 && nblocks_hist > 0)
3343 gmx_fatal(FARGS, "Can't handle both raw delta U data and histograms in the same file %s", fn);
3348 /* check the native lambda */
3349 if (!lambda_vec_same(&start_lambda, native_lambda) )
3351 gmx_fatal(FARGS, "Native lambda not constant in file %s: started at %f, and becomes %f at time %f",
3352 fn, native_lambda, start_lambda, start_time);
3354 /* check the number of samples against the previous number */
3355 if ( ((nblocks_raw+nblocks_hist) != nsamples) || (nlam != 1 ) )
3357 gmx_fatal(FARGS, "Unexpected block count in %s: was %d, now %d\n",
3358 fn, nsamples+1, nblocks_raw+nblocks_hist+nlam);
3360 /* check whether last iterations's end time matches with
3361 the currrent start time */
3362 if ( (fabs(last_t - start_time) > 2*delta_time) && last_t >= 0)
3364 /* it didn't. We need to store our samples and reallocate */
3365 for (i = 0; i < nsamples; i++)
3367 if (samples_rawdh[i])
3369 /* insert it into the existing list */
3370 lambda_data_list_insert_sample(sd->lb,
3372 /* and make sure we'll allocate a new one this time
3374 samples_rawdh[i] = NULL;
3381 /* this is the first round; allocate the associated data
3383 /*native_lambda=start_lambda;*/
3384 lambda_vec_init(native_lambda, &(sd->lc));
3385 lambda_vec_copy(native_lambda, &start_lambda);
3386 nsamples = nblocks_raw+nblocks_hist;
3387 snew(nhists, nsamples);
3388 snew(npts, nsamples);
3389 snew(lambdas, nsamples);
3390 snew(samples_rawdh, nsamples);
3391 for (i = 0; i < nsamples; i++)
3396 samples_rawdh[i] = NULL; /* init to NULL so we know which
3397 ones contain values */
3402 k = 0; /* counter for the lambdas, etc. arrays */
3403 for (i = 0; i < fr->nblock; i++)
3405 if (fr->block[i].id == enxDH)
3407 int type = (fr->block[i].sub[0].ival[0]);
3408 if (type == dhbtDH || type == dhbtDHDL)
3411 read_edr_rawdh_block(&(samples_rawdh[k]),
3414 start_time, delta_time,
3415 native_lambda, rtemp,
3418 if (samples_rawdh[k])
3420 lambdas[k] = samples_rawdh[k]->foreign_lambda;
3425 else if (fr->block[i].id == enxDHHIST)
3427 int type = (int)(fr->block[i].sub[1].lval[1]);
3428 if (type == dhbtDH || type == dhbtDHDL)
3432 samples_t *s; /* this is where the data will go */
3433 s = read_edr_hist_block(&nb, &(fr->block[i]),
3434 start_time, delta_time,
3435 native_lambda, rtemp,
3440 lambdas[k] = s->foreign_lambda;
3443 /* and insert the new sample immediately */
3444 for (j = 0; j < nb; j++)
3446 lambda_data_list_insert_sample(sd->lb, s+j);
3452 /* Now store all our extant sample collections */
3453 for (i = 0; i < nsamples; i++)
3455 if (samples_rawdh[i])
3457 /* insert it into the existing list */
3458 lambda_data_list_insert_sample(sd->lb, samples_rawdh[i]);
3466 lambda_vec_print(native_lambda, buf, FALSE);
3467 printf("%s: %.1f - %.1f; lambda = %s\n foreign lambdas:\n",
3468 fn, first_t, last_t, buf);
3469 for (i = 0; i < nsamples; i++)
3473 lambda_vec_print(lambdas[i], buf, TRUE);
3476 printf(" %s (%d hists)\n", buf, nhists[i]);
3480 printf(" %s (%d pts)\n", buf, npts[i]);
3492 int gmx_bar(int argc, char *argv[])
3494 static const char *desc[] = {
3495 "[THISMODULE] calculates free energy difference estimates through ",
3496 "Bennett's acceptance ratio method (BAR). It also automatically",
3497 "adds series of individual free energies obtained with BAR into",
3498 "a combined free energy estimate.[PAR]",
3500 "Every individual BAR free energy difference relies on two ",
3501 "simulations at different states: say state A and state B, as",
3502 "controlled by a parameter, [GRK]lambda[grk] (see the [REF].mdp[ref] parameter",
3503 "[TT]init_lambda[tt]). The BAR method calculates a ratio of weighted",
3504 "average of the Hamiltonian difference of state B given state A and",
3506 "The energy differences to the other state must be calculated",
3507 "explicitly during the simulation. This can be done with",
3508 "the [REF].mdp[ref] option [TT]foreign_lambda[tt].[PAR]",
3510 "Input option [TT]-f[tt] expects multiple [TT]dhdl.xvg[tt] files. ",
3511 "Two types of input files are supported:",
3513 " * Files with more than one [IT]y[it]-value. ",
3514 " The files should have columns ",
3515 " with dH/d[GRK]lambda[grk] and [GRK]Delta[grk][GRK]lambda[grk]. ",
3516 " The [GRK]lambda[grk] values are inferred ",
3517 " from the legends: [GRK]lambda[grk] of the simulation from the legend of ",
3518 " dH/d[GRK]lambda[grk] and the foreign [GRK]lambda[grk] values from the ",
3519 " legends of Delta H",
3520 " * Files with only one [IT]y[it]-value. Using the",
3521 " [TT]-extp[tt] option for these files, it is assumed",
3522 " that the [IT]y[it]-value is dH/d[GRK]lambda[grk] and that the ",
3523 " Hamiltonian depends linearly on [GRK]lambda[grk]. ",
3524 " The [GRK]lambda[grk] value of the simulation is inferred from the ",
3525 " subtitle (if present), otherwise from a number in the subdirectory ",
3526 " in the file name.",
3529 "The [GRK]lambda[grk] of the simulation is parsed from ",
3530 "[TT]dhdl.xvg[tt] file's legend containing the string 'dH', the ",
3531 "foreign [GRK]lambda[grk] values from the legend containing the ",
3532 "capitalized letters 'D' and 'H'. The temperature is parsed from ",
3533 "the legend line containing 'T ='.[PAR]",
3535 "The input option [TT]-g[tt] expects multiple [REF].edr[ref] files. ",
3536 "These can contain either lists of energy differences (see the ",
3537 "[REF].mdp[ref] option [TT]separate_dhdl_file[tt]), or a series of ",
3538 "histograms (see the [REF].mdp[ref] options [TT]dh_hist_size[tt] and ",
3539 "[TT]dh_hist_spacing[tt]).", "The temperature and [GRK]lambda[grk] ",
3540 "values are automatically deduced from the [TT]ener.edr[tt] file.[PAR]",
3542 "In addition to the [REF].mdp[ref] option [TT]foreign_lambda[tt], ",
3543 "the energy difference can also be extrapolated from the ",
3544 "dH/d[GRK]lambda[grk] values. This is done with the[TT]-extp[tt]",
3545 "option, which assumes that the system's Hamiltonian depends linearly",
3546 "on [GRK]lambda[grk], which is not normally the case.[PAR]",
3548 "The free energy estimates are determined using BAR with bisection, ",
3549 "with the precision of the output set with [TT]-prec[tt]. ",
3550 "An error estimate taking into account time correlations ",
3551 "is made by splitting the data into blocks and determining ",
3552 "the free energy differences over those blocks and assuming ",
3553 "the blocks are independent. ",
3554 "The final error estimate is determined from the average variance ",
3555 "over 5 blocks. A range of block numbers for error estimation can ",
3556 "be provided with the options [TT]-nbmin[tt] and [TT]-nbmax[tt].[PAR]",
3558 "[THISMODULE] tries to aggregate samples with the same 'native' and ",
3559 "'foreign' [GRK]lambda[grk] values, but always assumes independent ",
3560 "samples. [BB]Note[bb] that when aggregating energy ",
3561 "differences/derivatives with different sampling intervals, this is ",
3562 "almost certainly not correct. Usually subsequent energies are ",
3563 "correlated and different time intervals mean different degrees ",
3564 "of correlation between samples.[PAR]",
3566 "The results are split in two parts: the last part contains the final ",
3567 "results in kJ/mol, together with the error estimate for each part ",
3568 "and the total. The first part contains detailed free energy ",
3569 "difference estimates and phase space overlap measures in units of ",
3570 "kT (together with their computed error estimate). The printed ",
3573 " * lam_A: the [GRK]lambda[grk] values for point A.",
3574 " * lam_B: the [GRK]lambda[grk] values for point B.",
3575 " * DG: the free energy estimate.",
3576 " * s_A: an estimate of the relative entropy of B in A.",
3577 " * s_B: an estimate of the relative entropy of A in B.",
3578 " * stdev: an estimate expected per-sample standard deviation.",
3581 "The relative entropy of both states in each other's ensemble can be ",
3582 "interpreted as a measure of phase space overlap: ",
3583 "the relative entropy s_A of the work samples of lambda_B in the ",
3584 "ensemble of lambda_A (and vice versa for s_B), is a ",
3585 "measure of the 'distance' between Boltzmann distributions of ",
3586 "the two states, that goes to zero for identical distributions. See ",
3587 "Wu & Kofke, J. Chem. Phys. 123 084109 (2005) for more information.",
3589 "The estimate of the expected per-sample standard deviation, as given ",
3590 "in Bennett's original BAR paper: Bennett, J. Comp. Phys. 22, p 245 (1976).",
3591 "Eq. 10 therein gives an estimate of the quality of sampling (not directly",
3592 "of the actual statistical error, because it assumes independent samples).[PAR]",
3594 "To get a visual estimate of the phase space overlap, use the ",
3595 "[TT]-oh[tt] option to write series of histograms, together with the ",
3596 "[TT]-nbin[tt] option.[PAR]"
3598 static real begin = 0, end = -1, temp = -1;
3599 int nd = 2, nbmin = 5, nbmax = 5;
3601 gmx_bool use_dhdl = FALSE;
3602 gmx_bool calc_s, calc_v;
3604 { "-b", FALSE, etREAL, {&begin}, "Begin time for BAR" },
3605 { "-e", FALSE, etREAL, {&end}, "End time for BAR" },
3606 { "-temp", FALSE, etREAL, {&temp}, "Temperature (K)" },
3607 { "-prec", FALSE, etINT, {&nd}, "The number of digits after the decimal point" },
3608 { "-nbmin", FALSE, etINT, {&nbmin}, "Minimum number of blocks for error estimation" },
3609 { "-nbmax", FALSE, etINT, {&nbmax}, "Maximum number of blocks for error estimation" },
3610 { "-nbin", FALSE, etINT, {&nbin}, "Number of bins for histogram output"},
3611 { "-extp", FALSE, etBOOL, {&use_dhdl}, "Whether to linearly extrapolate dH/dl values to use as energies"}
3615 { efXVG, "-f", "dhdl", ffOPTRDMULT },
3616 { efEDR, "-g", "ener", ffOPTRDMULT },
3617 { efXVG, "-o", "bar", ffOPTWR },
3618 { efXVG, "-oi", "barint", ffOPTWR },
3619 { efXVG, "-oh", "histogram", ffOPTWR }
3621 #define NFILE asize(fnm)
3624 int nf = 0; /* file counter */
3626 int nfile_tot; /* total number of input files */
3631 sim_data_t sim_data; /* the simulation data */
3632 barres_t *results; /* the results */
3633 int nresults; /* number of results in results array */
3636 double prec, dg_tot, dg, sig, dg_tot_max, dg_tot_min;
3638 char dgformat[20], xvg2format[STRLEN], xvg3format[STRLEN];
3639 char buf[STRLEN], buf2[STRLEN];
3640 char ktformat[STRLEN], sktformat[STRLEN];
3641 char kteformat[STRLEN], skteformat[STRLEN];
3644 gmx_bool result_OK = TRUE, bEE = TRUE;
3646 gmx_bool disc_err = FALSE;
3647 double sum_disc_err = 0.; /* discretization error */
3648 gmx_bool histrange_err = FALSE;
3649 double sum_histrange_err = 0.; /* histogram range error */
3650 double stat_err = 0.; /* statistical error */
3652 if (!parse_common_args(&argc, argv,
3654 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &oenv))
3659 if (opt2bSet("-f", NFILE, fnm))
3661 nxvgfile = opt2fns(&fxvgnms, "-f", NFILE, fnm);
3663 if (opt2bSet("-g", NFILE, fnm))
3665 nedrfile = opt2fns(&fedrnms, "-g", NFILE, fnm);
3668 sim_data_init(&sim_data);
3670 /* make linked list */
3672 lambda_data_init(lb, 0, 0);
3678 nfile_tot = nxvgfile + nedrfile;
3682 gmx_fatal(FARGS, "No input files!");
3687 gmx_fatal(FARGS, "Can not have negative number of digits");
3689 prec = pow(10, -nd);
3691 snew(partsum, (nbmax+1)*(nbmax+1));
3694 /* read in all files. First xvg files */
3695 for (f = 0; f < nxvgfile; f++)
3697 read_bar_xvg(fxvgnms[f], &temp, &sim_data);
3700 /* then .edr files */
3701 for (f = 0; f < nedrfile; f++)
3703 read_barsim_edr(fedrnms[f], &temp, &sim_data);;
3707 /* fix the times to allow for equilibration */
3708 sim_data_impose_times(&sim_data, begin, end);
3710 if (opt2bSet("-oh", NFILE, fnm))
3712 sim_data_histogram(&sim_data, opt2fn("-oh", NFILE, fnm), nbin, oenv);
3715 /* assemble the output structures from the lambdas */
3716 results = barres_list_create(&sim_data, &nresults, use_dhdl);
3718 sum_disc_err = barres_list_max_disc_err(results, nresults);
3722 printf("\nNo results to calculate.\n");
3726 if (sum_disc_err > prec)
3728 prec = sum_disc_err;
3729 nd = ceil(-log10(prec));
3730 printf("WARNING: setting the precision to %g because that is the minimum\n reasonable number, given the expected discretization error.\n", prec);
3734 /*sprintf(lamformat,"%%6.3f");*/
3735 sprintf( dgformat, "%%%d.%df", 3+nd, nd);
3736 /* the format strings of the results in kT */
3737 sprintf( ktformat, "%%%d.%df", 5+nd, nd);
3738 sprintf( sktformat, "%%%ds", 6+nd);
3739 /* the format strings of the errors in kT */
3740 sprintf( kteformat, "%%%d.%df", 3+nd, nd);
3741 sprintf( skteformat, "%%%ds", 4+nd);
3742 sprintf(xvg2format, "%s %s\n", "%s", dgformat);
3743 sprintf(xvg3format, "%s %s %s\n", "%s", dgformat, dgformat);
3748 if (opt2bSet("-o", NFILE, fnm))
3750 sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3751 fpb = xvgropen_type(opt2fn("-o", NFILE, fnm), "Free energy differences",
3752 "\\lambda", buf, exvggtXYDY, oenv);
3756 if (opt2bSet("-oi", NFILE, fnm))
3758 sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3759 fpi = xvgropen(opt2fn("-oi", NFILE, fnm), "Free energy integral",
3760 "\\lambda", buf, oenv);
3770 /* first calculate results */
3773 for (f = 0; f < nresults; f++)
3775 /* Determine the free energy difference with a factor of 10
3776 * more accuracy than requested for printing.
3778 calc_bar(&(results[f]), 0.1*prec, nbmin, nbmax,
3781 if (results[f].dg_disc_err > prec/10.)
3785 if (results[f].dg_histrange_err > prec/10.)
3787 histrange_err = TRUE;
3791 /* print results in kT */
3795 printf("\nTemperature: %g K\n", temp);
3797 printf("\nDetailed results in kT (see help for explanation):\n\n");
3798 printf("%6s ", " lam_A");
3799 printf("%6s ", " lam_B");
3800 printf(sktformat, "DG ");
3803 printf(skteformat, "+/- ");
3807 printf(skteformat, "disc ");
3811 printf(skteformat, "range ");
3813 printf(sktformat, "s_A ");
3816 printf(skteformat, "+/- " );
3818 printf(sktformat, "s_B ");
3821 printf(skteformat, "+/- " );
3823 printf(sktformat, "stdev ");
3826 printf(skteformat, "+/- ");
3829 for (f = 0; f < nresults; f++)
3831 lambda_vec_print_short(results[f].a->native_lambda, buf);
3833 lambda_vec_print_short(results[f].b->native_lambda, buf);
3835 printf(ktformat, results[f].dg);
3839 printf(kteformat, results[f].dg_err);
3844 printf(kteformat, results[f].dg_disc_err);
3849 printf(kteformat, results[f].dg_histrange_err);
3852 printf(ktformat, results[f].sa);
3856 printf(kteformat, results[f].sa_err);
3859 printf(ktformat, results[f].sb);
3863 printf(kteformat, results[f].sb_err);
3866 printf(ktformat, results[f].dg_stddev);
3870 printf(kteformat, results[f].dg_stddev_err);
3874 /* Check for negative relative entropy with a 95% certainty. */
3875 if (results[f].sa < -2*results[f].sa_err ||
3876 results[f].sb < -2*results[f].sb_err)
3884 printf("\nWARNING: Some of these results violate the Second Law of "
3885 "Thermodynamics: \n"
3886 " This is can be the result of severe undersampling, or "
3888 " there is something wrong with the simulations.\n");
3892 /* final results in kJ/mol */
3893 printf("\n\nFinal results in kJ/mol:\n\n");
3895 for (f = 0; f < nresults; f++)
3900 lambda_vec_print_short(results[f].a->native_lambda, buf);
3901 fprintf(fpi, xvg2format, buf, dg_tot);
3907 lambda_vec_print_intermediate(results[f].a->native_lambda,
3908 results[f].b->native_lambda,
3911 fprintf(fpb, xvg3format, buf, results[f].dg, results[f].dg_err);
3915 lambda_vec_print_short(results[f].a->native_lambda, buf);
3916 lambda_vec_print_short(results[f].b->native_lambda, buf2);
3917 printf("%s - %s", buf, buf2);
3920 printf(dgformat, results[f].dg*kT);
3924 printf(dgformat, results[f].dg_err*kT);
3928 printf(" (max. range err. = ");
3929 printf(dgformat, results[f].dg_histrange_err*kT);
3931 sum_histrange_err += results[f].dg_histrange_err*kT;
3935 dg_tot += results[f].dg;
3939 lambda_vec_print_short(results[0].a->native_lambda, buf);
3940 lambda_vec_print_short(results[nresults-1].b->native_lambda, buf2);
3941 printf("%s - %s", buf, buf2);
3944 printf(dgformat, dg_tot*kT);
3947 stat_err = bar_err(nbmin, nbmax, partsum)*kT;
3949 printf(dgformat, max(max(stat_err, sum_disc_err), sum_histrange_err));
3954 printf("\nmaximum discretization error = ");
3955 printf(dgformat, sum_disc_err);
3956 if (bEE && stat_err < sum_disc_err)
3958 printf("WARNING: discretization error (%g) is larger than statistical error.\n Decrease histogram spacing for more accurate results\n", stat_err);
3963 printf("\nmaximum histogram range error = ");
3964 printf(dgformat, sum_histrange_err);
3965 if (bEE && stat_err < sum_histrange_err)
3967 printf("WARNING: histogram range error (%g) is larger than statistical error.\n Increase histogram range for more accurate results\n", stat_err);
3976 lambda_vec_print_short(results[nresults-1].b->native_lambda, buf);
3977 fprintf(fpi, xvg2format, buf, dg_tot);
3985 do_view(oenv, opt2fn_null("-o", NFILE, fnm), "-xydy");
3986 do_view(oenv, opt2fn_null("-oi", NFILE, fnm), "-xydy");