1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * Green Red Orange Magenta Azure Cyan Skyblue
54 #include "gmx_fatal.h"
63 /* Structure for the names of lambda vector components */
64 typedef struct lambda_components_t
66 char **names; /* Array of strings with names for the lambda vector
68 int N; /* The number of components */
69 int Nalloc; /* The number of allocated components */
70 } lambda_components_t;
72 /* Structure for a lambda vector or a dhdl derivative direction */
73 typedef struct lambda_vec_t
75 double *val; /* The lambda vector component values. Only valid if
77 int dhdl; /* The coordinate index for the derivative described by this
79 const lambda_components_t *lc; /* the associated lambda_components
81 int index; /* The state number (init-lambda-state) of this lambda
82 vector, if known. If not, it is set to -1 */
85 /* the dhdl.xvg data from a simulation */
89 int ftp; /* file type */
90 int nset; /* number of lambdas, including dhdl */
91 int *np; /* number of data points (du or hists) per lambda */
92 int np_alloc; /* number of points (du or hists) allocated */
93 double temp; /* temperature */
94 lambda_vec_t *lambda; /* the lambdas (of first index for y). */
95 double *t; /* the times (of second index for y) */
96 double **y; /* the dU values. y[0] holds the derivative, while
97 further ones contain the energy differences between
98 the native lambda and the 'foreign' lambdas. */
99 lambda_vec_t native_lambda; /* the native lambda */
101 struct xvg_t *next, *prev; /*location in the global linked list of xvg_ts*/
105 typedef struct hist_t
107 unsigned int *bin[2]; /* the (forward + reverse) histogram values */
108 double dx[2]; /* the histogram spacing. The reverse
109 dx is the negative of the forward dx.*/
110 gmx_large_int_t x0[2]; /* the (forward + reverse) histogram start
113 int nbin[2]; /* the (forward+reverse) number of bins */
114 gmx_large_int_t sum; /* the total number of counts. Must be
115 the same for forward + reverse. */
116 int nhist; /* number of hist datas (forward or reverse) */
118 double start_time, delta_time; /* start time, end time of histogram */
122 /* an aggregate of samples for partial free energy calculation */
123 typedef struct samples_t
125 lambda_vec_t *native_lambda; /* pointer to native lambda vector */
126 lambda_vec_t *foreign_lambda; /* pointer to foreign lambda vector */
127 double temp; /* the temperature */
128 gmx_bool derivative; /* whether this sample is a derivative */
130 /* The samples come either as either delta U lists: */
131 int ndu; /* the number of delta U samples */
132 double *du; /* the delta u's */
133 double *t; /* the times associated with those samples, or: */
134 double start_time, delta_time; /*start time and delta time for linear time*/
136 /* or as histograms: */
137 hist_t *hist; /* a histogram */
139 /* allocation data: (not NULL for data 'owned' by this struct) */
140 double *du_alloc, *t_alloc; /* allocated delta u arrays */
141 size_t ndu_alloc, nt_alloc; /* pre-allocated sizes */
142 hist_t *hist_alloc; /* allocated hist */
144 gmx_large_int_t ntot; /* total number of samples */
145 const char *filename; /* the file name this sample comes from */
148 /* a sample range (start to end for du-style data, or boolean
149 for both du-style data and histograms */
150 typedef struct sample_range_t
152 int start, end; /* start and end index for du style data */
153 gmx_bool use; /* whether to use this sample */
155 samples_t *s; /* the samples this range belongs to */
159 /* a collection of samples for a partial free energy calculation
160 (i.e. the collection of samples from one native lambda to one
162 typedef struct sample_coll_t
164 lambda_vec_t *native_lambda; /* these should be the same for all samples
166 lambda_vec_t *foreign_lambda; /* collection */
167 double temp; /* the temperature */
169 int nsamples; /* the number of samples */
170 samples_t **s; /* the samples themselves */
171 sample_range_t *r; /* the sample ranges */
172 int nsamples_alloc; /* number of allocated samples */
174 gmx_large_int_t ntot; /* total number of samples in the ranges of
177 struct sample_coll_t *next, *prev; /* next and previous in the list */
180 /* all the samples associated with a lambda point */
181 typedef struct lambda_data_t
183 lambda_vec_t *lambda; /* the native lambda (at start time if dynamic) */
184 double temp; /* temperature */
186 sample_coll_t *sc; /* the samples */
188 sample_coll_t sc_head; /*the pre-allocated list head for the linked list.*/
190 struct lambda_data_t *next, *prev; /* the next and prev in the list */
193 /* Top-level data structure of simulation data */
194 typedef struct sim_data_t
196 lambda_data_t *lb; /* a lambda data linked list */
197 lambda_data_t lb_head; /* The head element of the linked list */
199 lambda_components_t lc; /* the allowed components of the lambda
203 /* Top-level data structure with calculated values. */
205 sample_coll_t *a, *b; /* the simulation data */
207 double dg; /* the free energy difference */
208 double dg_err; /* the free energy difference */
210 double dg_disc_err; /* discretization error */
211 double dg_histrange_err; /* histogram range error */
213 double sa; /* relative entropy of b in state a */
214 double sa_err; /* error in sa */
215 double sb; /* relative entropy of a in state b */
216 double sb_err; /* error in sb */
218 double dg_stddev; /* expected dg stddev per sample */
219 double dg_stddev_err; /* error in dg_stddev */
223 /* Initialize a lambda_components structure */
224 static void lambda_components_init(lambda_components_t *lc)
228 snew(lc->names, lc->Nalloc);
231 /* Add a component to a lambda_components structure */
232 static void lambda_components_add(lambda_components_t *lc,
233 const char *name, size_t name_length)
235 while (lc->N + 1 > lc->Nalloc)
237 lc->Nalloc = (lc->Nalloc == 0) ? 2 : 2*lc->Nalloc;
238 srealloc( lc->names, lc->Nalloc );
240 snew(lc->names[lc->N], name_length+1);
241 strncpy(lc->names[lc->N], name, name_length);
245 /* check whether a component with index 'index' matches the given name, or
246 is also NULL. Returns TRUE if this is the case.
247 the string name does not need to end */
248 static gmx_bool lambda_components_check(const lambda_components_t *lc,
258 if (name == NULL && lc->names[index] == NULL)
262 if ((name == NULL) != (lc->names[index] == NULL))
266 len = strlen(lc->names[index]);
267 if (len != name_length)
271 if (strncmp(lc->names[index], name, name_length) == 0)
278 /* Find the index of a given lambda component name, or -1 if not found */
279 static int lambda_components_find(const lambda_components_t *lc,
285 for (i = 0; i < lc->N; i++)
287 if (strncmp(lc->names[i], name, name_length) == 0)
297 /* initialize a lambda vector */
298 static void lambda_vec_init(lambda_vec_t *lv, const lambda_components_t *lc)
300 snew(lv->val, lc->N);
306 static void lambda_vec_destroy(lambda_vec_t *lv)
311 static void lambda_vec_copy(lambda_vec_t *lv, const lambda_vec_t *orig)
315 lambda_vec_init(lv, orig->lc);
316 lv->dhdl = orig->dhdl;
317 lv->index = orig->index;
318 for (i = 0; i < lv->lc->N; i++)
320 lv->val[i] = orig->val[i];
324 /* write a lambda vec to a preallocated string */
325 static void lambda_vec_print(const lambda_vec_t *lv, char *str, gmx_bool named)
330 str[0] = 0; /* reset the string */
335 str += sprintf(str, "delta H to ");
339 str += sprintf(str, "(");
341 for (i = 0; i < lv->lc->N; i++)
343 str += sprintf(str, "%g", lv->val[i]);
346 str += sprintf(str, ", ");
351 str += sprintf(str, ")");
356 /* this lambda vector describes a derivative */
357 str += sprintf(str, "dH/dl");
358 if (strlen(lv->lc->names[lv->dhdl]) > 0)
360 str += sprintf(str, " (%s)", lv->lc->names[lv->dhdl]);
365 /* write a shortened version of the lambda vec to a preallocated string */
366 static void lambda_vec_print_short(const lambda_vec_t *lv, char *str)
373 sprintf(str, "%6d", lv->index);
379 sprintf(str, "%6.3f", lv->val[0]);
383 sprintf(str, "dH/dl[%d]", lv->dhdl);
388 /* write an intermediate version of two lambda vecs to a preallocated string */
389 static void lambda_vec_print_intermediate(const lambda_vec_t *a,
390 const lambda_vec_t *b, char *str)
396 if ( (a->index >= 0) && (b->index >= 0) )
398 sprintf(str, "%6.3f", ((double)a->index+(double)b->index)/2.);
402 if ( (a->dhdl < 0) && (b->dhdl < 0) )
404 sprintf(str, "%6.3f", (a->val[0]+b->val[0])/2.);
411 /* calculate the difference in lambda vectors: c = a-b.
412 c must be initialized already, and a and b must describe non-derivative
414 static void lambda_vec_diff(const lambda_vec_t *a, const lambda_vec_t *b,
419 if ( (a->dhdl > 0) || (b->dhdl > 0) )
422 "Trying to calculate the difference between derivatives instead of lambda points");
424 if ((a->lc != b->lc) || (a->lc != c->lc) )
427 "Trying to calculate the difference lambdas with differing basis set");
429 for (i = 0; i < a->lc->N; i++)
431 c->val[i] = a->val[i] - b->val[i];
435 /* calculate and return the absolute difference in lambda vectors: c = |a-b|.
436 a and b must describe non-derivative lambda points */
437 static double lambda_vec_abs_diff(const lambda_vec_t *a, const lambda_vec_t *b)
442 if ( (a->dhdl > 0) || (b->dhdl > 0) )
445 "Trying to calculate the difference between derivatives instead of lambda points");
450 "Trying to calculate the difference lambdas with differing basis set");
452 for (i = 0; i < a->lc->N; i++)
454 double df = a->val[i] - b->val[i];
461 /* check whether two lambda vectors are the same */
462 static gmx_bool lambda_vec_same(const lambda_vec_t *a, const lambda_vec_t *b)
472 for (i = 0; i < a->lc->N; i++)
474 if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
483 /* they're derivatives, so we check whether the indices match */
484 return (a->dhdl == b->dhdl);
488 /* Compare the sort order of two foreign lambda vectors
490 returns 1 if a is 'bigger' than b,
491 returns 0 if they're the same,
492 returns -1 if a is 'smaller' than b.*/
493 static gmx_bool lambda_vec_cmp_foreign(const lambda_vec_t *a,
494 const lambda_vec_t *b)
497 double norm_a = 0, norm_b = 0;
498 gmx_bool different = FALSE;
502 gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
504 /* if either one has an index we sort based on that */
505 if ((a->index >= 0) || (b->index >= 0))
507 if (a->index == b->index)
511 return (a->index > b->index) ? 1 : -1;
513 if (a->dhdl >= 0 || b->dhdl >= 0)
515 /* lambda vectors that are derivatives always sort higher than those
516 without derivatives */
517 if ((a->dhdl >= 0) != (b->dhdl >= 0) )
519 return (a->dhdl >= 0) ? 1 : -1;
521 return a->dhdl > b->dhdl;
524 /* neither has an index, so we can only sort on the lambda components,
525 which is only valid if there is one component */
526 for (i = 0; i < a->lc->N; i++)
528 if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
532 norm_a += a->val[i]*a->val[i];
533 norm_b += b->val[i]*b->val[i];
539 return norm_a > norm_b;
542 /* Compare the sort order of two native lambda vectors
544 returns 1 if a is 'bigger' than b,
545 returns 0 if they're the same,
546 returns -1 if a is 'smaller' than b.*/
547 static gmx_bool lambda_vec_cmp_native(const lambda_vec_t *a,
548 const lambda_vec_t *b)
554 gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
556 /* if either one has an index we sort based on that */
557 if ((a->index >= 0) || (b->index >= 0))
559 if (a->index == b->index)
563 return (a->index > b->index) ? 1 : -1;
565 /* neither has an index, so we can only sort on the lambda components,
566 which is only valid if there is one component */
570 "Can't compare lambdas with no index and > 1 component");
572 if (a->dhdl >= 0 || b->dhdl >= 0)
575 "Can't compare native lambdas that are derivatives");
577 if (gmx_within_tol(a->val[0], b->val[0], 10*GMX_REAL_EPS))
581 return a->val[0] > b->val[0] ? 1 : -1;
587 static void hist_init(hist_t *h, int nhist, int *nbin)
592 gmx_fatal(FARGS, "histogram with more than two sets of data!");
594 for (i = 0; i < nhist; i++)
596 snew(h->bin[i], nbin[i]);
598 h->nbin[i] = nbin[i];
599 h->start_time = h->delta_time = 0;
606 static void hist_destroy(hist_t *h)
612 static void xvg_init(xvg_t *ba)
621 static void samples_init(samples_t *s, lambda_vec_t *native_lambda,
622 lambda_vec_t *foreign_lambda, double temp,
623 gmx_bool derivative, const char *filename)
625 s->native_lambda = native_lambda;
626 s->foreign_lambda = foreign_lambda;
628 s->derivative = derivative;
633 s->start_time = s->delta_time = 0;
637 s->hist_alloc = NULL;
642 s->filename = filename;
645 static void sample_range_init(sample_range_t *r, samples_t *s)
653 static void sample_coll_init(sample_coll_t *sc, lambda_vec_t *native_lambda,
654 lambda_vec_t *foreign_lambda, double temp)
656 sc->native_lambda = native_lambda;
657 sc->foreign_lambda = foreign_lambda;
663 sc->nsamples_alloc = 0;
666 sc->next = sc->prev = NULL;
669 static void sample_coll_destroy(sample_coll_t *sc)
671 /* don't free the samples themselves */
677 static void lambda_data_init(lambda_data_t *l, lambda_vec_t *native_lambda,
680 l->lambda = native_lambda;
686 l->sc = &(l->sc_head);
688 sample_coll_init(l->sc, native_lambda, NULL, 0.);
693 static void barres_init(barres_t *br)
702 br->dg_stddev_err = 0;
709 /* calculate the total number of samples in a sample collection */
710 static void sample_coll_calc_ntot(sample_coll_t *sc)
715 for (i = 0; i < sc->nsamples; i++)
721 sc->ntot += sc->s[i]->ntot;
725 sc->ntot += sc->r[i].end - sc->r[i].start;
732 /* find the barsamples_t associated with a lambda that corresponds to
733 a specific foreign lambda */
734 static sample_coll_t *lambda_data_find_sample_coll(lambda_data_t *l,
735 lambda_vec_t *foreign_lambda)
737 sample_coll_t *sc = l->sc->next;
741 if (lambda_vec_same(sc->foreign_lambda, foreign_lambda))
751 /* insert li into an ordered list of lambda_colls */
752 static void lambda_data_insert_sample_coll(lambda_data_t *l, sample_coll_t *sc)
754 sample_coll_t *scn = l->sc->next;
755 while ( (scn != l->sc) )
757 if (lambda_vec_cmp_foreign(scn->foreign_lambda, sc->foreign_lambda) > 0)
763 /* now insert it before the found scn */
765 sc->prev = scn->prev;
766 scn->prev->next = sc;
770 /* insert li into an ordered list of lambdas */
771 static void lambda_data_insert_lambda(lambda_data_t *head, lambda_data_t *li)
773 lambda_data_t *lc = head->next;
776 if (lambda_vec_cmp_native(lc->lambda, li->lambda) > 0)
782 /* now insert ourselves before the found lc */
789 /* insert a sample and a sample_range into a sample_coll. The
790 samples are stored as a pointer, the range is copied. */
791 static void sample_coll_insert_sample(sample_coll_t *sc, samples_t *s,
794 /* first check if it belongs here */
795 if (sc->temp != s->temp)
797 gmx_fatal(FARGS, "Temperatures in files %s and %s are not the same!",
798 s->filename, sc->next->s[0]->filename);
800 if (!lambda_vec_same(sc->native_lambda, s->native_lambda))
802 gmx_fatal(FARGS, "Native lambda in files %s and %s are not the same (and they should be)!",
803 s->filename, sc->next->s[0]->filename);
805 if (!lambda_vec_same(sc->foreign_lambda, s->foreign_lambda))
807 gmx_fatal(FARGS, "Foreign lambda in files %s and %s are not the same (and they should be)!",
808 s->filename, sc->next->s[0]->filename);
811 /* check if there's room */
812 if ( (sc->nsamples + 1) > sc->nsamples_alloc)
814 sc->nsamples_alloc = max(2*sc->nsamples_alloc, 2);
815 srenew(sc->s, sc->nsamples_alloc);
816 srenew(sc->r, sc->nsamples_alloc);
818 sc->s[sc->nsamples] = s;
819 sc->r[sc->nsamples] = *r;
822 sample_coll_calc_ntot(sc);
825 /* insert a sample into a lambda_list, creating the right sample_coll if
827 static void lambda_data_list_insert_sample(lambda_data_t *head, samples_t *s)
829 gmx_bool found = FALSE;
833 lambda_data_t *l = head->next;
835 /* first search for the right lambda_data_t */
838 if (lambda_vec_same(l->lambda, s->native_lambda) )
848 snew(l, 1); /* allocate a new one */
849 lambda_data_init(l, s->native_lambda, s->temp); /* initialize it */
850 lambda_data_insert_lambda(head, l); /* add it to the list */
853 /* now look for a sample collection */
854 sc = lambda_data_find_sample_coll(l, s->foreign_lambda);
857 snew(sc, 1); /* allocate a new one */
858 sample_coll_init(sc, s->native_lambda, s->foreign_lambda, s->temp);
859 lambda_data_insert_sample_coll(l, sc);
862 /* now insert the samples into the sample coll */
863 sample_range_init(&r, s);
864 sample_coll_insert_sample(sc, s, &r);
868 /* make a histogram out of a sample collection */
869 static void sample_coll_make_hist(sample_coll_t *sc, int **bin,
870 int *nbin_alloc, int *nbin,
871 double *dx, double *xmin, int nbin_default)
874 gmx_bool dx_set = FALSE;
875 gmx_bool xmin_set = FALSE;
877 gmx_bool xmax_set = FALSE;
878 gmx_bool xmax_set_hard = FALSE; /* whether the xmax is bounded by the
879 limits of a histogram */
882 /* first determine dx and xmin; try the histograms */
883 for (i = 0; i < sc->nsamples; i++)
887 hist_t *hist = sc->s[i]->hist;
888 for (k = 0; k < hist->nhist; k++)
890 double hdx = hist->dx[k];
891 double xmax_now = (hist->x0[k]+hist->nbin[k])*hdx;
893 /* we use the biggest dx*/
894 if ( (!dx_set) || hist->dx[0] > *dx)
899 if ( (!xmin_set) || (hist->x0[k]*hdx) < *xmin)
902 *xmin = (hist->x0[k]*hdx);
905 if ( (!xmax_set) || (xmax_now > xmax && !xmax_set_hard) )
909 if (hist->bin[k][hist->nbin[k]-1] != 0)
911 xmax_set_hard = TRUE;
914 if (hist->bin[k][hist->nbin[k]-1] != 0 && (xmax_now < xmax) )
916 xmax_set_hard = TRUE;
922 /* and the delta us */
923 for (i = 0; i < sc->nsamples; i++)
925 if (sc->s[i]->ndu > 0)
927 /* determine min and max */
928 int starti = sc->r[i].start;
929 int endi = sc->r[i].end;
930 double du_xmin = sc->s[i]->du[starti];
931 double du_xmax = sc->s[i]->du[starti];
932 for (j = starti+1; j < endi; j++)
934 if (sc->s[i]->du[j] < du_xmin)
936 du_xmin = sc->s[i]->du[j];
938 if (sc->s[i]->du[j] > du_xmax)
940 du_xmax = sc->s[i]->du[j];
944 /* and now change the limits */
945 if ( (!xmin_set) || (du_xmin < *xmin) )
950 if ( (!xmax_set) || ((du_xmax > xmax) && !xmax_set_hard) )
958 if (!xmax_set || !xmin_set)
967 *nbin = nbin_default;
968 *dx = (xmax-(*xmin))/((*nbin)-2); /* -2 because we want the last bin to
969 be 0, and we count from 0 */
973 *nbin = (xmax-(*xmin))/(*dx);
976 if (*nbin > *nbin_alloc)
979 srenew(*bin, *nbin_alloc);
982 /* reset the histogram */
983 for (i = 0; i < (*nbin); i++)
988 /* now add the actual data */
989 for (i = 0; i < sc->nsamples; i++)
993 hist_t *hist = sc->s[i]->hist;
994 for (k = 0; k < hist->nhist; k++)
996 double hdx = hist->dx[k];
997 double xmin_hist = hist->x0[k]*hdx;
998 for (j = 0; j < hist->nbin[k]; j++)
1000 /* calculate the bin corresponding to the middle of the
1002 double x = hdx*(j+0.5) + xmin_hist;
1003 int binnr = (int)((x-(*xmin))/(*dx));
1005 if (binnr >= *nbin || binnr < 0)
1010 (*bin)[binnr] += hist->bin[k][j];
1016 int starti = sc->r[i].start;
1017 int endi = sc->r[i].end;
1018 for (j = starti; j < endi; j++)
1020 int binnr = (int)((sc->s[i]->du[j]-(*xmin))/(*dx));
1021 if (binnr >= *nbin || binnr < 0)
1032 /* write a collection of histograms to a file */
1033 void sim_data_histogram(sim_data_t *sd, const char *filename,
1034 int nbin_default, const output_env_t oenv)
1036 char label_x[STRLEN];
1037 const char *dhdl = "dH/d\\lambda", *deltag = "\\DeltaH", *lambda = "\\lambda";
1038 const char *title = "N(\\DeltaH)";
1039 const char *label_y = "Samples";
1043 char **setnames = NULL;
1044 gmx_bool first_set = FALSE;
1045 /* histogram data: */
1052 lambda_data_t *bl_head = sd->lb;
1054 printf("\nWriting histogram to %s\n", filename);
1055 sprintf(label_x, "\\DeltaH (%s)", unit_energy);
1057 fp = xvgropen_type(filename, title, label_x, label_y, exvggtXNY, oenv);
1059 /* first get all the set names */
1061 /* iterate over all lambdas */
1062 while (bl != bl_head)
1064 sample_coll_t *sc = bl->sc->next;
1066 /* iterate over all samples */
1067 while (sc != bl->sc)
1069 char buf[STRLEN], buf2[STRLEN];
1072 srenew(setnames, nsets);
1073 snew(setnames[nsets-1], STRLEN);
1074 if (sc->foreign_lambda->dhdl < 0)
1076 lambda_vec_print(sc->native_lambda, buf, FALSE);
1077 lambda_vec_print(sc->foreign_lambda, buf2, FALSE);
1078 sprintf(setnames[nsets-1], "N(%s(%s=%s) | %s=%s)",
1079 deltag, lambda, buf2, lambda, buf);
1083 lambda_vec_print(sc->native_lambda, buf, FALSE);
1084 sprintf(setnames[nsets-1], "N(%s | %s=%s)",
1092 xvgr_legend(fp, nsets, (const char**)setnames, oenv);
1095 /* now make the histograms */
1097 /* iterate over all lambdas */
1098 while (bl != bl_head)
1100 sample_coll_t *sc = bl->sc->next;
1102 /* iterate over all samples */
1103 while (sc != bl->sc)
1107 xvgr_new_dataset(fp, 0, 0, NULL, oenv);
1110 sample_coll_make_hist(sc, &hist, &nbin_alloc, &nbin, &dx, &min,
1113 for (i = 0; i < nbin; i++)
1115 double xmin = i*dx + min;
1116 double xmax = (i+1)*dx + min;
1118 fprintf(fp, "%g %d\n%g %d\n", xmin, hist[i], xmax, hist[i]);
1136 /* create a collection (array) of barres_t object given a ordered linked list
1137 of barlamda_t sample collections */
1138 static barres_t *barres_list_create(sim_data_t *sd, int *nres,
1145 gmx_bool dhdl = FALSE;
1146 gmx_bool first = TRUE;
1147 lambda_data_t *bl_head = sd->lb;
1149 /* first count the lambdas */
1151 while (bl != bl_head)
1156 snew(res, nlambda-1);
1158 /* next put the right samples in the res */
1160 bl = bl_head->next->next; /* we start with the second one. */
1161 while (bl != bl_head)
1163 sample_coll_t *sc, *scprev;
1164 barres_t *br = &(res[*nres]);
1165 /* there is always a previous one. we search for that as a foreign
1167 scprev = lambda_data_find_sample_coll(bl->prev, bl->lambda);
1168 sc = lambda_data_find_sample_coll(bl, bl->prev->lambda);
1176 scprev = lambda_data_find_sample_coll(bl->prev, bl->prev->lambda);
1177 sc = lambda_data_find_sample_coll(bl, bl->lambda);
1181 printf("\nWARNING: Using the derivative data (dH/dlambda) to extrapolate delta H values.\nThis will only work if the Hamiltonian is linear in lambda.\n");
1186 gmx_fatal(FARGS, "Some dhdl files contain only one value (dH/dl), while others \ncontain multiple values (dH/dl and/or Delta H), will not proceed \nbecause of possible inconsistencies.\n");
1189 else if (!scprev && !sc)
1191 gmx_fatal(FARGS, "There is no path from lambda=%f -> %f that is covered by foreign lambdas:\ncannot proceed with BAR.\nUse thermodynamic integration of dH/dl by calculating the averages of dH/dl\nwith g_analyze and integrating them.\nAlternatively, use the -extp option if (and only if) the Hamiltonian\ndepends linearly on lambda, which is NOT normally the case.\n", bl->prev->lambda, bl->lambda);
1194 /* normal delta H */
1197 gmx_fatal(FARGS, "Could not find a set for foreign lambda = %f\nin the files for lambda = %f", bl->lambda, bl->prev->lambda);
1201 gmx_fatal(FARGS, "Could not find a set for foreign lambda = %f\nin the files for lambda = %f", bl->prev->lambda, bl->lambda);
1213 /* estimate the maximum discretization error */
1214 static double barres_list_max_disc_err(barres_t *res, int nres)
1217 double disc_err = 0.;
1218 double delta_lambda;
1220 for (i = 0; i < nres; i++)
1222 barres_t *br = &(res[i]);
1224 delta_lambda = lambda_vec_abs_diff(br->b->native_lambda,
1225 br->a->native_lambda);
1227 for (j = 0; j < br->a->nsamples; j++)
1229 if (br->a->s[j]->hist)
1232 if (br->a->s[j]->derivative)
1234 Wfac = delta_lambda;
1237 disc_err = max(disc_err, Wfac*br->a->s[j]->hist->dx[0]);
1240 for (j = 0; j < br->b->nsamples; j++)
1242 if (br->b->s[j]->hist)
1245 if (br->b->s[j]->derivative)
1247 Wfac = delta_lambda;
1249 disc_err = max(disc_err, Wfac*br->b->s[j]->hist->dx[0]);
1257 /* impose start and end times on a sample collection, updating sample_ranges */
1258 static void sample_coll_impose_times(sample_coll_t *sc, double begin_t,
1262 for (i = 0; i < sc->nsamples; i++)
1264 samples_t *s = sc->s[i];
1265 sample_range_t *r = &(sc->r[i]);
1268 double end_time = s->hist->delta_time*s->hist->sum +
1269 s->hist->start_time;
1270 if (s->hist->start_time < begin_t || end_time > end_t)
1280 if (s->start_time < begin_t)
1282 r->start = (int)((begin_t - s->start_time)/s->delta_time);
1284 end_time = s->delta_time*s->ndu + s->start_time;
1285 if (end_time > end_t)
1287 r->end = (int)((end_t - s->start_time)/s->delta_time);
1293 for (j = 0; j < s->ndu; j++)
1295 if (s->t[j] < begin_t)
1300 if (s->t[j] >= end_t)
1307 if (r->start > r->end)
1313 sample_coll_calc_ntot(sc);
1316 static void sim_data_impose_times(sim_data_t *sd, double begin, double end)
1318 double first_t, last_t;
1319 double begin_t, end_t;
1321 lambda_data_t *head = sd->lb;
1324 if (begin <= 0 && end < 0)
1329 /* first determine the global start and end times */
1335 sample_coll_t *sc = lc->sc->next;
1336 while (sc != lc->sc)
1338 for (j = 0; j < sc->nsamples; j++)
1340 double start_t, end_t;
1342 start_t = sc->s[j]->start_time;
1343 end_t = sc->s[j]->start_time;
1346 end_t += sc->s[j]->delta_time*sc->s[j]->hist->sum;
1352 end_t = sc->s[j]->t[sc->s[j]->ndu-1];
1356 end_t += sc->s[j]->delta_time*sc->s[j]->ndu;
1360 if (start_t < first_t || first_t < 0)
1374 /* calculate the actual times */
1392 printf("\n Samples in time interval: %.3f - %.3f\n", first_t, last_t);
1394 if (begin_t > end_t)
1398 printf("Removing samples outside of: %.3f - %.3f\n", begin_t, end_t);
1400 /* then impose them */
1404 sample_coll_t *sc = lc->sc->next;
1405 while (sc != lc->sc)
1407 sample_coll_impose_times(sc, begin_t, end_t);
1415 /* create subsample i out of ni from an existing sample_coll */
1416 static gmx_bool sample_coll_create_subsample(sample_coll_t *sc,
1417 sample_coll_t *sc_orig,
1421 int hist_start, hist_end;
1423 gmx_large_int_t ntot_start;
1424 gmx_large_int_t ntot_end;
1425 gmx_large_int_t ntot_so_far;
1427 *sc = *sc_orig; /* just copy all fields */
1429 /* allocate proprietary memory */
1430 snew(sc->s, sc_orig->nsamples);
1431 snew(sc->r, sc_orig->nsamples);
1433 /* copy the samples */
1434 for (j = 0; j < sc_orig->nsamples; j++)
1436 sc->s[j] = sc_orig->s[j];
1437 sc->r[j] = sc_orig->r[j]; /* copy the ranges too */
1440 /* now fix start and end fields */
1441 /* the casts avoid possible overflows */
1442 ntot_start = (gmx_large_int_t)(sc_orig->ntot*(double)i/(double)ni);
1443 ntot_end = (gmx_large_int_t)(sc_orig->ntot*(double)(i+1)/(double)ni);
1445 for (j = 0; j < sc->nsamples; j++)
1447 gmx_large_int_t ntot_add;
1448 gmx_large_int_t new_start, new_end;
1454 ntot_add = sc->s[j]->hist->sum;
1458 ntot_add = sc->r[j].end - sc->r[j].start;
1466 if (!sc->s[j]->hist)
1468 if (ntot_so_far < ntot_start)
1470 /* adjust starting point */
1471 new_start = sc->r[j].start + (ntot_start - ntot_so_far);
1475 new_start = sc->r[j].start;
1477 /* adjust end point */
1478 new_end = sc->r[j].start + (ntot_end - ntot_so_far);
1479 if (new_end > sc->r[j].end)
1481 new_end = sc->r[j].end;
1484 /* check if we're in range at all */
1485 if ( (new_end < new_start) || (new_start > sc->r[j].end) )
1490 /* and write the new range */
1491 sc->r[j].start = (int)new_start;
1492 sc->r[j].end = (int)new_end;
1499 double ntot_start_norm, ntot_end_norm;
1500 /* calculate the amount of overlap of the
1501 desired range (ntot_start -- ntot_end) onto
1502 the histogram range (ntot_so_far -- ntot_so_far+ntot_add)*/
1504 /* first calculate normalized bounds
1505 (where 0 is the start of the hist range, and 1 the end) */
1506 ntot_start_norm = (ntot_start-ntot_so_far)/(double)ntot_add;
1507 ntot_end_norm = (ntot_end-ntot_so_far)/(double)ntot_add;
1509 /* now fix the boundaries */
1510 ntot_start_norm = min(1, max(0., ntot_start_norm));
1511 ntot_end_norm = max(0, min(1., ntot_end_norm));
1513 /* and calculate the overlap */
1514 overlap = ntot_end_norm - ntot_start_norm;
1516 if (overlap > 0.95) /* we allow for 5% slack */
1518 sc->r[j].use = TRUE;
1520 else if (overlap < 0.05)
1522 sc->r[j].use = FALSE;
1530 ntot_so_far += ntot_add;
1532 sample_coll_calc_ntot(sc);
1537 /* calculate minimum and maximum work values in sample collection */
1538 static void sample_coll_min_max(sample_coll_t *sc, double Wfac,
1539 double *Wmin, double *Wmax)
1546 for (i = 0; i < sc->nsamples; i++)
1548 samples_t *s = sc->s[i];
1549 sample_range_t *r = &(sc->r[i]);
1554 for (j = r->start; j < r->end; j++)
1556 *Wmin = min(*Wmin, s->du[j]*Wfac);
1557 *Wmax = max(*Wmax, s->du[j]*Wfac);
1562 int hd = 0; /* determine the histogram direction: */
1564 if ( (s->hist->nhist > 1) && (Wfac < 0) )
1568 dx = s->hist->dx[hd];
1570 for (j = s->hist->nbin[hd]-1; j >= 0; j--)
1572 *Wmin = min(*Wmin, Wfac*(s->hist->x0[hd])*dx);
1573 *Wmax = max(*Wmax, Wfac*(s->hist->x0[hd])*dx);
1574 /* look for the highest value bin with values */
1575 if (s->hist->bin[hd][j] > 0)
1577 *Wmin = min(*Wmin, Wfac*(j+s->hist->x0[hd]+1)*dx);
1578 *Wmax = max(*Wmax, Wfac*(j+s->hist->x0[hd]+1)*dx);
1587 /* Initialize a sim_data structure */
1588 static void sim_data_init(sim_data_t *sd)
1590 /* make linked list */
1591 sd->lb = &(sd->lb_head);
1592 sd->lb->next = sd->lb;
1593 sd->lb->prev = sd->lb;
1595 lambda_components_init(&(sd->lc));
1599 static double calc_bar_sum(int n, const double *W, double Wfac, double sbMmDG)
1606 for (i = 0; i < n; i++)
1608 sum += 1./(1. + exp(Wfac*W[i] + sbMmDG));
1614 /* calculate the BAR average given a histogram
1616 if type== 0, calculate the best estimate for the average,
1617 if type==-1, calculate the minimum possible value given the histogram
1618 if type== 1, calculate the maximum possible value given the histogram */
1619 static double calc_bar_sum_hist(const hist_t *hist, double Wfac, double sbMmDG,
1625 /* normalization factor multiplied with bin width and
1626 number of samples (we normalize through M): */
1628 int hd = 0; /* determine the histogram direction: */
1631 if ( (hist->nhist > 1) && (Wfac < 0) )
1636 max = hist->nbin[hd]-1;
1639 max = hist->nbin[hd]; /* we also add whatever was out of range */
1642 for (i = 0; i < max; i++)
1644 double x = Wfac*((i+hist->x0[hd])+0.5)*dx; /* bin middle */
1645 double pxdx = hist->bin[0][i]*normdx; /* p(x)dx */
1647 sum += pxdx/(1. + exp(x + sbMmDG));
1653 static double calc_bar_lowlevel(sample_coll_t *ca, sample_coll_t *cb,
1654 double temp, double tol, int type)
1659 double Wfac1, Wfac2, Wmin, Wmax;
1660 double DG0, DG1, DG2, dDG1;
1662 double n1, n2; /* numbers of samples as doubles */
1667 /* count the numbers of samples */
1673 /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1674 if (ca->foreign_lambda->dhdl < 0)
1676 /* this is the case when the delta U were calculated directly
1677 (i.e. we're not scaling dhdl) */
1683 /* we're using dhdl, so delta_lambda needs to be a
1684 multiplication factor. */
1685 /*double delta_lambda=cb->native_lambda-ca->native_lambda;*/
1686 double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1688 if (cb->native_lambda->lc->N > 1)
1691 "Can't (yet) do multi-component dhdl interpolation");
1694 Wfac1 = beta*delta_lambda;
1695 Wfac2 = -beta*delta_lambda;
1700 /* We print the output both in kT and kJ/mol.
1701 * Here we determine DG in kT, so when beta < 1
1702 * the precision has to be increased.
1707 /* Calculate minimum and maximum work to give an initial estimate of
1708 * delta G as their average.
1711 double Wmin1, Wmin2, Wmax1, Wmax2;
1712 sample_coll_min_max(ca, Wfac1, &Wmin1, &Wmax1);
1713 sample_coll_min_max(cb, Wfac2, &Wmin2, &Wmax2);
1715 Wmin = min(Wmin1, Wmin2);
1716 Wmax = max(Wmax1, Wmax2);
1724 fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1726 /* We approximate by bisection: given our initial estimates
1727 we keep checking whether the halfway point is greater or
1728 smaller than what we get out of the BAR averages.
1730 For the comparison we can use twice the tolerance. */
1731 while (DG2 - DG0 > 2*tol)
1733 DG1 = 0.5*(DG0 + DG2);
1735 /* calculate the BAR averages */
1738 for (i = 0; i < ca->nsamples; i++)
1740 samples_t *s = ca->s[i];
1741 sample_range_t *r = &(ca->r[i]);
1746 dDG1 += calc_bar_sum_hist(s->hist, Wfac1, (M-DG1), type);
1750 dDG1 += calc_bar_sum(r->end - r->start, s->du + r->start,
1755 for (i = 0; i < cb->nsamples; i++)
1757 samples_t *s = cb->s[i];
1758 sample_range_t *r = &(cb->r[i]);
1763 dDG1 -= calc_bar_sum_hist(s->hist, Wfac2, -(M-DG1), type);
1767 dDG1 -= calc_bar_sum(r->end - r->start, s->du + r->start,
1783 fprintf(debug, "DG %9.5f %9.5f\n", DG0, DG2);
1787 return 0.5*(DG0 + DG2);
1790 static void calc_rel_entropy(sample_coll_t *ca, sample_coll_t *cb,
1791 double temp, double dg, double *sa, double *sb)
1797 double Wfac1, Wfac2;
1803 /* count the numbers of samples */
1807 /* to ensure the work values are the same as during the delta_G */
1808 /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1809 if (ca->foreign_lambda->dhdl < 0)
1811 /* this is the case when the delta U were calculated directly
1812 (i.e. we're not scaling dhdl) */
1818 /* we're using dhdl, so delta_lambda needs to be a
1819 multiplication factor. */
1820 double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1822 Wfac1 = beta*delta_lambda;
1823 Wfac2 = -beta*delta_lambda;
1826 /* first calculate the average work in both directions */
1827 for (i = 0; i < ca->nsamples; i++)
1829 samples_t *s = ca->s[i];
1830 sample_range_t *r = &(ca->r[i]);
1835 for (j = r->start; j < r->end; j++)
1837 W_ab += Wfac1*s->du[j];
1842 /* normalization factor multiplied with bin width and
1843 number of samples (we normalize through M): */
1846 int hd = 0; /* histogram direction */
1847 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1851 dx = s->hist->dx[hd];
1853 for (j = 0; j < s->hist->nbin[0]; j++)
1855 double x = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1856 double pxdx = s->hist->bin[0][j]*normdx; /* p(x)dx */
1864 for (i = 0; i < cb->nsamples; i++)
1866 samples_t *s = cb->s[i];
1867 sample_range_t *r = &(cb->r[i]);
1872 for (j = r->start; j < r->end; j++)
1874 W_ba += Wfac1*s->du[j];
1879 /* normalization factor multiplied with bin width and
1880 number of samples (we normalize through M): */
1883 int hd = 0; /* histogram direction */
1884 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
1888 dx = s->hist->dx[hd];
1890 for (j = 0; j < s->hist->nbin[0]; j++)
1892 double x = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1893 double pxdx = s->hist->bin[0][j]*normdx; /* p(x)dx */
1901 /* then calculate the relative entropies */
1906 static void calc_dg_stddev(sample_coll_t *ca, sample_coll_t *cb,
1907 double temp, double dg, double *stddev)
1911 double sigmafact = 0.;
1913 double Wfac1, Wfac2;
1919 /* count the numbers of samples */
1923 /* to ensure the work values are the same as during the delta_G */
1924 /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1925 if (ca->foreign_lambda->dhdl < 0)
1927 /* this is the case when the delta U were calculated directly
1928 (i.e. we're not scaling dhdl) */
1934 /* we're using dhdl, so delta_lambda needs to be a
1935 multiplication factor. */
1936 double delta_lambda = lambda_vec_abs_diff(cb->native_lambda,
1938 Wfac1 = beta*delta_lambda;
1939 Wfac2 = -beta*delta_lambda;
1945 /* calculate average in both directions */
1946 for (i = 0; i < ca->nsamples; i++)
1948 samples_t *s = ca->s[i];
1949 sample_range_t *r = &(ca->r[i]);
1954 for (j = r->start; j < r->end; j++)
1956 sigmafact += 1./(2. + 2.*cosh((M + Wfac1*s->du[j] - dg)));
1961 /* normalization factor multiplied with bin width and
1962 number of samples (we normalize through M): */
1965 int hd = 0; /* histogram direction */
1966 if ( (s->hist->nhist > 1) && (Wfac1 < 0) )
1970 dx = s->hist->dx[hd];
1972 for (j = 0; j < s->hist->nbin[0]; j++)
1974 double x = Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1975 double pxdx = s->hist->bin[0][j]*normdx; /* p(x)dx */
1977 sigmafact += pxdx/(2. + 2.*cosh((M + x - dg)));
1982 for (i = 0; i < cb->nsamples; i++)
1984 samples_t *s = cb->s[i];
1985 sample_range_t *r = &(cb->r[i]);
1990 for (j = r->start; j < r->end; j++)
1992 sigmafact += 1./(2. + 2.*cosh((M - Wfac2*s->du[j] - dg)));
1997 /* normalization factor multiplied with bin width and
1998 number of samples (we normalize through M): */
2001 int hd = 0; /* histogram direction */
2002 if ( (s->hist->nhist > 1) && (Wfac2 < 0) )
2006 dx = s->hist->dx[hd];
2008 for (j = 0; j < s->hist->nbin[0]; j++)
2010 double x = Wfac2*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
2011 double pxdx = s->hist->bin[0][j]*normdx; /* p(x)dx */
2013 sigmafact += pxdx/(2. + 2.*cosh((M - x - dg)));
2019 sigmafact /= (n1 + n2);
2023 Shirts, Bair, Hooker & Pande, Phys. Rev. Lett 91, 140601 (2003): */
2024 *stddev = sqrt(((1./sigmafact) - ( (n1+n2)/n1 + (n1+n2)/n2 )));
2029 static void calc_bar(barres_t *br, double tol,
2030 int npee_min, int npee_max, gmx_bool *bEE,
2034 double dg_sig2, sa_sig2, sb_sig2, stddev_sig2; /* intermediate variance values
2035 for calculated quantities */
2036 int nsample1, nsample2;
2037 double temp = br->a->temp;
2039 double dg_min, dg_max;
2040 gmx_bool have_hist = FALSE;
2042 br->dg = calc_bar_lowlevel(br->a, br->b, temp, tol, 0);
2044 br->dg_disc_err = 0.;
2045 br->dg_histrange_err = 0.;
2047 /* check if there are histograms */
2048 for (i = 0; i < br->a->nsamples; i++)
2050 if (br->a->r[i].use && br->a->s[i]->hist)
2058 for (i = 0; i < br->b->nsamples; i++)
2060 if (br->b->r[i].use && br->b->s[i]->hist)
2068 /* calculate histogram-specific errors */
2071 dg_min = calc_bar_lowlevel(br->a, br->b, temp, tol, -1);
2072 dg_max = calc_bar_lowlevel(br->a, br->b, temp, tol, 1);
2074 if (fabs(dg_max - dg_min) > GMX_REAL_EPS*10)
2076 /* the histogram range error is the biggest of the differences
2077 between the best estimate and the extremes */
2078 br->dg_histrange_err = fabs(dg_max - dg_min);
2080 br->dg_disc_err = 0.;
2081 for (i = 0; i < br->a->nsamples; i++)
2083 if (br->a->s[i]->hist)
2085 br->dg_disc_err = max(br->dg_disc_err, br->a->s[i]->hist->dx[0]);
2088 for (i = 0; i < br->b->nsamples; i++)
2090 if (br->b->s[i]->hist)
2092 br->dg_disc_err = max(br->dg_disc_err, br->b->s[i]->hist->dx[0]);
2096 calc_rel_entropy(br->a, br->b, temp, br->dg, &(br->sa), &(br->sb));
2098 calc_dg_stddev(br->a, br->b, temp, br->dg, &(br->dg_stddev) );
2107 sample_coll_t ca, cb;
2109 /* initialize the samples */
2110 sample_coll_init(&ca, br->a->native_lambda, br->a->foreign_lambda,
2112 sample_coll_init(&cb, br->b->native_lambda, br->b->foreign_lambda,
2115 for (npee = npee_min; npee <= npee_max; npee++)
2124 double dstddev2 = 0;
2127 for (p = 0; p < npee; p++)
2134 cac = sample_coll_create_subsample(&ca, br->a, p, npee);
2135 cbc = sample_coll_create_subsample(&cb, br->b, p, npee);
2139 printf("WARNING: histogram number incompatible with block number for averaging: can't do error estimate\n");
2143 sample_coll_destroy(&ca);
2147 sample_coll_destroy(&cb);
2152 dgp = calc_bar_lowlevel(&ca, &cb, temp, tol, 0);
2156 partsum[npee*(npee_max+1)+p] += dgp;
2158 calc_rel_entropy(&ca, &cb, temp, dgp, &sac, &sbc);
2163 calc_dg_stddev(&ca, &cb, temp, dgp, &stddevc );
2166 dstddev2 += stddevc*stddevc;
2168 sample_coll_destroy(&ca);
2169 sample_coll_destroy(&cb);
2173 dg_sig2 += (dgs2-dgs*dgs)/(npee-1);
2179 sa_sig2 += (dsa2-dsa*dsa)/(npee-1);
2180 sb_sig2 += (dsb2-dsb*dsb)/(npee-1);
2184 stddev_sig2 += (dstddev2-dstddev*dstddev)/(npee-1);
2186 br->dg_err = sqrt(dg_sig2/(npee_max - npee_min + 1));
2187 br->sa_err = sqrt(sa_sig2/(npee_max - npee_min + 1));
2188 br->sb_err = sqrt(sb_sig2/(npee_max - npee_min + 1));
2189 br->dg_stddev_err = sqrt(stddev_sig2/(npee_max - npee_min + 1));
2194 static double bar_err(int nbmin, int nbmax, const double *partsum)
2197 double svar, s, s2, dg;
2200 for (nb = nbmin; nb <= nbmax; nb++)
2204 for (b = 0; b < nb; b++)
2206 dg = partsum[nb*(nbmax+1)+b];
2212 svar += (s2 - s*s)/(nb - 1);
2215 return sqrt(svar/(nbmax + 1 - nbmin));
2219 /* Seek the end of an identifier (consecutive non-spaces), followed by
2220 an optional number of spaces or '='-signs. Returns a pointer to the
2221 first non-space value found after that. Returns NULL if the string
2224 static const char *find_value(const char *str)
2226 gmx_bool name_end_found = FALSE;
2228 /* if the string is a NULL pointer, return a NULL pointer. */
2233 while (*str != '\0')
2235 /* first find the end of the name */
2236 if (!name_end_found)
2238 if (isspace(*str) || (*str == '=') )
2240 name_end_found = TRUE;
2245 if (!( isspace(*str) || (*str == '=') ))
2257 /* read a vector-notation description of a lambda vector */
2258 static gmx_bool read_lambda_compvec(const char *str,
2260 const lambda_components_t *lc_in,
2261 lambda_components_t *lc_out,
2265 gmx_bool initialize_lc = FALSE; /* whether to initialize the lambda
2266 components, or to check them */
2267 gmx_bool start_reached = FALSE; /* whether the start of component names
2269 gmx_bool vector = FALSE; /* whether there are multiple components */
2270 int n = 0; /* current component number */
2271 const char *val_start = NULL; /* start of the component name, or NULL
2272 if not in a value */
2282 if (lc_out && lc_out->N == 0)
2284 initialize_lc = TRUE;
2299 start_reached = TRUE;
2302 else if (*str == '(')
2305 start_reached = TRUE;
2307 else if (!isspace(*str))
2309 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2316 if (isspace(*str) || *str == ')' || *str == ',' || *str == '\0')
2323 lambda_components_add(lc_out, val_start,
2328 if (!lambda_components_check(lc_out, n, val_start,
2337 /* add a vector component to lv */
2338 lv->val[n] = strtod(val_start, &strtod_end);
2339 if (val_start == strtod_end)
2342 "Error reading lambda vector in %s", fn);
2345 /* reset for the next identifier */
2354 else if (isalnum(*str))
2367 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2375 else if (lv == NULL)
2381 gmx_fatal(FARGS, "Incomplete lambda vector data in %s",
2401 gmx_fatal(FARGS, "Incomplete lambda components data in %s", fn);
2407 /* read and check the component names from a string */
2408 static gmx_bool read_lambda_components(const char *str,
2409 lambda_components_t *lc,
2413 return read_lambda_compvec(str, NULL, NULL, lc, end, fn);
2416 /* read an initialized lambda vector from a string */
2417 static gmx_bool read_lambda_vector(const char *str,
2422 return read_lambda_compvec(str, lv, lv->lc, NULL, end, fn);
2427 /* deduce lambda value from legend.
2429 legend = the legend string
2431 lam = the initialized lambda vector
2432 returns whether to use the data in this set.
2434 static gmx_bool legend2lambda(const char *fn,
2440 const char *ptr = NULL, *ptr2 = NULL;
2441 gmx_bool ok = FALSE;
2442 gmx_bool bdhdl = FALSE;
2443 const char *tostr = " to ";
2447 gmx_fatal(FARGS, "There is no legend in file '%s', can not deduce lambda", fn);
2450 /* look for the last 'to': */
2454 ptr2 = strstr(ptr2, tostr);
2461 while (ptr2 != NULL && *ptr2 != '\0');
2465 ptr += strlen(tostr)-1; /* and advance past that 'to' */
2469 /* look for the = sign */
2470 ptr = strrchr(legend, '=');
2473 /* otherwise look for the last space */
2474 ptr = strrchr(legend, ' ');
2478 if (strstr(legend, "dH"))
2483 else if (strchr(legend, 'D') != NULL && strchr(legend, 'H') != NULL)
2488 else /*if (strstr(legend, "pV"))*/
2499 gmx_fatal(FARGS, "There is no proper lambda legend in file '%s', can not deduce lambda", fn);
2503 ptr = find_value(ptr);
2504 if (!ptr || !read_lambda_vector(ptr, lam, NULL, fn))
2506 gmx_fatal(FARGS, "lambda vector '%s' %s faulty", legend, fn);
2515 ptr = strrchr(legend, '=');
2519 /* there must be a component name */
2523 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2525 /* now backtrack to the start of the identifier */
2526 while (isspace(*ptr))
2532 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2535 while (!isspace(*ptr))
2540 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2544 strncpy(buf, ptr, (end-ptr));
2545 buf[(end-ptr)] = '\0';
2546 dhdl_index = lambda_components_find(lam->lc, ptr, (end-ptr));
2550 strncpy(buf, ptr, (end-ptr));
2551 buf[(end-ptr)] = '\0';
2553 "Did not find lambda component for '%s' in %s",
2562 "dhdl without component name with >1 lambda component in %s",
2567 lam->dhdl = dhdl_index;
2572 static gmx_bool subtitle2lambda(const char *subtitle, xvg_t *ba, const char *fn,
2573 lambda_components_t *lc)
2578 double native_lambda;
2582 /* first check for a state string */
2583 ptr = strstr(subtitle, "state");
2587 const char *val_end;
2589 /* the new 4.6 style lambda vectors */
2590 ptr = find_value(ptr);
2593 index = strtol(ptr, &end, 10);
2596 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2603 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2606 /* now find the lambda vector component names */
2607 while (*ptr != '(' && !isalnum(*ptr))
2613 "Incomplete lambda vector component data in %s", fn);
2618 if (!read_lambda_components(ptr, lc, &val_end, fn))
2621 "lambda vector components in %s don't match those previously read",
2624 ptr = find_value(val_end);
2627 gmx_fatal(FARGS, "Incomplete state data in %s", fn);
2630 lambda_vec_init(&(ba->native_lambda), lc);
2631 if (!read_lambda_vector(ptr, &(ba->native_lambda), NULL, fn))
2633 gmx_fatal(FARGS, "lambda vector in %s faulty", fn);
2635 ba->native_lambda.index = index;
2640 /* compatibility mode: check for lambda in other ways. */
2641 /* plain text lambda string */
2642 ptr = strstr(subtitle, "lambda");
2645 /* xmgrace formatted lambda string */
2646 ptr = strstr(subtitle, "\\xl\\f{}");
2650 /* xmgr formatted lambda string */
2651 ptr = strstr(subtitle, "\\8l\\4");
2655 ptr = strstr(ptr, "=");
2659 bFound = (sscanf(ptr+1, "%lf", &(native_lambda)) == 1);
2660 /* add the lambda component name as an empty string */
2663 if (!lambda_components_check(lc, 0, "", 0))
2666 "lambda vector components in %s don't match those previously read",
2672 lambda_components_add(lc, "", 0);
2674 lambda_vec_init(&(ba->native_lambda), lc);
2675 ba->native_lambda.val[0] = native_lambda;
2682 static void filename2lambda(const char *fn, xvg_t *ba)
2685 const char *ptr, *digitptr;
2689 /* go to the end of the path string and search backward to find the last
2690 directory in the path which has to contain the value of lambda
2692 while (ptr[1] != '\0')
2696 /* searching backward to find the second directory separator */
2701 if (ptr[0] != DIR_SEPARATOR && ptr[1] == DIR_SEPARATOR)
2709 /* save the last position of a digit between the last two
2710 separators = in the last dirname */
2711 if (dirsep > 0 && isdigit(*ptr))
2719 gmx_fatal(FARGS, "While trying to read the lambda value from the file path:"
2720 " last directory in the path '%s' does not contain a number", fn);
2722 if (digitptr[-1] == '-')
2726 lambda = strtod(digitptr, &endptr);
2727 if (endptr == digitptr)
2729 gmx_fatal(FARGS, "Malformed number in file path '%s'", fn);
2733 static void read_bar_xvg_lowlevel(const char *fn, real *temp, xvg_t *ba,
2734 lambda_components_t *lc)
2737 char *subtitle, **legend, *ptr;
2739 gmx_bool native_lambda_read = FALSE;
2747 np = read_xvg_legend(fn, &ba->y, &ba->nset, &subtitle, &legend);
2750 gmx_fatal(FARGS, "File %s contains no usable data.", fn);
2752 /* Reorder the data */
2754 for (i = 1; i < ba->nset; i++)
2756 ba->y[i-1] = ba->y[i];
2760 snew(ba->np, ba->nset);
2761 for (i = 0; i < ba->nset; i++)
2767 if (subtitle != NULL)
2769 /* try to extract temperature */
2770 ptr = strstr(subtitle, "T =");
2774 if (sscanf(ptr, "%lf", &ba->temp) == 1)
2778 gmx_fatal(FARGS, "Found temperature of %f in file '%s'",
2788 gmx_fatal(FARGS, "Did not find a temperature in the subtitle in file '%s', use the -temp option of [TT]g_bar[tt]", fn);
2793 /* Try to deduce lambda from the subtitle */
2796 if (subtitle2lambda(subtitle, ba, fn, lc))
2798 native_lambda_read = TRUE;
2801 snew(ba->lambda, ba->nset);
2804 /* Check if we have a single set, no legend, nset=1 means t and dH/dl */
2807 if (!native_lambda_read)
2809 /* Deduce lambda from the file name */
2810 filename2lambda(fn, ba);
2811 native_lambda_read = TRUE;
2813 ba->lambda[0] = ba->native_lambda;
2817 gmx_fatal(FARGS, "File %s contains multiple sets but no legends, can not determine the lambda values", fn);
2822 for (i = 0; i < ba->nset; )
2824 gmx_bool use = FALSE;
2825 /* Read lambda from the legend */
2826 lambda_vec_init( &(ba->lambda[i]), lc );
2827 lambda_vec_copy( &(ba->lambda[i]), &(ba->native_lambda));
2828 use = legend2lambda(fn, legend[i], ba, &(ba->lambda[i]));
2831 lambda_vec_print(&(ba->lambda[i]), buf, FALSE);
2837 printf("%s: Ignoring set '%s'.\n", fn, legend[i]);
2838 for (j = i+1; j < ba->nset; j++)
2840 ba->y[j-1] = ba->y[j];
2841 legend[j-1] = legend[j];
2848 if (!native_lambda_read)
2850 gmx_fatal(FARGS, "File %s contains multiple sets but no indication of the native lambda", fn);
2855 for (i = 0; i < ba->nset-1; i++)
2863 static void read_bar_xvg(char *fn, real *temp, sim_data_t *sd)
2872 read_bar_xvg_lowlevel(fn, temp, barsim, &(sd->lc));
2874 if (barsim->nset < 1)
2876 gmx_fatal(FARGS, "File '%s' contains fewer than two columns", fn);
2879 if (!gmx_within_tol(*temp, barsim->temp, GMX_FLOAT_EPS) && (*temp > 0) )
2881 gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
2883 *temp = barsim->temp;
2885 /* now create a series of samples_t */
2886 snew(s, barsim->nset);
2887 for (i = 0; i < barsim->nset; i++)
2889 samples_init(s+i, &(barsim->native_lambda), &(barsim->lambda[i]),
2890 barsim->temp, lambda_vec_same(&(barsim->native_lambda),
2891 &(barsim->lambda[i])),
2893 s[i].du = barsim->y[i];
2894 s[i].ndu = barsim->np[i];
2897 lambda_data_list_insert_sample(sd->lb, s+i);
2902 lambda_vec_print(s[0].native_lambda, buf, FALSE);
2903 printf("%s: %.1f - %.1f; lambda = %s\n dH/dl & foreign lambdas:\n",
2904 fn, s[0].t[0], s[0].t[s[0].ndu-1], buf);
2905 for (i = 0; i < barsim->nset; i++)
2907 lambda_vec_print(s[i].foreign_lambda, buf, TRUE);
2908 printf(" %s (%d pts)\n", buf, s[i].ndu);
2914 static void read_edr_rawdh_block(samples_t **smp, int *ndu, t_enxblock *blk,
2915 double start_time, double delta_time,
2916 lambda_vec_t *native_lambda, double temp,
2917 double *last_t, const char *filename)
2921 double old_foreign_lambda;
2922 lambda_vec_t *foreign_lambda;
2924 samples_t *s; /* convenience pointer */
2927 /* check the block types etc. */
2928 if ( (blk->nsub < 3) ||
2929 (blk->sub[0].type != xdr_datatype_int) ||
2930 (blk->sub[1].type != xdr_datatype_double) ||
2932 (blk->sub[2].type != xdr_datatype_float) &&
2933 (blk->sub[2].type != xdr_datatype_double)
2935 (blk->sub[0].nr < 1) ||
2936 (blk->sub[1].nr < 1) )
2939 "Unexpected/corrupted block data in file %s around time %f.",
2940 filename, start_time);
2943 snew(foreign_lambda, 1);
2944 lambda_vec_init(foreign_lambda, native_lambda->lc);
2945 lambda_vec_copy(foreign_lambda, native_lambda);
2946 type = blk->sub[0].ival[0];
2949 for (i = 0; i < native_lambda->lc->N; i++)
2951 foreign_lambda->val[i] = blk->sub[1].dval[i];
2956 if (blk->sub[0].nr > 1)
2958 foreign_lambda->dhdl = blk->sub[0].ival[1];
2962 foreign_lambda->dhdl = 0;
2968 /* initialize the samples structure if it's empty. */
2970 samples_init(*smp, native_lambda, foreign_lambda, temp,
2971 type == dhbtDHDL, filename);
2972 (*smp)->start_time = start_time;
2973 (*smp)->delta_time = delta_time;
2976 /* set convenience pointer */
2979 /* now double check */
2980 if (!lambda_vec_same(s->foreign_lambda, foreign_lambda) )
2982 char buf[STRLEN], buf2[STRLEN];
2983 lambda_vec_print(foreign_lambda, buf, FALSE);
2984 lambda_vec_print(s->foreign_lambda, buf2, FALSE);
2985 fprintf(stderr, "Got foreign lambda=%s, expected: %s\n", buf, buf2);
2986 gmx_fatal(FARGS, "Corrupted data in file %s around t=%f.",
2987 filename, start_time);
2990 /* make room for the data */
2991 if (s->ndu_alloc < (size_t)(s->ndu + blk->sub[2].nr) )
2993 s->ndu_alloc += (s->ndu_alloc < (size_t)blk->sub[2].nr) ?
2994 blk->sub[2].nr*2 : s->ndu_alloc;
2995 srenew(s->du_alloc, s->ndu_alloc);
2996 s->du = s->du_alloc;
2999 s->ndu += blk->sub[2].nr;
3000 s->ntot += blk->sub[2].nr;
3001 *ndu = blk->sub[2].nr;
3003 /* and copy the data*/
3004 for (j = 0; j < blk->sub[2].nr; j++)
3006 if (blk->sub[2].type == xdr_datatype_float)
3008 s->du[startj+j] = blk->sub[2].fval[j];
3012 s->du[startj+j] = blk->sub[2].dval[j];
3015 if (start_time + blk->sub[2].nr*delta_time > *last_t)
3017 *last_t = start_time + blk->sub[2].nr*delta_time;
3021 static samples_t *read_edr_hist_block(int *nsamples, t_enxblock *blk,
3022 double start_time, double delta_time,
3023 lambda_vec_t *native_lambda, double temp,
3024 double *last_t, const char *filename)
3029 double old_foreign_lambda;
3030 lambda_vec_t *foreign_lambda;
3034 /* check the block types etc. */
3035 if ( (blk->nsub < 2) ||
3036 (blk->sub[0].type != xdr_datatype_double) ||
3037 (blk->sub[1].type != xdr_datatype_large_int) ||
3038 (blk->sub[0].nr < 2) ||
3039 (blk->sub[1].nr < 2) )
3042 "Unexpected/corrupted block data in file %s around time %f",
3043 filename, start_time);
3046 nhist = blk->nsub-2;
3054 "Unexpected/corrupted block data in file %s around time %f",
3055 filename, start_time);
3061 snew(foreign_lambda, 1);
3062 lambda_vec_init(foreign_lambda, native_lambda->lc);
3063 lambda_vec_copy(foreign_lambda, native_lambda);
3064 type = (int)(blk->sub[1].lval[1]);
3067 double old_foreign_lambda;
3069 old_foreign_lambda = blk->sub[0].dval[0];
3070 if (old_foreign_lambda >= 0)
3072 foreign_lambda->val[0] = old_foreign_lambda;
3073 if (foreign_lambda->lc->N > 1)
3076 "Single-component lambda in multi-component file %s",
3082 for (i = 0; i < native_lambda->lc->N; i++)
3084 foreign_lambda->val[i] = blk->sub[0].dval[i+2];
3090 if (foreign_lambda->lc->N > 1)
3092 if (blk->sub[1].nr < 3 + nhist)
3095 "Missing derivative coord in multi-component file %s",
3098 foreign_lambda->dhdl = blk->sub[1].lval[2 + nhist];
3102 foreign_lambda->dhdl = 0;
3106 samples_init(s, native_lambda, foreign_lambda, temp, type == dhbtDHDL,
3110 for (i = 0; i < nhist; i++)
3112 nbins[i] = blk->sub[i+2].nr;
3115 hist_init(s->hist, nhist, nbins);
3117 for (i = 0; i < nhist; i++)
3119 s->hist->x0[i] = blk->sub[1].lval[2+i];
3120 s->hist->dx[i] = blk->sub[0].dval[1];
3123 s->hist->dx[i] = -s->hist->dx[i];
3127 s->hist->start_time = start_time;
3128 s->hist->delta_time = delta_time;
3129 s->start_time = start_time;
3130 s->delta_time = delta_time;
3132 for (i = 0; i < nhist; i++)
3135 gmx_large_int_t sum = 0;
3137 for (j = 0; j < s->hist->nbin[i]; j++)
3139 int binv = (int)(blk->sub[i+2].ival[j]);
3141 s->hist->bin[i][j] = binv;
3154 gmx_fatal(FARGS, "Histogram counts don't match in %s",
3160 if (start_time + s->hist->sum*delta_time > *last_t)
3162 *last_t = start_time + s->hist->sum*delta_time;
3168 static void read_barsim_edr(char *fn, real *temp, sim_data_t *sd)
3174 gmx_enxnm_t *enm = NULL;
3175 double first_t = -1;
3177 samples_t **samples_rawdh = NULL; /* contains samples for raw delta_h */
3178 int *nhists = NULL; /* array to keep count & print at end */
3179 int *npts = NULL; /* array to keep count & print at end */
3180 lambda_vec_t **lambdas = NULL; /* array to keep count & print at end */
3181 lambda_vec_t *native_lambda;
3182 double end_time; /* the end time of the last batch of samples */
3184 lambda_vec_t start_lambda;
3186 fp = open_enx(fn, "r");
3187 do_enxnms(fp, &nre, &enm);
3190 snew(native_lambda, 1);
3191 start_lambda.lc = NULL;
3193 while (do_enx(fp, fr))
3195 /* count the data blocks */
3196 int nblocks_raw = 0;
3197 int nblocks_hist = 0;
3200 /* DHCOLL block information: */
3201 double start_time = 0, delta_time = 0, old_start_lambda = 0, delta_lambda = 0;
3204 /* count the blocks and handle collection information: */
3205 for (i = 0; i < fr->nblock; i++)
3207 if (fr->block[i].id == enxDHHIST)
3211 if (fr->block[i].id == enxDH)
3215 if (fr->block[i].id == enxDHCOLL)
3218 if ( (fr->block[i].nsub < 1) ||
3219 (fr->block[i].sub[0].type != xdr_datatype_double) ||
3220 (fr->block[i].sub[0].nr < 5))
3222 gmx_fatal(FARGS, "Unexpected block data in file %s", fn);
3225 /* read the data from the DHCOLL block */
3226 rtemp = fr->block[i].sub[0].dval[0];
3227 start_time = fr->block[i].sub[0].dval[1];
3228 delta_time = fr->block[i].sub[0].dval[2];
3229 old_start_lambda = fr->block[i].sub[0].dval[3];
3230 delta_lambda = fr->block[i].sub[0].dval[4];
3232 if (delta_lambda > 0)
3234 gmx_fatal(FARGS, "Lambda values not constant in %s: can't apply BAR method", fn);
3236 if ( ( *temp != rtemp) && (*temp > 0) )
3238 gmx_fatal(FARGS, "Temperature in file %s different from earlier files or setting\n", fn);
3242 if (old_start_lambda >= 0)
3246 if (!lambda_components_check(&(sd->lc), 0, "", 0))
3249 "lambda vector components in %s don't match those previously read",
3255 lambda_components_add(&(sd->lc), "", 0);
3257 if (!start_lambda.lc)
3259 lambda_vec_init(&start_lambda, &(sd->lc));
3261 start_lambda.val[0] = old_start_lambda;
3265 /* read lambda vector */
3267 gmx_bool check = (sd->lc.N > 0);
3268 if (fr->block[i].nsub < 2)
3271 "No lambda vector, but start_lambda=%f\n",
3274 n_lambda_vec = fr->block[i].sub[1].ival[1];
3275 for (j = 0; j < n_lambda_vec; j++)
3278 efpt_singular_names[fr->block[i].sub[1].ival[1+j]];
3281 /* check the components */
3282 lambda_components_check(&(sd->lc), j, name,
3287 lambda_components_add(&(sd->lc), name,
3291 lambda_vec_init(&start_lambda, &(sd->lc));
3292 start_lambda.index = fr->block[i].sub[1].ival[0];
3293 for (j = 0; j < n_lambda_vec; j++)
3295 start_lambda.val[j] = fr->block[i].sub[0].dval[5+j];
3300 first_t = start_time;
3307 gmx_fatal(FARGS, "Did not find delta H information in file %s", fn);
3309 if (nblocks_raw > 0 && nblocks_hist > 0)
3311 gmx_fatal(FARGS, "Can't handle both raw delta U data and histograms in the same file %s", fn);
3316 /* check the native lambda */
3317 if (!lambda_vec_same(&start_lambda, native_lambda) )
3319 gmx_fatal(FARGS, "Native lambda not constant in file %s: started at %f, and becomes %f at time %f",
3320 fn, native_lambda, start_lambda, start_time);
3322 /* check the number of samples against the previous number */
3323 if ( ((nblocks_raw+nblocks_hist) != nsamples) || (nlam != 1 ) )
3325 gmx_fatal(FARGS, "Unexpected block count in %s: was %d, now %d\n",
3326 fn, nsamples+1, nblocks_raw+nblocks_hist+nlam);
3328 /* check whether last iterations's end time matches with
3329 the currrent start time */
3330 if ( (fabs(last_t - start_time) > 2*delta_time) && last_t >= 0)
3332 /* it didn't. We need to store our samples and reallocate */
3333 for (i = 0; i < nsamples; i++)
3335 if (samples_rawdh[i])
3337 /* insert it into the existing list */
3338 lambda_data_list_insert_sample(sd->lb,
3340 /* and make sure we'll allocate a new one this time
3342 samples_rawdh[i] = NULL;
3349 /* this is the first round; allocate the associated data
3351 /*native_lambda=start_lambda;*/
3352 lambda_vec_init(native_lambda, &(sd->lc));
3353 lambda_vec_copy(native_lambda, &start_lambda);
3354 nsamples = nblocks_raw+nblocks_hist;
3355 snew(nhists, nsamples);
3356 snew(npts, nsamples);
3357 snew(lambdas, nsamples);
3358 snew(samples_rawdh, nsamples);
3359 for (i = 0; i < nsamples; i++)
3364 samples_rawdh[i] = NULL; /* init to NULL so we know which
3365 ones contain values */
3370 k = 0; /* counter for the lambdas, etc. arrays */
3371 for (i = 0; i < fr->nblock; i++)
3373 if (fr->block[i].id == enxDH)
3375 int type = (fr->block[i].sub[0].ival[0]);
3376 if (type == dhbtDH || type == dhbtDHDL)
3379 read_edr_rawdh_block(&(samples_rawdh[k]),
3382 start_time, delta_time,
3383 native_lambda, rtemp,
3386 if (samples_rawdh[k])
3388 lambdas[k] = samples_rawdh[k]->foreign_lambda;
3393 else if (fr->block[i].id == enxDHHIST)
3395 int type = (int)(fr->block[i].sub[1].lval[1]);
3396 if (type == dhbtDH || type == dhbtDHDL)
3400 samples_t *s; /* this is where the data will go */
3401 s = read_edr_hist_block(&nb, &(fr->block[i]),
3402 start_time, delta_time,
3403 native_lambda, rtemp,
3408 lambdas[k] = s->foreign_lambda;
3411 /* and insert the new sample immediately */
3412 for (j = 0; j < nb; j++)
3414 lambda_data_list_insert_sample(sd->lb, s+j);
3420 /* Now store all our extant sample collections */
3421 for (i = 0; i < nsamples; i++)
3423 if (samples_rawdh[i])
3425 /* insert it into the existing list */
3426 lambda_data_list_insert_sample(sd->lb, samples_rawdh[i]);
3434 lambda_vec_print(native_lambda, buf, FALSE);
3435 printf("%s: %.1f - %.1f; lambda = %s\n foreign lambdas:\n",
3436 fn, first_t, last_t, buf);
3437 for (i = 0; i < nsamples; i++)
3441 lambda_vec_print(lambdas[i], buf, TRUE);
3444 printf(" %s (%d hists)\n", buf, nhists[i]);
3448 printf(" %s (%d pts)\n", buf, npts[i]);
3460 int gmx_bar(int argc, char *argv[])
3462 static const char *desc[] = {
3463 "[TT]g_bar[tt] calculates free energy difference estimates through ",
3464 "Bennett's acceptance ratio method (BAR). It also automatically",
3465 "adds series of individual free energies obtained with BAR into",
3466 "a combined free energy estimate.[PAR]",
3468 "Every individual BAR free energy difference relies on two ",
3469 "simulations at different states: say state A and state B, as",
3470 "controlled by a parameter, [GRK]lambda[grk] (see the [TT].mdp[tt] parameter",
3471 "[TT]init_lambda[tt]). The BAR method calculates a ratio of weighted",
3472 "average of the Hamiltonian difference of state B given state A and",
3474 "The energy differences to the other state must be calculated",
3475 "explicitly during the simulation. This can be done with",
3476 "the [TT].mdp[tt] option [TT]foreign_lambda[tt].[PAR]",
3478 "Input option [TT]-f[tt] expects multiple [TT]dhdl.xvg[tt] files. ",
3479 "Two types of input files are supported:[BR]",
3480 "[TT]*[tt] Files with more than one [IT]y[it]-value. ",
3481 "The files should have columns ",
3482 "with dH/d[GRK]lambda[grk] and [GRK]Delta[grk][GRK]lambda[grk]. ",
3483 "The [GRK]lambda[grk] values are inferred ",
3484 "from the legends: [GRK]lambda[grk] of the simulation from the legend of ",
3485 "dH/d[GRK]lambda[grk] and the foreign [GRK]lambda[grk] values from the ",
3486 "legends of Delta H",
3488 "[TT]*[tt] Files with only one [IT]y[it]-value. Using the",
3489 "[TT]-extp[tt] option for these files, it is assumed",
3490 "that the [IT]y[it]-value is dH/d[GRK]lambda[grk] and that the ",
3491 "Hamiltonian depends linearly on [GRK]lambda[grk]. ",
3492 "The [GRK]lambda[grk] value of the simulation is inferred from the ",
3493 "subtitle (if present), otherwise from a number in the subdirectory ",
3494 "in the file name.[PAR]",
3496 "The [GRK]lambda[grk] of the simulation is parsed from ",
3497 "[TT]dhdl.xvg[tt] file's legend containing the string 'dH', the ",
3498 "foreign [GRK]lambda[grk] values from the legend containing the ",
3499 "capitalized letters 'D' and 'H'. The temperature is parsed from ",
3500 "the legend line containing 'T ='.[PAR]",
3502 "The input option [TT]-g[tt] expects multiple [TT].edr[tt] files. ",
3503 "These can contain either lists of energy differences (see the ",
3504 "[TT].mdp[tt] option [TT]separate_dhdl_file[tt]), or a series of ",
3505 "histograms (see the [TT].mdp[tt] options [TT]dh_hist_size[tt] and ",
3506 "[TT]dh_hist_spacing[tt]).", "The temperature and [GRK]lambda[grk] ",
3507 "values are automatically deduced from the [TT]ener.edr[tt] file.[PAR]",
3509 "In addition to the [TT].mdp[tt] option [TT]foreign_lambda[tt], ",
3510 "the energy difference can also be extrapolated from the ",
3511 "dH/d[GRK]lambda[grk] values. This is done with the[TT]-extp[tt]",
3512 "option, which assumes that the system's Hamiltonian depends linearly",
3513 "on [GRK]lambda[grk], which is not normally the case.[PAR]",
3515 "The free energy estimates are determined using BAR with bisection, ",
3516 "with the precision of the output set with [TT]-prec[tt]. ",
3517 "An error estimate taking into account time correlations ",
3518 "is made by splitting the data into blocks and determining ",
3519 "the free energy differences over those blocks and assuming ",
3520 "the blocks are independent. ",
3521 "The final error estimate is determined from the average variance ",
3522 "over 5 blocks. A range of block numbers for error estimation can ",
3523 "be provided with the options [TT]-nbmin[tt] and [TT]-nbmax[tt].[PAR]",
3525 "[TT]g_bar[tt] tries to aggregate samples with the same 'native' and ",
3526 "'foreign' [GRK]lambda[grk] values, but always assumes independent ",
3527 "samples. [BB]Note[bb] that when aggregating energy ",
3528 "differences/derivatives with different sampling intervals, this is ",
3529 "almost certainly not correct. Usually subsequent energies are ",
3530 "correlated and different time intervals mean different degrees ",
3531 "of correlation between samples.[PAR]",
3533 "The results are split in two parts: the last part contains the final ",
3534 "results in kJ/mol, together with the error estimate for each part ",
3535 "and the total. The first part contains detailed free energy ",
3536 "difference estimates and phase space overlap measures in units of ",
3537 "kT (together with their computed error estimate). The printed ",
3539 "[TT]*[tt] lam_A: the [GRK]lambda[grk] values for point A.[BR]",
3540 "[TT]*[tt] lam_B: the [GRK]lambda[grk] values for point B.[BR]",
3541 "[TT]*[tt] DG: the free energy estimate.[BR]",
3542 "[TT]*[tt] s_A: an estimate of the relative entropy of B in A.[BR]",
3543 "[TT]*[tt] s_B: an estimate of the relative entropy of A in B.[BR]",
3544 "[TT]*[tt] stdev: an estimate expected per-sample standard deviation.[PAR]",
3546 "The relative entropy of both states in each other's ensemble can be ",
3547 "interpreted as a measure of phase space overlap: ",
3548 "the relative entropy s_A of the work samples of lambda_B in the ",
3549 "ensemble of lambda_A (and vice versa for s_B), is a ",
3550 "measure of the 'distance' between Boltzmann distributions of ",
3551 "the two states, that goes to zero for identical distributions. See ",
3552 "Wu & Kofke, J. Chem. Phys. 123 084109 (2005) for more information.",
3554 "The estimate of the expected per-sample standard deviation, as given ",
3555 "in Bennett's original BAR paper: Bennett, J. Comp. Phys. 22, p 245 (1976).",
3556 "Eq. 10 therein gives an estimate of the quality of sampling (not directly",
3557 "of the actual statistical error, because it assumes independent samples).[PAR]",
3559 "To get a visual estimate of the phase space overlap, use the ",
3560 "[TT]-oh[tt] option to write series of histograms, together with the ",
3561 "[TT]-nbin[tt] option.[PAR]"
3563 static real begin = 0, end = -1, temp = -1;
3564 int nd = 2, nbmin = 5, nbmax = 5;
3566 gmx_bool use_dhdl = FALSE;
3567 gmx_bool calc_s, calc_v;
3569 { "-b", FALSE, etREAL, {&begin}, "Begin time for BAR" },
3570 { "-e", FALSE, etREAL, {&end}, "End time for BAR" },
3571 { "-temp", FALSE, etREAL, {&temp}, "Temperature (K)" },
3572 { "-prec", FALSE, etINT, {&nd}, "The number of digits after the decimal point" },
3573 { "-nbmin", FALSE, etINT, {&nbmin}, "Minimum number of blocks for error estimation" },
3574 { "-nbmax", FALSE, etINT, {&nbmax}, "Maximum number of blocks for error estimation" },
3575 { "-nbin", FALSE, etINT, {&nbin}, "Number of bins for histogram output"},
3576 { "-extp", FALSE, etBOOL, {&use_dhdl}, "Whether to linearly extrapolate dH/dl values to use as energies"}
3580 { efXVG, "-f", "dhdl", ffOPTRDMULT },
3581 { efEDR, "-g", "ener", ffOPTRDMULT },
3582 { efXVG, "-o", "bar", ffOPTWR },
3583 { efXVG, "-oi", "barint", ffOPTWR },
3584 { efXVG, "-oh", "histogram", ffOPTWR }
3586 #define NFILE asize(fnm)
3589 int nf = 0; /* file counter */
3591 int nfile_tot; /* total number of input files */
3596 sim_data_t sim_data; /* the simulation data */
3597 barres_t *results; /* the results */
3598 int nresults; /* number of results in results array */
3601 double prec, dg_tot, dg, sig, dg_tot_max, dg_tot_min;
3603 char dgformat[20], xvg2format[STRLEN], xvg3format[STRLEN];
3604 char buf[STRLEN], buf2[STRLEN];
3605 char ktformat[STRLEN], sktformat[STRLEN];
3606 char kteformat[STRLEN], skteformat[STRLEN];
3609 gmx_bool result_OK = TRUE, bEE = TRUE;
3611 gmx_bool disc_err = FALSE;
3612 double sum_disc_err = 0.; /* discretization error */
3613 gmx_bool histrange_err = FALSE;
3614 double sum_histrange_err = 0.; /* histogram range error */
3615 double stat_err = 0.; /* statistical error */
3617 parse_common_args(&argc, argv,
3619 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL, &oenv);
3621 if (opt2bSet("-f", NFILE, fnm))
3623 nxvgfile = opt2fns(&fxvgnms, "-f", NFILE, fnm);
3625 if (opt2bSet("-g", NFILE, fnm))
3627 nedrfile = opt2fns(&fedrnms, "-g", NFILE, fnm);
3630 sim_data_init(&sim_data);
3632 /* make linked list */
3634 lambda_data_init(lb, 0, 0);
3640 nfile_tot = nxvgfile + nedrfile;
3644 gmx_fatal(FARGS, "No input files!");
3649 gmx_fatal(FARGS, "Can not have negative number of digits");
3651 prec = pow(10, -nd);
3653 snew(partsum, (nbmax+1)*(nbmax+1));
3656 /* read in all files. First xvg files */
3657 for (f = 0; f < nxvgfile; f++)
3659 read_bar_xvg(fxvgnms[f], &temp, &sim_data);
3662 /* then .edr files */
3663 for (f = 0; f < nedrfile; f++)
3665 read_barsim_edr(fedrnms[f], &temp, &sim_data);;
3669 /* fix the times to allow for equilibration */
3670 sim_data_impose_times(&sim_data, begin, end);
3672 if (opt2bSet("-oh", NFILE, fnm))
3674 sim_data_histogram(&sim_data, opt2fn("-oh", NFILE, fnm), nbin, oenv);
3677 /* assemble the output structures from the lambdas */
3678 results = barres_list_create(&sim_data, &nresults, use_dhdl);
3680 sum_disc_err = barres_list_max_disc_err(results, nresults);
3684 printf("\nNo results to calculate.\n");
3688 if (sum_disc_err > prec)
3690 prec = sum_disc_err;
3691 nd = ceil(-log10(prec));
3692 printf("WARNING: setting the precision to %g because that is the minimum\n reasonable number, given the expected discretization error.\n", prec);
3696 /*sprintf(lamformat,"%%6.3f");*/
3697 sprintf( dgformat, "%%%d.%df", 3+nd, nd);
3698 /* the format strings of the results in kT */
3699 sprintf( ktformat, "%%%d.%df", 5+nd, nd);
3700 sprintf( sktformat, "%%%ds", 6+nd);
3701 /* the format strings of the errors in kT */
3702 sprintf( kteformat, "%%%d.%df", 3+nd, nd);
3703 sprintf( skteformat, "%%%ds", 4+nd);
3704 sprintf(xvg2format, "%s %s\n", "%s", dgformat);
3705 sprintf(xvg3format, "%s %s %s\n", "%s", dgformat, dgformat);
3710 if (opt2bSet("-o", NFILE, fnm))
3712 sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3713 fpb = xvgropen_type(opt2fn("-o", NFILE, fnm), "Free energy differences",
3714 "\\lambda", buf, exvggtXYDY, oenv);
3718 if (opt2bSet("-oi", NFILE, fnm))
3720 sprintf(buf, "%s (%s)", "\\DeltaG", "kT");
3721 fpi = xvgropen(opt2fn("-oi", NFILE, fnm), "Free energy integral",
3722 "\\lambda", buf, oenv);
3732 /* first calculate results */
3735 for (f = 0; f < nresults; f++)
3737 /* Determine the free energy difference with a factor of 10
3738 * more accuracy than requested for printing.
3740 calc_bar(&(results[f]), 0.1*prec, nbmin, nbmax,
3743 if (results[f].dg_disc_err > prec/10.)
3747 if (results[f].dg_histrange_err > prec/10.)
3749 histrange_err = TRUE;
3753 /* print results in kT */
3757 printf("\nTemperature: %g K\n", temp);
3759 printf("\nDetailed results in kT (see help for explanation):\n\n");
3760 printf("%6s ", " lam_A");
3761 printf("%6s ", " lam_B");
3762 printf(sktformat, "DG ");
3765 printf(skteformat, "+/- ");
3769 printf(skteformat, "disc ");
3773 printf(skteformat, "range ");
3775 printf(sktformat, "s_A ");
3778 printf(skteformat, "+/- " );
3780 printf(sktformat, "s_B ");
3783 printf(skteformat, "+/- " );
3785 printf(sktformat, "stdev ");
3788 printf(skteformat, "+/- ");
3791 for (f = 0; f < nresults; f++)
3793 lambda_vec_print_short(results[f].a->native_lambda, buf);
3795 lambda_vec_print_short(results[f].b->native_lambda, buf);
3797 printf(ktformat, results[f].dg);
3801 printf(kteformat, results[f].dg_err);
3806 printf(kteformat, results[f].dg_disc_err);
3811 printf(kteformat, results[f].dg_histrange_err);
3814 printf(ktformat, results[f].sa);
3818 printf(kteformat, results[f].sa_err);
3821 printf(ktformat, results[f].sb);
3825 printf(kteformat, results[f].sb_err);
3828 printf(ktformat, results[f].dg_stddev);
3832 printf(kteformat, results[f].dg_stddev_err);
3836 /* Check for negative relative entropy with a 95% certainty. */
3837 if (results[f].sa < -2*results[f].sa_err ||
3838 results[f].sb < -2*results[f].sb_err)
3846 printf("\nWARNING: Some of these results violate the Second Law of "
3847 "Thermodynamics: \n"
3848 " This is can be the result of severe undersampling, or "
3850 " there is something wrong with the simulations.\n");
3854 /* final results in kJ/mol */
3855 printf("\n\nFinal results in kJ/mol:\n\n");
3857 for (f = 0; f < nresults; f++)
3862 lambda_vec_print_short(results[f].a->native_lambda, buf);
3863 fprintf(fpi, xvg2format, buf, dg_tot);
3869 lambda_vec_print_intermediate(results[f].a->native_lambda,
3870 results[f].b->native_lambda,
3873 fprintf(fpb, xvg3format, buf, results[f].dg, results[f].dg_err);
3877 lambda_vec_print_short(results[f].a->native_lambda, buf);
3878 lambda_vec_print_short(results[f].b->native_lambda, buf2);
3879 printf("%s - %s", buf, buf2);
3882 printf(dgformat, results[f].dg*kT);
3886 printf(dgformat, results[f].dg_err*kT);
3890 printf(" (max. range err. = ");
3891 printf(dgformat, results[f].dg_histrange_err*kT);
3893 sum_histrange_err += results[f].dg_histrange_err*kT;
3897 dg_tot += results[f].dg;
3901 lambda_vec_print_short(results[0].a->native_lambda, buf);
3902 lambda_vec_print_short(results[nresults-1].b->native_lambda, buf2);
3903 printf("%s - %s", buf, buf2);
3906 printf(dgformat, dg_tot*kT);
3909 stat_err = bar_err(nbmin, nbmax, partsum)*kT;
3911 printf(dgformat, max(max(stat_err, sum_disc_err), sum_histrange_err));
3916 printf("\nmaximum discretization error = ");
3917 printf(dgformat, sum_disc_err);
3918 if (bEE && stat_err < sum_disc_err)
3920 printf("WARNING: discretization error (%g) is larger than statistical error.\n Decrease histogram spacing for more accurate results\n", stat_err);
3925 printf("\nmaximum histogram range error = ");
3926 printf(dgformat, sum_histrange_err);
3927 if (bEE && stat_err < sum_histrange_err)
3929 printf("WARNING: histogram range error (%g) is larger than statistical error.\n Increase histogram range for more accurate results\n", stat_err);
3938 lambda_vec_print_short(results[nresults-1].b->native_lambda, buf);
3939 fprintf(fpi, xvg2format, buf, dg_tot);
3943 do_view(oenv, opt2fn_null("-o", NFILE, fnm), "-xydy");
3944 do_view(oenv, opt2fn_null("-oi", NFILE, fnm), "-xydy");