1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * Green Red Orange Magenta Azure Cyan Skyblue
55 #include "gmx_fatal.h"
64 /* Structure for the names of lambda vector components */
65 typedef struct lambda_components_t
67 char **names; /* Array of strings with names for the lambda vector
69 int N; /* The number of components */
70 int Nalloc; /* The number of allocated components */
71 } lambda_components_t;
73 /* Structure for a lambda vector or a dhdl derivative direction */
74 typedef struct lambda_vec_t
76 double *val; /* The lambda vector component values. Only valid if
78 int dhdl; /* The coordinate index for the derivative described by this
80 const lambda_components_t *lc; /* the associated lambda_components
82 int index; /* The state number (init-lambda-state) of this lambda
83 vector, if known. If not, it is set to -1 */
86 /* the dhdl.xvg data from a simulation */
90 int ftp; /* file type */
91 int nset; /* number of lambdas, including dhdl */
92 int *np; /* number of data points (du or hists) per lambda */
93 int np_alloc; /* number of points (du or hists) allocated */
94 double temp; /* temperature */
95 lambda_vec_t *lambda; /* the lambdas (of first index for y). */
96 double *t; /* the times (of second index for y) */
97 double **y; /* the dU values. y[0] holds the derivative, while
98 further ones contain the energy differences between
99 the native lambda and the 'foreign' lambdas. */
100 lambda_vec_t native_lambda; /* the native lambda */
102 struct xvg_t *next, *prev; /*location in the global linked list of xvg_ts*/
106 typedef struct hist_t
108 unsigned int *bin[2]; /* the (forward + reverse) histogram values */
109 double dx[2]; /* the histogram spacing. The reverse
110 dx is the negative of the forward dx.*/
111 gmx_large_int_t x0[2]; /* the (forward + reverse) histogram start
114 int nbin[2]; /* the (forward+reverse) number of bins */
115 gmx_large_int_t sum; /* the total number of counts. Must be
116 the same for forward + reverse. */
117 int nhist; /* number of hist datas (forward or reverse) */
119 double start_time, delta_time; /* start time, end time of histogram */
123 /* an aggregate of samples for partial free energy calculation */
124 typedef struct samples_t
126 lambda_vec_t *native_lambda; /* pointer to native lambda vector */
127 lambda_vec_t *foreign_lambda; /* pointer to foreign lambda vector */
128 double temp; /* the temperature */
129 gmx_bool derivative; /* whether this sample is a derivative */
131 /* The samples come either as either delta U lists: */
132 int ndu; /* the number of delta U samples */
133 double *du; /* the delta u's */
134 double *t; /* the times associated with those samples, or: */
135 double start_time,delta_time;/*start time and delta time for linear time*/
137 /* or as histograms: */
138 hist_t *hist; /* a histogram */
140 /* allocation data: (not NULL for data 'owned' by this struct) */
141 double *du_alloc, *t_alloc; /* allocated delta u arrays */
142 size_t ndu_alloc, nt_alloc; /* pre-allocated sizes */
143 hist_t *hist_alloc; /* allocated hist */
145 gmx_large_int_t ntot; /* total number of samples */
146 const char *filename; /* the file name this sample comes from */
149 /* a sample range (start to end for du-style data, or boolean
150 for both du-style data and histograms */
151 typedef struct sample_range_t
153 int start, end; /* start and end index for du style data */
154 gmx_bool use; /* whether to use this sample */
156 samples_t *s; /* the samples this range belongs to */
160 /* a collection of samples for a partial free energy calculation
161 (i.e. the collection of samples from one native lambda to one
163 typedef struct sample_coll_t
165 lambda_vec_t *native_lambda; /* these should be the same for all samples
167 lambda_vec_t *foreign_lambda; /* collection */
168 double temp; /* the temperature */
170 int nsamples; /* the number of samples */
171 samples_t **s; /* the samples themselves */
172 sample_range_t *r; /* the sample ranges */
173 int nsamples_alloc; /* number of allocated samples */
175 gmx_large_int_t ntot; /* total number of samples in the ranges of
178 struct sample_coll_t *next, *prev; /* next and previous in the list */
181 /* all the samples associated with a lambda point */
182 typedef struct lambda_data_t
184 lambda_vec_t *lambda; /* the native lambda (at start time if dynamic) */
185 double temp; /* temperature */
187 sample_coll_t *sc; /* the samples */
189 sample_coll_t sc_head; /*the pre-allocated list head for the linked list.*/
191 struct lambda_data_t *next, *prev; /* the next and prev in the list */
194 /* Top-level data structure of simulation data */
195 typedef struct sim_data_t
197 lambda_data_t *lb; /* a lambda data linked list */
198 lambda_data_t lb_head; /* The head element of the linked list */
200 lambda_components_t lc; /* the allowed components of the lambda
204 /* Top-level data structure with calculated values. */
206 sample_coll_t *a, *b; /* the simulation data */
208 double dg; /* the free energy difference */
209 double dg_err; /* the free energy difference */
211 double dg_disc_err; /* discretization error */
212 double dg_histrange_err; /* histogram range error */
214 double sa; /* relative entropy of b in state a */
215 double sa_err; /* error in sa */
216 double sb; /* relative entropy of a in state b */
217 double sb_err; /* error in sb */
219 double dg_stddev; /* expected dg stddev per sample */
220 double dg_stddev_err; /* error in dg_stddev */
224 /* Initialize a lambda_components structure */
225 static void lambda_components_init(lambda_components_t *lc)
229 snew(lc->names, lc->Nalloc);
232 /* Add a component to a lambda_components structure */
233 static void lambda_components_add(lambda_components_t *lc,
234 const char *name, size_t name_length)
236 while (lc->N + 1 > lc->Nalloc)
238 lc->Nalloc = (lc->Nalloc == 0) ? 2 : 2*lc->Nalloc;
239 srealloc( lc->names, lc->Nalloc );
241 snew(lc->names[lc->N], name_length+1);
242 strncpy(lc->names[lc->N], name, name_length);
246 /* check whether a component with index 'index' matches the given name, or
247 is also NULL. Returns TRUE if this is the case.
248 the string name does not need to end */
249 static gmx_bool lambda_components_check(const lambda_components_t *lc,
257 if (name == NULL && lc->names[index] == NULL)
259 if ((name == NULL) != (lc->names[index] == NULL))
261 len = strlen(lc->names[index]);
262 if (len != name_length)
264 if (strncmp(lc->names[index], name, name_length)== 0)
269 /* Find the index of a given lambda component name, or -1 if not found */
270 static int lambda_components_find(const lambda_components_t *lc,
278 if (strncmp(lc->names[i], name, name_length) == 0)
286 /* initialize a lambda vector */
287 static void lambda_vec_init(lambda_vec_t *lv, const lambda_components_t *lc)
289 snew(lv->val, lc->N);
295 static void lambda_vec_destroy(lambda_vec_t *lv)
300 static void lambda_vec_copy(lambda_vec_t *lv, const lambda_vec_t *orig)
304 lambda_vec_init(lv, orig->lc);
306 lv->index=orig->index;
307 for(i=0;i<lv->lc->N;i++)
309 lv->val[i] = orig->val[i];
313 /* write a lambda vec to a preallocated string */
314 static void lambda_vec_print(const lambda_vec_t *lv, char *str, gmx_bool named)
319 str[0]=0; /* reset the string */
324 str += sprintf(str, "delta H to ");
328 str += sprintf(str, "(");
330 for(i=0;i<lv->lc->N;i++)
332 str += sprintf(str, "%g", lv->val[i]);
335 str += sprintf(str, ", ");
340 str += sprintf(str, ")");
345 /* this lambda vector describes a derivative */
346 str += sprintf(str, "dH/dl");
347 if (strlen(lv->lc->names[lv->dhdl]) > 0)
349 str += sprintf(str, " (%s)", lv->lc->names[lv->dhdl]);
354 /* write a shortened version of the lambda vec to a preallocated string */
355 static void lambda_vec_print_short(const lambda_vec_t *lv, char *str)
362 sprintf(str, "%6d", lv->index);
368 sprintf(str, "%6.3f", lv->val[0]);
372 sprintf(str, "dH/dl[%d]", lv->dhdl);
377 /* write an intermediate version of two lambda vecs to a preallocated string */
378 static void lambda_vec_print_intermediate(const lambda_vec_t *a,
379 const lambda_vec_t *b, char *str)
385 if ( (a->index >= 0) && (b->index>=0) )
387 sprintf(str, "%6.3f", ((double)a->index+(double)b->index)/2.);
391 if ( (a->dhdl < 0) && (b->dhdl < 0) )
393 sprintf(str, "%6.3f", (a->val[0]+b->val[0])/2.);
400 /* calculate the difference in lambda vectors: c = a-b.
401 c must be initialized already, and a and b must describe non-derivative
403 static void lambda_vec_diff(const lambda_vec_t *a, const lambda_vec_t *b,
408 if ( (a->dhdl > 0) || (b->dhdl > 0) )
411 "Trying to calculate the difference between derivatives instead of lambda points");
413 if ((a->lc != b->lc) || (a->lc != c->lc) )
416 "Trying to calculate the difference lambdas with differing basis set");
418 for(i=0;i<a->lc->N;i++)
420 c->val[i] = a->val[i] - b->val[i];
424 /* calculate and return the absolute difference in lambda vectors: c = |a-b|.
425 a and b must describe non-derivative lambda points */
426 static double lambda_vec_abs_diff(const lambda_vec_t *a, const lambda_vec_t *b)
431 if ( (a->dhdl > 0) || (b->dhdl > 0) )
434 "Trying to calculate the difference between derivatives instead of lambda points");
439 "Trying to calculate the difference lambdas with differing basis set");
441 for(i=0;i<a->lc->N;i++)
443 double df=a->val[i] - b->val[i];
450 /* check whether two lambda vectors are the same */
451 static gmx_bool lambda_vec_same(const lambda_vec_t *a, const lambda_vec_t *b)
459 for(i=0;i<a->lc->N;i++)
461 if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
470 /* they're derivatives, so we check whether the indices match */
471 return (a->dhdl == b->dhdl);
475 /* Compare the sort order of two foreign lambda vectors
477 returns 1 if a is 'bigger' than b,
478 returns 0 if they're the same,
479 returns -1 if a is 'smaller' than b.*/
480 static gmx_bool lambda_vec_cmp_foreign(const lambda_vec_t *a,
481 const lambda_vec_t *b)
484 double norm_a=0, norm_b=0;
485 gmx_bool different=FALSE;
489 gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
491 /* if either one has an index we sort based on that */
492 if ((a->index >= 0) || (b->index >= 0))
494 if (a->index == b->index)
496 return (a->index > b->index) ? 1 : -1;
498 if (a->dhdl >= 0 || b->dhdl >= 0)
500 /* lambda vectors that are derivatives always sort higher than those
501 without derivatives */
502 if ((a->dhdl >= 0) != (b->dhdl >= 0) )
504 return (a->dhdl >= 0) ? 1 : -1 ;
506 return a->dhdl > b->dhdl;
509 /* neither has an index, so we can only sort on the lambda components,
510 which is only valid if there is one component */
511 for(i=0;i<a->lc->N;i++)
513 if (!gmx_within_tol(a->val[i], b->val[i], 10*GMX_REAL_EPS))
517 norm_a += a->val[i]*a->val[i];
518 norm_b += b->val[i]*b->val[i];
524 return norm_a > norm_b;
527 /* Compare the sort order of two native lambda vectors
529 returns 1 if a is 'bigger' than b,
530 returns 0 if they're the same,
531 returns -1 if a is 'smaller' than b.*/
532 static gmx_bool lambda_vec_cmp_native(const lambda_vec_t *a,
533 const lambda_vec_t *b)
539 gmx_fatal(FARGS, "Can't compare lambdas with differing basis sets");
541 /* if either one has an index we sort based on that */
542 if ((a->index >= 0) || (b->index >= 0))
544 if (a->index == b->index)
546 return (a->index > b->index) ? 1 : -1;
548 /* neither has an index, so we can only sort on the lambda components,
549 which is only valid if there is one component */
553 "Can't compare lambdas with no index and > 1 component");
555 if (a->dhdl >= 0 || b->dhdl >= 0)
558 "Can't compare native lambdas that are derivatives");
560 if (gmx_within_tol(a->val[0], b->val[0], 10*GMX_REAL_EPS))
564 return a->val[0] > b->val[0] ? 1 : -1;
570 static void hist_init(hist_t *h, int nhist, int *nbin)
575 gmx_fatal(FARGS, "histogram with more than two sets of data!");
579 snew(h->bin[i], nbin[i]);
582 h->start_time=h->delta_time=0;
589 static void hist_destroy(hist_t *h)
595 static void xvg_init(xvg_t *ba)
604 static void samples_init(samples_t *s, lambda_vec_t *native_lambda,
605 lambda_vec_t *foreign_lambda, double temp,
606 gmx_bool derivative, const char *filename)
608 s->native_lambda=native_lambda;
609 s->foreign_lambda=foreign_lambda;
611 s->derivative=derivative;
616 s->start_time = s->delta_time = 0;
625 s->filename=filename;
628 static void sample_range_init(sample_range_t *r, samples_t *s)
636 static void sample_coll_init(sample_coll_t *sc, lambda_vec_t *native_lambda,
637 lambda_vec_t *foreign_lambda, double temp)
639 sc->native_lambda = native_lambda;
640 sc->foreign_lambda = foreign_lambda;
646 sc->nsamples_alloc=0;
649 sc->next=sc->prev=NULL;
652 static void sample_coll_destroy(sample_coll_t *sc)
654 /* don't free the samples themselves */
660 static void lambda_data_init(lambda_data_t *l, lambda_vec_t *native_lambda,
663 l->lambda=native_lambda;
671 sample_coll_init(l->sc, native_lambda, NULL, 0.);
676 static void barres_init(barres_t *br)
692 /* calculate the total number of samples in a sample collection */
693 static void sample_coll_calc_ntot(sample_coll_t *sc)
698 for(i=0;i<sc->nsamples;i++)
704 sc->ntot += sc->s[i]->ntot;
708 sc->ntot += sc->r[i].end - sc->r[i].start;
715 /* find the barsamples_t associated with a lambda that corresponds to
716 a specific foreign lambda */
717 static sample_coll_t *lambda_data_find_sample_coll(lambda_data_t *l,
718 lambda_vec_t *foreign_lambda)
720 sample_coll_t *sc=l->sc->next;
724 if (lambda_vec_same(sc->foreign_lambda,foreign_lambda))
734 /* insert li into an ordered list of lambda_colls */
735 static void lambda_data_insert_sample_coll(lambda_data_t *l, sample_coll_t *sc)
737 sample_coll_t *scn=l->sc->next;
738 while ( (scn!=l->sc) )
740 if (lambda_vec_cmp_foreign(scn->foreign_lambda, sc->foreign_lambda) > 0)
744 /* now insert it before the found scn */
751 /* insert li into an ordered list of lambdas */
752 static void lambda_data_insert_lambda(lambda_data_t *head, lambda_data_t *li)
754 lambda_data_t *lc=head->next;
757 if (lambda_vec_cmp_native(lc->lambda, li->lambda) > 0)
761 /* now insert ourselves before the found lc */
768 /* insert a sample and a sample_range into a sample_coll. The
769 samples are stored as a pointer, the range is copied. */
770 static void sample_coll_insert_sample(sample_coll_t *sc, samples_t *s,
773 /* first check if it belongs here */
774 if (sc->temp != s->temp)
776 gmx_fatal(FARGS, "Temperatures in files %s and %s are not the same!",
777 s->filename, sc->next->s[0]->filename);
779 if (!lambda_vec_same(sc->native_lambda, s->native_lambda))
781 gmx_fatal(FARGS, "Native lambda in files %s and %s are not the same (and they should be)!",
782 s->filename, sc->next->s[0]->filename);
784 if (!lambda_vec_same(sc->foreign_lambda,s->foreign_lambda))
786 gmx_fatal(FARGS, "Foreign lambda in files %s and %s are not the same (and they should be)!",
787 s->filename, sc->next->s[0]->filename);
790 /* check if there's room */
791 if ( (sc->nsamples + 1) > sc->nsamples_alloc)
793 sc->nsamples_alloc = max(2*sc->nsamples_alloc, 2);
794 srenew(sc->s, sc->nsamples_alloc);
795 srenew(sc->r, sc->nsamples_alloc);
797 sc->s[sc->nsamples]=s;
798 sc->r[sc->nsamples]=*r;
801 sample_coll_calc_ntot(sc);
804 /* insert a sample into a lambda_list, creating the right sample_coll if
806 static void lambda_data_list_insert_sample(lambda_data_t *head, samples_t *s)
808 gmx_bool found=FALSE;
812 lambda_data_t *l=head->next;
814 /* first search for the right lambda_data_t */
817 if (lambda_vec_same(l->lambda, s->native_lambda) )
827 snew(l, 1); /* allocate a new one */
828 lambda_data_init(l, s->native_lambda, s->temp); /* initialize it */
829 lambda_data_insert_lambda(head, l); /* add it to the list */
832 /* now look for a sample collection */
833 sc=lambda_data_find_sample_coll(l, s->foreign_lambda);
836 snew(sc, 1); /* allocate a new one */
837 sample_coll_init(sc, s->native_lambda, s->foreign_lambda, s->temp);
838 lambda_data_insert_sample_coll(l, sc);
841 /* now insert the samples into the sample coll */
842 sample_range_init(&r, s);
843 sample_coll_insert_sample(sc, s, &r);
847 /* make a histogram out of a sample collection */
848 static void sample_coll_make_hist(sample_coll_t *sc, int **bin,
849 int *nbin_alloc, int *nbin,
850 double *dx, double *xmin, int nbin_default)
853 gmx_bool dx_set=FALSE;
854 gmx_bool xmin_set=FALSE;
856 gmx_bool xmax_set=FALSE;
857 gmx_bool xmax_set_hard=FALSE; /* whether the xmax is bounded by the
858 limits of a histogram */
861 /* first determine dx and xmin; try the histograms */
862 for(i=0;i<sc->nsamples;i++)
866 hist_t *hist=sc->s[i]->hist;
867 for(k=0;k<hist->nhist;k++)
869 double hdx=hist->dx[k];
870 double xmax_now=(hist->x0[k]+hist->nbin[k])*hdx;
872 /* we use the biggest dx*/
873 if ( (!dx_set) || hist->dx[0] > *dx)
878 if ( (!xmin_set) || (hist->x0[k]*hdx) < *xmin)
881 *xmin = (hist->x0[k]*hdx);
884 if ( (!xmax_set) || (xmax_now>xmax && !xmax_set_hard) )
888 if (hist->bin[k][hist->nbin[k]-1] != 0)
891 if ( hist->bin[k][hist->nbin[k]-1]!=0 && (xmax_now < xmax) )
899 /* and the delta us */
900 for(i=0;i<sc->nsamples;i++)
902 if (sc->s[i]->ndu > 0)
904 /* determine min and max */
905 int starti=sc->r[i].start;
906 int endi=sc->r[i].end;
907 double du_xmin=sc->s[i]->du[starti];
908 double du_xmax=sc->s[i]->du[starti];
909 for(j=starti+1;j<endi;j++)
911 if (sc->s[i]->du[j] < du_xmin)
912 du_xmin = sc->s[i]->du[j];
913 if (sc->s[i]->du[j] > du_xmax)
914 du_xmax = sc->s[i]->du[j];
917 /* and now change the limits */
918 if ( (!xmin_set) || (du_xmin < *xmin) )
923 if ( (!xmax_set) || ((du_xmax > xmax) && !xmax_set_hard) )
931 if (!xmax_set || !xmin_set)
941 *dx=(xmax-(*xmin))/((*nbin)-2); /* -2 because we want the last bin to
942 be 0, and we count from 0 */
946 *nbin=(xmax-(*xmin))/(*dx);
949 if (*nbin > *nbin_alloc)
952 srenew(*bin, *nbin_alloc);
955 /* reset the histogram */
956 for(i=0;i<(*nbin);i++)
961 /* now add the actual data */
962 for(i=0;i<sc->nsamples;i++)
966 hist_t *hist=sc->s[i]->hist;
967 for(k=0;k<hist->nhist;k++)
969 double hdx = hist->dx[k];
970 double xmin_hist=hist->x0[k]*hdx;
971 for(j=0;j<hist->nbin[k];j++)
973 /* calculate the bin corresponding to the middle of the
975 double x=hdx*(j+0.5) + xmin_hist;
976 int binnr=(int)((x-(*xmin))/(*dx));
978 if (binnr >= *nbin || binnr<0)
981 (*bin)[binnr] += hist->bin[k][j];
987 int starti=sc->r[i].start;
988 int endi=sc->r[i].end;
989 for(j=starti;j<endi;j++)
991 int binnr=(int)((sc->s[i]->du[j]-(*xmin))/(*dx));
992 if (binnr >= *nbin || binnr<0)
1001 /* write a collection of histograms to a file */
1002 void sim_data_histogram(sim_data_t *sd, const char *filename,
1003 int nbin_default, const output_env_t oenv)
1005 char label_x[STRLEN];
1006 const char *dhdl="dH/d\\lambda",*deltag="\\DeltaH",*lambda="\\lambda";
1007 const char *title="N(\\DeltaH)";
1008 const char *label_y="Samples";
1012 char **setnames=NULL;
1013 gmx_bool first_set=FALSE;
1014 /* histogram data: */
1021 lambda_data_t *bl_head = sd->lb;
1023 printf("\nWriting histogram to %s\n", filename);
1024 sprintf(label_x, "\\DeltaH (%s)", unit_energy);
1026 fp=xvgropen_type(filename, title, label_x, label_y, exvggtXNY, oenv);
1028 /* first get all the set names */
1030 /* iterate over all lambdas */
1033 sample_coll_t *sc=bl->sc->next;
1035 /* iterate over all samples */
1038 char buf[STRLEN], buf2[STRLEN];
1041 srenew(setnames, nsets);
1042 snew(setnames[nsets-1], STRLEN);
1043 if (sc->foreign_lambda->dhdl < 0)
1045 lambda_vec_print(sc->native_lambda, buf, FALSE);
1046 lambda_vec_print(sc->foreign_lambda, buf2, FALSE);
1047 sprintf(setnames[nsets-1], "N(%s(%s=%s) | %s=%s)",
1048 deltag, lambda, buf2, lambda, buf);
1052 lambda_vec_print(sc->native_lambda, buf, FALSE);
1053 sprintf(setnames[nsets-1], "N(%s | %s=%s)",
1061 xvgr_legend(fp, nsets, (const char**)setnames, oenv);
1064 /* now make the histograms */
1066 /* iterate over all lambdas */
1069 sample_coll_t *sc=bl->sc->next;
1071 /* iterate over all samples */
1076 xvgr_new_dataset(fp, 0, 0, NULL, oenv);
1079 sample_coll_make_hist(sc, &hist, &nbin_alloc, &nbin, &dx, &min,
1084 double xmin=i*dx + min;
1085 double xmax=(i+1)*dx + min;
1087 fprintf(fp, "%g %d\n%g %d\n", xmin, hist[i], xmax, hist[i]);
1103 /* create a collection (array) of barres_t object given a ordered linked list
1104 of barlamda_t sample collections */
1105 static barres_t *barres_list_create(sim_data_t *sd, int *nres,
1112 gmx_bool dhdl=FALSE;
1113 gmx_bool first=TRUE;
1114 lambda_data_t *bl_head=sd->lb;
1116 /* first count the lambdas */
1123 snew(res, nlambda-1);
1125 /* next put the right samples in the res */
1127 bl=bl_head->next->next; /* we start with the second one. */
1130 sample_coll_t *sc, *scprev;
1131 barres_t *br=&(res[*nres]);
1132 /* there is always a previous one. we search for that as a foreign
1134 scprev=lambda_data_find_sample_coll(bl->prev, bl->lambda);
1135 sc=lambda_data_find_sample_coll(bl, bl->prev->lambda);
1143 scprev=lambda_data_find_sample_coll(bl->prev, bl->prev->lambda);
1144 sc=lambda_data_find_sample_coll(bl, bl->lambda);
1148 printf("\nWARNING: Using the derivative data (dH/dlambda) to extrapolate delta H values.\nThis will only work if the Hamiltonian is linear in lambda.\n");
1153 gmx_fatal(FARGS,"Some dhdl files contain only one value (dH/dl), while others \ncontain multiple values (dH/dl and/or Delta H), will not proceed \nbecause of possible inconsistencies.\n");
1156 else if (!scprev && !sc)
1158 gmx_fatal(FARGS,"There is no path from lambda=%g -> %g that is covered by foreign lambdas:\ncannot proceed with BAR.\nUse thermodynamic integration of dH/dl by calculating the averages of dH/dl\nwith g_analyze and integrating them.\nAlternatively, use the -extp option if (and only if) the Hamiltonian\ndepends linearly on lambda, which is NOT normally the case.\n", bl->prev->lambda, bl->lambda);
1161 /* normal delta H */
1164 gmx_fatal(FARGS,"Could not find a set for foreign lambda = %g\nin the files for lambda = %g",bl->lambda,bl->prev->lambda);
1168 gmx_fatal(FARGS,"Could not find a set for foreign lambda = %g\nin the files for lambda = %g",bl->prev->lambda,bl->lambda);
1180 /* estimate the maximum discretization error */
1181 static double barres_list_max_disc_err(barres_t *res, int nres)
1185 double delta_lambda;
1189 barres_t *br=&(res[i]);
1191 delta_lambda=lambda_vec_abs_diff(br->b->native_lambda,
1192 br->a->native_lambda);
1194 for(j=0;j<br->a->nsamples;j++)
1196 if (br->a->s[j]->hist)
1199 if (br->a->s[j]->derivative)
1200 Wfac = delta_lambda;
1202 disc_err=max(disc_err, Wfac*br->a->s[j]->hist->dx[0]);
1205 for(j=0;j<br->b->nsamples;j++)
1207 if (br->b->s[j]->hist)
1210 if (br->b->s[j]->derivative)
1211 Wfac = delta_lambda;
1212 disc_err=max(disc_err, Wfac*br->b->s[j]->hist->dx[0]);
1220 /* impose start and end times on a sample collection, updating sample_ranges */
1221 static void sample_coll_impose_times(sample_coll_t *sc, double begin_t,
1225 for(i=0;i<sc->nsamples;i++)
1227 samples_t *s=sc->s[i];
1228 sample_range_t *r=&(sc->r[i]);
1231 double end_time=s->hist->delta_time*s->hist->sum +
1232 s->hist->start_time;
1233 if (s->hist->start_time < begin_t || end_time > end_t)
1243 if (s->start_time < begin_t)
1245 r->start=(int)((begin_t - s->start_time)/s->delta_time);
1247 end_time=s->delta_time*s->ndu + s->start_time;
1248 if (end_time > end_t)
1250 r->end=(int)((end_t - s->start_time)/s->delta_time);
1256 for(j=0;j<s->ndu;j++)
1258 if (s->t[j] <begin_t)
1263 if (s->t[j] >= end_t)
1270 if (r->start > r->end)
1276 sample_coll_calc_ntot(sc);
1279 static void sim_data_impose_times(sim_data_t *sd, double begin, double end)
1281 double first_t, last_t;
1282 double begin_t, end_t;
1284 lambda_data_t *head=sd->lb;
1287 if (begin<=0 && end<0)
1292 /* first determine the global start and end times */
1298 sample_coll_t *sc=lc->sc->next;
1301 for(j=0;j<sc->nsamples;j++)
1303 double start_t,end_t;
1305 start_t = sc->s[j]->start_time;
1306 end_t = sc->s[j]->start_time;
1309 end_t += sc->s[j]->delta_time*sc->s[j]->hist->sum;
1315 end_t = sc->s[j]->t[sc->s[j]->ndu-1];
1319 end_t += sc->s[j]->delta_time*sc->s[j]->ndu;
1323 if (start_t < first_t || first_t<0)
1337 /* calculate the actual times */
1355 printf("\n Samples in time interval: %.3f - %.3f\n", first_t, last_t);
1357 if (begin_t > end_t)
1361 printf("Removing samples outside of: %.3f - %.3f\n", begin_t, end_t);
1363 /* then impose them */
1367 sample_coll_t *sc=lc->sc->next;
1370 sample_coll_impose_times(sc, begin_t, end_t);
1378 /* create subsample i out of ni from an existing sample_coll */
1379 static gmx_bool sample_coll_create_subsample(sample_coll_t *sc,
1380 sample_coll_t *sc_orig,
1384 int hist_start, hist_end;
1386 gmx_large_int_t ntot_start;
1387 gmx_large_int_t ntot_end;
1388 gmx_large_int_t ntot_so_far;
1390 *sc = *sc_orig; /* just copy all fields */
1392 /* allocate proprietary memory */
1393 snew(sc->s, sc_orig->nsamples);
1394 snew(sc->r, sc_orig->nsamples);
1396 /* copy the samples */
1397 for(j=0;j<sc_orig->nsamples;j++)
1399 sc->s[j] = sc_orig->s[j];
1400 sc->r[j] = sc_orig->r[j]; /* copy the ranges too */
1403 /* now fix start and end fields */
1404 /* the casts avoid possible overflows */
1405 ntot_start=(gmx_large_int_t)(sc_orig->ntot*(double)i/(double)ni);
1406 ntot_end =(gmx_large_int_t)(sc_orig->ntot*(double)(i+1)/(double)ni);
1408 for(j=0;j<sc->nsamples;j++)
1410 gmx_large_int_t ntot_add;
1411 gmx_large_int_t new_start, new_end;
1417 ntot_add = sc->s[j]->hist->sum;
1421 ntot_add = sc->r[j].end - sc->r[j].start;
1429 if (!sc->s[j]->hist)
1431 if (ntot_so_far < ntot_start)
1433 /* adjust starting point */
1434 new_start = sc->r[j].start + (ntot_start - ntot_so_far);
1438 new_start = sc->r[j].start;
1440 /* adjust end point */
1441 new_end = sc->r[j].start + (ntot_end - ntot_so_far);
1442 if (new_end > sc->r[j].end)
1444 new_end=sc->r[j].end;
1447 /* check if we're in range at all */
1448 if ( (new_end < new_start) || (new_start > sc->r[j].end) )
1453 /* and write the new range */
1454 sc->r[j].start=(int)new_start;
1455 sc->r[j].end=(int)new_end;
1462 double ntot_start_norm, ntot_end_norm;
1463 /* calculate the amount of overlap of the
1464 desired range (ntot_start -- ntot_end) onto
1465 the histogram range (ntot_so_far -- ntot_so_far+ntot_add)*/
1467 /* first calculate normalized bounds
1468 (where 0 is the start of the hist range, and 1 the end) */
1469 ntot_start_norm = (ntot_start-ntot_so_far)/(double)ntot_add;
1470 ntot_end_norm = (ntot_end-ntot_so_far)/(double)ntot_add;
1472 /* now fix the boundaries */
1473 ntot_start_norm = min(1, max(0., ntot_start_norm));
1474 ntot_end_norm = max(0, min(1., ntot_end_norm));
1476 /* and calculate the overlap */
1477 overlap = ntot_end_norm - ntot_start_norm;
1479 if (overlap > 0.95) /* we allow for 5% slack */
1481 sc->r[j].use = TRUE;
1483 else if (overlap < 0.05)
1485 sc->r[j].use = FALSE;
1493 ntot_so_far += ntot_add;
1495 sample_coll_calc_ntot(sc);
1500 /* calculate minimum and maximum work values in sample collection */
1501 static void sample_coll_min_max(sample_coll_t *sc, double Wfac,
1502 double *Wmin, double *Wmax)
1509 for(i=0;i<sc->nsamples;i++)
1511 samples_t *s=sc->s[i];
1512 sample_range_t *r=&(sc->r[i]);
1517 for(j=r->start; j<r->end; j++)
1519 *Wmin = min(*Wmin,s->du[j]*Wfac);
1520 *Wmax = max(*Wmax,s->du[j]*Wfac);
1525 int hd=0; /* determine the histogram direction: */
1527 if ( (s->hist->nhist>1) && (Wfac<0) )
1533 for(j=s->hist->nbin[hd]-1;j>=0;j--)
1535 *Wmin=min(*Wmin,Wfac*(s->hist->x0[hd])*dx);
1536 *Wmax=max(*Wmax,Wfac*(s->hist->x0[hd])*dx);
1537 /* look for the highest value bin with values */
1538 if (s->hist->bin[hd][j]>0)
1540 *Wmin=min(*Wmin,Wfac*(j+s->hist->x0[hd]+1)*dx);
1541 *Wmax=max(*Wmax,Wfac*(j+s->hist->x0[hd]+1)*dx);
1550 /* Initialize a sim_data structure */
1551 static void sim_data_init(sim_data_t *sd)
1553 /* make linked list */
1554 sd->lb = &(sd->lb_head);
1555 sd->lb->next = sd->lb;
1556 sd->lb->prev = sd->lb;
1558 lambda_components_init(&(sd->lc));
1562 static double calc_bar_sum(int n,const double *W,double Wfac,double sbMmDG)
1571 sum += 1./(1. + exp(Wfac*W[i] + sbMmDG));
1577 /* calculate the BAR average given a histogram
1579 if type== 0, calculate the best estimate for the average,
1580 if type==-1, calculate the minimum possible value given the histogram
1581 if type== 1, calculate the maximum possible value given the histogram */
1582 static double calc_bar_sum_hist(const hist_t *hist, double Wfac, double sbMmDG,
1588 /* normalization factor multiplied with bin width and
1589 number of samples (we normalize through M): */
1591 int hd=0; /* determine the histogram direction: */
1594 if ( (hist->nhist>1) && (Wfac<0) )
1599 max=hist->nbin[hd]-1;
1602 max=hist->nbin[hd]; /* we also add whatever was out of range */
1607 double x=Wfac*((i+hist->x0[hd])+0.5)*dx; /* bin middle */
1608 double pxdx=hist->bin[0][i]*normdx; /* p(x)dx */
1610 sum += pxdx/(1. + exp(x + sbMmDG));
1616 static double calc_bar_lowlevel(sample_coll_t *ca, sample_coll_t *cb,
1617 double temp, double tol, int type)
1622 double Wfac1,Wfac2,Wmin,Wmax;
1623 double DG0,DG1,DG2,dDG1;
1625 double n1, n2; /* numbers of samples as doubles */
1630 /* count the numbers of samples */
1636 /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1637 if (ca->foreign_lambda->dhdl<0)
1639 /* this is the case when the delta U were calculated directly
1640 (i.e. we're not scaling dhdl) */
1646 /* we're using dhdl, so delta_lambda needs to be a
1647 multiplication factor. */
1648 /*double delta_lambda=cb->native_lambda-ca->native_lambda;*/
1649 double delta_lambda=lambda_vec_abs_diff(cb->native_lambda,
1651 if (cb->native_lambda->lc->N > 1)
1654 "Can't (yet) do multi-component dhdl interpolation");
1657 Wfac1 = beta*delta_lambda;
1658 Wfac2 = -beta*delta_lambda;
1663 /* We print the output both in kT and kJ/mol.
1664 * Here we determine DG in kT, so when beta < 1
1665 * the precision has to be increased.
1670 /* Calculate minimum and maximum work to give an initial estimate of
1671 * delta G as their average.
1674 double Wmin1, Wmin2, Wmax1, Wmax2;
1675 sample_coll_min_max(ca, Wfac1, &Wmin1, &Wmax1);
1676 sample_coll_min_max(cb, Wfac2, &Wmin2, &Wmax2);
1678 Wmin=min(Wmin1, Wmin2);
1679 Wmax=max(Wmax1, Wmax2);
1687 fprintf(debug,"DG %9.5f %9.5f\n",DG0,DG2);
1689 /* We approximate by bisection: given our initial estimates
1690 we keep checking whether the halfway point is greater or
1691 smaller than what we get out of the BAR averages.
1693 For the comparison we can use twice the tolerance. */
1694 while (DG2 - DG0 > 2*tol)
1696 DG1 = 0.5*(DG0 + DG2);
1698 /*printf("Wfac1=%g, Wfac2=%g, beta=%g, DG1=%g\n",Wfac1,Wfac2,beta,
1701 /* calculate the BAR averages */
1704 for(i=0;i<ca->nsamples;i++)
1706 samples_t *s=ca->s[i];
1707 sample_range_t *r=&(ca->r[i]);
1712 dDG1 += calc_bar_sum_hist(s->hist, Wfac1, (M-DG1), type);
1716 dDG1 += calc_bar_sum(r->end - r->start, s->du + r->start,
1721 for(i=0;i<cb->nsamples;i++)
1723 samples_t *s=cb->s[i];
1724 sample_range_t *r=&(cb->r[i]);
1729 dDG1 -= calc_bar_sum_hist(s->hist, Wfac2, -(M-DG1), type);
1733 dDG1 -= calc_bar_sum(r->end - r->start, s->du + r->start,
1749 fprintf(debug,"DG %9.5f %9.5f\n",DG0,DG2);
1753 return 0.5*(DG0 + DG2);
1756 static void calc_rel_entropy(sample_coll_t *ca, sample_coll_t *cb,
1757 double temp, double dg, double *sa, double *sb)
1763 double Wfac1, Wfac2;
1769 /* count the numbers of samples */
1773 /* to ensure the work values are the same as during the delta_G */
1774 /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1775 if (ca->foreign_lambda->dhdl<0)
1777 /* this is the case when the delta U were calculated directly
1778 (i.e. we're not scaling dhdl) */
1784 /* we're using dhdl, so delta_lambda needs to be a
1785 multiplication factor. */
1786 double delta_lambda=lambda_vec_abs_diff(cb->native_lambda,
1788 Wfac1 = beta*delta_lambda;
1789 Wfac2 = -beta*delta_lambda;
1792 /* first calculate the average work in both directions */
1793 for(i=0;i<ca->nsamples;i++)
1795 samples_t *s=ca->s[i];
1796 sample_range_t *r=&(ca->r[i]);
1801 for(j=r->start;j<r->end;j++)
1802 W_ab += Wfac1*s->du[j];
1806 /* normalization factor multiplied with bin width and
1807 number of samples (we normalize through M): */
1810 int hd=0; /* histogram direction */
1811 if ( (s->hist->nhist>1) && (Wfac1<0) )
1817 for(j=0;j<s->hist->nbin[0];j++)
1819 double x=Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1820 double pxdx=s->hist->bin[0][j]*normdx; /* p(x)dx */
1828 for(i=0;i<cb->nsamples;i++)
1830 samples_t *s=cb->s[i];
1831 sample_range_t *r=&(cb->r[i]);
1836 for(j=r->start;j<r->end;j++)
1837 W_ba += Wfac1*s->du[j];
1841 /* normalization factor multiplied with bin width and
1842 number of samples (we normalize through M): */
1845 int hd=0; /* histogram direction */
1846 if ( (s->hist->nhist>1) && (Wfac2<0) )
1852 for(j=0;j<s->hist->nbin[0];j++)
1854 double x=Wfac1*((j+s->hist->x0[0])+0.5)*dx;/*bin ctr*/
1855 double pxdx=s->hist->bin[0][j]*normdx; /* p(x)dx */
1863 /* then calculate the relative entropies */
1868 static void calc_dg_stddev(sample_coll_t *ca, sample_coll_t *cb,
1869 double temp, double dg, double *stddev)
1873 double sigmafact=0.;
1875 double Wfac1, Wfac2;
1881 /* count the numbers of samples */
1885 /* to ensure the work values are the same as during the delta_G */
1886 /*if (!lambda_vec_same(ca->native_lambda, ca->foreign_lambda))*/
1887 if (ca->foreign_lambda->dhdl<0)
1889 /* this is the case when the delta U were calculated directly
1890 (i.e. we're not scaling dhdl) */
1896 /* we're using dhdl, so delta_lambda needs to be a
1897 multiplication factor. */
1898 double delta_lambda=lambda_vec_abs_diff(cb->native_lambda,
1900 Wfac1 = beta*delta_lambda;
1901 Wfac2 = -beta*delta_lambda;
1907 /* calculate average in both directions */
1908 for(i=0;i<ca->nsamples;i++)
1910 samples_t *s=ca->s[i];
1911 sample_range_t *r=&(ca->r[i]);
1916 for(j=r->start;j<r->end;j++)
1918 sigmafact += 1./(2. + 2.*cosh((M + Wfac1*s->du[j] - dg)));
1923 /* normalization factor multiplied with bin width and
1924 number of samples (we normalize through M): */
1927 int hd=0; /* histogram direction */
1928 if ( (s->hist->nhist>1) && (Wfac1<0) )
1934 for(j=0;j<s->hist->nbin[0];j++)
1936 double x=Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1937 double pxdx=s->hist->bin[0][j]*normdx; /* p(x)dx */
1939 sigmafact += pxdx/(2. + 2.*cosh((M + x - dg)));
1944 for(i=0;i<cb->nsamples;i++)
1946 samples_t *s=cb->s[i];
1947 sample_range_t *r=&(cb->r[i]);
1952 for(j=r->start;j<r->end;j++)
1954 sigmafact += 1./(2. + 2.*cosh((M - Wfac2*s->du[j] - dg)));
1959 /* normalization factor multiplied with bin width and
1960 number of samples (we normalize through M): */
1963 int hd=0; /* histogram direction */
1964 if ( (s->hist->nhist>1) && (Wfac2<0) )
1970 for(j=0;j<s->hist->nbin[0];j++)
1972 double x=Wfac2*((j+s->hist->x0[0])+0.5)*dx;/*bin ctr*/
1973 double pxdx=s->hist->bin[0][j]*normdx; /* p(x)dx */
1975 sigmafact += pxdx/(2. + 2.*cosh((M - x - dg)));
1981 sigmafact /= (n1 + n2);
1985 Shirts, Bair, Hooker & Pande, Phys. Rev. Lett 91, 140601 (2003): */
1986 *stddev = sqrt(((1./sigmafact) - ( (n1+n2)/n1 + (n1+n2)/n2 )));
1991 static void calc_bar(barres_t *br, double tol,
1992 int npee_min, int npee_max, gmx_bool *bEE,
1996 double dg_sig2,sa_sig2,sb_sig2,stddev_sig2; /* intermediate variance values
1997 for calculated quantities */
1998 int nsample1, nsample2;
1999 double temp=br->a->temp;
2001 double dg_min, dg_max;
2002 gmx_bool have_hist=FALSE;
2004 br->dg=calc_bar_lowlevel(br->a, br->b, temp, tol, 0);
2006 br->dg_disc_err = 0.;
2007 br->dg_histrange_err = 0.;
2009 /* check if there are histograms */
2010 for(i=0;i<br->a->nsamples;i++)
2012 if (br->a->r[i].use && br->a->s[i]->hist)
2020 for(i=0;i<br->b->nsamples;i++)
2022 if (br->b->r[i].use && br->b->s[i]->hist)
2030 /* calculate histogram-specific errors */
2033 dg_min=calc_bar_lowlevel(br->a, br->b, temp, tol, -1);
2034 dg_max=calc_bar_lowlevel(br->a, br->b, temp, tol, 1);
2036 if (fabs(dg_max - dg_min) > GMX_REAL_EPS*10)
2038 /* the histogram range error is the biggest of the differences
2039 between the best estimate and the extremes */
2040 br->dg_histrange_err = fabs(dg_max - dg_min);
2042 br->dg_disc_err = 0.;
2043 for(i=0;i<br->a->nsamples;i++)
2045 if (br->a->s[i]->hist)
2046 br->dg_disc_err=max(br->dg_disc_err, br->a->s[i]->hist->dx[0]);
2048 for(i=0;i<br->b->nsamples;i++)
2050 if (br->b->s[i]->hist)
2051 br->dg_disc_err=max(br->dg_disc_err, br->b->s[i]->hist->dx[0]);
2054 calc_rel_entropy(br->a, br->b, temp, br->dg, &(br->sa), &(br->sb));
2056 calc_dg_stddev(br->a, br->b, temp, br->dg, &(br->dg_stddev) );
2065 sample_coll_t ca, cb;
2067 /* initialize the samples */
2068 sample_coll_init(&ca, br->a->native_lambda, br->a->foreign_lambda,
2070 sample_coll_init(&cb, br->b->native_lambda, br->b->foreign_lambda,
2073 for(npee=npee_min; npee<=npee_max; npee++)
2082 double dstddev2 = 0;
2085 for(p=0; p<npee; p++)
2092 cac=sample_coll_create_subsample(&ca, br->a, p, npee);
2093 cbc=sample_coll_create_subsample(&cb, br->b, p, npee);
2097 printf("WARNING: histogram number incompatible with block number for averaging: can't do error estimate\n");
2100 sample_coll_destroy(&ca);
2102 sample_coll_destroy(&cb);
2106 dgp=calc_bar_lowlevel(&ca, &cb, temp, tol, 0);
2110 partsum[npee*(npee_max+1)+p] += dgp;
2112 calc_rel_entropy(&ca, &cb, temp, dgp, &sac, &sbc);
2117 calc_dg_stddev(&ca, &cb, temp, dgp, &stddevc );
2120 dstddev2 += stddevc*stddevc;
2122 sample_coll_destroy(&ca);
2123 sample_coll_destroy(&cb);
2127 dg_sig2 += (dgs2-dgs*dgs)/(npee-1);
2133 sa_sig2 += (dsa2-dsa*dsa)/(npee-1);
2134 sb_sig2 += (dsb2-dsb*dsb)/(npee-1);
2138 stddev_sig2 += (dstddev2-dstddev*dstddev)/(npee-1);
2140 br->dg_err = sqrt(dg_sig2/(npee_max - npee_min + 1));
2141 br->sa_err = sqrt(sa_sig2/(npee_max - npee_min + 1));
2142 br->sb_err = sqrt(sb_sig2/(npee_max - npee_min + 1));
2143 br->dg_stddev_err = sqrt(stddev_sig2/(npee_max - npee_min + 1));
2148 static double bar_err(int nbmin, int nbmax, const double *partsum)
2151 double svar,s,s2,dg;
2154 for(nb=nbmin; nb<=nbmax; nb++)
2160 dg = partsum[nb*(nbmax+1)+b];
2166 svar += (s2 - s*s)/(nb - 1);
2169 return sqrt(svar/(nbmax + 1 - nbmin));
2173 /* Seek the end of an identifier (consecutive non-spaces), followed by
2174 an optional number of spaces or '='-signs. Returns a pointer to the
2175 first non-space value found after that. Returns NULL if the string
2178 static const char *find_value(const char *str)
2180 gmx_bool name_end_found=FALSE;
2182 /* if the string is a NULL pointer, return a NULL pointer. */
2189 /* first find the end of the name */
2190 if (! name_end_found)
2192 if ( isspace(*str) || (*str == '=') )
2194 name_end_found=TRUE;
2199 if (! ( isspace(*str) || (*str == '=') ))
2211 /* read a vector-notation description of a lambda vector */
2212 static gmx_bool read_lambda_compvec(const char *str,
2214 const lambda_components_t *lc_in,
2215 lambda_components_t *lc_out,
2219 gmx_bool initialize_lc = FALSE; /* whether to initialize the lambda
2220 components, or to check them */
2221 gmx_bool start_reached=FALSE; /* whether the start of component names
2223 gmx_bool vector=FALSE; /* whether there are multiple components */
2224 int n = 0; /* current component number */
2225 const char *val_start=NULL; /* start of the component name, or NULL
2226 if not in a value */
2234 if (lc_out && lc_out->N == 0)
2236 initialize_lc = TRUE;
2257 else if (! isspace(*str))
2259 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2266 if (isspace(*str) || *str==')' || *str==',' || *str=='\0')
2273 lambda_components_add(lc_out, val_start,
2278 if (!lambda_components_check(lc_out, n, val_start,
2287 /* add a vector component to lv */
2288 lv->val[n] = strtod(val_start, &strtod_end);
2289 if (val_start == strtod_end)
2292 "Error reading lambda vector in %s", fn);
2295 /* reset for the next identifier */
2304 else if (isalnum(*str))
2315 gmx_fatal(FARGS, "Error in lambda components in %s", fn);
2323 else if (lv == NULL)
2329 gmx_fatal(FARGS, "Incomplete lambda vector data in %s",
2349 gmx_fatal(FARGS, "Incomplete lambda components data in %s", fn);
2355 /* read and check the component names from a string */
2356 static gmx_bool read_lambda_components(const char *str,
2357 lambda_components_t *lc,
2361 return read_lambda_compvec(str, NULL, NULL, lc, end, fn);
2364 /* read an initialized lambda vector from a string */
2365 static gmx_bool read_lambda_vector(const char *str,
2370 return read_lambda_compvec(str, lv, lv->lc, NULL, end, fn);
2375 /* deduce lambda value from legend.
2377 legend = the legend string
2379 lam = the initialized lambda vector
2380 returns whether to use the data in this set.
2382 static gmx_bool legend2lambda(const char *fn,
2388 const char *ptr=NULL, *ptr2=NULL;
2390 gmx_bool bdhdl=FALSE;
2391 const char *tostr=" to ";
2395 gmx_fatal(FARGS,"There is no legend in file '%s', can not deduce lambda",fn);
2398 /* look for the last 'to': */
2402 ptr2 = strstr(ptr2, tostr);
2409 while(ptr2!=NULL && *ptr2!= '\0' );
2413 ptr += strlen(tostr)-1; /* and advance past that 'to' */
2417 /* look for the = sign */
2418 ptr = strrchr(legend,'=');
2421 /* otherwise look for the last space */
2422 ptr = strrchr(legend,' ');
2426 if (strstr(legend,"dH"))
2431 else if (strchr(legend,'D') != NULL && strchr(legend,'H') != NULL)
2436 else /*if (strstr(legend, "pV"))*/
2447 gmx_fatal(FARGS,"There is no proper lambda legend in file '%s', can not deduce lambda",fn);
2451 ptr=find_value(ptr);
2452 if (!ptr || !read_lambda_vector(ptr, lam, NULL, fn))
2454 gmx_fatal(FARGS, "lambda vector '%s' %s faulty", legend, fn);
2463 ptr = strrchr(legend, '=');
2467 /* there must be a component name */
2471 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2473 /* now backtrack to the start of the identifier */
2474 while(isspace(*ptr))
2480 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2483 while(!isspace(*ptr))
2488 gmx_fatal(FARGS, "dhdl legend '%s' %s faulty", legend, fn);
2492 strncpy(buf, ptr, (end-ptr));
2493 buf[(end-ptr)]='\0';
2494 dhdl_index=lambda_components_find(lam->lc, ptr, (end-ptr));
2498 strncpy(buf, ptr, (end-ptr));
2499 buf[(end-ptr)]='\0';
2501 "Did not find lambda component for '%s' in %s",
2510 "dhdl without component name with >1 lambda component in %s",
2515 lam->dhdl = dhdl_index;
2520 static gmx_bool subtitle2lambda(const char *subtitle, xvg_t *ba, const char *fn,
2521 lambda_components_t *lc)
2526 double native_lambda;
2530 /* first check for a state string */
2531 ptr = strstr(subtitle, "state");
2535 const char *val_end;
2537 /* the new 4.6 style lambda vectors */
2538 ptr = find_value(ptr);
2541 index = strtol(ptr, &end, 10);
2544 gmx_fatal(FARGS,"Incomplete state data in %s", fn);
2551 gmx_fatal(FARGS,"Incomplete state data in %s", fn);
2554 /* now find the lambda vector component names */
2555 while(*ptr!='(' && ! isalnum(*ptr))
2561 "Incomplete lambda vector component data in %s", fn);
2566 if (!read_lambda_components(ptr, lc, &val_end, fn))
2569 "lambda vector components in %s don't match those previously read",
2572 ptr=find_value(val_end);
2575 gmx_fatal(FARGS,"Incomplete state data in %s", fn);
2578 lambda_vec_init(&(ba->native_lambda), lc);
2579 if (!read_lambda_vector(ptr, &(ba->native_lambda), NULL, fn))
2581 gmx_fatal(FARGS, "lambda vector in %s faulty", fn);
2583 ba->native_lambda.index=index;
2588 /* compatibility mode: check for lambda in other ways. */
2589 /* plain text lambda string */
2590 ptr = strstr(subtitle,"lambda");
2593 /* xmgrace formatted lambda string */
2594 ptr = strstr(subtitle,"\\xl\\f{}");
2598 /* xmgr formatted lambda string */
2599 ptr = strstr(subtitle,"\\8l\\4");
2603 ptr = strstr(ptr,"=");
2607 bFound = (sscanf(ptr+1,"%lf",&(native_lambda)) == 1);
2608 /* add the lambda component name as an empty string */
2611 if (!lambda_components_check(lc, 0, "", 0))
2614 "lambda vector components in %s don't match those previously read",
2620 lambda_components_add(lc, "", 0);
2622 lambda_vec_init(&(ba->native_lambda), lc);
2623 ba->native_lambda.val[0]=native_lambda;
2630 static void filename2lambda(const char *fn, xvg_t *ba)
2633 const char *ptr,*digitptr;
2637 /* go to the end of the path string and search backward to find the last
2638 directory in the path which has to contain the value of lambda
2640 while (ptr[1] != '\0')
2644 /* searching backward to find the second directory separator */
2649 if (ptr[0] != DIR_SEPARATOR && ptr[1] == DIR_SEPARATOR)
2651 if (dirsep == 1) break;
2654 /* save the last position of a digit between the last two
2655 separators = in the last dirname */
2656 if (dirsep > 0 && isdigit(*ptr))
2664 gmx_fatal(FARGS,"While trying to read the lambda value from the file path:"
2665 " last directory in the path '%s' does not contain a number",fn);
2667 if (digitptr[-1] == '-')
2671 lambda = strtod(digitptr,&endptr);
2672 if (endptr == digitptr)
2674 gmx_fatal(FARGS,"Malformed number in file path '%s'",fn);
2678 static void read_bar_xvg_lowlevel(const char *fn, real *temp, xvg_t *ba,
2679 lambda_components_t *lc)
2682 char *subtitle,**legend,*ptr;
2684 gmx_bool native_lambda_read=FALSE;
2692 np = read_xvg_legend(fn,&ba->y,&ba->nset,&subtitle,&legend);
2695 gmx_fatal(FARGS,"File %s contains no usable data.",fn);
2697 /* Reorder the data */
2699 for(i=1; i<ba->nset; i++)
2701 ba->y[i-1] = ba->y[i];
2705 snew(ba->np,ba->nset);
2706 for(i=0;i<ba->nset;i++)
2710 if (subtitle != NULL)
2712 /* try to extract temperature */
2713 ptr = strstr(subtitle,"T =");
2717 if (sscanf(ptr,"%lf",&ba->temp) == 1)
2721 gmx_fatal(FARGS,"Found temperature of %g in file '%s'",
2731 gmx_fatal(FARGS,"Did not find a temperature in the subtitle in file '%s', use the -temp option of [TT]g_bar[tt]",fn);
2736 /* Try to deduce lambda from the subtitle */
2739 if (subtitle2lambda(subtitle,ba, fn, lc))
2741 native_lambda_read=TRUE;
2744 snew(ba->lambda,ba->nset);
2747 /* Check if we have a single set, no legend, nset=1 means t and dH/dl */
2750 if (!native_lambda_read)
2752 /* Deduce lambda from the file name */
2753 filename2lambda(fn, ba);
2754 native_lambda_read=TRUE;
2756 ba->lambda[0] = ba->native_lambda;
2760 gmx_fatal(FARGS,"File %s contains multiple sets but no legends, can not determine the lambda values",fn);
2765 for(i=0; i<ba->nset; )
2768 /* Read lambda from the legend */
2769 lambda_vec_init( &(ba->lambda[i]), lc );
2770 lambda_vec_copy( &(ba->lambda[i]), &(ba->native_lambda));
2771 use=legend2lambda(fn,legend[i], ba, &(ba->lambda[i]));
2774 lambda_vec_print(&(ba->lambda[i]), buf, FALSE);
2780 printf("%s: Ignoring set '%s'.\n", fn, legend[i]);
2781 for(j=i+1; j<ba->nset; j++)
2783 ba->y[j-1] = ba->y[j];
2784 legend[j-1] = legend[j];
2791 if (!native_lambda_read)
2793 gmx_fatal(FARGS,"File %s contains multiple sets but no indication of the native lambda",fn);
2798 for(i=0; i<ba->nset-1; i++)
2806 static void read_bar_xvg(char *fn, real *temp, sim_data_t *sd)
2815 read_bar_xvg_lowlevel(fn, temp, barsim, &(sd->lc));
2817 if (barsim->nset <1 )
2819 gmx_fatal(FARGS,"File '%s' contains fewer than two columns", fn);
2822 if ( !gmx_within_tol(*temp,barsim->temp,GMX_FLOAT_EPS) && (*temp > 0) )
2824 gmx_fatal(FARGS,"Temperature in file %s different from earlier files or setting\n", fn);
2828 /* now create a series of samples_t */
2829 snew(s, barsim->nset);
2830 for(i=0;i<barsim->nset;i++)
2832 samples_init(s+i, &(barsim->native_lambda), &(barsim->lambda[i]),
2833 barsim->temp, lambda_vec_same(&(barsim->native_lambda),
2834 &(barsim->lambda[i])),
2836 s[i].du=barsim->y[i];
2837 s[i].ndu=barsim->np[i];
2840 lambda_data_list_insert_sample(sd->lb, s+i);
2845 lambda_vec_print(s[0].native_lambda, buf, FALSE);
2846 printf("%s: %.1f - %.1f; lambda = %s\n dH/dl & foreign lambdas:\n",
2847 fn, s[0].t[0], s[0].t[s[0].ndu-1], buf);
2848 for(i=0;i<barsim->nset;i++)
2850 lambda_vec_print(s[i].foreign_lambda, buf, TRUE);
2851 printf(" %s (%d pts)\n", buf, s[i].ndu);
2857 static void read_edr_rawdh_block(samples_t **smp, int *ndu, t_enxblock *blk,
2858 double start_time, double delta_time,
2859 lambda_vec_t *native_lambda, double temp,
2860 double *last_t, const char *filename)
2864 double old_foreign_lambda;
2865 lambda_vec_t *foreign_lambda;
2867 samples_t *s; /* convenience pointer */
2870 /* check the block types etc. */
2871 if ( (blk->nsub < 3) ||
2872 (blk->sub[0].type != xdr_datatype_int) ||
2873 (blk->sub[1].type != xdr_datatype_double) ||
2875 (blk->sub[2].type != xdr_datatype_float) &&
2876 (blk->sub[2].type != xdr_datatype_double)
2878 (blk->sub[0].nr < 1) ||
2879 (blk->sub[1].nr < 1) )
2882 "Unexpected/corrupted block data in file %s around time %g.",
2883 filename, start_time);
2886 snew(foreign_lambda, 1);
2887 lambda_vec_init(foreign_lambda, native_lambda->lc);
2888 lambda_vec_copy(foreign_lambda, native_lambda);
2889 type = blk->sub[0].ival[0];
2892 for(i=0;i<native_lambda->lc->N;i++)
2894 foreign_lambda->val[i] = blk->sub[1].dval[i];
2899 if (blk->sub[0].nr > 1)
2901 foreign_lambda->dhdl = blk->sub[0].ival[1];
2904 foreign_lambda->dhdl = 0;
2909 /* initialize the samples structure if it's empty. */
2911 samples_init(*smp, native_lambda, foreign_lambda, temp,
2912 type==dhbtDHDL, filename);
2913 (*smp)->start_time=start_time;
2914 (*smp)->delta_time=delta_time;
2917 /* set convenience pointer */
2920 /* now double check */
2921 if ( ! lambda_vec_same(s->foreign_lambda, foreign_lambda) )
2923 char buf[STRLEN], buf2[STRLEN];
2924 lambda_vec_print(foreign_lambda, buf, FALSE);
2925 lambda_vec_print(s->foreign_lambda, buf2, FALSE);
2926 fprintf(stderr, "Got foreign lambda=%s, expected: %s\n", buf, buf2);
2927 gmx_fatal(FARGS, "Corrupted data in file %s around t=%g.",
2928 filename, start_time);
2931 /* make room for the data */
2932 if (s->ndu_alloc < (size_t)(s->ndu + blk->sub[2].nr) )
2934 s->ndu_alloc += (s->ndu_alloc < (size_t)blk->sub[2].nr) ?
2935 blk->sub[2].nr*2 : s->ndu_alloc;
2936 srenew(s->du_alloc, s->ndu_alloc);
2940 s->ndu += blk->sub[2].nr;
2941 s->ntot += blk->sub[2].nr;
2942 *ndu = blk->sub[2].nr;
2944 /* and copy the data*/
2945 for(j=0;j<blk->sub[2].nr;j++)
2947 if (blk->sub[2].type == xdr_datatype_float)
2949 s->du[startj+j] = blk->sub[2].fval[j];
2953 s->du[startj+j] = blk->sub[2].dval[j];
2956 if (start_time + blk->sub[2].nr*delta_time > *last_t)
2958 *last_t = start_time + blk->sub[2].nr*delta_time;
2962 static samples_t *read_edr_hist_block(int *nsamples, t_enxblock *blk,
2963 double start_time, double delta_time,
2964 lambda_vec_t *native_lambda, double temp,
2965 double *last_t, const char *filename)
2970 double old_foreign_lambda;
2971 lambda_vec_t *foreign_lambda;
2975 /* check the block types etc. */
2976 if ( (blk->nsub < 2) ||
2977 (blk->sub[0].type != xdr_datatype_double) ||
2978 (blk->sub[1].type != xdr_datatype_large_int) ||
2979 (blk->sub[0].nr < 2) ||
2980 (blk->sub[1].nr < 2) )
2983 "Unexpected/corrupted block data in file %s around time %g",
2984 filename, start_time);
2995 "Unexpected/corrupted block data in file %s around time %g",
2996 filename, start_time);
3002 snew(foreign_lambda, 1);
3003 lambda_vec_init(foreign_lambda, native_lambda->lc);
3004 lambda_vec_copy(foreign_lambda, native_lambda);
3005 type = (int)(blk->sub[1].lval[1]);
3008 double old_foreign_lambda;
3010 old_foreign_lambda=blk->sub[0].dval[0];
3011 if (old_foreign_lambda >= 0)
3013 foreign_lambda->val[0]=old_foreign_lambda;
3014 if (foreign_lambda->lc->N > 1)
3017 "Single-component lambda in multi-component file %s",
3023 for(i=0;i<native_lambda->lc->N;i++)
3025 foreign_lambda->val[i] = blk->sub[0].dval[i+2];
3031 if (foreign_lambda->lc->N > 1)
3033 if (blk->sub[1].nr < 3 + nhist)
3036 "Missing derivative coord in multi-component file %s",
3039 foreign_lambda->dhdl = blk->sub[1].lval[2 + nhist];
3043 foreign_lambda->dhdl = 0;
3047 samples_init(s, native_lambda, foreign_lambda, temp, type==dhbtDHDL,
3051 for(i=0;i<nhist;i++)
3053 nbins[i] = blk->sub[i+2].nr;
3056 hist_init(s->hist, nhist, nbins);
3058 for(i=0;i<nhist;i++)
3060 s->hist->x0[i] = blk->sub[1].lval[2+i];
3061 s->hist->dx[i] = blk->sub[0].dval[1];
3064 s->hist->dx[i] = - s->hist->dx[i];
3068 s->hist->start_time = start_time;
3069 s->hist->delta_time = delta_time;
3070 s->start_time = start_time;
3071 s->delta_time = delta_time;
3073 for(i=0;i<nhist;i++)
3076 gmx_large_int_t sum=0;
3078 for(j=0;j<s->hist->nbin[i];j++)
3080 int binv=(int)(blk->sub[i+2].ival[j]);
3082 s->hist->bin[i][j] = binv;
3095 gmx_fatal(FARGS, "Histogram counts don't match in %s",
3101 if (start_time + s->hist->sum*delta_time > *last_t)
3103 *last_t = start_time + s->hist->sum*delta_time;
3109 static void read_barsim_edr(char *fn, real *temp, sim_data_t *sd)
3115 gmx_enxnm_t *enm=NULL;
3118 samples_t **samples_rawdh=NULL; /* contains samples for raw delta_h */
3119 int *nhists=NULL; /* array to keep count & print at end */
3120 int *npts=NULL; /* array to keep count & print at end */
3121 lambda_vec_t **lambdas=NULL; /* array to keep count & print at end */
3122 lambda_vec_t *native_lambda;
3123 double end_time; /* the end time of the last batch of samples */
3125 lambda_vec_t start_lambda;
3127 fp = open_enx(fn,"r");
3128 do_enxnms(fp,&nre,&enm);
3131 snew(native_lambda, 1);
3132 start_lambda.lc = NULL;
3134 while(do_enx(fp, fr))
3136 /* count the data blocks */
3141 /* DHCOLL block information: */
3142 double start_time=0, delta_time=0, old_start_lambda=0, delta_lambda=0;
3145 /* count the blocks and handle collection information: */
3146 for(i=0;i<fr->nblock;i++)
3148 if (fr->block[i].id == enxDHHIST )
3150 if ( fr->block[i].id == enxDH )
3152 if (fr->block[i].id == enxDHCOLL )
3155 if ( (fr->block[i].nsub < 1) ||
3156 (fr->block[i].sub[0].type != xdr_datatype_double) ||
3157 (fr->block[i].sub[0].nr < 5))
3159 gmx_fatal(FARGS, "Unexpected block data in file %s", fn);
3162 /* read the data from the DHCOLL block */
3163 rtemp = fr->block[i].sub[0].dval[0];
3164 start_time = fr->block[i].sub[0].dval[1];
3165 delta_time = fr->block[i].sub[0].dval[2];
3166 old_start_lambda = fr->block[i].sub[0].dval[3];
3167 delta_lambda = fr->block[i].sub[0].dval[4];
3171 gmx_fatal(FARGS, "Lambda values not constant in %s: can't apply BAR method", fn);
3173 if ( ( *temp != rtemp) && (*temp > 0) )
3175 gmx_fatal(FARGS,"Temperature in file %s different from earlier files or setting\n", fn);
3179 if (old_start_lambda >= 0)
3183 if (!lambda_components_check(&(sd->lc), 0, "", 0))
3186 "lambda vector components in %s don't match those previously read",
3192 lambda_components_add(&(sd->lc), "", 0);
3194 if (!start_lambda.lc)
3196 lambda_vec_init(&start_lambda, &(sd->lc));
3198 start_lambda.val[0]=old_start_lambda;
3202 /* read lambda vector */
3204 gmx_bool check=(sd->lc.N > 0);
3205 if (fr->block[i].nsub < 2)
3208 "No lambda vector, but start_lambda=%g\n",
3211 n_lambda_vec=fr->block[i].sub[1].ival[1];
3212 for(j=0;j<n_lambda_vec;j++)
3215 efpt_singular_names[fr->block[i].sub[1].ival[1+j]];
3218 /* check the components */
3219 lambda_components_check(&(sd->lc), j, name,
3224 lambda_components_add(&(sd->lc), name,
3228 lambda_vec_init(&start_lambda, &(sd->lc));
3229 start_lambda.index=fr->block[i].sub[1].ival[0];
3230 for(j=0;j<n_lambda_vec;j++)
3232 start_lambda.val[j]=fr->block[i].sub[0].dval[5+j];
3242 gmx_fatal(FARGS, "Did not find delta H information in file %s", fn);
3244 if (nblocks_raw > 0 && nblocks_hist > 0 )
3246 gmx_fatal(FARGS, "Can't handle both raw delta U data and histograms in the same file %s", fn);
3251 /* check the native lambda */
3252 if (!lambda_vec_same(&start_lambda, native_lambda) )
3254 gmx_fatal(FARGS, "Native lambda not constant in file %s: started at %g, and becomes %g at time %g",
3255 fn, native_lambda, start_lambda, start_time);
3257 /* check the number of samples against the previous number */
3258 if ( ((nblocks_raw+nblocks_hist)!=nsamples) || (nlam!=1 ) )
3260 gmx_fatal(FARGS, "Unexpected block count in %s: was %d, now %d\n",
3261 fn, nsamples+1, nblocks_raw+nblocks_hist+nlam);
3263 /* check whether last iterations's end time matches with
3264 the currrent start time */
3265 if ( (fabs(last_t - start_time) > 2*delta_time) && last_t>=0)
3267 /* it didn't. We need to store our samples and reallocate */
3268 for(i=0;i<nsamples;i++)
3270 if (samples_rawdh[i])
3272 /* insert it into the existing list */
3273 lambda_data_list_insert_sample(sd->lb,
3275 /* and make sure we'll allocate a new one this time
3277 samples_rawdh[i]=NULL;
3284 /* this is the first round; allocate the associated data
3286 /*native_lambda=start_lambda;*/
3287 lambda_vec_init(native_lambda, &(sd->lc));
3288 lambda_vec_copy(native_lambda, &start_lambda);
3289 nsamples=nblocks_raw+nblocks_hist;
3290 snew(nhists, nsamples);
3291 snew(npts, nsamples);
3292 snew(lambdas, nsamples);
3293 snew(samples_rawdh, nsamples);
3294 for(i=0;i<nsamples;i++)
3299 samples_rawdh[i]=NULL; /* init to NULL so we know which
3300 ones contain values */
3305 k=0; /* counter for the lambdas, etc. arrays */
3306 for(i=0;i<fr->nblock;i++)
3308 if (fr->block[i].id == enxDH)
3310 int type=(fr->block[i].sub[0].ival[0]);
3311 if (type == dhbtDH || type == dhbtDHDL)
3314 read_edr_rawdh_block(&(samples_rawdh[k]),
3317 start_time, delta_time,
3318 native_lambda, rtemp,
3321 if (samples_rawdh[k])
3323 lambdas[k]=samples_rawdh[k]->foreign_lambda;
3328 else if (fr->block[i].id == enxDHHIST)
3330 int type=(int)(fr->block[i].sub[1].lval[1]);
3331 if (type == dhbtDH || type == dhbtDHDL)
3335 samples_t *s; /* this is where the data will go */
3336 s=read_edr_hist_block(&nb, &(fr->block[i]),
3337 start_time, delta_time,
3338 native_lambda, rtemp,
3343 lambdas[k]=s->foreign_lambda;
3346 /* and insert the new sample immediately */
3349 lambda_data_list_insert_sample(sd->lb, s+j);
3355 /* Now store all our extant sample collections */
3356 for(i=0;i<nsamples;i++)
3358 if (samples_rawdh[i])
3360 /* insert it into the existing list */
3361 lambda_data_list_insert_sample(sd->lb, samples_rawdh[i]);
3369 lambda_vec_print(native_lambda, buf, FALSE);
3370 printf("%s: %.1f - %.1f; lambda = %s\n foreign lambdas:\n",
3371 fn, first_t, last_t, buf);
3372 for(i=0;i<nsamples;i++)
3376 lambda_vec_print(lambdas[i], buf, TRUE);
3379 printf(" %s (%d hists)\n", buf, nhists[i]);
3383 printf(" %s (%d pts)\n", buf, npts[i]);
3395 int gmx_bar(int argc,char *argv[])
3397 static const char *desc[] = {
3398 "[TT]g_bar[tt] calculates free energy difference estimates through ",
3399 "Bennett's acceptance ratio method (BAR). It also automatically",
3400 "adds series of individual free energies obtained with BAR into",
3401 "a combined free energy estimate.[PAR]",
3403 "Every individual BAR free energy difference relies on two ",
3404 "simulations at different states: say state A and state B, as",
3405 "controlled by a parameter, [GRK]lambda[grk] (see the [TT].mdp[tt] parameter",
3406 "[TT]init_lambda[tt]). The BAR method calculates a ratio of weighted",
3407 "average of the Hamiltonian difference of state B given state A and",
3409 "The energy differences to the other state must be calculated",
3410 "explicitly during the simulation. This can be done with",
3411 "the [TT].mdp[tt] option [TT]foreign_lambda[tt].[PAR]",
3413 "Input option [TT]-f[tt] expects multiple [TT]dhdl.xvg[tt] files. ",
3414 "Two types of input files are supported:[BR]",
3415 "[TT]*[tt] Files with more than one [IT]y[it]-value. ",
3416 "The files should have columns ",
3417 "with dH/d[GRK]lambda[grk] and [GRK]Delta[grk][GRK]lambda[grk]. ",
3418 "The [GRK]lambda[grk] values are inferred ",
3419 "from the legends: [GRK]lambda[grk] of the simulation from the legend of ",
3420 "dH/d[GRK]lambda[grk] and the foreign [GRK]lambda[grk] values from the ",
3421 "legends of Delta H",
3423 "[TT]*[tt] Files with only one [IT]y[it]-value. Using the",
3424 "[TT]-extp[tt] option for these files, it is assumed",
3425 "that the [IT]y[it]-value is dH/d[GRK]lambda[grk] and that the ",
3426 "Hamiltonian depends linearly on [GRK]lambda[grk]. ",
3427 "The [GRK]lambda[grk] value of the simulation is inferred from the ",
3428 "subtitle (if present), otherwise from a number in the subdirectory ",
3429 "in the file name.[PAR]",
3431 "The [GRK]lambda[grk] of the simulation is parsed from ",
3432 "[TT]dhdl.xvg[tt] file's legend containing the string 'dH', the ",
3433 "foreign [GRK]lambda[grk] values from the legend containing the ",
3434 "capitalized letters 'D' and 'H'. The temperature is parsed from ",
3435 "the legend line containing 'T ='.[PAR]",
3437 "The input option [TT]-g[tt] expects multiple [TT].edr[tt] files. ",
3438 "These can contain either lists of energy differences (see the ",
3439 "[TT].mdp[tt] option [TT]separate_dhdl_file[tt]), or a series of ",
3440 "histograms (see the [TT].mdp[tt] options [TT]dh_hist_size[tt] and ",
3441 "[TT]dh_hist_spacing[tt]).", "The temperature and [GRK]lambda[grk] ",
3442 "values are automatically deduced from the [TT]ener.edr[tt] file.[PAR]",
3444 "In addition to the [TT].mdp[tt] option [TT]foreign_lambda[tt], ",
3445 "the energy difference can also be extrapolated from the ",
3446 "dH/d[GRK]lambda[grk] values. This is done with the[TT]-extp[tt]",
3447 "option, which assumes that the system's Hamiltonian depends linearly",
3448 "on [GRK]lambda[grk], which is not normally the case.[PAR]",
3450 "The free energy estimates are determined using BAR with bisection, ",
3451 "with the precision of the output set with [TT]-prec[tt]. ",
3452 "An error estimate taking into account time correlations ",
3453 "is made by splitting the data into blocks and determining ",
3454 "the free energy differences over those blocks and assuming ",
3455 "the blocks are independent. ",
3456 "The final error estimate is determined from the average variance ",
3457 "over 5 blocks. A range of block numbers for error estimation can ",
3458 "be provided with the options [TT]-nbmin[tt] and [TT]-nbmax[tt].[PAR]",
3460 "[TT]g_bar[tt] tries to aggregate samples with the same 'native' and ",
3461 "'foreign' [GRK]lambda[grk] values, but always assumes independent ",
3462 "samples. [BB]Note[bb] that when aggregating energy ",
3463 "differences/derivatives with different sampling intervals, this is ",
3464 "almost certainly not correct. Usually subsequent energies are ",
3465 "correlated and different time intervals mean different degrees ",
3466 "of correlation between samples.[PAR]",
3468 "The results are split in two parts: the last part contains the final ",
3469 "results in kJ/mol, together with the error estimate for each part ",
3470 "and the total. The first part contains detailed free energy ",
3471 "difference estimates and phase space overlap measures in units of ",
3472 "kT (together with their computed error estimate). The printed ",
3474 "[TT]*[tt] lam_A: the [GRK]lambda[grk] values for point A.[BR]",
3475 "[TT]*[tt] lam_B: the [GRK]lambda[grk] values for point B.[BR]",
3476 "[TT]*[tt] DG: the free energy estimate.[BR]",
3477 "[TT]*[tt] s_A: an estimate of the relative entropy of B in A.[BR]",
3478 "[TT]*[tt] s_A: an estimate of the relative entropy of A in B.[BR]",
3479 "[TT]*[tt] stdev: an estimate expected per-sample standard deviation.[PAR]",
3481 "The relative entropy of both states in each other's ensemble can be ",
3482 "interpreted as a measure of phase space overlap: ",
3483 "the relative entropy s_A of the work samples of lambda_B in the ",
3484 "ensemble of lambda_A (and vice versa for s_B), is a ",
3485 "measure of the 'distance' between Boltzmann distributions of ",
3486 "the two states, that goes to zero for identical distributions. See ",
3487 "Wu & Kofke, J. Chem. Phys. 123 084109 (2005) for more information.",
3489 "The estimate of the expected per-sample standard deviation, as given ",
3490 "in Bennett's original BAR paper: Bennett, J. Comp. Phys. 22, p 245 (1976).",
3491 "Eq. 10 therein gives an estimate of the quality of sampling (not directly",
3492 "of the actual statistical error, because it assumes independent samples).[PAR]",
3494 "To get a visual estimate of the phase space overlap, use the ",
3495 "[TT]-oh[tt] option to write series of histograms, together with the ",
3496 "[TT]-nbin[tt] option.[PAR]"
3498 static real begin=0,end=-1,temp=-1;
3499 int nd=2,nbmin=5,nbmax=5;
3501 gmx_bool use_dhdl=FALSE;
3502 gmx_bool calc_s,calc_v;
3504 { "-b", FALSE, etREAL, {&begin}, "Begin time for BAR" },
3505 { "-e", FALSE, etREAL, {&end}, "End time for BAR" },
3506 { "-temp", FALSE, etREAL, {&temp}, "Temperature (K)" },
3507 { "-prec", FALSE, etINT, {&nd}, "The number of digits after the decimal point" },
3508 { "-nbmin", FALSE, etINT, {&nbmin}, "Minimum number of blocks for error estimation" },
3509 { "-nbmax", FALSE, etINT, {&nbmax}, "Maximum number of blocks for error estimation" },
3510 { "-nbin", FALSE, etINT, {&nbin}, "Number of bins for histogram output"},
3511 { "-extp", FALSE, etBOOL, {&use_dhdl}, "Whether to linearly extrapolate dH/dl values to use as energies"}
3515 { efXVG, "-f", "dhdl", ffOPTRDMULT },
3516 { efEDR, "-g", "ener", ffOPTRDMULT },
3517 { efXVG, "-o", "bar", ffOPTWR },
3518 { efXVG, "-oi", "barint", ffOPTWR },
3519 { efXVG, "-oh", "histogram", ffOPTWR }
3521 #define NFILE asize(fnm)
3524 int nf=0; /* file counter */
3526 int nfile_tot; /* total number of input files */
3531 sim_data_t sim_data; /* the simulation data */
3532 barres_t *results; /* the results */
3533 int nresults; /* number of results in results array */
3536 double prec,dg_tot,dg,sig, dg_tot_max, dg_tot_min;
3538 char dgformat[20],xvg2format[STRLEN],xvg3format[STRLEN];
3539 char buf[STRLEN], buf2[STRLEN];
3540 char ktformat[STRLEN], sktformat[STRLEN];
3541 char kteformat[STRLEN], skteformat[STRLEN];
3544 gmx_bool result_OK=TRUE,bEE=TRUE;
3546 gmx_bool disc_err=FALSE;
3547 double sum_disc_err=0.; /* discretization error */
3548 gmx_bool histrange_err=FALSE;
3549 double sum_histrange_err=0.; /* histogram range error */
3550 double stat_err=0.; /* statistical error */
3552 parse_common_args(&argc,argv,
3554 NFILE,fnm,asize(pa),pa,asize(desc),desc,0,NULL,&oenv);
3556 if (opt2bSet("-f", NFILE, fnm))
3558 nxvgfile = opt2fns(&fxvgnms,"-f",NFILE,fnm);
3560 if (opt2bSet("-g", NFILE, fnm))
3562 nedrfile = opt2fns(&fedrnms,"-g",NFILE,fnm);
3565 sim_data_init(&sim_data);
3567 /* make linked list */
3569 lambda_data_init(lb, 0, 0);
3575 nfile_tot = nxvgfile + nedrfile;
3579 gmx_fatal(FARGS,"No input files!");
3584 gmx_fatal(FARGS,"Can not have negative number of digits");
3588 snew(partsum,(nbmax+1)*(nbmax+1));
3591 /* read in all files. First xvg files */
3592 for(f=0; f<nxvgfile; f++)
3594 read_bar_xvg(fxvgnms[f],&temp,&sim_data);
3597 /* then .edr files */
3598 for(f=0; f<nedrfile; f++)
3600 read_barsim_edr(fedrnms[f],&temp,&sim_data);;
3604 /* fix the times to allow for equilibration */
3605 sim_data_impose_times(&sim_data, begin, end);
3607 if (opt2bSet("-oh",NFILE,fnm))
3609 sim_data_histogram(&sim_data, opt2fn("-oh",NFILE,fnm), nbin, oenv);
3612 /* assemble the output structures from the lambdas */
3613 results=barres_list_create(&sim_data, &nresults, use_dhdl);
3615 sum_disc_err=barres_list_max_disc_err(results, nresults);
3619 printf("\nNo results to calculate.\n");
3623 if (sum_disc_err > prec)
3626 nd = ceil(-log10(prec));
3627 printf("WARNING: setting the precision to %g because that is the minimum\n reasonable number, given the expected discretization error.\n", prec);
3631 /*sprintf(lamformat,"%%6.3f");*/
3632 sprintf( dgformat,"%%%d.%df",3+nd,nd);
3633 /* the format strings of the results in kT */
3634 sprintf( ktformat,"%%%d.%df",5+nd,nd);
3635 sprintf( sktformat,"%%%ds",6+nd);
3636 /* the format strings of the errors in kT */
3637 sprintf( kteformat,"%%%d.%df",3+nd,nd);
3638 sprintf( skteformat,"%%%ds",4+nd);
3639 sprintf(xvg2format,"%s %s\n","%s",dgformat);
3640 sprintf(xvg3format,"%s %s %s\n","%s",dgformat,dgformat);
3645 if (opt2bSet("-o",NFILE,fnm))
3647 sprintf(buf,"%s (%s)","\\DeltaG","kT");
3648 fpb = xvgropen_type(opt2fn("-o",NFILE,fnm),"Free energy differences",
3649 "\\lambda",buf,exvggtXYDY,oenv);
3653 if (opt2bSet("-oi",NFILE,fnm))
3655 sprintf(buf,"%s (%s)","\\DeltaG","kT");
3656 fpi = xvgropen(opt2fn("-oi",NFILE,fnm),"Free energy integral",
3657 "\\lambda",buf,oenv);
3665 /* first calculate results */
3668 for(f=0; f<nresults; f++)
3670 /* Determine the free energy difference with a factor of 10
3671 * more accuracy than requested for printing.
3673 calc_bar(&(results[f]), 0.1*prec, nbmin, nbmax,
3676 if (results[f].dg_disc_err > prec/10.)
3678 if (results[f].dg_histrange_err > prec/10.)
3682 /* print results in kT */
3686 printf("\nTemperature: %g K\n", temp);
3688 printf("\nDetailed results in kT (see help for explanation):\n\n");
3689 printf("%6s ", " lam_A");
3690 printf("%6s ", " lam_B");
3691 printf(sktformat, "DG ");
3693 printf(skteformat, "+/- ");
3695 printf(skteformat, "disc ");
3697 printf(skteformat, "range ");
3698 printf(sktformat, "s_A ");
3700 printf(skteformat, "+/- " );
3701 printf(sktformat, "s_B ");
3703 printf(skteformat, "+/- " );
3704 printf(sktformat, "stdev ");
3706 printf(skteformat, "+/- ");
3708 for(f=0; f<nresults; f++)
3710 lambda_vec_print_short(results[f].a->native_lambda, buf);
3712 lambda_vec_print_short(results[f].b->native_lambda, buf);
3714 printf(ktformat, results[f].dg);
3718 printf(kteformat, results[f].dg_err);
3723 printf(kteformat, results[f].dg_disc_err);
3728 printf(kteformat, results[f].dg_histrange_err);
3731 printf(ktformat, results[f].sa);
3735 printf(kteformat, results[f].sa_err);
3738 printf(ktformat, results[f].sb);
3742 printf(kteformat, results[f].sb_err);
3745 printf(ktformat, results[f].dg_stddev);
3749 printf(kteformat, results[f].dg_stddev_err);
3753 /* Check for negative relative entropy with a 95% certainty. */
3754 if (results[f].sa < -2*results[f].sa_err ||
3755 results[f].sb < -2*results[f].sb_err)
3763 printf("\nWARNING: Some of these results violate the Second Law of "
3764 "Thermodynamics: \n"
3765 " This is can be the result of severe undersampling, or "
3767 " there is something wrong with the simulations.\n");
3771 /* final results in kJ/mol */
3772 printf("\n\nFinal results in kJ/mol:\n\n");
3774 for(f=0; f<nresults; f++)
3779 lambda_vec_print_short(results[f].a->native_lambda, buf);
3780 fprintf(fpi, xvg2format, buf, dg_tot);
3786 lambda_vec_print_intermediate(results[f].a->native_lambda,
3787 results[f].b->native_lambda,
3790 fprintf(fpb, xvg3format, buf, results[f].dg,results[f].dg_err);
3794 lambda_vec_print_short(results[f].a->native_lambda, buf);
3795 lambda_vec_print_short(results[f].b->native_lambda, buf2);
3796 printf("%s - %s", buf, buf2);
3799 printf(dgformat,results[f].dg*kT);
3803 printf(dgformat,results[f].dg_err*kT);
3807 printf(" (max. range err. = ");
3808 printf(dgformat,results[f].dg_histrange_err*kT);
3810 sum_histrange_err += results[f].dg_histrange_err*kT;
3814 dg_tot += results[f].dg;
3818 lambda_vec_print_short(results[0].a->native_lambda, buf);
3819 lambda_vec_print_short(results[nresults-1].b->native_lambda, buf2);
3820 printf("%s - %s", buf, buf2);
3823 printf(dgformat,dg_tot*kT);
3826 stat_err=bar_err(nbmin,nbmax,partsum)*kT;
3828 printf(dgformat, max(max(stat_err, sum_disc_err), sum_histrange_err));
3833 printf("\nmaximum discretization error = ");
3834 printf(dgformat,sum_disc_err);
3835 if (bEE && stat_err < sum_disc_err)
3837 printf("WARNING: discretization error (%g) is larger than statistical error.\n Decrease histogram spacing for more accurate results\n", stat_err);
3842 printf("\nmaximum histogram range error = ");
3843 printf(dgformat,sum_histrange_err);
3844 if (bEE && stat_err < sum_histrange_err)
3846 printf("WARNING: histogram range error (%g) is larger than statistical error.\n Increase histogram range for more accurate results\n", stat_err);
3855 lambda_vec_print_short(results[nresults-1].b->native_lambda, buf);
3856 fprintf(fpi, xvg2format, buf, dg_tot);
3860 do_view(oenv,opt2fn_null("-o",NFILE,fnm),"-xydy");
3861 do_view(oenv,opt2fn_null("-oi",NFILE,fnm),"-xydy");