1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * Green Red Orange Magenta Azure Cyan Skyblue
55 #include "gmx_fatal.h"
61 /* the dhdl.xvg data from a simulation (actually obsolete, but still
62 here for reading the dhdl.xvg file*/
66 int ftp; /* file type */
67 int nset; /* number of lambdas, including dhdl */
68 int *np; /* number of data points (du or hists) per lambda */
69 int np_alloc; /* number of points (du or hists) allocated */
70 double temp; /* temperature */
71 double *lambda; /* the lambdas (of first index for y). */
72 double *t; /* the times (of second index for y) */
73 double **y; /* the dU values. y[0] holds the derivative, while
74 further ones contain the energy differences between
75 the native lambda and the 'foreign' lambdas. */
77 double native_lambda; /* the native lambda */
79 struct xvg_t *next, *prev; /*location in the global linked list of xvg_ts*/
86 unsigned int *bin[2]; /* the (forward + reverse) histogram values */
87 double dx[2]; /* the histogram spacing. The reverse
88 dx is the negative of the forward dx.*/
89 gmx_large_int_t x0[2]; /* the (forward + reverse) histogram start
92 int nbin[2]; /* the (forward+reverse) number of bins */
93 gmx_large_int_t sum; /* the total number of counts. Must be
94 the same for forward + reverse. */
95 int nhist; /* number of hist datas (forward or reverse) */
97 double start_time, delta_time; /* start time, end time of histogram */
101 /* an aggregate of samples for partial free energy calculation */
102 typedef struct samples_t
104 double native_lambda;
105 double foreign_lambda;
106 double temp; /* the temperature */
107 gmx_bool derivative; /* whether this sample is a derivative */
109 /* The samples come either as either delta U lists: */
110 int ndu; /* the number of delta U samples */
111 double *du; /* the delta u's */
112 double *t; /* the times associated with those samples, or: */
113 double start_time,delta_time;/*start time and delta time for linear time*/
115 /* or as histograms: */
116 hist_t *hist; /* a histogram */
118 /* allocation data: (not NULL for data 'owned' by this struct) */
119 double *du_alloc, *t_alloc; /* allocated delta u arrays */
120 size_t ndu_alloc, nt_alloc; /* pre-allocated sizes */
121 hist_t *hist_alloc; /* allocated hist */
123 gmx_large_int_t ntot; /* total number of samples */
124 const char *filename; /* the file name this sample comes from */
127 /* a sample range (start to end for du-style data, or boolean
128 for both du-style data and histograms */
129 typedef struct sample_range_t
131 int start, end; /* start and end index for du style data */
132 gmx_bool use; /* whether to use this sample */
134 samples_t *s; /* the samples this range belongs to */
138 /* a collection of samples for a partial free energy calculation
139 (i.e. the collection of samples from one native lambda to one
141 typedef struct sample_coll_t
143 double native_lambda; /* these should be the same for all samples in the histogram?*/
144 double foreign_lambda; /* collection */
145 double temp; /* the temperature */
147 int nsamples; /* the number of samples */
148 samples_t **s; /* the samples themselves */
149 sample_range_t *r; /* the sample ranges */
150 int nsamples_alloc; /* number of allocated samples */
152 gmx_large_int_t ntot; /* total number of samples in the ranges of
155 struct sample_coll_t *next, *prev; /* next and previous in the list */
158 /* all the samples associated with a lambda point */
159 typedef struct lambda_t
161 double lambda; /* the native lambda (at start time if dynamic) */
162 double temp; /* temperature */
164 sample_coll_t *sc; /* the samples */
166 sample_coll_t sc_head; /*the pre-allocated list head for the linked list.*/
168 struct lambda_t *next, *prev; /* the next and prev in the list */
172 /* calculated values. */
174 sample_coll_t *a, *b; /* the simulation data */
176 double dg; /* the free energy difference */
177 double dg_err; /* the free energy difference */
179 double dg_disc_err; /* discretization error */
180 double dg_histrange_err; /* histogram range error */
182 double sa; /* relative entropy of b in state a */
183 double sa_err; /* error in sa */
184 double sb; /* relative entropy of a in state b */
185 double sb_err; /* error in sb */
187 double dg_stddev; /* expected dg stddev per sample */
188 double dg_stddev_err; /* error in dg_stddev */
194 static void hist_init(hist_t *h, int nhist, int *nbin)
199 gmx_fatal(FARGS, "histogram with more than two sets of data!");
203 snew(h->bin[i], nbin[i]);
206 h->start_time=h->delta_time=0;
213 static void hist_destroy(hist_t *h)
219 static void xvg_init(xvg_t *ba)
228 static void samples_init(samples_t *s, double native_lambda,
229 double foreign_lambda, double temp,
230 gmx_bool derivative, const char *filename)
232 s->native_lambda=native_lambda;
233 s->foreign_lambda=foreign_lambda;
235 s->derivative=derivative;
240 s->start_time = s->delta_time = 0;
249 s->filename=filename;
252 static void sample_range_init(sample_range_t *r, samples_t *s)
260 static void sample_coll_init(sample_coll_t *sc, double native_lambda,
261 double foreign_lambda, double temp)
263 sc->native_lambda = native_lambda;
264 sc->foreign_lambda = foreign_lambda;
270 sc->nsamples_alloc=0;
273 sc->next=sc->prev=NULL;
276 static void sample_coll_destroy(sample_coll_t *sc)
278 /* don't free the samples themselves */
284 static void lambda_init(lambda_t *l, double native_lambda, double temp)
286 l->lambda=native_lambda;
294 sample_coll_init(l->sc, native_lambda, 0., 0.);
299 static void barres_init(barres_t *br)
317 static gmx_bool lambda_same(double lambda1, double lambda2)
319 return gmx_within_tol(lambda1, lambda2, 10*GMX_REAL_EPS);
322 /* calculate the total number of samples in a sample collection */
323 static void sample_coll_calc_ntot(sample_coll_t *sc)
328 for(i=0;i<sc->nsamples;i++)
334 sc->ntot += sc->s[i]->ntot;
338 sc->ntot += sc->r[i].end - sc->r[i].start;
345 /* find the barsamples_t associated with a lambda that corresponds to
346 a specific foreign lambda */
347 static sample_coll_t *lambda_find_sample_coll(lambda_t *l,
348 double foreign_lambda)
350 sample_coll_t *sc=l->sc->next;
354 if (lambda_same(sc->foreign_lambda,foreign_lambda))
364 /* insert li into an ordered list of lambda_colls */
365 static void lambda_insert_sample_coll(lambda_t *l, sample_coll_t *sc)
367 sample_coll_t *scn=l->sc->next;
368 while ( (scn!=l->sc) )
370 if (scn->foreign_lambda > sc->foreign_lambda)
374 /* now insert it before the found scn */
381 /* insert li into an ordered list of lambdas */
382 static void lambda_insert_lambda(lambda_t *head, lambda_t *li)
384 lambda_t *lc=head->next;
387 if (lc->lambda > li->lambda)
391 /* now insert ourselves before the found lc */
398 /* insert a sample and a sample_range into a sample_coll. The
399 samples are stored as a pointer, the range is copied. */
400 static void sample_coll_insert_sample(sample_coll_t *sc, samples_t *s,
403 /* first check if it belongs here */
404 if (sc->temp != s->temp)
406 gmx_fatal(FARGS, "Temperatures in files %s and %s are not the same!",
407 s->filename, sc->next->s[0]->filename);
409 if (sc->native_lambda != s->native_lambda)
411 gmx_fatal(FARGS, "Native lambda in files %s and %s are not the same (and they should be)!",
412 s->filename, sc->next->s[0]->filename);
414 if (sc->foreign_lambda != s->foreign_lambda)
416 gmx_fatal(FARGS, "Foreign lambda in files %s and %s are not the same (and they should be)!",
417 s->filename, sc->next->s[0]->filename);
420 /* check if there's room */
421 if ( (sc->nsamples + 1) > sc->nsamples_alloc)
423 sc->nsamples_alloc = max(2*sc->nsamples_alloc, 2);
424 srenew(sc->s, sc->nsamples_alloc);
425 srenew(sc->r, sc->nsamples_alloc);
427 sc->s[sc->nsamples]=s;
428 sc->r[sc->nsamples]=*r;
431 sample_coll_calc_ntot(sc);
434 /* insert a sample into a lambda_list, creating the right sample_coll if
436 static void lambda_list_insert_sample(lambda_t *head, samples_t *s)
438 gmx_bool found=FALSE;
442 lambda_t *l=head->next;
444 /* first search for the right lambda_t */
447 if (lambda_same(l->lambda, s->native_lambda) )
457 snew(l, 1); /* allocate a new one */
458 lambda_init(l, s->native_lambda, s->temp); /* initialize it */
459 lambda_insert_lambda(head, l); /* add it to the list */
462 /* now look for a sample collection */
463 sc=lambda_find_sample_coll(l, s->foreign_lambda);
466 snew(sc, 1); /* allocate a new one */
467 sample_coll_init(sc, s->native_lambda, s->foreign_lambda, s->temp);
468 lambda_insert_sample_coll(l, sc);
471 /* now insert the samples into the sample coll */
472 sample_range_init(&r, s);
473 sample_coll_insert_sample(sc, s, &r);
477 /* make a histogram out of a sample collection */
478 static void sample_coll_make_hist(sample_coll_t *sc, int **bin,
479 int *nbin_alloc, int *nbin,
480 double *dx, double *xmin, int nbin_default)
483 gmx_bool dx_set=FALSE;
484 gmx_bool xmin_set=FALSE;
486 gmx_bool xmax_set=FALSE;
487 gmx_bool xmax_set_hard=FALSE; /* whether the xmax is bounded by the
488 limits of a histogram */
491 /* first determine dx and xmin; try the histograms */
492 for(i=0;i<sc->nsamples;i++)
496 hist_t *hist=sc->s[i]->hist;
497 for(k=0;k<hist->nhist;k++)
499 double hdx=hist->dx[k];
500 double xmax_now=(hist->x0[k]+hist->nbin[k])*hdx;
502 /* we use the biggest dx*/
503 if ( (!dx_set) || hist->dx[0] > *dx)
508 if ( (!xmin_set) || (hist->x0[k]*hdx) < *xmin)
511 *xmin = (hist->x0[k]*hdx);
514 if ( (!xmax_set) || (xmax_now>xmax && !xmax_set_hard) )
518 if (hist->bin[k][hist->nbin[k]-1] != 0)
521 if ( hist->bin[k][hist->nbin[k]-1]!=0 && (xmax_now < xmax) )
529 /* and the delta us */
530 for(i=0;i<sc->nsamples;i++)
532 if (sc->s[i]->ndu > 0)
534 /* determine min and max */
535 int starti=sc->r[i].start;
536 int endi=sc->r[i].end;
537 double du_xmin=sc->s[i]->du[starti];
538 double du_xmax=sc->s[i]->du[starti];
539 for(j=starti+1;j<endi;j++)
541 if (sc->s[i]->du[j] < du_xmin)
542 du_xmin = sc->s[i]->du[j];
543 if (sc->s[i]->du[j] > du_xmax)
544 du_xmax = sc->s[i]->du[j];
547 /* and now change the limits */
548 if ( (!xmin_set) || (du_xmin < *xmin) )
553 if ( (!xmax_set) || ((du_xmax > xmax) && !xmax_set_hard) )
561 if (!xmax_set || !xmin_set)
571 *dx=(xmax-(*xmin))/((*nbin)-2); /* -2 because we want the last bin to
572 be 0, and we count from 0 */
576 *nbin=(xmax-(*xmin))/(*dx);
579 if (*nbin > *nbin_alloc)
582 srenew(*bin, *nbin_alloc);
585 /* reset the histogram */
586 for(i=0;i<(*nbin);i++)
591 /* now add the actual data */
592 for(i=0;i<sc->nsamples;i++)
596 hist_t *hist=sc->s[i]->hist;
597 for(k=0;k<hist->nhist;k++)
599 double hdx = hist->dx[k];
600 double xmin_hist=hist->x0[k]*hdx;
601 for(j=0;j<hist->nbin[k];j++)
603 /* calculate the bin corresponding to the middle of the
605 double x=hdx*(j+0.5) + xmin_hist;
606 int binnr=(int)((x-(*xmin))/(*dx));
608 if (binnr >= *nbin || binnr<0)
611 (*bin)[binnr] += hist->bin[k][j];
617 int starti=sc->r[i].start;
618 int endi=sc->r[i].end;
619 for(j=starti;j<endi;j++)
621 int binnr=(int)((sc->s[i]->du[j]-(*xmin))/(*dx));
622 if (binnr >= *nbin || binnr<0)
631 /* write a collection of histograms to a file */
632 void lambdas_histogram(lambda_t *bl_head, const char *filename,
633 int nbin_default, const output_env_t oenv)
635 char label_x[STRLEN];
636 const char *dhdl="dH/d\\lambda",*deltag="\\DeltaH",*lambda="\\lambda";
637 const char *title="N(\\DeltaH)";
638 const char *label_y="Samples";
642 char **setnames=NULL;
643 gmx_bool first_set=FALSE;
644 /* histogram data: */
652 printf("\nWriting histogram to %s\n", filename);
653 sprintf(label_x, "\\DeltaH (%s)", unit_energy);
655 fp=xvgropen_type(filename, title, label_x, label_y, exvggtXNY, oenv);
657 /* first get all the set names */
659 /* iterate over all lambdas */
662 sample_coll_t *sc=bl->sc->next;
664 /* iterate over all samples */
668 srenew(setnames, nsets);
669 snew(setnames[nsets-1], STRLEN);
670 if (!lambda_same(sc->foreign_lambda, sc->native_lambda))
672 sprintf(setnames[nsets-1], "N(%s(%s=%g) | %s=%g)",
673 deltag, lambda, sc->foreign_lambda, lambda,
678 sprintf(setnames[nsets-1], "N(%s | %s=%g)",
679 dhdl, lambda, sc->native_lambda);
686 xvgr_legend(fp, nsets, (const char**)setnames, oenv);
689 /* now make the histograms */
691 /* iterate over all lambdas */
694 sample_coll_t *sc=bl->sc->next;
696 /* iterate over all samples */
701 xvgr_new_dataset(fp, 0, 0, NULL, oenv);
704 sample_coll_make_hist(sc, &hist, &nbin_alloc, &nbin, &dx, &min,
709 double xmin=i*dx + min;
710 double xmax=(i+1)*dx + min;
712 fprintf(fp, "%g %d\n%g %d\n", xmin, hist[i], xmax, hist[i]);
728 /* create a collection (array) of barres_t object given a ordered linked list
729 of barlamda_t sample collections */
730 static barres_t *barres_list_create(lambda_t *bl_head, int *nres,
740 /* first count the lambdas */
747 snew(res, nlambda-1);
749 /* next put the right samples in the res */
751 bl=bl_head->next->next; /* we start with the second one. */
754 sample_coll_t *sc, *scprev;
755 barres_t *br=&(res[*nres]);
756 /* there is always a previous one. we search for that as a foreign
758 scprev=lambda_find_sample_coll(bl->prev, bl->lambda);
759 sc=lambda_find_sample_coll(bl, bl->prev->lambda);
767 scprev=lambda_find_sample_coll(bl->prev, bl->prev->lambda);
768 sc=lambda_find_sample_coll(bl, bl->lambda);
772 printf("\nWARNING: Using the derivative data (dH/dlambda) to extrapolate delta H values.\nThis will only work if the Hamiltonian is linear in lambda.\n");
777 gmx_fatal(FARGS,"Some dhdl files contain only one value (dH/dl), while others \ncontain multiple values (dH/dl and/or Delta H), will not proceed \nbecause of possible inconsistencies.\n");
780 else if (!scprev && !sc)
782 gmx_fatal(FARGS,"There is no path from lambda=%g -> %g that is covered by foreign lambdas:\ncannot proceed with BAR.\nUse thermodynamic integration of dH/dl by calculating the averages of dH/dl\nwith g_analyze and integrating them.\nAlternatively, use the -extp option if (and only if) the Hamiltonian\ndepends linearly on lambda, which is NOT normally the case.\n", bl->prev->lambda, bl->lambda);
788 gmx_fatal(FARGS,"Could not find a set for foreign lambda = %g\nin the files for lambda = %g",bl->lambda,bl->prev->lambda);
792 gmx_fatal(FARGS,"Could not find a set for foreign lambda = %g\nin the files for lambda = %g",bl->prev->lambda,bl->lambda);
804 /* estimate the maximum discretization error */
805 static double barres_list_max_disc_err(barres_t *res, int nres)
813 barres_t *br=&(res[i]);
815 delta_lambda=fabs(br->b->native_lambda-br->a->native_lambda);
817 for(j=0;j<br->a->nsamples;j++)
819 if (br->a->s[j]->hist)
822 if (br->a->s[j]->derivative)
825 disc_err=max(disc_err, Wfac*br->a->s[j]->hist->dx[0]);
828 for(j=0;j<br->b->nsamples;j++)
830 if (br->b->s[j]->hist)
833 if (br->b->s[j]->derivative)
835 disc_err=max(disc_err, Wfac*br->b->s[j]->hist->dx[0]);
843 /* impose start and end times on a sample collection, updating sample_ranges */
844 static void sample_coll_impose_times(sample_coll_t *sc, double begin_t,
848 for(i=0;i<sc->nsamples;i++)
850 samples_t *s=sc->s[i];
851 sample_range_t *r=&(sc->r[i]);
854 double end_time=s->hist->delta_time*s->hist->sum +
856 if (s->hist->start_time < begin_t || end_time > end_t)
866 if (s->start_time < begin_t)
868 r->start=(int)((begin_t - s->start_time)/s->delta_time);
870 end_time=s->delta_time*s->ndu + s->start_time;
871 if (end_time > end_t)
873 r->end=(int)((end_t - s->start_time)/s->delta_time);
879 for(j=0;j<s->ndu;j++)
881 if (s->t[j] <begin_t)
886 if (s->t[j] >= end_t)
893 if (r->start > r->end)
899 sample_coll_calc_ntot(sc);
902 static void lambdas_impose_times(lambda_t *head, double begin, double end)
904 double first_t, last_t;
905 double begin_t, end_t;
909 if (begin<=0 && end<0)
914 /* first determine the global start and end times */
920 sample_coll_t *sc=lc->sc->next;
923 for(j=0;j<sc->nsamples;j++)
925 double start_t,end_t;
927 start_t = sc->s[j]->start_time;
928 end_t = sc->s[j]->start_time;
931 end_t += sc->s[j]->delta_time*sc->s[j]->hist->sum;
937 end_t = sc->s[j]->t[sc->s[j]->ndu-1];
941 end_t += sc->s[j]->delta_time*sc->s[j]->ndu;
945 if (start_t < first_t || first_t<0)
959 /* calculate the actual times */
977 printf("\n Samples in time interval: %.3f - %.3f\n", first_t, last_t);
983 printf("Removing samples outside of: %.3f - %.3f\n", begin_t, end_t);
985 /* then impose them */
989 sample_coll_t *sc=lc->sc->next;
992 sample_coll_impose_times(sc, begin_t, end_t);
1000 /* create subsample i out of ni from an existing sample_coll */
1001 static gmx_bool sample_coll_create_subsample(sample_coll_t *sc,
1002 sample_coll_t *sc_orig,
1006 int hist_start, hist_end;
1008 gmx_large_int_t ntot_start;
1009 gmx_large_int_t ntot_end;
1010 gmx_large_int_t ntot_so_far;
1012 *sc = *sc_orig; /* just copy all fields */
1014 /* allocate proprietary memory */
1015 snew(sc->s, sc_orig->nsamples);
1016 snew(sc->r, sc_orig->nsamples);
1018 /* copy the samples */
1019 for(j=0;j<sc_orig->nsamples;j++)
1021 sc->s[j] = sc_orig->s[j];
1022 sc->r[j] = sc_orig->r[j]; /* copy the ranges too */
1025 /* now fix start and end fields */
1026 /* the casts avoid possible overflows */
1027 ntot_start=(gmx_large_int_t)(sc_orig->ntot*(double)i/(double)ni);
1028 ntot_end =(gmx_large_int_t)(sc_orig->ntot*(double)(i+1)/(double)ni);
1030 for(j=0;j<sc->nsamples;j++)
1032 gmx_large_int_t ntot_add;
1033 gmx_large_int_t new_start, new_end;
1039 ntot_add = sc->s[j]->hist->sum;
1043 ntot_add = sc->r[j].end - sc->r[j].start;
1051 if (!sc->s[j]->hist)
1053 if (ntot_so_far < ntot_start)
1055 /* adjust starting point */
1056 new_start = sc->r[j].start + (ntot_start - ntot_so_far);
1060 new_start = sc->r[j].start;
1062 /* adjust end point */
1063 new_end = sc->r[j].start + (ntot_end - ntot_so_far);
1064 if (new_end > sc->r[j].end)
1066 new_end=sc->r[j].end;
1069 /* check if we're in range at all */
1070 if ( (new_end < new_start) || (new_start > sc->r[j].end) )
1075 /* and write the new range */
1076 sc->r[j].start=(int)new_start;
1077 sc->r[j].end=(int)new_end;
1084 double ntot_start_norm, ntot_end_norm;
1085 /* calculate the amount of overlap of the
1086 desired range (ntot_start -- ntot_end) onto
1087 the histogram range (ntot_so_far -- ntot_so_far+ntot_add)*/
1089 /* first calculate normalized bounds
1090 (where 0 is the start of the hist range, and 1 the end) */
1091 ntot_start_norm = (ntot_start-ntot_so_far)/(double)ntot_add;
1092 ntot_end_norm = (ntot_end-ntot_so_far)/(double)ntot_add;
1094 /* now fix the boundaries */
1095 ntot_start_norm = min(1, max(0., ntot_start_norm));
1096 ntot_end_norm = max(0, min(1., ntot_end_norm));
1098 /* and calculate the overlap */
1099 overlap = ntot_end_norm - ntot_start_norm;
1101 if (overlap > 0.95) /* we allow for 5% slack */
1103 sc->r[j].use = TRUE;
1105 else if (overlap < 0.05)
1107 sc->r[j].use = FALSE;
1115 ntot_so_far += ntot_add;
1117 sample_coll_calc_ntot(sc);
1122 /* calculate minimum and maximum work values in sample collection */
1123 static void sample_coll_min_max(sample_coll_t *sc, double Wfac,
1124 double *Wmin, double *Wmax)
1131 for(i=0;i<sc->nsamples;i++)
1133 samples_t *s=sc->s[i];
1134 sample_range_t *r=&(sc->r[i]);
1139 for(j=r->start; j<r->end; j++)
1141 *Wmin = min(*Wmin,s->du[j]*Wfac);
1142 *Wmax = max(*Wmax,s->du[j]*Wfac);
1147 int hd=0; /* determine the histogram direction: */
1149 if ( (s->hist->nhist>1) && (Wfac<0) )
1155 for(j=s->hist->nbin[hd]-1;j>=0;j--)
1157 *Wmin=min(*Wmin,Wfac*(s->hist->x0[hd])*dx);
1158 *Wmax=max(*Wmax,Wfac*(s->hist->x0[hd])*dx);
1159 /* look for the highest value bin with values */
1160 if (s->hist->bin[hd][j]>0)
1162 *Wmin=min(*Wmin,Wfac*(j+s->hist->x0[hd]+1)*dx);
1163 *Wmax=max(*Wmax,Wfac*(j+s->hist->x0[hd]+1)*dx);
1173 static double calc_bar_sum(int n,const double *W,double Wfac,double sbMmDG)
1182 sum += 1./(1. + exp(Wfac*W[i] + sbMmDG));
1188 /* calculate the BAR average given a histogram
1190 if type== 0, calculate the best estimate for the average,
1191 if type==-1, calculate the minimum possible value given the histogram
1192 if type== 1, calculate the maximum possible value given the histogram */
1193 static double calc_bar_sum_hist(const hist_t *hist, double Wfac, double sbMmDG,
1199 /* normalization factor multiplied with bin width and
1200 number of samples (we normalize through M): */
1202 int hd=0; /* determine the histogram direction: */
1205 if ( (hist->nhist>1) && (Wfac<0) )
1210 max=hist->nbin[hd]-1;
1213 max=hist->nbin[hd]; /* we also add whatever was out of range */
1218 double x=Wfac*((i+hist->x0[hd])+0.5)*dx; /* bin middle */
1219 double pxdx=hist->bin[0][i]*normdx; /* p(x)dx */
1221 sum += pxdx/(1. + exp(x + sbMmDG));
1227 static double calc_bar_lowlevel(sample_coll_t *ca, sample_coll_t *cb,
1228 double temp, double tol, int type)
1233 double Wfac1,Wfac2,Wmin,Wmax;
1234 double DG0,DG1,DG2,dDG1;
1236 double n1, n2; /* numbers of samples as doubles */
1241 /* count the numbers of samples */
1247 if (!lambda_same(ca->native_lambda, ca->foreign_lambda))
1249 /* this is the case when the delta U were calculated directly
1250 (i.e. we're not scaling dhdl) */
1256 /* we're using dhdl, so delta_lambda needs to be a
1257 multiplication factor. */
1258 double delta_lambda=cb->native_lambda-ca->native_lambda;
1259 Wfac1 = beta*delta_lambda;
1260 Wfac2 = -beta*delta_lambda;
1265 /* We print the output both in kT and kJ/mol.
1266 * Here we determine DG in kT, so when beta < 1
1267 * the precision has to be increased.
1272 /* Calculate minimum and maximum work to give an initial estimate of
1273 * delta G as their average.
1276 double Wmin1, Wmin2, Wmax1, Wmax2;
1277 sample_coll_min_max(ca, Wfac1, &Wmin1, &Wmax1);
1278 sample_coll_min_max(cb, Wfac2, &Wmin2, &Wmax2);
1280 Wmin=min(Wmin1, Wmin2);
1281 Wmax=max(Wmax1, Wmax2);
1289 fprintf(debug,"DG %9.5f %9.5f\n",DG0,DG2);
1291 /* We approximate by bisection: given our initial estimates
1292 we keep checking whether the halfway point is greater or
1293 smaller than what we get out of the BAR averages.
1295 For the comparison we can use twice the tolerance. */
1296 while (DG2 - DG0 > 2*tol)
1298 DG1 = 0.5*(DG0 + DG2);
1300 /*printf("Wfac1=%g, Wfac2=%g, beta=%g, DG1=%g\n",Wfac1,Wfac2,beta,
1303 /* calculate the BAR averages */
1306 for(i=0;i<ca->nsamples;i++)
1308 samples_t *s=ca->s[i];
1309 sample_range_t *r=&(ca->r[i]);
1314 dDG1 += calc_bar_sum_hist(s->hist, Wfac1, (M-DG1), type);
1318 dDG1 += calc_bar_sum(r->end - r->start, s->du + r->start,
1323 for(i=0;i<cb->nsamples;i++)
1325 samples_t *s=cb->s[i];
1326 sample_range_t *r=&(cb->r[i]);
1331 dDG1 -= calc_bar_sum_hist(s->hist, Wfac2, -(M-DG1), type);
1335 dDG1 -= calc_bar_sum(r->end - r->start, s->du + r->start,
1351 fprintf(debug,"DG %9.5f %9.5f\n",DG0,DG2);
1355 return 0.5*(DG0 + DG2);
1358 static void calc_rel_entropy(sample_coll_t *ca, sample_coll_t *cb,
1359 double temp, double dg, double *sa, double *sb)
1365 double Wfac1, Wfac2;
1371 /* count the numbers of samples */
1375 /* to ensure the work values are the same as during the delta_G */
1376 if (!lambda_same(ca->native_lambda, ca->foreign_lambda))
1378 /* this is the case when the delta U were calculated directly
1379 (i.e. we're not scaling dhdl) */
1385 /* we're using dhdl, so delta_lambda needs to be a
1386 multiplication factor. */
1387 double delta_lambda=cb->native_lambda-ca->native_lambda;
1388 Wfac1 = beta*delta_lambda;
1389 Wfac2 = -beta*delta_lambda;
1392 /* first calculate the average work in both directions */
1393 for(i=0;i<ca->nsamples;i++)
1395 samples_t *s=ca->s[i];
1396 sample_range_t *r=&(ca->r[i]);
1401 for(j=r->start;j<r->end;j++)
1402 W_ab += Wfac1*s->du[j];
1406 /* normalization factor multiplied with bin width and
1407 number of samples (we normalize through M): */
1410 int hd=0; /* histogram direction */
1411 if ( (s->hist->nhist>1) && (Wfac1<0) )
1417 for(j=0;j<s->hist->nbin[0];j++)
1419 double x=Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1420 double pxdx=s->hist->bin[0][j]*normdx; /* p(x)dx */
1428 for(i=0;i<cb->nsamples;i++)
1430 samples_t *s=cb->s[i];
1431 sample_range_t *r=&(cb->r[i]);
1436 for(j=r->start;j<r->end;j++)
1437 W_ba += Wfac1*s->du[j];
1441 /* normalization factor multiplied with bin width and
1442 number of samples (we normalize through M): */
1445 int hd=0; /* histogram direction */
1446 if ( (s->hist->nhist>1) && (Wfac2<0) )
1452 for(j=0;j<s->hist->nbin[0];j++)
1454 double x=Wfac1*((j+s->hist->x0[0])+0.5)*dx;/*bin ctr*/
1455 double pxdx=s->hist->bin[0][j]*normdx; /* p(x)dx */
1463 /* then calculate the relative entropies */
1468 static void calc_dg_stddev(sample_coll_t *ca, sample_coll_t *cb,
1469 double temp, double dg, double *stddev)
1473 double sigmafact=0.;
1475 double Wfac1, Wfac2;
1481 /* count the numbers of samples */
1485 /* to ensure the work values are the same as during the delta_G */
1486 if (!lambda_same(ca->native_lambda, ca->foreign_lambda))
1488 /* this is the case when the delta U were calculated directly
1489 (i.e. we're not scaling dhdl) */
1495 /* we're using dhdl, so delta_lambda needs to be a
1496 multiplication factor. */
1497 double delta_lambda=cb->native_lambda-ca->native_lambda;
1498 Wfac1 = beta*delta_lambda;
1499 Wfac2 = -beta*delta_lambda;
1505 /* calculate average in both directions */
1506 for(i=0;i<ca->nsamples;i++)
1508 samples_t *s=ca->s[i];
1509 sample_range_t *r=&(ca->r[i]);
1514 for(j=r->start;j<r->end;j++)
1516 sigmafact += 1./(2. + 2.*cosh((M + Wfac1*s->du[j] - dg)));
1521 /* normalization factor multiplied with bin width and
1522 number of samples (we normalize through M): */
1525 int hd=0; /* histogram direction */
1526 if ( (s->hist->nhist>1) && (Wfac1<0) )
1532 for(j=0;j<s->hist->nbin[0];j++)
1534 double x=Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1535 double pxdx=s->hist->bin[0][j]*normdx; /* p(x)dx */
1537 sigmafact += pxdx/(2. + 2.*cosh((M + x - dg)));
1542 for(i=0;i<cb->nsamples;i++)
1544 samples_t *s=cb->s[i];
1545 sample_range_t *r=&(cb->r[i]);
1550 for(j=r->start;j<r->end;j++)
1552 sigmafact += 1./(2. + 2.*cosh((M - Wfac2*s->du[j] - dg)));
1557 /* normalization factor multiplied with bin width and
1558 number of samples (we normalize through M): */
1561 int hd=0; /* histogram direction */
1562 if ( (s->hist->nhist>1) && (Wfac2<0) )
1568 for(j=0;j<s->hist->nbin[0];j++)
1570 double x=Wfac2*((j+s->hist->x0[0])+0.5)*dx;/*bin ctr*/
1571 double pxdx=s->hist->bin[0][j]*normdx; /* p(x)dx */
1573 sigmafact += pxdx/(2. + 2.*cosh((M - x - dg)));
1579 sigmafact /= (n1 + n2);
1583 Shirts, Bair, Hooker & Pande, Phys. Rev. Lett 91, 140601 (2003): */
1584 *stddev = sqrt(((1./sigmafact) - ( (n1+n2)/n1 + (n1+n2)/n2 )));
1589 static void calc_bar(barres_t *br, double tol,
1590 int npee_min, int npee_max, gmx_bool *bEE,
1594 double dg_sig2,sa_sig2,sb_sig2,stddev_sig2; /* intermediate variance values
1595 for calculated quantities */
1596 int nsample1, nsample2;
1597 double temp=br->a->temp;
1599 double dg_min, dg_max;
1600 gmx_bool have_hist=FALSE;
1602 br->dg=calc_bar_lowlevel(br->a, br->b, temp, tol, 0);
1604 br->dg_disc_err = 0.;
1605 br->dg_histrange_err = 0.;
1607 /* check if there are histograms */
1608 for(i=0;i<br->a->nsamples;i++)
1610 if (br->a->r[i].use && br->a->s[i]->hist)
1618 for(i=0;i<br->b->nsamples;i++)
1620 if (br->b->r[i].use && br->b->s[i]->hist)
1628 /* calculate histogram-specific errors */
1631 dg_min=calc_bar_lowlevel(br->a, br->b, temp, tol, -1);
1632 dg_max=calc_bar_lowlevel(br->a, br->b, temp, tol, 1);
1634 if (fabs(dg_max - dg_min) > GMX_REAL_EPS*10)
1636 /* the histogram range error is the biggest of the differences
1637 between the best estimate and the extremes */
1638 br->dg_histrange_err = fabs(dg_max - dg_min);
1640 br->dg_disc_err = 0.;
1641 for(i=0;i<br->a->nsamples;i++)
1643 if (br->a->s[i]->hist)
1644 br->dg_disc_err=max(br->dg_disc_err, br->a->s[i]->hist->dx[0]);
1646 for(i=0;i<br->b->nsamples;i++)
1648 if (br->b->s[i]->hist)
1649 br->dg_disc_err=max(br->dg_disc_err, br->b->s[i]->hist->dx[0]);
1652 calc_rel_entropy(br->a, br->b, temp, br->dg, &(br->sa), &(br->sb));
1654 calc_dg_stddev(br->a, br->b, temp, br->dg, &(br->dg_stddev) );
1663 sample_coll_t ca, cb;
1665 /* initialize the samples */
1666 sample_coll_init(&ca, br->a->native_lambda, br->a->foreign_lambda,
1668 sample_coll_init(&cb, br->b->native_lambda, br->b->foreign_lambda,
1671 for(npee=npee_min; npee<=npee_max; npee++)
1680 double dstddev2 = 0;
1683 for(p=0; p<npee; p++)
1690 cac=sample_coll_create_subsample(&ca, br->a, p, npee);
1691 cbc=sample_coll_create_subsample(&cb, br->b, p, npee);
1695 printf("WARNING: histogram number incompatible with block number for averaging: can't do error estimate\n");
1698 sample_coll_destroy(&ca);
1700 sample_coll_destroy(&cb);
1704 dgp=calc_bar_lowlevel(&ca, &cb, temp, tol, 0);
1708 partsum[npee*(npee_max+1)+p] += dgp;
1710 calc_rel_entropy(&ca, &cb, temp, dgp, &sac, &sbc);
1715 calc_dg_stddev(&ca, &cb, temp, dgp, &stddevc );
1718 dstddev2 += stddevc*stddevc;
1720 sample_coll_destroy(&ca);
1721 sample_coll_destroy(&cb);
1725 dg_sig2 += (dgs2-dgs*dgs)/(npee-1);
1731 sa_sig2 += (dsa2-dsa*dsa)/(npee-1);
1732 sb_sig2 += (dsb2-dsb*dsb)/(npee-1);
1736 stddev_sig2 += (dstddev2-dstddev*dstddev)/(npee-1);
1738 br->dg_err = sqrt(dg_sig2/(npee_max - npee_min + 1));
1739 br->sa_err = sqrt(sa_sig2/(npee_max - npee_min + 1));
1740 br->sb_err = sqrt(sb_sig2/(npee_max - npee_min + 1));
1741 br->dg_stddev_err = sqrt(stddev_sig2/(npee_max - npee_min + 1));
1746 static double bar_err(int nbmin, int nbmax, const double *partsum)
1749 double svar,s,s2,dg;
1752 for(nb=nbmin; nb<=nbmax; nb++)
1758 dg = partsum[nb*(nbmax+1)+b];
1764 svar += (s2 - s*s)/(nb - 1);
1767 return sqrt(svar/(nbmax + 1 - nbmin));
1770 /* deduce lambda value from legend.
1772 bdhdl = if true, value may be a derivative.
1774 bdhdl = whether the legend was for a derivative.
1776 static double legend2lambda(char *fn,const char *legend,gmx_bool *bdhdl)
1784 gmx_fatal(FARGS,"There is no legend in file '%s', can not deduce lambda",fn);
1786 ptr = strrchr(legend,' ');
1788 if (strstr(legend,"dH"))
1801 if (strchr(legend,'D') != NULL && strchr(legend,'H') != NULL)
1814 printf("%s\n", legend);
1815 gmx_fatal(FARGS,"There is no proper lambda legend in file '%s', can not deduce lambda",fn);
1817 if (sscanf(ptr,"%lf",&lambda) != 1)
1819 gmx_fatal(FARGS,"There is no proper lambda legend in file '%s', can not deduce lambda",fn);
1825 static gmx_bool subtitle2lambda(const char *subtitle,double *lambda)
1832 /* plain text lambda string */
1833 ptr = strstr(subtitle,"lambda");
1836 /* xmgrace formatted lambda string */
1837 ptr = strstr(subtitle,"\\xl\\f{}");
1841 /* xmgr formatted lambda string */
1842 ptr = strstr(subtitle,"\\8l\\4");
1846 ptr = strstr(ptr,"=");
1850 bFound = (sscanf(ptr+1,"%lf",lambda) == 1);
1856 static double filename2lambda(char *fn)
1859 char *ptr,*endptr,*digitptr;
1862 /* go to the end of the path string and search backward to find the last
1863 directory in the path which has to contain the value of lambda
1865 while (ptr[1] != '\0')
1869 /* searching backward to find the second directory separator */
1874 if (ptr[0] != DIR_SEPARATOR && ptr[1] == DIR_SEPARATOR)
1876 if (dirsep == 1) break;
1879 /* save the last position of a digit between the last two
1880 separators = in the last dirname */
1881 if (dirsep > 0 && isdigit(*ptr))
1889 gmx_fatal(FARGS,"While trying to read the lambda value from the file path:"
1890 " last directory in the path '%s' does not contain a number",fn);
1892 if (digitptr[-1] == '-')
1896 lambda = strtod(digitptr,&endptr);
1897 if (endptr == digitptr)
1899 gmx_fatal(FARGS,"Malformed number in file path '%s'",fn);
1905 static void read_bar_xvg_lowlevel(char *fn, real *temp, xvg_t *ba)
1908 char *subtitle,**legend,*ptr;
1910 gmx_bool native_lambda_read=FALSE;
1916 np = read_xvg_legend(fn,&ba->y,&ba->nset,&subtitle,&legend);
1919 gmx_fatal(FARGS,"File %s contains no usable data.",fn);
1923 snew(ba->np,ba->nset);
1924 for(i=0;i<ba->nset;i++)
1929 if (subtitle != NULL)
1931 ptr = strstr(subtitle,"T =");
1935 if (sscanf(ptr,"%lf",&ba->temp) == 1)
1939 gmx_fatal(FARGS,"Found temperature of %g in file '%s'",
1949 gmx_fatal(FARGS,"Did not find a temperature in the subtitle in file '%s', use the -temp option of [TT]g_bar[tt]",fn);
1954 /* Try to deduce lambda from the subtitle */
1957 if (subtitle2lambda(subtitle,&(ba->native_lambda)))
1959 native_lambda_read=TRUE;
1962 snew(ba->lambda,ba->nset-1);
1965 /* Check if we have a single set, no legend, nset=2 means t and dH/dl */
1968 if (!native_lambda_read)
1970 /* Deduce lambda from the file name */
1971 ba->native_lambda = filename2lambda(fn);
1972 native_lambda_read=TRUE;
1974 ba->lambda[0] = ba->native_lambda;
1978 gmx_fatal(FARGS,"File %s contains multiple sets but no legends, can not determine the lambda values",fn);
1983 for(i=0; i<ba->nset-1; i++)
1985 gmx_bool is_dhdl=(i==0);
1986 /* Read lambda from the legend */
1987 ba->lambda[i] = legend2lambda(fn,legend[i], &is_dhdl);
1989 if (is_dhdl && !native_lambda_read)
1991 ba->native_lambda = ba->lambda[i];
1992 native_lambda_read=TRUE;
1997 if (!native_lambda_read)
1999 gmx_fatal(FARGS,"File %s contains multiple sets but no indication of the native lambda",fn);
2002 /* Reorder the data */
2003 for(i=1; i<ba->nset; i++)
2005 ba->y[i-1] = ba->y[i];
2009 for(i=0; i<ba->nset-1; i++)
2018 static void read_bar_xvg(char *fn, real *temp, lambda_t *lambda_head)
2027 read_bar_xvg_lowlevel(fn, temp, barsim);
2029 if (barsim->nset <1 )
2031 gmx_fatal(FARGS,"File '%s' contains fewer than two columns", fn);
2034 if ( !gmx_within_tol(*temp,barsim->temp,GMX_FLOAT_EPS) && (*temp > 0) )
2036 gmx_fatal(FARGS,"Temperature in file %s different from earlier files or setting\n", fn);
2040 /* now create a series of samples_t */
2041 snew(s, barsim->nset);
2042 for(i=0;i<barsim->nset;i++)
2044 samples_init(s+i, barsim->native_lambda, barsim->lambda[i],
2045 barsim->temp, lambda_same(barsim->native_lambda,
2048 s[i].du=barsim->y[i];
2049 s[i].ndu=barsim->np[i];
2052 lambda_list_insert_sample(lambda_head, s+i);
2054 printf("%s: %.1f - %.1f; lambda = %.3f\n foreign lambdas:",
2055 fn, s[0].t[0], s[0].t[s[0].ndu-1], s[0].native_lambda);
2056 for(i=0;i<barsim->nset;i++)
2058 printf(" %.3f (%d pts)", s[i].foreign_lambda, s[i].ndu);
2063 static void read_edr_rawdh_block(samples_t **smp, int *ndu, t_enxblock *blk,
2064 double start_time, double delta_time,
2065 double native_lambda, double temp,
2066 double *last_t, const char *filename)
2070 double foreign_lambda;
2072 samples_t *s; /* convenience pointer */
2075 /* check the block types etc. */
2076 if ( (blk->nsub < 3) ||
2077 (blk->sub[0].type != xdr_datatype_int) ||
2078 (blk->sub[1].type != xdr_datatype_double) ||
2080 (blk->sub[2].type != xdr_datatype_float) &&
2081 (blk->sub[2].type != xdr_datatype_double)
2083 (blk->sub[0].nr < 1) ||
2084 (blk->sub[1].nr < 1) )
2087 "Unexpected/corrupted block data in file %s around time %g.",
2088 filename, start_time);
2091 derivative = blk->sub[0].ival[0];
2092 foreign_lambda = blk->sub[1].dval[0];
2096 /* initialize the samples structure if it's empty. */
2098 samples_init(*smp, native_lambda, foreign_lambda, temp,
2099 derivative!=0, filename);
2100 (*smp)->start_time=start_time;
2101 (*smp)->delta_time=delta_time;
2104 /* set convenience pointer */
2107 /* now double check */
2108 if ( ! lambda_same(s->foreign_lambda, foreign_lambda) ||
2109 ( (derivative!=0) != (s->derivative!=0) ) )
2111 fprintf(stderr, "Got foreign lambda=%g, expected: %g\n",
2112 foreign_lambda, s->foreign_lambda);
2113 fprintf(stderr, "Got derivative=%d, expected: %d\n",
2114 derivative, s->derivative);
2115 gmx_fatal(FARGS, "Corrupted data in file %s around t=%g.",
2116 filename, start_time);
2119 /* make room for the data */
2120 if (s->ndu_alloc < (size_t)(s->ndu + blk->sub[2].nr) )
2122 s->ndu_alloc += (s->ndu_alloc < (size_t)blk->sub[2].nr) ?
2123 blk->sub[2].nr*2 : s->ndu_alloc;
2124 srenew(s->du_alloc, s->ndu_alloc);
2128 s->ndu += blk->sub[2].nr;
2129 s->ntot += blk->sub[2].nr;
2130 *ndu = blk->sub[2].nr;
2132 /* and copy the data*/
2133 for(j=0;j<blk->sub[2].nr;j++)
2135 if (blk->sub[2].type == xdr_datatype_float)
2137 s->du[startj+j] = blk->sub[2].fval[j];
2141 s->du[startj+j] = blk->sub[2].dval[j];
2144 if (start_time + blk->sub[2].nr*delta_time > *last_t)
2146 *last_t = start_time + blk->sub[2].nr*delta_time;
2150 static samples_t *read_edr_hist_block(int *nsamples, t_enxblock *blk,
2151 double start_time, double delta_time,
2152 double native_lambda, double temp,
2153 double *last_t, const char *filename)
2158 double foreign_lambda;
2162 /* check the block types etc. */
2163 if ( (blk->nsub < 2) ||
2164 (blk->sub[0].type != xdr_datatype_double) ||
2165 (blk->sub[1].type != xdr_datatype_large_int) ||
2166 (blk->sub[0].nr < 2) ||
2167 (blk->sub[1].nr < 2) )
2170 "Unexpected/corrupted block data in file %s around time %g",
2171 filename, start_time);
2182 "Unexpected/corrupted block data in file %s around time %g",
2183 filename, start_time);
2189 foreign_lambda=blk->sub[0].dval[0];
2190 derivative=(int)(blk->sub[1].lval[1]);
2192 foreign_lambda=native_lambda;
2194 samples_init(s, native_lambda, foreign_lambda, temp,
2195 derivative!=0, filename);
2198 for(i=0;i<nhist;i++)
2200 nbins[i] = blk->sub[i+2].nr;
2203 hist_init(s->hist, nhist, nbins);
2205 for(i=0;i<nhist;i++)
2207 s->hist->x0[i]=blk->sub[1].lval[2+i];
2208 s->hist->dx[i] = blk->sub[0].dval[1];
2210 s->hist->dx[i] = - s->hist->dx[i];
2213 s->hist->start_time = start_time;
2214 s->hist->delta_time = delta_time;
2215 s->start_time = start_time;
2216 s->delta_time = delta_time;
2218 for(i=0;i<nhist;i++)
2221 gmx_large_int_t sum=0;
2223 for(j=0;j<s->hist->nbin[i];j++)
2225 int binv=(int)(blk->sub[i+2].ival[j]);
2227 s->hist->bin[i][j] = binv;
2240 gmx_fatal(FARGS, "Histogram counts don't match in %s",
2246 if (start_time + s->hist->sum*delta_time > *last_t)
2248 *last_t = start_time + s->hist->sum*delta_time;
2254 static void read_barsim_edr(char *fn, real *temp, lambda_t *lambda_head)
2260 gmx_enxnm_t *enm=NULL;
2263 samples_t **samples_rawdh=NULL; /* contains samples for raw delta_h */
2264 int *nhists=NULL; /* array to keep count & print at end */
2265 int *npts=NULL; /* array to keep count & print at end */
2266 double *lambdas=NULL; /* array to keep count & print at end */
2267 double native_lambda=-1;
2268 double end_time; /* the end time of the last batch of samples */
2271 fp = open_enx(fn,"r");
2272 do_enxnms(fp,&nre,&enm);
2275 while(do_enx(fp, fr))
2277 /* count the data blocks */
2282 /* DHCOLL block information: */
2283 double start_time=0, delta_time=0, start_lambda=0, delta_lambda=0;
2286 /* count the blocks and handle collection information: */
2287 for(i=0;i<fr->nblock;i++)
2289 if (fr->block[i].id == enxDHHIST )
2291 if ( fr->block[i].id == enxDH )
2293 if (fr->block[i].id == enxDHCOLL )
2296 if ( (fr->block[i].nsub < 1) ||
2297 (fr->block[i].sub[0].type != xdr_datatype_double) ||
2298 (fr->block[i].sub[0].nr < 5))
2300 gmx_fatal(FARGS, "Unexpected block data in file %s", fn);
2303 /* read the data from the DHCOLL block */
2304 rtemp = fr->block[i].sub[0].dval[0];
2305 start_time = fr->block[i].sub[0].dval[1];
2306 delta_time = fr->block[i].sub[0].dval[2];
2307 start_lambda = fr->block[i].sub[0].dval[3];
2308 delta_lambda = fr->block[i].sub[0].dval[4];
2312 gmx_fatal(FARGS, "Lambda values not constant in %s: can't apply BAR method", fn);
2314 if ( ( *temp != rtemp) && (*temp > 0) )
2316 gmx_fatal(FARGS,"Temperature in file %s different from earlier files or setting\n", fn);
2327 gmx_fatal(FARGS, "Did not find a delta h information in file %s" , fn);
2329 if (nblocks_raw > 0 && nblocks_hist > 0 )
2331 gmx_fatal(FARGS, "Can't handle both raw delta U data and histograms in the same file %s", fn);
2336 /* check the native lambda */
2337 if (!lambda_same(start_lambda, native_lambda) )
2339 gmx_fatal(FARGS, "Native lambda not constant in file %s: started at %g, and becomes %g at time %g",
2340 fn, native_lambda, start_lambda, start_time);
2342 /* check the number of samples against the previous number */
2343 if ( ((nblocks_raw+nblocks_hist)!=nsamples) || (nlam!=1 ) )
2345 gmx_fatal(FARGS, "Unexpected block count in %s: was %d, now %d\n",
2346 fn, nsamples+1, nblocks_raw+nblocks_hist+nlam);
2348 /* check whether last iterations's end time matches with
2349 the currrent start time */
2350 if ( (fabs(last_t - start_time) > 2*delta_time) && last_t>=0)
2352 /* it didn't. We need to store our samples and reallocate */
2353 for(i=0;i<nsamples;i++)
2355 if (samples_rawdh[i])
2357 /* insert it into the existing list */
2358 lambda_list_insert_sample(lambda_head,
2360 /* and make sure we'll allocate a new one this time
2362 samples_rawdh[i]=NULL;
2369 /* this is the first round; allocate the associated data
2371 native_lambda=start_lambda;
2372 nsamples=nblocks_raw+nblocks_hist;
2373 snew(nhists, nsamples);
2374 snew(npts, nsamples);
2375 snew(lambdas, nsamples);
2376 snew(samples_rawdh, nsamples);
2377 for(i=0;i<nsamples;i++)
2382 samples_rawdh[i]=NULL; /* init to NULL so we know which
2383 ones contain values */
2388 k=0; /* counter for the lambdas, etc. arrays */
2389 for(i=0;i<fr->nblock;i++)
2391 if (fr->block[i].id == enxDH)
2394 read_edr_rawdh_block(&(samples_rawdh[k]),
2397 start_time, delta_time,
2398 start_lambda, rtemp,
2401 if (samples_rawdh[k])
2403 lambdas[k]=samples_rawdh[k]->foreign_lambda;
2407 else if (fr->block[i].id == enxDHHIST)
2411 samples_t *s; /* this is where the data will go */
2412 s=read_edr_hist_block(&nb, &(fr->block[i]),
2413 start_time, delta_time,
2414 start_lambda, rtemp,
2419 lambdas[k]= s->foreign_lambda;
2422 /* and insert the new sample immediately */
2425 lambda_list_insert_sample(lambda_head, s+j);
2430 /* Now store all our extant sample collections */
2431 for(i=0;i<nsamples;i++)
2433 if (samples_rawdh[i])
2435 /* insert it into the existing list */
2436 lambda_list_insert_sample(lambda_head, samples_rawdh[i]);
2441 fprintf(stderr, "\n");
2442 printf("%s: %.1f - %.1f; lambda = %.3f\n foreign lambdas:",
2443 fn, first_t, last_t, native_lambda);
2444 for(i=0;i<nsamples;i++)
2448 printf(" %.3f (%d hists)", lambdas[i], nhists[i]);
2452 printf(" %.3f (%d pts)", lambdas[i], npts[i]);
2462 int gmx_bar(int argc,char *argv[])
2464 static const char *desc[] = {
2465 "[TT]g_bar[tt] calculates free energy difference estimates through ",
2466 "Bennett's acceptance ratio method (BAR). It also automatically",
2467 "adds series of individual free energies obtained with BAR into",
2468 "a combined free energy estimate.[PAR]",
2470 "Every individual BAR free energy difference relies on two ",
2471 "simulations at different states: say state A and state B, as",
2472 "controlled by a parameter, [GRK]lambda[grk] (see the [TT].mdp[tt] parameter",
2473 "[TT]init_lambda[tt]). The BAR method calculates a ratio of weighted",
2474 "average of the Hamiltonian difference of state B given state A and",
2476 "The energy differences to the other state must be calculated",
2477 "explicitly during the simulation. This can be done with",
2478 "the [TT].mdp[tt] option [TT]foreign_lambda[tt].[PAR]",
2480 "Input option [TT]-f[tt] expects multiple [TT]dhdl.xvg[tt] files. ",
2481 "Two types of input files are supported:[BR]",
2482 "[TT]*[tt] Files with more than one [IT]y[it]-value. ",
2483 "The files should have columns ",
2484 "with dH/d[GRK]lambda[grk] and [GRK]Delta[grk][GRK]lambda[grk]. ",
2485 "The [GRK]lambda[grk] values are inferred ",
2486 "from the legends: [GRK]lambda[grk] of the simulation from the legend of ",
2487 "dH/d[GRK]lambda[grk] and the foreign [GRK]lambda[grk] values from the ",
2488 "legends of Delta H",
2490 "[TT]*[tt] Files with only one [IT]y[it]-value. Using the",
2491 "[TT]-extp[tt] option for these files, it is assumed",
2492 "that the [IT]y[it]-value is dH/d[GRK]lambda[grk] and that the ",
2493 "Hamiltonian depends linearly on [GRK]lambda[grk]. ",
2494 "The [GRK]lambda[grk] value of the simulation is inferred from the ",
2495 "subtitle (if present), otherwise from a number in the subdirectory ",
2496 "in the file name.[PAR]",
2498 "The [GRK]lambda[grk] of the simulation is parsed from ",
2499 "[TT]dhdl.xvg[tt] file's legend containing the string 'dH', the ",
2500 "foreign [GRK]lambda[grk] values from the legend containing the ",
2501 "capitalized letters 'D' and 'H'. The temperature is parsed from ",
2502 "the legend line containing 'T ='.[PAR]",
2504 "The input option [TT]-g[tt] expects multiple [TT].edr[tt] files. ",
2505 "These can contain either lists of energy differences (see the ",
2506 "[TT].mdp[tt] option [TT]separate_dhdl_file[tt]), or a series of ",
2507 "histograms (see the [TT].mdp[tt] options [TT]dh_hist_size[tt] and ",
2508 "[TT]dh_hist_spacing[tt]).", "The temperature and [GRK]lambda[grk] ",
2509 "values are automatically deduced from the [TT]ener.edr[tt] file.[PAR]",
2511 "In addition to the [TT].mdp[tt] option [TT]foreign_lambda[tt], ",
2512 "the energy difference can also be extrapolated from the ",
2513 "dH/d[GRK]lambda[grk] values. This is done with the[TT]-extp[tt]",
2514 "option, which assumes that the system's Hamiltonian depends linearly",
2515 "on [GRK]lambda[grk], which is not normally the case.[PAR]",
2517 "The free energy estimates are determined using BAR with bisection, ",
2518 "with the precision of the output set with [TT]-prec[tt]. ",
2519 "An error estimate taking into account time correlations ",
2520 "is made by splitting the data into blocks and determining ",
2521 "the free energy differences over those blocks and assuming ",
2522 "the blocks are independent. ",
2523 "The final error estimate is determined from the average variance ",
2524 "over 5 blocks. A range of block numbers for error estimation can ",
2525 "be provided with the options [TT]-nbmin[tt] and [TT]-nbmax[tt].[PAR]",
2527 "[TT]g_bar[tt] tries to aggregate samples with the same 'native' and ",
2528 "'foreign' [GRK]lambda[grk] values, but always assumes independent ",
2529 "samples. [BB]Note[bb] that when aggregating energy ",
2530 "differences/derivatives with different sampling intervals, this is ",
2531 "almost certainly not correct. Usually subsequent energies are ",
2532 "correlated and different time intervals mean different degrees ",
2533 "of correlation between samples.[PAR]",
2535 "The results are split in two parts: the last part contains the final ",
2536 "results in kJ/mol, together with the error estimate for each part ",
2537 "and the total. The first part contains detailed free energy ",
2538 "difference estimates and phase space overlap measures in units of ",
2539 "kT (together with their computed error estimate). The printed ",
2541 "[TT]*[tt] lam_A: the [GRK]lambda[grk] values for point A.[BR]",
2542 "[TT]*[tt] lam_B: the [GRK]lambda[grk] values for point B.[BR]",
2543 "[TT]*[tt] DG: the free energy estimate.[BR]",
2544 "[TT]*[tt] s_A: an estimate of the relative entropy of B in A.[BR]",
2545 "[TT]*[tt] s_A: an estimate of the relative entropy of A in B.[BR]",
2546 "[TT]*[tt] stdev: an estimate expected per-sample standard deviation.[PAR]",
2548 "The relative entropy of both states in each other's ensemble can be ",
2549 "interpreted as a measure of phase space overlap: ",
2550 "the relative entropy s_A of the work samples of lambda_B in the ",
2551 "ensemble of lambda_A (and vice versa for s_B), is a ",
2552 "measure of the 'distance' between Boltzmann distributions of ",
2553 "the two states, that goes to zero for identical distributions. See ",
2554 "Wu & Kofke, J. Chem. Phys. 123 084109 (2005) for more information.",
2556 "The estimate of the expected per-sample standard deviation, as given ",
2557 "in Bennett's original BAR paper: Bennett, J. Comp. Phys. 22, p 245 (1976).",
2558 "Eq. 10 therein gives an estimate of the quality of sampling (not directly",
2559 "of the actual statistical error, because it assumes independent samples).[PAR]",
2561 "To get a visual estimate of the phase space overlap, use the ",
2562 "[TT]-oh[tt] option to write series of histograms, together with the ",
2563 "[TT]-nbin[tt] option.[PAR]"
2565 static real begin=0,end=-1,temp=-1;
2566 int nd=2,nbmin=5,nbmax=5;
2568 gmx_bool use_dhdl=FALSE;
2569 gmx_bool calc_s,calc_v;
2571 { "-b", FALSE, etREAL, {&begin}, "Begin time for BAR" },
2572 { "-e", FALSE, etREAL, {&end}, "End time for BAR" },
2573 { "-temp", FALSE, etREAL, {&temp}, "Temperature (K)" },
2574 { "-prec", FALSE, etINT, {&nd}, "The number of digits after the decimal point" },
2575 { "-nbmin", FALSE, etINT, {&nbmin}, "Minimum number of blocks for error estimation" },
2576 { "-nbmax", FALSE, etINT, {&nbmax}, "Maximum number of blocks for error estimation" },
2577 { "-nbin", FALSE, etINT, {&nbin}, "Number of bins for histogram output"},
2578 { "-extp", FALSE, etBOOL, {&use_dhdl}, "Whether to linearly extrapolate dH/dl values to use as energies"}
2582 { efXVG, "-f", "dhdl", ffOPTRDMULT },
2583 { efEDR, "-g", "ener", ffOPTRDMULT },
2584 { efXVG, "-o", "bar", ffOPTWR },
2585 { efXVG, "-oi", "barint", ffOPTWR },
2586 { efXVG, "-oh", "histogram", ffOPTWR }
2588 #define NFILE asize(fnm)
2591 int nf=0; /* file counter */
2593 int nfile_tot; /* total number of input files */
2598 lambda_t *lb; /* the pre-processed lambda data (linked list head) */
2599 lambda_t lambda_head; /* the head element */
2600 barres_t *results; /* the results */
2601 int nresults; /* number of results in results array */
2604 double prec,dg_tot,dg,sig, dg_tot_max, dg_tot_min;
2607 char dgformat[20],xvg2format[STRLEN],xvg3format[STRLEN],buf[STRLEN];
2608 char ktformat[STRLEN], sktformat[STRLEN];
2609 char kteformat[STRLEN], skteformat[STRLEN];
2612 gmx_bool result_OK=TRUE,bEE=TRUE;
2614 gmx_bool disc_err=FALSE;
2615 double sum_disc_err=0.; /* discretization error */
2616 gmx_bool histrange_err=FALSE;
2617 double sum_histrange_err=0.; /* histogram range error */
2618 double stat_err=0.; /* statistical error */
2620 CopyRight(stderr,argv[0]);
2621 parse_common_args(&argc,argv,
2623 NFILE,fnm,asize(pa),pa,asize(desc),desc,0,NULL,&oenv);
2625 if (opt2bSet("-f", NFILE, fnm))
2627 nxvgfile = opt2fns(&fxvgnms,"-f",NFILE,fnm);
2629 if (opt2bSet("-g", NFILE, fnm))
2631 nedrfile = opt2fns(&fedrnms,"-g",NFILE,fnm);
2634 /* make linked list */
2636 lambda_init(lb, 0, 0);
2641 nfile_tot = nxvgfile + nedrfile;
2645 gmx_fatal(FARGS,"No input files!");
2650 gmx_fatal(FARGS,"Can not have negative number of digits");
2654 snew(partsum,(nbmax+1)*(nbmax+1));
2657 /* read in all files. First xvg files */
2658 for(f=0; f<nxvgfile; f++)
2660 read_bar_xvg(fxvgnms[f],&temp,lb);
2663 /* then .edr files */
2664 for(f=0; f<nedrfile; f++)
2666 read_barsim_edr(fedrnms[f],&temp,lb);;
2670 /* fix the times to allow for equilibration */
2671 lambdas_impose_times(lb, begin, end);
2673 if (opt2bSet("-oh",NFILE,fnm))
2675 lambdas_histogram(lb, opt2fn("-oh",NFILE,fnm), nbin, oenv);
2678 /* assemble the output structures from the lambdas */
2679 results=barres_list_create(lb, &nresults, use_dhdl);
2681 sum_disc_err=barres_list_max_disc_err(results, nresults);
2685 printf("\nNo results to calculate.\n");
2689 if (sum_disc_err > prec)
2692 nd = ceil(-log10(prec));
2693 printf("WARNING: setting the precision to %g because that is the minimum\n reasonable number, given the expected discretization error.\n", prec);
2697 sprintf(lamformat,"%%6.3f");
2698 sprintf( dgformat,"%%%d.%df",3+nd,nd);
2699 /* the format strings of the results in kT */
2700 sprintf( ktformat,"%%%d.%df",5+nd,nd);
2701 sprintf( sktformat,"%%%ds",6+nd);
2702 /* the format strings of the errors in kT */
2703 sprintf( kteformat,"%%%d.%df",3+nd,nd);
2704 sprintf( skteformat,"%%%ds",4+nd);
2705 sprintf(xvg2format,"%s %s\n","%g",dgformat);
2706 sprintf(xvg3format,"%s %s %s\n","%g",dgformat,dgformat);
2711 if (opt2bSet("-o",NFILE,fnm))
2713 sprintf(buf,"%s (%s)","\\DeltaG","kT");
2714 fpb = xvgropen_type(opt2fn("-o",NFILE,fnm),"Free energy differences",
2715 "\\lambda",buf,exvggtXYDY,oenv);
2719 if (opt2bSet("-oi",NFILE,fnm))
2721 sprintf(buf,"%s (%s)","\\DeltaG","kT");
2722 fpi = xvgropen(opt2fn("-oi",NFILE,fnm),"Free energy integral",
2723 "\\lambda",buf,oenv);
2731 /* first calculate results */
2734 for(f=0; f<nresults; f++)
2736 /* Determine the free energy difference with a factor of 10
2737 * more accuracy than requested for printing.
2739 calc_bar(&(results[f]), 0.1*prec, nbmin, nbmax,
2742 if (results[f].dg_disc_err > prec/10.)
2744 if (results[f].dg_histrange_err > prec/10.)
2748 /* print results in kT */
2752 printf("\nTemperature: %g K\n", temp);
2754 printf("\nDetailed results in kT (see help for explanation):\n\n");
2755 printf("%6s ", " lam_A");
2756 printf("%6s ", " lam_B");
2757 printf(sktformat, "DG ");
2759 printf(skteformat, "+/- ");
2761 printf(skteformat, "disc ");
2763 printf(skteformat, "range ");
2764 printf(sktformat, "s_A ");
2766 printf(skteformat, "+/- " );
2767 printf(sktformat, "s_B ");
2769 printf(skteformat, "+/- " );
2770 printf(sktformat, "stdev ");
2772 printf(skteformat, "+/- ");
2774 for(f=0; f<nresults; f++)
2776 printf(lamformat, results[f].a->native_lambda);
2778 printf(lamformat, results[f].b->native_lambda);
2780 printf(ktformat, results[f].dg);
2784 printf(kteformat, results[f].dg_err);
2789 printf(kteformat, results[f].dg_disc_err);
2794 printf(kteformat, results[f].dg_histrange_err);
2797 printf(ktformat, results[f].sa);
2801 printf(kteformat, results[f].sa_err);
2804 printf(ktformat, results[f].sb);
2808 printf(kteformat, results[f].sb_err);
2811 printf(ktformat, results[f].dg_stddev);
2815 printf(kteformat, results[f].dg_stddev_err);
2819 /* Check for negative relative entropy with a 95% certainty. */
2820 if (results[f].sa < -2*results[f].sa_err ||
2821 results[f].sb < -2*results[f].sb_err)
2829 printf("\nWARNING: Some of these results violate the Second Law of "
2830 "Thermodynamics: \n"
2831 " This is can be the result of severe undersampling, or "
2833 " there is something wrong with the simulations.\n");
2837 /* final results in kJ/mol */
2838 printf("\n\nFinal results in kJ/mol:\n\n");
2840 for(f=0; f<nresults; f++)
2845 fprintf(fpi, xvg2format, results[f].a->native_lambda, dg_tot);
2851 fprintf(fpb, xvg3format,
2852 0.5*(results[f].a->native_lambda +
2853 results[f].b->native_lambda),
2854 results[f].dg,results[f].dg_err);
2857 /*printf("lambda %4.2f - %4.2f, DG ", results[f].lambda_a,
2858 results[f].lambda_b);*/
2860 printf(lamformat, results[f].a->native_lambda);
2862 printf(lamformat, results[f].b->native_lambda);
2865 printf(dgformat,results[f].dg*kT);
2869 printf(dgformat,results[f].dg_err*kT);
2873 printf(" (max. range err. = ");
2874 printf(dgformat,results[f].dg_histrange_err*kT);
2876 sum_histrange_err += results[f].dg_histrange_err*kT;
2880 dg_tot += results[f].dg;
2884 printf(lamformat, results[0].a->native_lambda);
2886 printf(lamformat, results[nresults-1].b->native_lambda);
2889 printf(dgformat,dg_tot*kT);
2892 stat_err=bar_err(nbmin,nbmax,partsum)*kT;
2894 printf(dgformat, max(max(stat_err, sum_disc_err), sum_histrange_err));
2899 printf("\nmaximum discretization error = ");
2900 printf(dgformat,sum_disc_err);
2901 if (bEE && stat_err < sum_disc_err)
2903 printf("WARNING: discretization error (%g) is larger than statistical error.\n Decrease histogram spacing for more accurate results\n", stat_err);
2908 printf("\nmaximum histogram range error = ");
2909 printf(dgformat,sum_histrange_err);
2910 if (bEE && stat_err < sum_histrange_err)
2912 printf("WARNING: histogram range error (%g) is larger than statistical error.\n Increase histogram range for more accurate results\n", stat_err);
2921 fprintf(fpi, xvg2format,
2922 results[nresults-1].b->native_lambda, dg_tot);
2926 do_view(oenv,opt2fn_null("-o",NFILE,fnm),"-xydy");
2927 do_view(oenv,opt2fn_null("-oi",NFILE,fnm),"-xydy");