1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * Green Red Orange Magenta Azure Cyan Skyblue
54 #include "gmx_fatal.h"
59 /* Suppress Cygwin compiler warnings from using newlib version of
65 /* the dhdl.xvg data from a simulation (actually obsolete, but still
66 here for reading the dhdl.xvg file*/
70 int ftp; /* file type */
71 int nset; /* number of lambdas, including dhdl */
72 int *np; /* number of data points (du or hists) per lambda */
73 int np_alloc; /* number of points (du or hists) allocated */
74 double temp; /* temperature */
75 double *lambda; /* the lambdas (of first index for y). */
76 double *t; /* the times (of second index for y) */
77 double **y; /* the dU values. y[0] holds the derivative, while
78 further ones contain the energy differences between
79 the native lambda and the 'foreign' lambdas. */
81 double native_lambda; /* the native lambda */
83 struct xvg_t *next, *prev; /*location in the global linked list of xvg_ts*/
90 unsigned int *bin[2]; /* the (forward + reverse) histogram values */
91 double dx[2]; /* the histogram spacing. The reverse
92 dx is the negative of the forward dx.*/
93 gmx_large_int_t x0[2]; /* the (forward + reverse) histogram start
96 int nbin[2]; /* the (forward+reverse) number of bins */
97 gmx_large_int_t sum; /* the total number of counts. Must be
98 the same for forward + reverse. */
99 int nhist; /* number of hist datas (forward or reverse) */
101 double start_time, delta_time; /* start time, end time of histogram */
105 /* an aggregate of samples for partial free energy calculation */
106 typedef struct samples_t
108 double native_lambda;
109 double foreign_lambda;
110 double temp; /* the temperature */
111 gmx_bool derivative; /* whether this sample is a derivative */
113 /* The samples come either as either delta U lists: */
114 int ndu; /* the number of delta U samples */
115 double *du; /* the delta u's */
116 double *t; /* the times associated with those samples, or: */
117 double start_time,delta_time;/*start time and delta time for linear time*/
119 /* or as histograms: */
120 hist_t *hist; /* a histogram */
122 /* allocation data: (not NULL for data 'owned' by this struct) */
123 double *du_alloc, *t_alloc; /* allocated delta u arrays */
124 size_t ndu_alloc, nt_alloc; /* pre-allocated sizes */
125 hist_t *hist_alloc; /* allocated hist */
127 gmx_large_int_t ntot; /* total number of samples */
128 const char *filename; /* the file name this sample comes from */
131 /* a sample range (start to end for du-style data, or boolean
132 for both du-style data and histograms */
133 typedef struct sample_range_t
135 int start, end; /* start and end index for du style data */
136 gmx_bool use; /* whether to use this sample */
138 samples_t *s; /* the samples this range belongs to */
142 /* a collection of samples for a partial free energy calculation
143 (i.e. the collection of samples from one native lambda to one
145 typedef struct sample_coll_t
147 double native_lambda; /* these should be the same for all samples in the */
148 double foreign_lambda; /* collection */
149 double temp; /* the temperature */
151 int nsamples; /* the number of samples */
152 samples_t **s; /* the samples themselves */
153 sample_range_t *r; /* the sample ranges */
154 int nsamples_alloc; /* number of allocated samples */
156 gmx_large_int_t ntot; /* total number of samples in the ranges of
159 struct sample_coll_t *next, *prev; /* next and previous in the list */
162 /* all the samples associated with a lambda point */
163 typedef struct lambda_t
165 double lambda; /* the native lambda (at start time if dynamic) */
166 double temp; /* temperature */
168 sample_coll_t *sc; /* the samples */
170 sample_coll_t sc_head; /*the pre-allocated list head for the linked list.*/
172 struct lambda_t *next, *prev; /* the next and prev in the list */
176 /* calculated values. */
178 sample_coll_t *a, *b; /* the simulation data */
180 double dg; /* the free energy difference */
181 double dg_err; /* the free energy difference */
183 double dg_disc_err; /* discretization error */
184 double dg_histrange_err; /* histogram range error */
186 double sa; /* relative entropy of b in state a */
187 double sa_err; /* error in sa */
188 double sb; /* relative entropy of a in state b */
189 double sb_err; /* error in sb */
191 double dg_stddev; /* expected dg stddev per sample */
192 double dg_stddev_err; /* error in dg_stddev */
198 static void hist_init(hist_t *h, int nhist, int *nbin)
203 gmx_fatal(FARGS, "histogram with more than two sets of data!");
207 snew(h->bin[i], nbin[i]);
210 h->start_time=h->delta_time=0;
217 static void hist_destroy(hist_t *h)
223 static void xvg_init(xvg_t *ba)
232 static void samples_init(samples_t *s, double native_lambda,
233 double foreign_lambda, double temp,
234 gmx_bool derivative, const char *filename)
236 s->native_lambda=native_lambda;
237 s->foreign_lambda=foreign_lambda;
239 s->derivative=derivative;
244 s->start_time = s->delta_time = 0;
253 s->filename=filename;
256 static void sample_range_init(sample_range_t *r, samples_t *s)
264 static void sample_coll_init(sample_coll_t *sc, double native_lambda,
265 double foreign_lambda, double temp)
267 sc->native_lambda = native_lambda;
268 sc->foreign_lambda = foreign_lambda;
274 sc->nsamples_alloc=0;
277 sc->next=sc->prev=NULL;
280 static void sample_coll_destroy(sample_coll_t *sc)
282 /* don't free the samples themselves */
288 static void lambda_init(lambda_t *l, double native_lambda, double temp)
290 l->lambda=native_lambda;
298 sample_coll_init(l->sc, native_lambda, 0., 0.);
303 static void barres_init(barres_t *br)
321 static gmx_bool lambda_same(double lambda1, double lambda2)
323 return gmx_within_tol(lambda1, lambda2, 10*GMX_REAL_EPS);
326 /* calculate the total number of samples in a sample collection */
327 static void sample_coll_calc_ntot(sample_coll_t *sc)
332 for(i=0;i<sc->nsamples;i++)
338 sc->ntot += sc->s[i]->ntot;
342 sc->ntot += sc->r[i].end - sc->r[i].start;
349 /* find the barsamples_t associated with a lambda that corresponds to
350 a specific foreign lambda */
351 static sample_coll_t *lambda_find_sample_coll(lambda_t *l,
352 double foreign_lambda)
354 sample_coll_t *sc=l->sc->next;
358 if (lambda_same(sc->foreign_lambda,foreign_lambda))
368 /* insert li into an ordered list of lambda_colls */
369 static void lambda_insert_sample_coll(lambda_t *l, sample_coll_t *sc)
371 sample_coll_t *scn=l->sc->next;
372 while ( (scn!=l->sc) )
374 if (scn->foreign_lambda > sc->foreign_lambda)
378 /* now insert it before the found scn */
385 /* insert li into an ordered list of lambdas */
386 static void lambda_insert_lambda(lambda_t *head, lambda_t *li)
388 lambda_t *lc=head->next;
391 if (lc->lambda > li->lambda)
395 /* now insert ourselves before the found lc */
402 /* insert a sample and a sample_range into a sample_coll. The
403 samples are stored as a pointer, the range is copied. */
404 static void sample_coll_insert_sample(sample_coll_t *sc, samples_t *s,
407 /* first check if it belongs here */
408 if (sc->temp != s->temp)
410 gmx_fatal(FARGS, "Temperatures in files %s and %s are not the same!",
411 s->filename, sc->next->s[0]->filename);
413 if (sc->native_lambda != s->native_lambda)
415 gmx_fatal(FARGS, "Native lambda in files %s and %s are not the same (and they should be)!",
416 s->filename, sc->next->s[0]->filename);
418 if (sc->foreign_lambda != s->foreign_lambda)
420 gmx_fatal(FARGS, "Foreign lambda in files %s and %s are not the same (and they should be)!",
421 s->filename, sc->next->s[0]->filename);
424 /* check if there's room */
425 if ( (sc->nsamples + 1) > sc->nsamples_alloc)
427 sc->nsamples_alloc = max(2*sc->nsamples_alloc, 2);
428 srenew(sc->s, sc->nsamples_alloc);
429 srenew(sc->r, sc->nsamples_alloc);
431 sc->s[sc->nsamples]=s;
432 sc->r[sc->nsamples]=*r;
435 sample_coll_calc_ntot(sc);
438 /* insert a sample into a lambda_list, creating the right sample_coll if
440 static void lambda_list_insert_sample(lambda_t *head, samples_t *s)
442 gmx_bool found=FALSE;
446 lambda_t *l=head->next;
448 /* first search for the right lambda_t */
451 if (lambda_same(l->lambda, s->native_lambda) )
461 snew(l, 1); /* allocate a new one */
462 lambda_init(l, s->native_lambda, s->temp); /* initialize it */
463 lambda_insert_lambda(head, l); /* add it to the list */
466 /* now look for a sample collection */
467 sc=lambda_find_sample_coll(l, s->foreign_lambda);
470 snew(sc, 1); /* allocate a new one */
471 sample_coll_init(sc, s->native_lambda, s->foreign_lambda, s->temp);
472 lambda_insert_sample_coll(l, sc);
475 /* now insert the samples into the sample coll */
476 sample_range_init(&r, s);
477 sample_coll_insert_sample(sc, s, &r);
481 /* make a histogram out of a sample collection */
482 static void sample_coll_make_hist(sample_coll_t *sc, int **bin,
483 int *nbin_alloc, int *nbin,
484 double *dx, double *xmin, int nbin_default)
487 gmx_bool dx_set=FALSE;
488 gmx_bool xmin_set=FALSE;
490 gmx_bool xmax_set=FALSE;
491 gmx_bool xmax_set_hard=FALSE; /* whether the xmax is bounded by the
492 limits of a histogram */
495 /* first determine dx and xmin; try the histograms */
496 for(i=0;i<sc->nsamples;i++)
500 hist_t *hist=sc->s[i]->hist;
501 for(k=0;k<hist->nhist;k++)
503 double hdx=hist->dx[k];
504 double xmax_now=(hist->x0[k]+hist->nbin[k])*hdx;
506 /* we use the biggest dx*/
507 if ( (!dx_set) || hist->dx[0] > *dx)
512 if ( (!xmin_set) || (hist->x0[k]*hdx) < *xmin)
515 *xmin = (hist->x0[k]*hdx);
518 if ( (!xmax_set) || (xmax_now>xmax && !xmax_set_hard) )
522 if (hist->bin[k][hist->nbin[k]-1] != 0)
525 if ( hist->bin[k][hist->nbin[k]-1]!=0 && (xmax_now < xmax) )
533 /* and the delta us */
534 for(i=0;i<sc->nsamples;i++)
536 if (sc->s[i]->ndu > 0)
538 /* determine min and max */
539 int starti=sc->r[i].start;
540 int endi=sc->r[i].end;
541 double du_xmin=sc->s[i]->du[starti];
542 double du_xmax=sc->s[i]->du[starti];
543 for(j=starti+1;j<endi;j++)
545 if (sc->s[i]->du[j] < du_xmin)
546 du_xmin = sc->s[i]->du[j];
547 if (sc->s[i]->du[j] > du_xmax)
548 du_xmax = sc->s[i]->du[j];
551 /* and now change the limits */
552 if ( (!xmin_set) || (du_xmin < *xmin) )
557 if ( (!xmax_set) || ((du_xmax > xmax) && !xmax_set_hard) )
565 if (!xmax_set || !xmin_set)
575 *dx=(xmax-(*xmin))/((*nbin)-2); /* -2 because we want the last bin to
576 be 0, and we count from 0 */
580 *nbin=(xmax-(*xmin))/(*dx);
583 if (*nbin > *nbin_alloc)
586 srenew(*bin, *nbin_alloc);
589 /* reset the histogram */
590 for(i=0;i<(*nbin);i++)
595 /* now add the actual data */
596 for(i=0;i<sc->nsamples;i++)
600 hist_t *hist=sc->s[i]->hist;
601 for(k=0;k<hist->nhist;k++)
603 double hdx = hist->dx[k];
604 double xmin_hist=hist->x0[k]*hdx;
605 for(j=0;j<hist->nbin[k];j++)
607 /* calculate the bin corresponding to the middle of the
609 double x=hdx*(j+0.5) + xmin_hist;
610 int binnr=(int)((x-(*xmin))/(*dx));
612 if (binnr >= *nbin || binnr<0)
615 (*bin)[binnr] += hist->bin[k][j];
621 int starti=sc->r[i].start;
622 int endi=sc->r[i].end;
623 for(j=starti;j<endi;j++)
625 int binnr=(int)((sc->s[i]->du[j]-(*xmin))/(*dx));
626 if (binnr >= *nbin || binnr<0)
635 /* write a collection of histograms to a file */
636 void lambdas_histogram(lambda_t *bl_head, const char *filename,
637 int nbin_default, const output_env_t oenv)
639 char label_x[STRLEN];
640 const char *dhdl="dH/d\\lambda",*deltag="\\DeltaH",*lambda="\\lambda";
641 const char *title="N(\\DeltaH)";
642 const char *label_y="Samples";
646 char **setnames=NULL;
647 gmx_bool first_set=FALSE;
648 /* histogram data: */
656 printf("\nWriting histogram to %s\n", filename);
657 sprintf(label_x, "\\DeltaH (%s)", unit_energy);
659 fp=xvgropen_type(filename, title, label_x, label_y, exvggtXNY, oenv);
661 /* first get all the set names */
663 /* iterate over all lambdas */
666 sample_coll_t *sc=bl->sc->next;
668 /* iterate over all samples */
672 srenew(setnames, nsets);
673 snew(setnames[nsets-1], STRLEN);
674 if (!lambda_same(sc->foreign_lambda, sc->native_lambda))
676 sprintf(setnames[nsets-1], "N(%s(%s=%g) | %s=%g)",
677 deltag, lambda, sc->foreign_lambda, lambda,
682 sprintf(setnames[nsets-1], "N(%s | %s=%g)",
683 dhdl, lambda, sc->native_lambda);
690 xvgr_legend(fp, nsets, (const char**)setnames, oenv);
693 /* now make the histograms */
695 /* iterate over all lambdas */
698 sample_coll_t *sc=bl->sc->next;
700 /* iterate over all samples */
705 xvgr_new_dataset(fp, 0, 0, NULL, oenv);
708 sample_coll_make_hist(sc, &hist, &nbin_alloc, &nbin, &dx, &min,
713 double xmin=i*dx + min;
714 double xmax=(i+1)*dx + min;
716 fprintf(fp, "%g %d\n%g %d\n", xmin, hist[i], xmax, hist[i]);
732 /* create a collection (array) of barres_t object given a ordered linked list
733 of barlamda_t sample collections */
734 static barres_t *barres_list_create(lambda_t *bl_head, int *nres)
743 /* first count the lambdas */
750 snew(res, nlambda-1);
752 /* next put the right samples in the res */
754 bl=bl_head->next->next; /* we start with the second one. */
757 sample_coll_t *sc, *scprev;
758 barres_t *br=&(res[*nres]);
759 /* there is always a previous one. we search for that as a foreign
761 scprev=lambda_find_sample_coll(bl->prev, bl->lambda);
762 sc=lambda_find_sample_coll(bl, bl->prev->lambda);
770 scprev=lambda_find_sample_coll(bl->prev, bl->prev->lambda);
771 sc=lambda_find_sample_coll(bl, bl->lambda);
775 printf("\nWARNING: Using the derivative data (dH/dlambda) to extrapolate delta H values.\nThis will only work if the Hamiltonian is linear in lambda.\n");
780 gmx_fatal(FARGS,"Some dhdl files contain only one value (dH/dl), while others \ncontain multiple values (dH/dl and/or Delta H), will not proceed \nbecause of possible inconsistencies.\n");
787 gmx_fatal(FARGS,"Could not find a set for lambda = %g in the files for lambda = %g",bl->lambda,bl->prev->lambda);
791 gmx_fatal(FARGS,"Could not find a set for lambda = %g in the files for lambda = %g",bl->prev->lambda,bl->lambda);
803 /* estimate the maximum discretization error */
804 static double barres_list_max_disc_err(barres_t *res, int nres)
812 barres_t *br=&(res[i]);
814 delta_lambda=fabs(br->b->native_lambda-br->a->native_lambda);
816 for(j=0;j<br->a->nsamples;j++)
818 if (br->a->s[j]->hist)
821 if (br->a->s[j]->derivative)
824 disc_err=max(disc_err, Wfac*br->a->s[j]->hist->dx[0]);
827 for(j=0;j<br->b->nsamples;j++)
829 if (br->b->s[j]->hist)
832 if (br->b->s[j]->derivative)
834 disc_err=max(disc_err, Wfac*br->b->s[j]->hist->dx[0]);
842 /* impose start and end times on a sample collection, updating sample_ranges */
843 static void sample_coll_impose_times(sample_coll_t *sc, double begin_t,
847 for(i=0;i<sc->nsamples;i++)
849 samples_t *s=sc->s[i];
850 sample_range_t *r=&(sc->r[i]);
853 double end_time=s->hist->delta_time*s->hist->sum +
855 if (s->hist->start_time < begin_t || end_time > end_t)
865 if (s->start_time < begin_t)
867 r->start=(int)((begin_t - s->start_time)/s->delta_time);
869 end_time=s->delta_time*s->ndu + s->start_time;
870 if (end_time > end_t)
872 r->end=(int)((end_t - s->start_time)/s->delta_time);
878 for(j=0;j<s->ndu;j++)
880 if (s->t[j] <begin_t)
885 if (s->t[j] >= end_t)
892 if (r->start > r->end)
898 sample_coll_calc_ntot(sc);
901 static void lambdas_impose_times(lambda_t *head, double begin, double end)
903 double first_t, last_t;
904 double begin_t, end_t;
908 if (begin<=0 && end<0)
913 /* first determine the global start and end times */
919 sample_coll_t *sc=lc->sc->next;
922 for(j=0;j<sc->nsamples;j++)
924 double start_t,end_t;
926 start_t = sc->s[j]->start_time;
927 end_t = sc->s[j]->start_time;
930 end_t += sc->s[j]->delta_time*sc->s[j]->hist->sum;
936 end_t = sc->s[j]->t[sc->s[j]->ndu-1];
940 end_t += sc->s[j]->delta_time*sc->s[j]->ndu;
944 if (start_t < first_t || first_t<0)
958 /* calculate the actual times */
976 printf("\n Samples in time interval: %.3f - %.3f\n", first_t, last_t);
982 printf("Removing samples outside of: %.3f - %.3f\n", begin_t, end_t);
984 /* then impose them */
988 sample_coll_t *sc=lc->sc->next;
991 sample_coll_impose_times(sc, begin_t, end_t);
999 /* create subsample i out of ni from an existing sample_coll */
1000 static gmx_bool sample_coll_create_subsample(sample_coll_t *sc,
1001 sample_coll_t *sc_orig,
1005 int hist_start, hist_end;
1007 gmx_large_int_t ntot_start;
1008 gmx_large_int_t ntot_end;
1009 gmx_large_int_t ntot_so_far;
1011 *sc = *sc_orig; /* just copy all fields */
1013 /* allocate proprietary memory */
1014 snew(sc->s, sc_orig->nsamples);
1015 snew(sc->r, sc_orig->nsamples);
1017 /* copy the samples */
1018 for(j=0;j<sc_orig->nsamples;j++)
1020 sc->s[j] = sc_orig->s[j];
1021 sc->r[j] = sc_orig->r[j]; /* copy the ranges too */
1024 /* now fix start and end fields */
1025 /* the casts avoid possible overflows */
1026 ntot_start=(gmx_large_int_t)(sc_orig->ntot*(double)i/(double)ni);
1027 ntot_end =(gmx_large_int_t)(sc_orig->ntot*(double)(i+1)/(double)ni);
1029 for(j=0;j<sc->nsamples;j++)
1031 gmx_large_int_t ntot_add;
1032 gmx_large_int_t new_start, new_end;
1038 ntot_add = sc->s[j]->hist->sum;
1042 ntot_add = sc->r[j].end - sc->r[j].start;
1050 if (!sc->s[j]->hist)
1052 if (ntot_so_far < ntot_start)
1054 /* adjust starting point */
1055 new_start = sc->r[j].start + (ntot_start - ntot_so_far);
1059 new_start = sc->r[j].start;
1061 /* adjust end point */
1062 new_end = sc->r[j].start + (ntot_end - ntot_so_far);
1063 if (new_end > sc->r[j].end)
1065 new_end=sc->r[j].end;
1068 /* check if we're in range at all */
1069 if ( (new_end < new_start) || (new_start > sc->r[j].end) )
1074 /* and write the new range */
1075 sc->r[j].start=(int)new_start;
1076 sc->r[j].end=(int)new_end;
1083 double ntot_start_norm, ntot_end_norm;
1084 /* calculate the amount of overlap of the
1085 desired range (ntot_start -- ntot_end) onto
1086 the histogram range (ntot_so_far -- ntot_so_far+ntot_add)*/
1088 /* first calculate normalized bounds
1089 (where 0 is the start of the hist range, and 1 the end) */
1090 ntot_start_norm = (ntot_start-ntot_so_far)/(double)ntot_add;
1091 ntot_end_norm = (ntot_end-ntot_so_far)/(double)ntot_add;
1093 /* now fix the boundaries */
1094 ntot_start_norm = min(1, max(0., ntot_start_norm));
1095 ntot_end_norm = max(0, min(1., ntot_end_norm));
1097 /* and calculate the overlap */
1098 overlap = ntot_end_norm - ntot_start_norm;
1100 if (overlap > 0.95) /* we allow for 5% slack */
1102 sc->r[j].use = TRUE;
1104 else if (overlap < 0.05)
1106 sc->r[j].use = FALSE;
1114 ntot_so_far += ntot_add;
1116 sample_coll_calc_ntot(sc);
1121 /* calculate minimum and maximum work values in sample collection */
1122 static void sample_coll_min_max(sample_coll_t *sc, double Wfac,
1123 double *Wmin, double *Wmax)
1130 for(i=0;i<sc->nsamples;i++)
1132 samples_t *s=sc->s[i];
1133 sample_range_t *r=&(sc->r[i]);
1138 for(j=r->start; j<r->end; j++)
1140 *Wmin = min(*Wmin,s->du[j]*Wfac);
1141 *Wmax = max(*Wmax,s->du[j]*Wfac);
1146 int hd=0; /* determine the histogram direction: */
1148 if ( (s->hist->nhist>1) && (Wfac<0) )
1154 for(j=s->hist->nbin[hd]-1;j>=0;j--)
1156 *Wmin=min(*Wmin,Wfac*(s->hist->x0[hd])*dx);
1157 *Wmax=max(*Wmax,Wfac*(s->hist->x0[hd])*dx);
1158 /* look for the highest value bin with values */
1159 if (s->hist->bin[hd][j]>0)
1161 *Wmin=min(*Wmin,Wfac*(j+s->hist->x0[hd]+1)*dx);
1162 *Wmax=max(*Wmax,Wfac*(j+s->hist->x0[hd]+1)*dx);
1172 static double calc_bar_sum(int n,const double *W,double Wfac,double sbMmDG)
1181 sum += 1./(1. + exp(Wfac*W[i] + sbMmDG));
1187 /* calculate the BAR average given a histogram
1189 if type== 0, calculate the best estimate for the average,
1190 if type==-1, calculate the minimum possible value given the histogram
1191 if type== 1, calculate the maximum possible value given the histogram */
1192 static double calc_bar_sum_hist(const hist_t *hist, double Wfac, double sbMmDG,
1198 /* normalization factor multiplied with bin width and
1199 number of samples (we normalize through M): */
1201 int hd=0; /* determine the histogram direction: */
1204 if ( (hist->nhist>1) && (Wfac<0) )
1209 max=hist->nbin[hd]-1;
1212 max=hist->nbin[hd]; /* we also add whatever was out of range */
1217 double x=Wfac*((i+hist->x0[hd])+0.5)*dx; /* bin middle */
1218 double pxdx=hist->bin[0][i]*normdx; /* p(x)dx */
1220 sum += pxdx/(1. + exp(x + sbMmDG));
1226 static double calc_bar_lowlevel(sample_coll_t *ca, sample_coll_t *cb,
1227 double temp, double tol, int type)
1232 double Wfac1,Wfac2,Wmin,Wmax;
1233 double DG0,DG1,DG2,dDG1;
1235 double n1, n2; /* numbers of samples as doubles */
1240 /* count the numbers of samples */
1246 if (!lambda_same(ca->native_lambda, ca->foreign_lambda))
1248 /* this is the case when the delta U were calculated directly
1249 (i.e. we're not scaling dhdl) */
1255 /* we're using dhdl, so delta_lambda needs to be a
1256 multiplication factor. */
1257 double delta_lambda=cb->native_lambda-ca->native_lambda;
1258 Wfac1 = beta*delta_lambda;
1259 Wfac2 = -beta*delta_lambda;
1264 /* We print the output both in kT and kJ/mol.
1265 * Here we determine DG in kT, so when beta < 1
1266 * the precision has to be increased.
1271 /* Calculate minimum and maximum work to give an initial estimate of
1272 * delta G as their average.
1275 double Wmin1, Wmin2, Wmax1, Wmax2;
1276 sample_coll_min_max(ca, Wfac1, &Wmin1, &Wmax1);
1277 sample_coll_min_max(cb, Wfac2, &Wmin2, &Wmax2);
1279 Wmin=min(Wmin1, Wmin2);
1280 Wmax=max(Wmax1, Wmax2);
1288 fprintf(debug,"DG %9.5f %9.5f\n",DG0,DG2);
1290 /* We approximate by bisection: given our initial estimates
1291 we keep checking whether the halfway point is greater or
1292 smaller than what we get out of the BAR averages.
1294 For the comparison we can use twice the tolerance. */
1295 while (DG2 - DG0 > 2*tol)
1297 DG1 = 0.5*(DG0 + DG2);
1299 /*printf("Wfac1=%g, Wfac2=%g, beta=%g, DG1=%g\n",Wfac1,Wfac2,beta,
1302 /* calculate the BAR averages */
1305 for(i=0;i<ca->nsamples;i++)
1307 samples_t *s=ca->s[i];
1308 sample_range_t *r=&(ca->r[i]);
1313 dDG1 += calc_bar_sum_hist(s->hist, Wfac1, (M-DG1), type);
1317 dDG1 += calc_bar_sum(r->end - r->start, s->du + r->start,
1322 for(i=0;i<cb->nsamples;i++)
1324 samples_t *s=cb->s[i];
1325 sample_range_t *r=&(cb->r[i]);
1330 dDG1 -= calc_bar_sum_hist(s->hist, Wfac2, -(M-DG1), type);
1334 dDG1 -= calc_bar_sum(r->end - r->start, s->du + r->start,
1350 fprintf(debug,"DG %9.5f %9.5f\n",DG0,DG2);
1354 return 0.5*(DG0 + DG2);
1357 static void calc_rel_entropy(sample_coll_t *ca, sample_coll_t *cb,
1358 double temp, double dg, double *sa, double *sb)
1364 double Wfac1, Wfac2;
1370 /* count the numbers of samples */
1374 /* to ensure the work values are the same as during the delta_G */
1375 if (!lambda_same(ca->native_lambda, ca->foreign_lambda))
1377 /* this is the case when the delta U were calculated directly
1378 (i.e. we're not scaling dhdl) */
1384 /* we're using dhdl, so delta_lambda needs to be a
1385 multiplication factor. */
1386 double delta_lambda=cb->native_lambda-ca->native_lambda;
1387 Wfac1 = beta*delta_lambda;
1388 Wfac2 = -beta*delta_lambda;
1391 /* first calculate the average work in both directions */
1392 for(i=0;i<ca->nsamples;i++)
1394 samples_t *s=ca->s[i];
1395 sample_range_t *r=&(ca->r[i]);
1400 for(j=r->start;j<r->end;j++)
1401 W_ab += Wfac1*s->du[j];
1405 /* normalization factor multiplied with bin width and
1406 number of samples (we normalize through M): */
1409 int hd=0; /* histogram direction */
1410 if ( (s->hist->nhist>1) && (Wfac1<0) )
1416 for(j=0;j<s->hist->nbin[0];j++)
1418 double x=Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1419 double pxdx=s->hist->bin[0][j]*normdx; /* p(x)dx */
1427 for(i=0;i<cb->nsamples;i++)
1429 samples_t *s=cb->s[i];
1430 sample_range_t *r=&(cb->r[i]);
1435 for(j=r->start;j<r->end;j++)
1436 W_ba += Wfac1*s->du[j];
1440 /* normalization factor multiplied with bin width and
1441 number of samples (we normalize through M): */
1444 int hd=0; /* histogram direction */
1445 if ( (s->hist->nhist>1) && (Wfac2<0) )
1451 for(j=0;j<s->hist->nbin[0];j++)
1453 double x=Wfac1*((j+s->hist->x0[0])+0.5)*dx;/*bin ctr*/
1454 double pxdx=s->hist->bin[0][j]*normdx; /* p(x)dx */
1462 /* then calculate the relative entropies */
1467 static void calc_dg_stddev(sample_coll_t *ca, sample_coll_t *cb,
1468 double temp, double dg, double *stddev)
1472 double sigmafact=0.;
1474 double Wfac1, Wfac2;
1480 /* count the numbers of samples */
1484 /* to ensure the work values are the same as during the delta_G */
1485 if (!lambda_same(ca->native_lambda, ca->foreign_lambda))
1487 /* this is the case when the delta U were calculated directly
1488 (i.e. we're not scaling dhdl) */
1494 /* we're using dhdl, so delta_lambda needs to be a
1495 multiplication factor. */
1496 double delta_lambda=cb->native_lambda-ca->native_lambda;
1497 Wfac1 = beta*delta_lambda;
1498 Wfac2 = -beta*delta_lambda;
1504 /* calculate average in both directions */
1505 for(i=0;i<ca->nsamples;i++)
1507 samples_t *s=ca->s[i];
1508 sample_range_t *r=&(ca->r[i]);
1513 for(j=r->start;j<r->end;j++)
1515 sigmafact += 1./(2. + 2.*cosh((M + Wfac1*s->du[j] - dg)));
1520 /* normalization factor multiplied with bin width and
1521 number of samples (we normalize through M): */
1524 int hd=0; /* histogram direction */
1525 if ( (s->hist->nhist>1) && (Wfac1<0) )
1531 for(j=0;j<s->hist->nbin[0];j++)
1533 double x=Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1534 double pxdx=s->hist->bin[0][j]*normdx; /* p(x)dx */
1536 sigmafact += pxdx/(2. + 2.*cosh((M + x - dg)));
1541 for(i=0;i<cb->nsamples;i++)
1543 samples_t *s=cb->s[i];
1544 sample_range_t *r=&(cb->r[i]);
1549 for(j=r->start;j<r->end;j++)
1551 sigmafact += 1./(2. + 2.*cosh((M - Wfac2*s->du[j] - dg)));
1556 /* normalization factor multiplied with bin width and
1557 number of samples (we normalize through M): */
1560 int hd=0; /* histogram direction */
1561 if ( (s->hist->nhist>1) && (Wfac2<0) )
1567 for(j=0;j<s->hist->nbin[0];j++)
1569 double x=Wfac2*((j+s->hist->x0[0])+0.5)*dx;/*bin ctr*/
1570 double pxdx=s->hist->bin[0][j]*normdx; /* p(x)dx */
1572 sigmafact += pxdx/(2. + 2.*cosh((M - x - dg)));
1578 sigmafact /= (n1 + n2);
1582 Shirts, Bair, Hooker & Pande, Phys. Rev. Lett 91, 140601 (2003): */
1583 *stddev = sqrt(((1./sigmafact) - ( (n1+n2)/n1 + (n1+n2)/n2 )));
1588 static void calc_bar(barres_t *br, double tol,
1589 int npee_min, int npee_max, gmx_bool *bEE,
1593 double dg_sig2,sa_sig2,sb_sig2,stddev_sig2; /* intermediate variance values
1594 for calculated quantities */
1595 int nsample1, nsample2;
1596 double temp=br->a->temp;
1598 double dg_min, dg_max;
1599 gmx_bool have_hist=FALSE;
1601 br->dg=calc_bar_lowlevel(br->a, br->b, temp, tol, 0);
1603 br->dg_disc_err = 0.;
1604 br->dg_histrange_err = 0.;
1606 /* check if there are histograms */
1607 for(i=0;i<br->a->nsamples;i++)
1609 if (br->a->r[i].use && br->a->s[i]->hist)
1617 for(i=0;i<br->b->nsamples;i++)
1619 if (br->b->r[i].use && br->b->s[i]->hist)
1627 /* calculate histogram-specific errors */
1630 dg_min=calc_bar_lowlevel(br->a, br->b, temp, tol, -1);
1631 dg_max=calc_bar_lowlevel(br->a, br->b, temp, tol, 1);
1633 if (fabs(dg_max - dg_min) > GMX_REAL_EPS*10)
1635 /* the histogram range error is the biggest of the differences
1636 between the best estimate and the extremes */
1637 br->dg_histrange_err = fabs(dg_max - dg_min);
1639 br->dg_disc_err = 0.;
1640 for(i=0;i<br->a->nsamples;i++)
1642 if (br->a->s[i]->hist)
1643 br->dg_disc_err=max(br->dg_disc_err, br->a->s[i]->hist->dx[0]);
1645 for(i=0;i<br->b->nsamples;i++)
1647 if (br->b->s[i]->hist)
1648 br->dg_disc_err=max(br->dg_disc_err, br->b->s[i]->hist->dx[0]);
1651 calc_rel_entropy(br->a, br->b, temp, br->dg, &(br->sa), &(br->sb));
1653 calc_dg_stddev(br->a, br->b, temp, br->dg, &(br->dg_stddev) );
1662 sample_coll_t ca, cb;
1664 /* initialize the samples */
1665 sample_coll_init(&ca, br->a->native_lambda, br->a->foreign_lambda,
1667 sample_coll_init(&cb, br->b->native_lambda, br->b->foreign_lambda,
1670 for(npee=npee_min; npee<=npee_max; npee++)
1679 double dstddev2 = 0;
1682 for(p=0; p<npee; p++)
1689 cac=sample_coll_create_subsample(&ca, br->a, p, npee);
1690 cbc=sample_coll_create_subsample(&cb, br->b, p, npee);
1694 printf("WARNING: histogram number incompatible with block number for averaging: can't do error estimate\n");
1697 sample_coll_destroy(&ca);
1699 sample_coll_destroy(&cb);
1703 dgp=calc_bar_lowlevel(&ca, &cb, temp, tol, 0);
1707 partsum[npee*(npee_max+1)+p] += dgp;
1709 calc_rel_entropy(&ca, &cb, temp, dgp, &sac, &sbc);
1714 calc_dg_stddev(&ca, &cb, temp, dgp, &stddevc );
1717 dstddev2 += stddevc*stddevc;
1719 sample_coll_destroy(&ca);
1720 sample_coll_destroy(&cb);
1724 dg_sig2 += (dgs2-dgs*dgs)/(npee-1);
1730 sa_sig2 += (dsa2-dsa*dsa)/(npee-1);
1731 sb_sig2 += (dsb2-dsb*dsb)/(npee-1);
1735 stddev_sig2 += (dstddev2-dstddev*dstddev)/(npee-1);
1737 br->dg_err = sqrt(dg_sig2/(npee_max - npee_min + 1));
1738 br->sa_err = sqrt(sa_sig2/(npee_max - npee_min + 1));
1739 br->sb_err = sqrt(sb_sig2/(npee_max - npee_min + 1));
1740 br->dg_stddev_err = sqrt(stddev_sig2/(npee_max - npee_min + 1));
1745 static double bar_err(int nbmin, int nbmax, const double *partsum)
1748 double svar,s,s2,dg;
1751 for(nb=nbmin; nb<=nbmax; nb++)
1757 dg = partsum[nb*(nbmax+1)+b];
1763 svar += (s2 - s*s)/(nb - 1);
1766 return sqrt(svar/(nbmax + 1 - nbmin));
1769 /* deduce lambda value from legend.
1771 bdhdl = if true, value may be a derivative.
1773 bdhdl = whether the legend was for a derivative.
1775 static double legend2lambda(char *fn,const char *legend,gmx_bool *bdhdl)
1783 gmx_fatal(FARGS,"There is no legend in file '%s', can not deduce lambda",fn);
1785 ptr = strrchr(legend,' ');
1787 if (strstr(legend,"dH"))
1800 if (strchr(legend,'D') != NULL && strchr(legend,'H') != NULL)
1813 printf("%s\n", legend);
1814 gmx_fatal(FARGS,"There is no proper lambda legend in file '%s', can not deduce lambda",fn);
1816 if (sscanf(ptr,"%lf",&lambda) != 1)
1818 gmx_fatal(FARGS,"There is no proper lambda legend in file '%s', can not deduce lambda",fn);
1824 static gmx_bool subtitle2lambda(const char *subtitle,double *lambda)
1831 /* plain text lambda string */
1832 ptr = strstr(subtitle,"lambda");
1835 /* xmgrace formatted lambda string */
1836 ptr = strstr(subtitle,"\\xl\\f{}");
1840 /* xmgr formatted lambda string */
1841 ptr = strstr(subtitle,"\\8l\\4");
1845 ptr = strstr(ptr,"=");
1849 bFound = (sscanf(ptr+1,"%lf",lambda) == 1);
1855 static double filename2lambda(char *fn)
1858 char *ptr,*endptr,*digitptr;
1861 /* go to the end of the path string and search backward to find the last
1862 directory in the path which has to contain the value of lambda
1864 while (ptr[1] != '\0')
1868 /* searching backward to find the second directory separator */
1873 if (ptr[0] != DIR_SEPARATOR && ptr[1] == DIR_SEPARATOR)
1875 if (dirsep == 1) break;
1878 /* save the last position of a digit between the last two
1879 separators = in the last dirname */
1880 if (dirsep > 0 && isdigit(*ptr))
1888 gmx_fatal(FARGS,"While trying to read the lambda value from the file path:"
1889 " last directory in the path '%s' does not contain a number",fn);
1891 if (digitptr[-1] == '-')
1895 lambda = strtod(digitptr,&endptr);
1896 if (endptr == digitptr)
1898 gmx_fatal(FARGS,"Malformed number in file path '%s'",fn);
1904 static void read_bar_xvg_lowlevel(char *fn, real *temp, xvg_t *ba)
1907 char *subtitle,**legend,*ptr;
1909 gmx_bool native_lambda_read=FALSE;
1915 np = read_xvg_legend(fn,&ba->y,&ba->nset,&subtitle,&legend);
1918 gmx_fatal(FARGS,"File %s contains no usable data.",fn);
1922 snew(ba->np,ba->nset);
1923 for(i=0;i<ba->nset;i++)
1928 if (subtitle != NULL)
1930 ptr = strstr(subtitle,"T =");
1934 if (sscanf(ptr,"%lf",&ba->temp) == 1)
1938 gmx_fatal(FARGS,"Found temperature of %g in file '%s'",
1948 gmx_fatal(FARGS,"Did not find a temperature in the subtitle in file '%s', use the -temp option of [TT]g_bar[tt]",fn);
1953 /* Try to deduce lambda from the subtitle */
1956 if (subtitle2lambda(subtitle,&(ba->native_lambda)))
1958 native_lambda_read=TRUE;
1961 snew(ba->lambda,ba->nset-1);
1964 /* Check if we have a single set, no legend, nset=2 means t and dH/dl */
1967 if (!native_lambda_read)
1969 /* Deduce lambda from the file name */
1970 ba->native_lambda = filename2lambda(fn);
1971 native_lambda_read=TRUE;
1973 ba->lambda[0] = ba->native_lambda;
1977 gmx_fatal(FARGS,"File %s contains multiple sets but no legends, can not determine the lambda values",fn);
1982 for(i=0; i<ba->nset-1; i++)
1984 gmx_bool is_dhdl=(i==0);
1985 /* Read lambda from the legend */
1986 ba->lambda[i] = legend2lambda(fn,legend[i], &is_dhdl);
1988 if (is_dhdl && !native_lambda_read)
1990 ba->native_lambda = ba->lambda[i];
1991 native_lambda_read=TRUE;
1996 if (!native_lambda_read)
1998 gmx_fatal(FARGS,"File %s contains multiple sets but no indication of the native lambda",fn);
2001 /* Reorder the data */
2002 for(i=1; i<ba->nset; i++)
2004 ba->y[i-1] = ba->y[i];
2008 for(i=0; i<ba->nset-1; i++)
2017 static void read_bar_xvg(char *fn, real *temp, lambda_t *lambda_head)
2026 read_bar_xvg_lowlevel(fn, temp, barsim);
2028 if (barsim->nset <1 )
2030 gmx_fatal(FARGS,"File '%s' contains fewer than two columns", fn);
2033 if ( !gmx_within_tol(*temp,barsim->temp,GMX_FLOAT_EPS) && (*temp > 0) )
2035 gmx_fatal(FARGS,"Temperature in file %s different from earlier files or setting\n", fn);
2039 /* now create a series of samples_t */
2040 snew(s, barsim->nset);
2041 for(i=0;i<barsim->nset;i++)
2043 samples_init(s+i, barsim->native_lambda, barsim->lambda[i],
2044 barsim->temp, lambda_same(barsim->native_lambda,
2047 s[i].du=barsim->y[i];
2048 s[i].ndu=barsim->np[i];
2051 lambda_list_insert_sample(lambda_head, s+i);
2053 printf("%s: %.1f - %.1f; lambda = %.3f\n foreign lambdas:",
2054 fn, s[0].t[0], s[0].t[s[0].ndu-1], s[0].native_lambda);
2055 for(i=0;i<barsim->nset;i++)
2057 printf(" %.3f (%d pts)", s[i].foreign_lambda, s[i].ndu);
2062 static void read_edr_rawdh_block(samples_t **smp, int *ndu, t_enxblock *blk,
2063 double start_time, double delta_time,
2064 double native_lambda, double temp,
2065 double *last_t, const char *filename)
2069 double foreign_lambda;
2071 samples_t *s; /* convenience pointer */
2074 /* check the block types etc. */
2075 if ( (blk->nsub < 3) ||
2076 (blk->sub[0].type != xdr_datatype_int) ||
2077 (blk->sub[1].type != xdr_datatype_double) ||
2079 (blk->sub[2].type != xdr_datatype_float) &&
2080 (blk->sub[2].type != xdr_datatype_double)
2082 (blk->sub[0].nr < 1) ||
2083 (blk->sub[1].nr < 1) )
2086 "Unexpected/corrupted block data in file %s around time %g.",
2087 filename, start_time);
2090 derivative = blk->sub[0].ival[0];
2091 foreign_lambda = blk->sub[1].dval[0];
2095 /* initialize the samples structure if it's empty. */
2097 samples_init(*smp, native_lambda, foreign_lambda, temp,
2098 derivative!=0, filename);
2099 (*smp)->start_time=start_time;
2100 (*smp)->delta_time=delta_time;
2103 /* set convenience pointer */
2106 /* now double check */
2107 if ( ! lambda_same(s->foreign_lambda, foreign_lambda) ||
2108 ( (derivative!=0) != (s->derivative!=0) ) )
2110 fprintf(stderr, "Got foreign lambda=%g, expected: %g\n",
2111 foreign_lambda, s->foreign_lambda);
2112 fprintf(stderr, "Got derivative=%d, expected: %d\n",
2113 derivative, s->derivative);
2114 gmx_fatal(FARGS, "Corrupted data in file %s around t=%g.",
2115 filename, start_time);
2118 /* make room for the data */
2119 if (s->ndu_alloc < (size_t)(s->ndu + blk->sub[2].nr) )
2121 s->ndu_alloc += (s->ndu_alloc < (size_t)blk->sub[2].nr) ?
2122 blk->sub[2].nr*2 : s->ndu_alloc;
2123 srenew(s->du_alloc, s->ndu_alloc);
2127 s->ndu += blk->sub[2].nr;
2128 s->ntot += blk->sub[2].nr;
2129 *ndu = blk->sub[2].nr;
2131 /* and copy the data*/
2132 for(j=0;j<blk->sub[2].nr;j++)
2134 if (blk->sub[2].type == xdr_datatype_float)
2136 s->du[startj+j] = blk->sub[2].fval[j];
2140 s->du[startj+j] = blk->sub[2].dval[j];
2143 if (start_time + blk->sub[2].nr*delta_time > *last_t)
2145 *last_t = start_time + blk->sub[2].nr*delta_time;
2149 static samples_t *read_edr_hist_block(int *nsamples, t_enxblock *blk,
2150 double start_time, double delta_time,
2151 double native_lambda, double temp,
2152 double *last_t, const char *filename)
2157 double foreign_lambda;
2161 /* check the block types etc. */
2162 if ( (blk->nsub < 2) ||
2163 (blk->sub[0].type != xdr_datatype_double) ||
2164 (blk->sub[1].type != xdr_datatype_large_int) ||
2165 (blk->sub[0].nr < 2) ||
2166 (blk->sub[1].nr < 2) )
2169 "Unexpected/corrupted block data in file %s around time %g",
2170 filename, start_time);
2181 "Unexpected/corrupted block data in file %s around time %g",
2182 filename, start_time);
2188 foreign_lambda=blk->sub[0].dval[0];
2189 derivative=(int)(blk->sub[1].lval[1]);
2191 foreign_lambda=native_lambda;
2193 samples_init(s, native_lambda, foreign_lambda, temp,
2194 derivative!=0, filename);
2197 for(i=0;i<nhist;i++)
2199 nbins[i] = blk->sub[i+2].nr;
2202 hist_init(s->hist, nhist, nbins);
2204 for(i=0;i<nhist;i++)
2206 s->hist->x0[i]=blk->sub[1].lval[2+i];
2207 s->hist->dx[i] = blk->sub[0].dval[1];
2209 s->hist->dx[i] = - s->hist->dx[i];
2212 s->hist->start_time = start_time;
2213 s->hist->delta_time = delta_time;
2214 s->start_time = start_time;
2215 s->delta_time = delta_time;
2217 for(i=0;i<nhist;i++)
2220 gmx_large_int_t sum=0;
2222 for(j=0;j<s->hist->nbin[i];j++)
2224 int binv=(int)(blk->sub[i+2].ival[j]);
2226 s->hist->bin[i][j] = binv;
2239 gmx_fatal(FARGS, "Histogram counts don't match in %s",
2245 if (start_time + s->hist->sum*delta_time > *last_t)
2247 *last_t = start_time + s->hist->sum*delta_time;
2253 static void read_barsim_edr(char *fn, real *temp, lambda_t *lambda_head)
2259 gmx_enxnm_t *enm=NULL;
2262 samples_t **samples_rawdh=NULL; /* contains samples for raw delta_h */
2263 int *nhists=NULL; /* array to keep count & print at end */
2264 int *npts=NULL; /* array to keep count & print at end */
2265 double *lambdas=NULL; /* array to keep count & print at end */
2266 double native_lambda=-1;
2267 double end_time; /* the end time of the last batch of samples */
2270 fp = open_enx(fn,"r");
2271 do_enxnms(fp,&nre,&enm);
2274 while(do_enx(fp, fr))
2276 /* count the data blocks */
2281 /* DHCOLL block information: */
2282 double start_time=0, delta_time=0, start_lambda=0, delta_lambda=0;
2285 /* count the blocks and handle collection information: */
2286 for(i=0;i<fr->nblock;i++)
2288 if (fr->block[i].id == enxDHHIST )
2290 if ( fr->block[i].id == enxDH )
2292 if (fr->block[i].id == enxDHCOLL )
2295 if ( (fr->block[i].nsub < 1) ||
2296 (fr->block[i].sub[0].type != xdr_datatype_double) ||
2297 (fr->block[i].sub[0].nr < 5))
2299 gmx_fatal(FARGS, "Unexpected block data in file %s", fn);
2302 /* read the data from the DHCOLL block */
2303 rtemp = fr->block[i].sub[0].dval[0];
2304 start_time = fr->block[i].sub[0].dval[1];
2305 delta_time = fr->block[i].sub[0].dval[2];
2306 start_lambda = fr->block[i].sub[0].dval[3];
2307 delta_lambda = fr->block[i].sub[0].dval[4];
2311 gmx_fatal(FARGS, "Lambda values not constant in %s: can't apply BAR method", fn);
2313 if ( ( *temp != rtemp) && (*temp > 0) )
2315 gmx_fatal(FARGS,"Temperature in file %s different from earlier files or setting\n", fn);
2326 gmx_fatal(FARGS, "Did not find a delta h information in file %s" , fn);
2328 if (nblocks_raw > 0 && nblocks_hist > 0 )
2330 gmx_fatal(FARGS, "Can't handle both raw delta U data and histograms in the same file %s", fn);
2335 /* check the native lambda */
2336 if (!lambda_same(start_lambda, native_lambda) )
2338 gmx_fatal(FARGS, "Native lambda not constant in file %s: started at %g, and becomes %g at time %g",
2339 fn, native_lambda, start_lambda, start_time);
2341 /* check the number of samples against the previous number */
2342 if ( ((nblocks_raw+nblocks_hist)!=nsamples) || (nlam!=1 ) )
2344 gmx_fatal(FARGS, "Unexpected block count in %s: was %d, now %d\n",
2345 fn, nsamples+1, nblocks_raw+nblocks_hist+nlam);
2347 /* check whether last iterations's end time matches with
2348 the currrent start time */
2349 if ( (fabs(last_t - start_time) > 2*delta_time) && last_t>=0)
2351 /* it didn't. We need to store our samples and reallocate */
2352 for(i=0;i<nsamples;i++)
2354 if (samples_rawdh[i])
2356 /* insert it into the existing list */
2357 lambda_list_insert_sample(lambda_head,
2359 /* and make sure we'll allocate a new one this time
2361 samples_rawdh[i]=NULL;
2368 /* this is the first round; allocate the associated data
2370 native_lambda=start_lambda;
2371 nsamples=nblocks_raw+nblocks_hist;
2372 snew(nhists, nsamples);
2373 snew(npts, nsamples);
2374 snew(lambdas, nsamples);
2375 snew(samples_rawdh, nsamples);
2376 for(i=0;i<nsamples;i++)
2381 samples_rawdh[i]=NULL; /* init to NULL so we know which
2382 ones contain values */
2387 k=0; /* counter for the lambdas, etc. arrays */
2388 for(i=0;i<fr->nblock;i++)
2390 if (fr->block[i].id == enxDH)
2393 read_edr_rawdh_block(&(samples_rawdh[k]),
2396 start_time, delta_time,
2397 start_lambda, rtemp,
2400 if (samples_rawdh[k])
2402 lambdas[k]=samples_rawdh[k]->foreign_lambda;
2406 else if (fr->block[i].id == enxDHHIST)
2410 samples_t *s; /* this is where the data will go */
2411 s=read_edr_hist_block(&nb, &(fr->block[i]),
2412 start_time, delta_time,
2413 start_lambda, rtemp,
2418 lambdas[k]= s->foreign_lambda;
2421 /* and insert the new sample immediately */
2424 lambda_list_insert_sample(lambda_head, s+j);
2429 /* Now store all our extant sample collections */
2430 for(i=0;i<nsamples;i++)
2432 if (samples_rawdh[i])
2434 /* insert it into the existing list */
2435 lambda_list_insert_sample(lambda_head, samples_rawdh[i]);
2440 fprintf(stderr, "\n");
2441 printf("%s: %.1f - %.1f; lambda = %.3f\n foreign lambdas:",
2442 fn, first_t, last_t, native_lambda);
2443 for(i=0;i<nsamples;i++)
2447 printf(" %.3f (%d hists)", lambdas[i], nhists[i]);
2451 printf(" %.3f (%d pts)", lambdas[i], npts[i]);
2461 int gmx_bar(int argc,char *argv[])
2463 static const char *desc[] = {
2464 "[TT]g_bar[tt] calculates free energy difference estimates through ",
2465 "Bennett's acceptance ratio method (BAR). It also automatically",
2466 "adds series of individual free energies obtained with BAR into",
2467 "a combined free energy estimate.[PAR]",
2469 "Every individual BAR free energy difference relies on two ",
2470 "simulations at different states: say state A and state B, as",
2471 "controlled by a parameter, [GRK]lambda[grk] (see the [TT].mdp[tt] parameter",
2472 "[TT]init_lambda[tt]). The BAR method calculates a ratio of weighted",
2473 "average of the Hamiltonian difference of state B given state A and",
2474 "vice versa. If the Hamiltonian does not depend linearly on [GRK]lambda[grk]",
2475 "(in which case we can extrapolate the derivative of the Hamiltonian",
2476 "with respect to [GRK]lambda[grk], as is the default when [TT]free_energy[tt] is on),",
2477 "the energy differences to the other state need to be calculated",
2478 "explicitly during the simulation. This can be controlled with",
2479 "the [TT].mdp[tt] option [TT]foreign_lambda[tt].[PAR]",
2481 "Input option [TT]-f[tt] expects multiple [TT]dhdl.xvg[tt] files. ",
2482 "Two types of input files are supported:[BR]",
2483 "[TT]*[tt] Files with only one [IT]y[it]-value, for such files it is assumed ",
2484 " that the [IT]y[it]-value is dH/d[GRK]lambda[grk] and that the Hamiltonian depends ",
2485 " linearly on [GRK]lambda[grk]. The [GRK]lambda[grk] value of the simulation is inferred ",
2486 " from the subtitle (if present), otherwise from a number in the",
2487 " subdirectory in the file name.",
2489 "[TT]*[tt] Files with more than one [IT]y[it]-value. The files should have columns ",
2490 " with dH/d[GRK]lambda[grk] and [GRK]Delta[grk][GRK]lambda[grk]. The [GRK]lambda[grk] values are inferred ",
2491 " from the legends: [GRK]lambda[grk] of the simulation from the legend of dH/d[GRK]lambda[grk] ",
2492 " and the foreign [GRK]lambda[grk] values from the legends of Delta H.[PAR]",
2493 "The [GRK]lambda[grk] of the simulation is parsed from [TT]dhdl.xvg[tt] file's legend ",
2494 "containing the string 'dH', the foreign [GRK]lambda[grk] values from the legend ",
2495 "containing the capitalized letters 'D' and 'H'. The temperature ",
2496 "is parsed from the legend line containing 'T ='.[PAR]",
2498 "The input option [TT]-g[tt] expects multiple [TT].edr[tt] files. ",
2499 "These can contain either lists of energy differences (see the",
2500 "[TT].mdp[tt] option [TT]separate_dhdl_file[tt]), or a series of histograms",
2501 "(see the [TT].mdp[tt] options [TT]dh_hist_size[tt] and [TT]dh_hist_spacing[tt]).",
2502 "The temperature and [GRK]lambda[grk] values are automatically deduced from",
2503 "the [TT]ener.edr[tt] file.[PAR]"
2505 "The free energy estimates are determined using BAR with bisection, ",
2506 "with the precision of the output set with [TT]-prec[tt]. ",
2507 "An error estimate taking into account time correlations ",
2508 "is made by splitting the data into blocks and determining ",
2509 "the free energy differences over those blocks and assuming ",
2510 "the blocks are independent. ",
2511 "The final error estimate is determined from the average variance ",
2512 "over 5 blocks. A range of block numbers for error estimation can ",
2513 "be provided with the options [TT]-nbmin[tt] and [TT]-nbmax[tt].[PAR]",
2515 "[TT]g_bar[tt] tries to aggregate samples with the same 'native' and 'foreign'",
2516 "[GRK]lambda[grk] values, but always assumes independent samples. [BB]Note[bb] that",
2517 "when aggregating energy differences/derivatives with different",
2518 "sampling intervals, this is almost certainly not correct. Usually",
2519 "subsequent energies are correlated and different time intervals mean",
2520 "different degrees of correlation between samples.[PAR]",
2522 "The results are split in two parts: the last part contains the final ",
2523 "results in kJ/mol, together with the error estimate for each part ",
2524 "and the total. The first part contains detailed free energy ",
2525 "difference estimates and phase space overlap measures in units of ",
2526 "kT (together with their computed error estimate). The printed ",
2528 "[TT]*[tt] lam_A: the [GRK]lambda[grk] values for point A.[BR]",
2529 "[TT]*[tt] lam_B: the [GRK]lambda[grk] values for point B.[BR]",
2530 "[TT]*[tt] DG: the free energy estimate.[BR]",
2531 "[TT]*[tt] s_A: an estimate of the relative entropy of B in A.[BR]",
2532 "[TT]*[tt] s_A: an estimate of the relative entropy of A in B.[BR]",
2533 "[TT]*[tt] stdev: an estimate expected per-sample standard deviation.[PAR]",
2535 "The relative entropy of both states in each other's ensemble can be ",
2536 "interpreted as a measure of phase space overlap: ",
2537 "the relative entropy s_A of the work samples of lambda_B in the ",
2538 "ensemble of lambda_A (and vice versa for s_B), is a ",
2539 "measure of the 'distance' between Boltzmann distributions of ",
2540 "the two states, that goes to zero for identical distributions. See ",
2541 "Wu & Kofke, J. Chem. Phys. 123 084109 (2005) for more information.",
2543 "The estimate of the expected per-sample standard deviation, as given ",
2544 "in Bennett's original BAR paper: Bennett, J. Comp. Phys. 22, p 245 (1976).",
2545 "Eq. 10 therein gives an estimate of the quality of sampling (not directly",
2546 "of the actual statistical error, because it assumes independent samples).[PAR]",
2548 "To get a visual estimate of the phase space overlap, use the ",
2549 "[TT]-oh[tt] option to write series of histograms, together with the ",
2550 "[TT]-nbin[tt] option.[PAR]"
2552 static real begin=0,end=-1,temp=-1;
2553 int nd=2,nbmin=5,nbmax=5;
2555 gmx_bool calc_s,calc_v;
2557 { "-b", FALSE, etREAL, {&begin}, "Begin time for BAR" },
2558 { "-e", FALSE, etREAL, {&end}, "End time for BAR" },
2559 { "-temp", FALSE, etREAL, {&temp}, "Temperature (K)" },
2560 { "-prec", FALSE, etINT, {&nd}, "The number of digits after the decimal point" },
2561 { "-nbmin", FALSE, etINT, {&nbmin}, "Minimum number of blocks for error estimation" },
2562 { "-nbmax", FALSE, etINT, {&nbmax}, "Maximum number of blocks for error estimation" },
2563 { "-nbin", FALSE, etINT, {&nbin}, "Number of bins for histogram output"}
2567 { efXVG, "-f", "dhdl", ffOPTRDMULT },
2568 { efEDR, "-g", "ener", ffOPTRDMULT },
2569 { efXVG, "-o", "bar", ffOPTWR },
2570 { efXVG, "-oi", "barint", ffOPTWR },
2571 { efXVG, "-oh", "histogram", ffOPTWR }
2573 #define NFILE asize(fnm)
2576 int nf=0; /* file counter */
2578 int nfile_tot; /* total number of input files */
2583 lambda_t *lb; /* the pre-processed lambda data (linked list head) */
2584 lambda_t lambda_head; /* the head element */
2585 barres_t *results; /* the results */
2586 int nresults; /* number of results in results array */
2589 double prec,dg_tot,dg,sig, dg_tot_max, dg_tot_min;
2592 char dgformat[20],xvg2format[STRLEN],xvg3format[STRLEN],buf[STRLEN];
2593 char ktformat[STRLEN], sktformat[STRLEN];
2594 char kteformat[STRLEN], skteformat[STRLEN];
2597 gmx_bool result_OK=TRUE,bEE=TRUE;
2599 gmx_bool disc_err=FALSE;
2600 double sum_disc_err=0.; /* discretization error */
2601 gmx_bool histrange_err=FALSE;
2602 double sum_histrange_err=0.; /* histogram range error */
2603 double stat_err=0.; /* statistical error */
2605 CopyRight(stderr,argv[0]);
2606 parse_common_args(&argc,argv,
2608 NFILE,fnm,asize(pa),pa,asize(desc),desc,0,NULL,&oenv);
2610 if (opt2bSet("-f", NFILE, fnm))
2612 nxvgfile = opt2fns(&fxvgnms,"-f",NFILE,fnm);
2614 if (opt2bSet("-g", NFILE, fnm))
2616 nedrfile = opt2fns(&fedrnms,"-g",NFILE,fnm);
2619 /* make linked list */
2621 lambda_init(lb, 0, 0);
2626 nfile_tot = nxvgfile + nedrfile;
2630 gmx_fatal(FARGS,"No input files!");
2635 gmx_fatal(FARGS,"Can not have negative number of digits");
2639 snew(partsum,(nbmax+1)*(nbmax+1));
2642 /* read in all files. First xvg files */
2643 for(f=0; f<nxvgfile; f++)
2645 read_bar_xvg(fxvgnms[f],&temp,lb);
2648 /* then .edr files */
2649 for(f=0; f<nedrfile; f++)
2651 read_barsim_edr(fedrnms[f],&temp,lb);;
2655 /* fix the times to allow for equilibration */
2656 lambdas_impose_times(lb, begin, end);
2658 if (opt2bSet("-oh",NFILE,fnm))
2660 lambdas_histogram(lb, opt2fn("-oh",NFILE,fnm), nbin, oenv);
2663 /* assemble the output structures from the lambdas */
2664 results=barres_list_create(lb, &nresults);
2666 sum_disc_err=barres_list_max_disc_err(results, nresults);
2670 printf("\nNo results to calculate.\n");
2675 if (sum_disc_err > prec)
2678 nd = ceil(-log10(prec));
2679 printf("WARNING: setting the precision to %g because that is the minimum\n reasonable number, given the expected discretization error.\n", prec);
2683 sprintf(lamformat,"%%6.3f");
2684 sprintf( dgformat,"%%%d.%df",3+nd,nd);
2685 /* the format strings of the results in kT */
2686 sprintf( ktformat,"%%%d.%df",5+nd,nd);
2687 sprintf( sktformat,"%%%ds",6+nd);
2688 /* the format strings of the errors in kT */
2689 sprintf( kteformat,"%%%d.%df",3+nd,nd);
2690 sprintf( skteformat,"%%%ds",4+nd);
2691 sprintf(xvg2format,"%s %s\n","%g",dgformat);
2692 sprintf(xvg3format,"%s %s %s\n","%g",dgformat,dgformat);
2697 if (opt2bSet("-o",NFILE,fnm))
2699 sprintf(buf,"%s (%s)","\\DeltaG","kT");
2700 fpb = xvgropen_type(opt2fn("-o",NFILE,fnm),"Free energy differences",
2701 "\\lambda",buf,exvggtXYDY,oenv);
2705 if (opt2bSet("-oi",NFILE,fnm))
2707 sprintf(buf,"%s (%s)","\\DeltaG","kT");
2708 fpi = xvgropen(opt2fn("-oi",NFILE,fnm),"Free energy integral",
2709 "\\lambda",buf,oenv);
2717 /* first calculate results */
2720 for(f=0; f<nresults; f++)
2722 /* Determine the free energy difference with a factor of 10
2723 * more accuracy than requested for printing.
2725 calc_bar(&(results[f]), 0.1*prec, nbmin, nbmax,
2728 if (results[f].dg_disc_err > prec/10.)
2730 if (results[f].dg_histrange_err > prec/10.)
2734 /* print results in kT */
2738 printf("\nTemperature: %g K\n", temp);
2740 printf("\nDetailed results in kT (see help for explanation):\n\n");
2741 printf("%6s ", " lam_A");
2742 printf("%6s ", " lam_B");
2743 printf(sktformat, "DG ");
2745 printf(skteformat, "+/- ");
2747 printf(skteformat, "disc ");
2749 printf(skteformat, "range ");
2750 printf(sktformat, "s_A ");
2752 printf(skteformat, "+/- " );
2753 printf(sktformat, "s_B ");
2755 printf(skteformat, "+/- " );
2756 printf(sktformat, "stdev ");
2758 printf(skteformat, "+/- ");
2760 for(f=0; f<nresults; f++)
2762 printf(lamformat, results[f].a->native_lambda);
2764 printf(lamformat, results[f].b->native_lambda);
2766 printf(ktformat, results[f].dg);
2770 printf(kteformat, results[f].dg_err);
2775 printf(kteformat, results[f].dg_disc_err);
2780 printf(kteformat, results[f].dg_histrange_err);
2783 printf(ktformat, results[f].sa);
2787 printf(kteformat, results[f].sa_err);
2790 printf(ktformat, results[f].sb);
2794 printf(kteformat, results[f].sb_err);
2797 printf(ktformat, results[f].dg_stddev);
2801 printf(kteformat, results[f].dg_stddev_err);
2805 /* Check for negative relative entropy with a 95% certainty. */
2806 if (results[f].sa < -2*results[f].sa_err ||
2807 results[f].sb < -2*results[f].sb_err)
2815 printf("\nWARNING: Some of these results violate the Second Law of "
2816 "Thermodynamics: \n"
2817 " This is can be the result of severe undersampling, or "
2819 " there is something wrong with the simulations.\n");
2823 /* final results in kJ/mol */
2824 printf("\n\nFinal results in kJ/mol:\n\n");
2826 for(f=0; f<nresults; f++)
2831 fprintf(fpi, xvg2format, results[f].a->native_lambda, dg_tot);
2837 fprintf(fpb, xvg3format,
2838 0.5*(results[f].a->native_lambda +
2839 results[f].b->native_lambda),
2840 results[f].dg,results[f].dg_err);
2843 /*printf("lambda %4.2f - %4.2f, DG ", results[f].lambda_a,
2844 results[f].lambda_b);*/
2846 printf(lamformat, results[f].a->native_lambda);
2848 printf(lamformat, results[f].b->native_lambda);
2851 printf(dgformat,results[f].dg*kT);
2855 printf(dgformat,results[f].dg_err*kT);
2859 printf(" (max. range err. = ");
2860 printf(dgformat,results[f].dg_histrange_err*kT);
2862 sum_histrange_err += results[f].dg_histrange_err*kT;
2866 dg_tot += results[f].dg;
2870 printf(lamformat, results[0].a->native_lambda);
2872 printf(lamformat, results[nresults-1].b->native_lambda);
2875 printf(dgformat,dg_tot*kT);
2878 stat_err=bar_err(nbmin,nbmax,partsum)*kT;
2880 printf(dgformat, max(max(stat_err, sum_disc_err), sum_histrange_err));
2885 printf("\nmaximum discretization error = ");
2886 printf(dgformat,sum_disc_err);
2887 if (bEE && stat_err < sum_disc_err)
2889 printf("WARNING: discretization error (%g) is larger than statistical error.\n Decrease histogram spacing for more accurate results\n", stat_err);
2894 printf("\nmaximum histogram range error = ");
2895 printf(dgformat,sum_histrange_err);
2896 if (bEE && stat_err < sum_histrange_err)
2898 printf("WARNING: histogram range error (%g) is larger than statistical error.\n Increase histogram range for more accurate results\n", stat_err);
2907 fprintf(fpi, xvg2format,
2908 results[nresults-1].b->native_lambda, dg_tot);
2912 do_view(oenv,opt2fn_null("-o",NFILE,fnm),"-xydy");
2913 do_view(oenv,opt2fn_null("-oi",NFILE,fnm),"-xydy");