1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * Green Red Orange Magenta Azure Cyan Skyblue
54 #include "gmx_fatal.h"
60 /* the dhdl.xvg data from a simulation (actually obsolete, but still
61 here for reading the dhdl.xvg file*/
65 int ftp; /* file type */
66 int nset; /* number of lambdas, including dhdl */
67 int *np; /* number of data points (du or hists) per lambda */
68 int np_alloc; /* number of points (du or hists) allocated */
69 double temp; /* temperature */
70 double *lambda; /* the lambdas (of first index for y). */
71 double *t; /* the times (of second index for y) */
72 double **y; /* the dU values. y[0] holds the derivative, while
73 further ones contain the energy differences between
74 the native lambda and the 'foreign' lambdas. */
76 double native_lambda; /* the native lambda */
78 struct xvg_t *next, *prev; /*location in the global linked list of xvg_ts*/
85 unsigned int *bin[2]; /* the (forward + reverse) histogram values */
86 double dx[2]; /* the histogram spacing. The reverse
87 dx is the negative of the forward dx.*/
88 gmx_large_int_t x0[2]; /* the (forward + reverse) histogram start
91 int nbin[2]; /* the (forward+reverse) number of bins */
92 gmx_large_int_t sum; /* the total number of counts. Must be
93 the same for forward + reverse. */
94 int nhist; /* number of hist datas (forward or reverse) */
96 double start_time, delta_time; /* start time, end time of histogram */
100 /* an aggregate of samples for partial free energy calculation */
101 typedef struct samples_t
103 double native_lambda;
104 double foreign_lambda;
105 double temp; /* the temperature */
106 gmx_bool derivative; /* whether this sample is a derivative */
108 /* The samples come either as either delta U lists: */
109 int ndu; /* the number of delta U samples */
110 double *du; /* the delta u's */
111 double *t; /* the times associated with those samples, or: */
112 double start_time,delta_time;/*start time and delta time for linear time*/
114 /* or as histograms: */
115 hist_t *hist; /* a histogram */
117 /* allocation data: (not NULL for data 'owned' by this struct) */
118 double *du_alloc, *t_alloc; /* allocated delta u arrays */
119 size_t ndu_alloc, nt_alloc; /* pre-allocated sizes */
120 hist_t *hist_alloc; /* allocated hist */
122 gmx_large_int_t ntot; /* total number of samples */
123 const char *filename; /* the file name this sample comes from */
126 /* a sample range (start to end for du-style data, or boolean
127 for both du-style data and histograms */
128 typedef struct sample_range_t
130 int start, end; /* start and end index for du style data */
131 gmx_bool use; /* whether to use this sample */
133 samples_t *s; /* the samples this range belongs to */
137 /* a collection of samples for a partial free energy calculation
138 (i.e. the collection of samples from one native lambda to one
140 typedef struct sample_coll_t
142 double native_lambda; /* these should be the same for all samples in the */
143 double foreign_lambda; /* collection */
144 double temp; /* the temperature */
146 int nsamples; /* the number of samples */
147 samples_t **s; /* the samples themselves */
148 sample_range_t *r; /* the sample ranges */
149 int nsamples_alloc; /* number of allocated samples */
151 gmx_large_int_t ntot; /* total number of samples in the ranges of
154 struct sample_coll_t *next, *prev; /* next and previous in the list */
157 /* all the samples associated with a lambda point */
158 typedef struct lambda_t
160 double lambda; /* the native lambda (at start time if dynamic) */
161 double temp; /* temperature */
163 sample_coll_t *sc; /* the samples */
165 sample_coll_t sc_head; /*the pre-allocated list head for the linked list.*/
167 struct lambda_t *next, *prev; /* the next and prev in the list */
171 /* calculated values. */
173 sample_coll_t *a, *b; /* the simulation data */
175 double dg; /* the free energy difference */
176 double dg_err; /* the free energy difference */
178 double dg_disc_err; /* discretization error */
179 double dg_histrange_err; /* histogram range error */
181 double sa; /* relative entropy of b in state a */
182 double sa_err; /* error in sa */
183 double sb; /* relative entropy of a in state b */
184 double sb_err; /* error in sb */
186 double dg_stddev; /* expected dg stddev per sample */
187 double dg_stddev_err; /* error in dg_stddev */
193 static void hist_init(hist_t *h, int nhist, int *nbin)
198 gmx_fatal(FARGS, "histogram with more than two sets of data!");
202 snew(h->bin[i], nbin[i]);
205 h->start_time=h->delta_time=0;
212 static void hist_destroy(hist_t *h)
218 static void xvg_init(xvg_t *ba)
227 static void samples_init(samples_t *s, double native_lambda,
228 double foreign_lambda, double temp,
229 gmx_bool derivative, const char *filename)
231 s->native_lambda=native_lambda;
232 s->foreign_lambda=foreign_lambda;
234 s->derivative=derivative;
239 s->start_time = s->delta_time = 0;
248 s->filename=filename;
251 /* destroy the data structures directly associated with the structure, not
252 the data it points to */
253 static void samples_destroy(samples_t *s)
265 hist_destroy(s->hist_alloc);
266 sfree(s->hist_alloc);
270 static void sample_range_init(sample_range_t *r, samples_t *s)
278 static void sample_coll_init(sample_coll_t *sc, double native_lambda,
279 double foreign_lambda, double temp)
281 sc->native_lambda = native_lambda;
282 sc->foreign_lambda = foreign_lambda;
288 sc->nsamples_alloc=0;
291 sc->next=sc->prev=NULL;
294 static void sample_coll_destroy(sample_coll_t *sc)
296 /* don't free the samples themselves */
302 static void lambda_init(lambda_t *l, double native_lambda, double temp)
304 l->lambda=native_lambda;
312 sample_coll_init(l->sc, native_lambda, 0., 0.);
317 static void barres_init(barres_t *br)
335 static gmx_bool lambda_same(double lambda1, double lambda2)
337 return gmx_within_tol(lambda1, lambda2, 10*GMX_REAL_EPS);
340 /* calculate the total number of samples in a sample collection */
341 static void sample_coll_calc_ntot(sample_coll_t *sc)
346 for(i=0;i<sc->nsamples;i++)
352 sc->ntot += sc->s[i]->ntot;
356 sc->ntot += sc->r[i].end - sc->r[i].start;
363 /* find the barsamples_t associated with a lambda that corresponds to
364 a specific foreign lambda */
365 static sample_coll_t *lambda_find_sample_coll(lambda_t *l,
366 double foreign_lambda)
368 sample_coll_t *sc=l->sc->next;
372 if (lambda_same(sc->foreign_lambda,foreign_lambda))
382 /* insert li into an ordered list of lambda_colls */
383 static void lambda_insert_sample_coll(lambda_t *l, sample_coll_t *sc)
385 sample_coll_t *scn=l->sc->next;
386 while ( (scn!=l->sc) )
388 if (scn->foreign_lambda > sc->foreign_lambda)
392 /* now insert it before the found scn */
399 /* insert li into an ordered list of lambdas */
400 static void lambda_insert_lambda(lambda_t *head, lambda_t *li)
402 lambda_t *lc=head->next;
405 if (lc->lambda > li->lambda)
409 /* now insert ourselves before the found lc */
416 /* insert a sample and a sample_range into a sample_coll. The
417 samples are stored as a pointer, the range is copied. */
418 static void sample_coll_insert_sample(sample_coll_t *sc, samples_t *s,
421 /* first check if it belongs here */
422 if (sc->temp != s->temp)
424 gmx_fatal(FARGS, "Temperatures in files %s and %s are not the same!",
425 s->filename, sc->next->s[0]->filename);
427 if (sc->native_lambda != s->native_lambda)
429 gmx_fatal(FARGS, "Native lambda in files %s and %s are not the same (and they should be)!",
430 s->filename, sc->next->s[0]->filename);
432 if (sc->foreign_lambda != s->foreign_lambda)
434 gmx_fatal(FARGS, "Foreign lambda in files %s and %s are not the same (and they should be)!",
435 s->filename, sc->next->s[0]->filename);
438 /* check if there's room */
439 if ( (sc->nsamples + 1) > sc->nsamples_alloc)
441 sc->nsamples_alloc = max(2*sc->nsamples_alloc, 2);
442 srenew(sc->s, sc->nsamples_alloc);
443 srenew(sc->r, sc->nsamples_alloc);
445 sc->s[sc->nsamples]=s;
446 sc->r[sc->nsamples]=*r;
449 sample_coll_calc_ntot(sc);
452 /* insert a sample into a lambda_list, creating the right sample_coll if
454 static void lambda_list_insert_sample(lambda_t *head, samples_t *s)
456 gmx_bool found=FALSE;
460 lambda_t *l=head->next;
462 /* first search for the right lambda_t */
465 if (lambda_same(l->lambda, s->native_lambda) )
475 snew(l, 1); /* allocate a new one */
476 lambda_init(l, s->native_lambda, s->temp); /* initialize it */
477 lambda_insert_lambda(head, l); /* add it to the list */
480 /* now look for a sample collection */
481 sc=lambda_find_sample_coll(l, s->foreign_lambda);
484 snew(sc, 1); /* allocate a new one */
485 sample_coll_init(sc, s->native_lambda, s->foreign_lambda, s->temp);
486 lambda_insert_sample_coll(l, sc);
489 /* now insert the samples into the sample coll */
490 sample_range_init(&r, s);
491 sample_coll_insert_sample(sc, s, &r);
495 /* make a histogram out of a sample collection */
496 static void sample_coll_make_hist(sample_coll_t *sc, int **bin,
497 int *nbin_alloc, int *nbin,
498 double *dx, double *xmin, int nbin_default)
501 gmx_bool dx_set=FALSE;
502 gmx_bool xmin_set=FALSE;
504 gmx_bool xmax_set=FALSE;
505 gmx_bool xmax_set_hard=FALSE; /* whether the xmax is bounded by the
506 limits of a histogram */
509 /* first determine dx and xmin; try the histograms */
510 for(i=0;i<sc->nsamples;i++)
514 hist_t *hist=sc->s[i]->hist;
515 for(k=0;k<hist->nhist;k++)
517 double hdx=hist->dx[k];
518 double xmax_now=(hist->x0[k]+hist->nbin[k])*hdx;
520 /* we use the biggest dx*/
521 if ( (!dx_set) || hist->dx[0] > *dx)
526 if ( (!xmin_set) || (hist->x0[k]*hdx) < *xmin)
529 *xmin = (hist->x0[k]*hdx);
532 if ( (!xmax_set) || (xmax_now>xmax && !xmax_set_hard) )
536 if (hist->bin[k][hist->nbin[k]-1] != 0)
539 if ( hist->bin[k][hist->nbin[k]-1]!=0 && (xmax_now < xmax) )
547 /* and the delta us */
548 for(i=0;i<sc->nsamples;i++)
550 if (sc->s[i]->ndu > 0)
552 /* determine min and max */
553 int starti=sc->r[i].start;
554 int endi=sc->r[i].end;
555 double du_xmin=sc->s[i]->du[starti];
556 double du_xmax=sc->s[i]->du[starti];
557 for(j=starti+1;j<endi;j++)
559 if (sc->s[i]->du[j] < du_xmin)
560 du_xmin = sc->s[i]->du[j];
561 if (sc->s[i]->du[j] > du_xmax)
562 du_xmax = sc->s[i]->du[j];
565 /* and now change the limits */
566 if ( (!xmin_set) || (du_xmin < *xmin) )
571 if ( (!xmax_set) || ((du_xmax > xmax) && !xmax_set_hard) )
579 if (!xmax_set || !xmin_set)
589 *dx=(xmax-(*xmin))/((*nbin)-2); /* -2 because we want the last bin to
590 be 0, and we count from 0 */
594 *nbin=(xmax-(*xmin))/(*dx);
597 if (*nbin > *nbin_alloc)
600 srenew(*bin, *nbin_alloc);
603 /* reset the histogram */
604 for(i=0;i<(*nbin);i++)
609 /* now add the actual data */
610 for(i=0;i<sc->nsamples;i++)
614 hist_t *hist=sc->s[i]->hist;
615 for(k=0;k<hist->nhist;k++)
617 double hdx = hist->dx[k];
618 double xmin_hist=hist->x0[k]*hdx;
619 for(j=0;j<hist->nbin[k];j++)
621 /* calculate the bin corresponding to the middle of the
623 double x=hdx*(j+0.5) + xmin_hist;
624 int binnr=(int)((x-(*xmin))/(*dx));
626 if (binnr >= *nbin || binnr<0)
629 (*bin)[binnr] += hist->bin[k][j];
635 int starti=sc->r[i].start;
636 int endi=sc->r[i].end;
637 for(j=starti;j<endi;j++)
639 int binnr=(int)((sc->s[i]->du[j]-(*xmin))/(*dx));
640 if (binnr >= *nbin || binnr<0)
649 /* write a collection of histograms to a file */
650 void lambdas_histogram(lambda_t *bl_head, const char *filename,
651 int nbin_default, const output_env_t oenv)
653 char label_x[STRLEN];
654 const char *dhdl="dH/d\\lambda",*deltag="\\DeltaH",*lambda="\\lambda";
655 const char *title="N(\\DeltaH)";
656 const char *label_y="Samples";
660 char **setnames=NULL;
661 gmx_bool first_set=FALSE;
662 /* histogram data: */
670 printf("\nWriting histogram to %s\n", filename);
671 sprintf(label_x, "\\DeltaH (%s)", unit_energy);
673 fp=xvgropen_type(filename, title, label_x, label_y, exvggtXNY, oenv);
675 /* first get all the set names */
677 /* iterate over all lambdas */
680 sample_coll_t *sc=bl->sc->next;
682 /* iterate over all samples */
686 srenew(setnames, nsets);
687 snew(setnames[nsets-1], STRLEN);
688 if (!lambda_same(sc->foreign_lambda, sc->native_lambda))
690 sprintf(setnames[nsets-1], "N(%s(%s=%g) | %s=%g)",
691 deltag, lambda, sc->foreign_lambda, lambda,
696 sprintf(setnames[nsets-1], "N(%s | %s=%g)",
697 dhdl, lambda, sc->native_lambda);
704 xvgr_legend(fp, nsets, (const char**)setnames, oenv);
707 /* now make the histograms */
709 /* iterate over all lambdas */
712 sample_coll_t *sc=bl->sc->next;
714 /* iterate over all samples */
719 xvgr_new_dataset(fp, 0, 0, NULL, oenv);
722 sample_coll_make_hist(sc, &hist, &nbin_alloc, &nbin, &dx, &min,
727 double xmin=i*dx + min;
728 double xmax=(i+1)*dx + min;
730 fprintf(fp, "%g %d\n%g %d\n", xmin, hist[i], xmax, hist[i]);
746 /* create a collection (array) of barres_t object given a ordered linked list
747 of barlamda_t sample collections */
748 static barres_t *barres_list_create(lambda_t *bl_head, int *nres)
757 /* first count the lambdas */
764 snew(res, nlambda-1);
766 /* next put the right samples in the res */
768 bl=bl_head->next->next; /* we start with the second one. */
771 sample_coll_t *sc, *scprev;
772 barres_t *br=&(res[*nres]);
773 /* there is always a previous one. we search for that as a foreign
775 scprev=lambda_find_sample_coll(bl->prev, bl->lambda);
776 sc=lambda_find_sample_coll(bl, bl->prev->lambda);
784 scprev=lambda_find_sample_coll(bl->prev, bl->prev->lambda);
785 sc=lambda_find_sample_coll(bl, bl->lambda);
789 printf("\nWARNING: Using the derivative data (dH/dlambda) to extrapolate delta H values.\nThis will only work if the Hamiltonian is linear in lambda.\n");
794 gmx_fatal(FARGS,"Some dhdl files contain only one value (dH/dl), while others \ncontain multiple values (dH/dl and/or Delta H), will not proceed \nbecause of possible inconsistencies.\n");
801 gmx_fatal(FARGS,"Could not find a set for lambda = %g in the files for lambda = %g",bl->lambda,bl->prev->lambda);
805 gmx_fatal(FARGS,"Could not find a set for lambda = %g in the files for lambda = %g",bl->prev->lambda,bl->lambda);
817 /* estimate the maximum discretization error */
818 static double barres_list_max_disc_err(barres_t *res, int nres)
826 barres_t *br=&(res[i]);
828 delta_lambda=fabs(br->b->native_lambda-br->a->native_lambda);
830 for(j=0;j<br->a->nsamples;j++)
832 if (br->a->s[j]->hist)
835 if (br->a->s[j]->derivative)
838 disc_err=max(disc_err, Wfac*br->a->s[j]->hist->dx[0]);
841 for(j=0;j<br->b->nsamples;j++)
843 if (br->b->s[j]->hist)
846 if (br->b->s[j]->derivative)
848 disc_err=max(disc_err, Wfac*br->b->s[j]->hist->dx[0]);
856 /* impose start and end times on a sample collection, updating sample_ranges */
857 static void sample_coll_impose_times(sample_coll_t *sc, double begin_t,
861 for(i=0;i<sc->nsamples;i++)
863 samples_t *s=sc->s[i];
864 sample_range_t *r=&(sc->r[i]);
867 double end_time=s->hist->delta_time*s->hist->sum +
869 if (s->hist->start_time < begin_t || end_time > end_t)
879 if (s->start_time < begin_t)
881 r->start=(int)((begin_t - s->start_time)/s->delta_time);
883 end_time=s->delta_time*s->ndu + s->start_time;
884 if (end_time > end_t)
886 r->end=(int)((end_t - s->start_time)/s->delta_time);
892 for(j=0;j<s->ndu;j++)
894 if (s->t[j] <begin_t)
899 if (s->t[j] >= end_t)
906 if (r->start > r->end)
912 sample_coll_calc_ntot(sc);
915 static void lambdas_impose_times(lambda_t *head, double begin, double end)
917 double first_t, last_t;
918 double begin_t, end_t;
922 if (begin<=0 && end<0)
927 /* first determine the global start and end times */
933 sample_coll_t *sc=lc->sc->next;
936 for(j=0;j<sc->nsamples;j++)
938 double start_t,end_t;
940 start_t = sc->s[j]->start_time;
941 end_t = sc->s[j]->start_time;
944 end_t += sc->s[j]->delta_time*sc->s[j]->hist->sum;
950 end_t = sc->s[j]->t[sc->s[j]->ndu-1];
954 end_t += sc->s[j]->delta_time*sc->s[j]->ndu;
958 if (start_t < first_t || first_t<0)
972 /* calculate the actual times */
990 printf("\n Samples in time interval: %.3f - %.3f\n", first_t, last_t);
996 printf("Removing samples outside of: %.3f - %.3f\n", begin_t, end_t);
998 /* then impose them */
1002 sample_coll_t *sc=lc->sc->next;
1005 sample_coll_impose_times(sc, begin_t, end_t);
1013 /* create subsample i out of ni from an existing sample_coll */
1014 static gmx_bool sample_coll_create_subsample(sample_coll_t *sc,
1015 sample_coll_t *sc_orig,
1019 int hist_start, hist_end;
1021 gmx_large_int_t ntot_start;
1022 gmx_large_int_t ntot_end;
1023 gmx_large_int_t ntot_so_far;
1025 *sc = *sc_orig; /* just copy all fields */
1027 /* allocate proprietary memory */
1028 snew(sc->s, sc_orig->nsamples);
1029 snew(sc->r, sc_orig->nsamples);
1031 /* copy the samples */
1032 for(j=0;j<sc_orig->nsamples;j++)
1034 sc->s[j] = sc_orig->s[j];
1035 sc->r[j] = sc_orig->r[j]; /* copy the ranges too */
1038 /* now fix start and end fields */
1039 /* the casts avoid possible overflows */
1040 ntot_start=(gmx_large_int_t)(sc_orig->ntot*(double)i/(double)ni);
1041 ntot_end =(gmx_large_int_t)(sc_orig->ntot*(double)(i+1)/(double)ni);
1043 for(j=0;j<sc->nsamples;j++)
1045 gmx_large_int_t ntot_add;
1046 gmx_large_int_t new_start, new_end;
1052 ntot_add = sc->s[j]->hist->sum;
1056 ntot_add = sc->r[j].end - sc->r[j].start;
1064 if (!sc->s[j]->hist)
1066 if (ntot_so_far < ntot_start)
1068 /* adjust starting point */
1069 new_start = sc->r[j].start + (ntot_start - ntot_so_far);
1073 new_start = sc->r[j].start;
1075 /* adjust end point */
1076 new_end = sc->r[j].start + (ntot_end - ntot_so_far);
1077 if (new_end > sc->r[j].end)
1079 new_end=sc->r[j].end;
1082 /* check if we're in range at all */
1083 if ( (new_end < new_start) || (new_start > sc->r[j].end) )
1088 /* and write the new range */
1089 sc->r[j].start=(int)new_start;
1090 sc->r[j].end=(int)new_end;
1097 double ntot_start_norm, ntot_end_norm;
1098 /* calculate the amount of overlap of the
1099 desired range (ntot_start -- ntot_end) onto
1100 the histogram range (ntot_so_far -- ntot_so_far+ntot_add)*/
1102 /* first calculate normalized bounds
1103 (where 0 is the start of the hist range, and 1 the end) */
1104 ntot_start_norm = (ntot_start-ntot_so_far)/(double)ntot_add;
1105 ntot_end_norm = (ntot_end-ntot_so_far)/(double)ntot_add;
1107 /* now fix the boundaries */
1108 ntot_start_norm = min(1, max(0., ntot_start_norm));
1109 ntot_end_norm = max(0, min(1., ntot_end_norm));
1111 /* and calculate the overlap */
1112 overlap = ntot_end_norm - ntot_start_norm;
1114 if (overlap > 0.95) /* we allow for 5% slack */
1116 sc->r[j].use = TRUE;
1118 else if (overlap < 0.05)
1120 sc->r[j].use = FALSE;
1128 ntot_so_far += ntot_add;
1130 sample_coll_calc_ntot(sc);
1135 /* calculate minimum and maximum work values in sample collection */
1136 static void sample_coll_min_max(sample_coll_t *sc, double Wfac,
1137 double *Wmin, double *Wmax)
1144 for(i=0;i<sc->nsamples;i++)
1146 samples_t *s=sc->s[i];
1147 sample_range_t *r=&(sc->r[i]);
1152 for(j=r->start; j<r->end; j++)
1154 *Wmin = min(*Wmin,s->du[j]*Wfac);
1155 *Wmax = max(*Wmax,s->du[j]*Wfac);
1160 int hd=0; /* determine the histogram direction: */
1162 if ( (s->hist->nhist>1) && (Wfac<0) )
1168 for(j=s->hist->nbin[hd]-1;j>=0;j--)
1170 *Wmin=min(*Wmin,Wfac*(s->hist->x0[hd])*dx);
1171 *Wmax=max(*Wmax,Wfac*(s->hist->x0[hd])*dx);
1172 /* look for the highest value bin with values */
1173 if (s->hist->bin[hd][j]>0)
1175 *Wmin=min(*Wmin,Wfac*(j+s->hist->x0[hd]+1)*dx);
1176 *Wmax=max(*Wmax,Wfac*(j+s->hist->x0[hd]+1)*dx);
1186 static double calc_bar_sum(int n,const double *W,double Wfac,double sbMmDG)
1195 sum += 1./(1. + exp(Wfac*W[i] + sbMmDG));
1201 /* calculate the BAR average given a histogram
1203 if type== 0, calculate the best estimate for the average,
1204 if type==-1, calculate the minimum possible value given the histogram
1205 if type== 1, calculate the maximum possible value given the histogram */
1206 static double calc_bar_sum_hist(const hist_t *hist, double Wfac, double sbMmDG,
1212 /* normalization factor multiplied with bin width and
1213 number of samples (we normalize through M): */
1215 int hd=0; /* determine the histogram direction: */
1218 if ( (hist->nhist>1) && (Wfac<0) )
1223 max=hist->nbin[hd]-1;
1226 max=hist->nbin[hd]; /* we also add whatever was out of range */
1231 double x=Wfac*((i+hist->x0[hd])+0.5)*dx; /* bin middle */
1232 double pxdx=hist->bin[0][i]*normdx; /* p(x)dx */
1234 sum += pxdx/(1. + exp(x + sbMmDG));
1240 static double calc_bar_lowlevel(sample_coll_t *ca, sample_coll_t *cb,
1241 double temp, double tol, int type)
1246 double Wfac1,Wfac2,Wmin,Wmax;
1247 double DG0,DG1,DG2,dDG1;
1249 double n1, n2; /* numbers of samples as doubles */
1254 /* count the numbers of samples */
1260 if (!lambda_same(ca->native_lambda, ca->foreign_lambda))
1262 /* this is the case when the delta U were calculated directly
1263 (i.e. we're not scaling dhdl) */
1269 /* we're using dhdl, so delta_lambda needs to be a
1270 multiplication factor. */
1271 double delta_lambda=cb->native_lambda-ca->native_lambda;
1272 Wfac1 = beta*delta_lambda;
1273 Wfac2 = -beta*delta_lambda;
1278 /* We print the output both in kT and kJ/mol.
1279 * Here we determine DG in kT, so when beta < 1
1280 * the precision has to be increased.
1285 /* Calculate minimum and maximum work to give an initial estimate of
1286 * delta G as their average.
1289 double Wmin1, Wmin2, Wmax1, Wmax2;
1290 sample_coll_min_max(ca, Wfac1, &Wmin1, &Wmax1);
1291 sample_coll_min_max(cb, Wfac2, &Wmin2, &Wmax2);
1293 Wmin=min(Wmin1, Wmin2);
1294 Wmax=max(Wmax1, Wmax2);
1302 fprintf(debug,"DG %9.5f %9.5f\n",DG0,DG2);
1304 /* We approximate by bisection: given our initial estimates
1305 we keep checking whether the halfway point is greater or
1306 smaller than what we get out of the BAR averages.
1308 For the comparison we can use twice the tolerance. */
1309 while (DG2 - DG0 > 2*tol)
1311 DG1 = 0.5*(DG0 + DG2);
1313 /*printf("Wfac1=%g, Wfac2=%g, beta=%g, DG1=%g\n",Wfac1,Wfac2,beta,
1316 /* calculate the BAR averages */
1319 for(i=0;i<ca->nsamples;i++)
1321 samples_t *s=ca->s[i];
1322 sample_range_t *r=&(ca->r[i]);
1327 dDG1 += calc_bar_sum_hist(s->hist, Wfac1, (M-DG1), type);
1331 dDG1 += calc_bar_sum(r->end - r->start, s->du + r->start,
1336 for(i=0;i<cb->nsamples;i++)
1338 samples_t *s=cb->s[i];
1339 sample_range_t *r=&(cb->r[i]);
1344 dDG1 -= calc_bar_sum_hist(s->hist, Wfac2, -(M-DG1), type);
1348 dDG1 -= calc_bar_sum(r->end - r->start, s->du + r->start,
1364 fprintf(debug,"DG %9.5f %9.5f\n",DG0,DG2);
1368 return 0.5*(DG0 + DG2);
1371 static void calc_rel_entropy(sample_coll_t *ca, sample_coll_t *cb,
1372 double temp, double dg, double *sa, double *sb)
1378 double Wfac1, Wfac2;
1384 /* count the numbers of samples */
1388 /* to ensure the work values are the same as during the delta_G */
1389 if (!lambda_same(ca->native_lambda, ca->foreign_lambda))
1391 /* this is the case when the delta U were calculated directly
1392 (i.e. we're not scaling dhdl) */
1398 /* we're using dhdl, so delta_lambda needs to be a
1399 multiplication factor. */
1400 double delta_lambda=cb->native_lambda-ca->native_lambda;
1401 Wfac1 = beta*delta_lambda;
1402 Wfac2 = -beta*delta_lambda;
1405 /* first calculate the average work in both directions */
1406 for(i=0;i<ca->nsamples;i++)
1408 samples_t *s=ca->s[i];
1409 sample_range_t *r=&(ca->r[i]);
1414 for(j=r->start;j<r->end;j++)
1415 W_ab += Wfac1*s->du[j];
1419 /* normalization factor multiplied with bin width and
1420 number of samples (we normalize through M): */
1423 int hd=0; /* histogram direction */
1424 if ( (s->hist->nhist>1) && (Wfac1<0) )
1430 for(j=0;j<s->hist->nbin[0];j++)
1432 double x=Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1433 double pxdx=s->hist->bin[0][j]*normdx; /* p(x)dx */
1441 for(i=0;i<cb->nsamples;i++)
1443 samples_t *s=cb->s[i];
1444 sample_range_t *r=&(cb->r[i]);
1449 for(j=r->start;j<r->end;j++)
1450 W_ba += Wfac1*s->du[j];
1454 /* normalization factor multiplied with bin width and
1455 number of samples (we normalize through M): */
1458 int hd=0; /* histogram direction */
1459 if ( (s->hist->nhist>1) && (Wfac2<0) )
1465 for(j=0;j<s->hist->nbin[0];j++)
1467 double x=Wfac1*((j+s->hist->x0[0])+0.5)*dx;/*bin ctr*/
1468 double pxdx=s->hist->bin[0][j]*normdx; /* p(x)dx */
1476 /* then calculate the relative entropies */
1481 static void calc_dg_stddev(sample_coll_t *ca, sample_coll_t *cb,
1482 double temp, double dg, double *stddev)
1486 double sigmafact=0.;
1488 double Wfac1, Wfac2;
1494 /* count the numbers of samples */
1498 /* to ensure the work values are the same as during the delta_G */
1499 if (!lambda_same(ca->native_lambda, ca->foreign_lambda))
1501 /* this is the case when the delta U were calculated directly
1502 (i.e. we're not scaling dhdl) */
1508 /* we're using dhdl, so delta_lambda needs to be a
1509 multiplication factor. */
1510 double delta_lambda=cb->native_lambda-ca->native_lambda;
1511 Wfac1 = beta*delta_lambda;
1512 Wfac2 = -beta*delta_lambda;
1518 /* calculate average in both directions */
1519 for(i=0;i<ca->nsamples;i++)
1521 samples_t *s=ca->s[i];
1522 sample_range_t *r=&(ca->r[i]);
1527 for(j=r->start;j<r->end;j++)
1529 sigmafact += 1./(2. + 2.*cosh((M + Wfac1*s->du[j] - dg)));
1534 /* normalization factor multiplied with bin width and
1535 number of samples (we normalize through M): */
1538 int hd=0; /* histogram direction */
1539 if ( (s->hist->nhist>1) && (Wfac1<0) )
1545 for(j=0;j<s->hist->nbin[0];j++)
1547 double x=Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1548 double pxdx=s->hist->bin[0][j]*normdx; /* p(x)dx */
1550 sigmafact += pxdx/(2. + 2.*cosh((M + x - dg)));
1555 for(i=0;i<cb->nsamples;i++)
1557 samples_t *s=cb->s[i];
1558 sample_range_t *r=&(cb->r[i]);
1563 for(j=r->start;j<r->end;j++)
1565 sigmafact += 1./(2. + 2.*cosh((M - Wfac2*s->du[j] - dg)));
1570 /* normalization factor multiplied with bin width and
1571 number of samples (we normalize through M): */
1574 int hd=0; /* histogram direction */
1575 if ( (s->hist->nhist>1) && (Wfac2<0) )
1581 for(j=0;j<s->hist->nbin[0];j++)
1583 double x=Wfac2*((j+s->hist->x0[0])+0.5)*dx;/*bin ctr*/
1584 double pxdx=s->hist->bin[0][j]*normdx; /* p(x)dx */
1586 sigmafact += pxdx/(2. + 2.*cosh((M - x - dg)));
1592 sigmafact /= (n1 + n2);
1596 Shirts, Bair, Hooker & Pande, Phys. Rev. Lett 91, 140601 (2003): */
1597 *stddev = sqrt(((1./sigmafact) - ( (n1+n2)/n1 + (n1+n2)/n2 )));
1602 static void calc_bar(barres_t *br, double tol,
1603 int npee_min, int npee_max, gmx_bool *bEE,
1607 double dg_sig2,sa_sig2,sb_sig2,stddev_sig2; /* intermediate variance values
1608 for calculated quantities */
1609 int nsample1, nsample2;
1610 double temp=br->a->temp;
1612 double dg_min, dg_max;
1613 gmx_bool have_hist=FALSE;
1615 br->dg=calc_bar_lowlevel(br->a, br->b, temp, tol, 0);
1617 br->dg_disc_err = 0.;
1618 br->dg_histrange_err = 0.;
1620 /* check if there are histograms */
1621 for(i=0;i<br->a->nsamples;i++)
1623 if (br->a->r[i].use && br->a->s[i]->hist)
1631 for(i=0;i<br->b->nsamples;i++)
1633 if (br->b->r[i].use && br->b->s[i]->hist)
1641 /* calculate histogram-specific errors */
1644 dg_min=calc_bar_lowlevel(br->a, br->b, temp, tol, -1);
1645 dg_max=calc_bar_lowlevel(br->a, br->b, temp, tol, 1);
1647 if (fabs(dg_max - dg_min) > GMX_REAL_EPS*10)
1649 /* the histogram range error is the biggest of the differences
1650 between the best estimate and the extremes */
1651 br->dg_histrange_err = fabs(dg_max - dg_min);
1653 br->dg_disc_err = 0.;
1654 for(i=0;i<br->a->nsamples;i++)
1656 if (br->a->s[i]->hist)
1657 br->dg_disc_err=max(br->dg_disc_err, br->a->s[i]->hist->dx[0]);
1659 for(i=0;i<br->b->nsamples;i++)
1661 if (br->b->s[i]->hist)
1662 br->dg_disc_err=max(br->dg_disc_err, br->b->s[i]->hist->dx[0]);
1665 calc_rel_entropy(br->a, br->b, temp, br->dg, &(br->sa), &(br->sb));
1667 calc_dg_stddev(br->a, br->b, temp, br->dg, &(br->dg_stddev) );
1676 sample_coll_t ca, cb;
1678 /* initialize the samples */
1679 sample_coll_init(&ca, br->a->native_lambda, br->a->foreign_lambda,
1681 sample_coll_init(&cb, br->b->native_lambda, br->b->foreign_lambda,
1684 for(npee=npee_min; npee<=npee_max; npee++)
1693 double dstddev2 = 0;
1696 for(p=0; p<npee; p++)
1703 cac=sample_coll_create_subsample(&ca, br->a, p, npee);
1704 cbc=sample_coll_create_subsample(&cb, br->b, p, npee);
1708 printf("WARNING: histogram number incompatible with block number for averaging: can't do error estimate\n");
1711 sample_coll_destroy(&ca);
1713 sample_coll_destroy(&cb);
1717 dgp=calc_bar_lowlevel(&ca, &cb, temp, tol, 0);
1721 partsum[npee*(npee_max+1)+p] += dgp;
1723 calc_rel_entropy(&ca, &cb, temp, dgp, &sac, &sbc);
1728 calc_dg_stddev(&ca, &cb, temp, dgp, &stddevc );
1731 dstddev2 += stddevc*stddevc;
1733 sample_coll_destroy(&ca);
1734 sample_coll_destroy(&cb);
1738 dg_sig2 += (dgs2-dgs*dgs)/(npee-1);
1744 sa_sig2 += (dsa2-dsa*dsa)/(npee-1);
1745 sb_sig2 += (dsb2-dsb*dsb)/(npee-1);
1749 stddev_sig2 += (dstddev2-dstddev*dstddev)/(npee-1);
1751 br->dg_err = sqrt(dg_sig2/(npee_max - npee_min + 1));
1752 br->sa_err = sqrt(sa_sig2/(npee_max - npee_min + 1));
1753 br->sb_err = sqrt(sb_sig2/(npee_max - npee_min + 1));
1754 br->dg_stddev_err = sqrt(stddev_sig2/(npee_max - npee_min + 1));
1759 static double bar_err(int nbmin, int nbmax, const double *partsum)
1762 double svar,s,s2,dg;
1765 for(nb=nbmin; nb<=nbmax; nb++)
1771 dg = partsum[nb*(nbmax+1)+b];
1777 svar += (s2 - s*s)/(nb - 1);
1780 return sqrt(svar/(nbmax + 1 - nbmin));
1783 /* deduce lambda value from legend.
1785 bdhdl = if true, value may be a derivative.
1787 bdhdl = whether the legend was for a derivative.
1789 static double legend2lambda(char *fn,const char *legend,gmx_bool *bdhdl)
1797 gmx_fatal(FARGS,"There is no legend in file '%s', can not deduce lambda",fn);
1799 ptr = strrchr(legend,' ');
1801 if (strstr(legend,"dH"))
1814 if (strchr(legend,'D') != NULL && strchr(legend,'H') != NULL)
1827 printf("%s\n", legend);
1828 gmx_fatal(FARGS,"There is no proper lambda legend in file '%s', can not deduce lambda",fn);
1830 if (sscanf(ptr,"%lf",&lambda) != 1)
1832 gmx_fatal(FARGS,"There is no proper lambda legend in file '%s', can not deduce lambda",fn);
1838 static gmx_bool subtitle2lambda(const char *subtitle,double *lambda)
1845 /* plain text lambda string */
1846 ptr = strstr(subtitle,"lambda");
1849 /* xmgrace formatted lambda string */
1850 ptr = strstr(subtitle,"\\xl\\f{}");
1854 /* xmgr formatted lambda string */
1855 ptr = strstr(subtitle,"\\8l\\4");
1859 ptr = strstr(ptr,"=");
1863 bFound = (sscanf(ptr+1,"%lf",lambda) == 1);
1869 static double filename2lambda(char *fn)
1872 char *ptr,*endptr,*digitptr;
1875 /* go to the end of the path string and search backward to find the last
1876 directory in the path which has to contain the value of lambda
1878 while (ptr[1] != '\0')
1882 /* searching backward to find the second directory separator */
1887 if (ptr[0] != DIR_SEPARATOR && ptr[1] == DIR_SEPARATOR)
1889 if (dirsep == 1) break;
1892 /* save the last position of a digit between the last two
1893 separators = in the last dirname */
1894 if (dirsep > 0 && isdigit(*ptr))
1902 gmx_fatal(FARGS,"While trying to read the lambda value from the file path:"
1903 " last directory in the path '%s' does not contain a number",fn);
1905 if (digitptr[-1] == '-')
1909 lambda = strtod(digitptr,&endptr);
1910 if (endptr == digitptr)
1912 gmx_fatal(FARGS,"Malformed number in file path '%s'",fn);
1918 static void read_bar_xvg_lowlevel(char *fn, real *temp, xvg_t *ba)
1921 char *subtitle,**legend,*ptr;
1923 gmx_bool native_lambda_read=FALSE;
1929 np = read_xvg_legend(fn,&ba->y,&ba->nset,&subtitle,&legend);
1932 gmx_fatal(FARGS,"File %s contains no usable data.",fn);
1936 snew(ba->np,ba->nset);
1937 for(i=0;i<ba->nset;i++)
1942 if (subtitle != NULL)
1944 ptr = strstr(subtitle,"T =");
1948 if (sscanf(ptr,"%lf",&ba->temp) == 1)
1952 gmx_fatal(FARGS,"Found temperature of %g in file '%s'",
1962 gmx_fatal(FARGS,"Did not find a temperature in the subtitle in file '%s', use the -temp option of [TT]g_bar[tt]",fn);
1967 /* Try to deduce lambda from the subtitle */
1970 if (subtitle2lambda(subtitle,&(ba->native_lambda)))
1972 native_lambda_read=TRUE;
1975 snew(ba->lambda,ba->nset-1);
1978 /* Check if we have a single set, no legend, nset=2 means t and dH/dl */
1981 if (!native_lambda_read)
1983 /* Deduce lambda from the file name */
1984 ba->native_lambda = filename2lambda(fn);
1985 native_lambda_read=TRUE;
1987 ba->lambda[0] = ba->native_lambda;
1991 gmx_fatal(FARGS,"File %s contains multiple sets but no legends, can not determine the lambda values",fn);
1996 for(i=0; i<ba->nset-1; i++)
1998 gmx_bool is_dhdl=(i==0);
1999 /* Read lambda from the legend */
2000 ba->lambda[i] = legend2lambda(fn,legend[i], &is_dhdl);
2002 if (is_dhdl && !native_lambda_read)
2004 ba->native_lambda = ba->lambda[i];
2005 native_lambda_read=TRUE;
2010 if (!native_lambda_read)
2012 gmx_fatal(FARGS,"File %s contains multiple sets but no indication of the native lambda",fn);
2015 /* Reorder the data */
2016 for(i=1; i<ba->nset; i++)
2018 ba->y[i-1] = ba->y[i];
2022 for(i=0; i<ba->nset-1; i++)
2031 static void read_bar_xvg(char *fn, real *temp, lambda_t *lambda_head)
2040 read_bar_xvg_lowlevel(fn, temp, barsim);
2042 if (barsim->nset <1 )
2044 gmx_fatal(FARGS,"File '%s' contains fewer than two columns", fn);
2047 if ( ( *temp != barsim->temp) && (*temp > 0) )
2049 gmx_fatal(FARGS,"Temperature in file %s different from earlier files or setting\n", fn);
2053 /* now create a series of samples_t */
2054 snew(s, barsim->nset);
2055 for(i=0;i<barsim->nset;i++)
2057 samples_init(s+i, barsim->native_lambda, barsim->lambda[i],
2058 barsim->temp, lambda_same(barsim->native_lambda,
2061 s[i].du=barsim->y[i];
2062 s[i].ndu=barsim->np[i];
2065 lambda_list_insert_sample(lambda_head, s+i);
2067 printf("%s: %.1f - %.1f; lambda = %.3f\n foreign lambdas:",
2068 fn, s[0].t[0], s[0].t[s[0].ndu-1], s[0].native_lambda);
2069 for(i=0;i<barsim->nset;i++)
2071 printf(" %.3f (%d pts)", s[i].foreign_lambda, s[i].ndu);
2076 static void read_edr_rawdh_block(samples_t **smp, int *ndu, t_enxblock *blk,
2077 double start_time, double delta_time,
2078 double native_lambda, double temp,
2079 double *last_t, const char *filename)
2083 double foreign_lambda;
2085 samples_t *s; /* convenience pointer */
2088 /* check the block types etc. */
2089 if ( (blk->nsub < 3) ||
2090 (blk->sub[0].type != xdr_datatype_int) ||
2091 (blk->sub[1].type != xdr_datatype_double) ||
2093 (blk->sub[2].type != xdr_datatype_float) &&
2094 (blk->sub[2].type != xdr_datatype_double)
2096 (blk->sub[0].nr < 1) ||
2097 (blk->sub[1].nr < 1) )
2100 "Unexpected/corrupted block data in file %s around time %g.",
2101 filename, start_time);
2104 derivative = blk->sub[0].ival[0];
2105 foreign_lambda = blk->sub[1].dval[0];
2109 /* initialize the samples structure if it's empty. */
2111 samples_init(*smp, native_lambda, foreign_lambda, temp,
2112 derivative!=0, filename);
2113 (*smp)->start_time=start_time;
2114 (*smp)->delta_time=delta_time;
2117 /* set convenience pointer */
2120 /* now double check */
2121 if ( ! lambda_same(s->foreign_lambda, foreign_lambda) ||
2122 ( (derivative!=0) != (s->derivative!=0) ) )
2124 fprintf(stderr, "Got foreign lambda=%g, expected: %g\n",
2125 foreign_lambda, s->foreign_lambda);
2126 fprintf(stderr, "Got derivative=%d, expected: %d\n",
2127 derivative, s->derivative);
2128 gmx_fatal(FARGS, "Corrupted data in file %s around t=%g.",
2129 filename, start_time);
2132 /* make room for the data */
2133 if (s->ndu_alloc < (s->ndu + blk->sub[2].nr) )
2135 s->ndu_alloc += (s->ndu_alloc < blk->sub[2].nr) ?
2136 blk->sub[2].nr*2 : s->ndu_alloc;
2137 srenew(s->du_alloc, s->ndu_alloc);
2141 s->ndu += blk->sub[2].nr;
2142 s->ntot += blk->sub[2].nr;
2143 *ndu = blk->sub[2].nr;
2145 /* and copy the data*/
2146 for(j=0;j<blk->sub[2].nr;j++)
2148 if (blk->sub[2].type == xdr_datatype_float)
2150 s->du[startj+j] = blk->sub[2].fval[j];
2154 s->du[startj+j] = blk->sub[2].dval[j];
2157 if (start_time + blk->sub[2].nr*delta_time > *last_t)
2159 *last_t = start_time + blk->sub[2].nr*delta_time;
2163 static samples_t *read_edr_hist_block(int *nsamples, t_enxblock *blk,
2164 double start_time, double delta_time,
2165 double native_lambda, double temp,
2166 double *last_t, const char *filename)
2171 double foreign_lambda;
2175 /* check the block types etc. */
2176 if ( (blk->nsub < 2) ||
2177 (blk->sub[0].type != xdr_datatype_double) ||
2178 (blk->sub[1].type != xdr_datatype_large_int) ||
2179 (blk->sub[0].nr < 2) ||
2180 (blk->sub[1].nr < 2) )
2183 "Unexpected/corrupted block data in file %s around time %g",
2184 filename, start_time);
2195 "Unexpected/corrupted block data in file %s around time %g",
2196 filename, start_time);
2202 foreign_lambda=blk->sub[0].dval[0];
2203 derivative=(int)(blk->sub[1].lval[1]);
2205 foreign_lambda=native_lambda;
2207 samples_init(s, native_lambda, foreign_lambda, temp,
2208 derivative!=0, filename);
2211 for(i=0;i<nhist;i++)
2213 nbins[i] = blk->sub[i+2].nr;
2216 hist_init(s->hist, nhist, nbins);
2218 for(i=0;i<nhist;i++)
2220 s->hist->x0[i]=blk->sub[1].lval[2+i];
2221 s->hist->dx[i] = blk->sub[0].dval[1];
2223 s->hist->dx[i] = - s->hist->dx[i];
2226 s->hist->start_time = start_time;
2227 s->hist->delta_time = delta_time;
2228 s->start_time = start_time;
2229 s->delta_time = delta_time;
2231 for(i=0;i<nhist;i++)
2234 gmx_large_int_t sum=0;
2236 for(j=0;j<s->hist->nbin[i];j++)
2238 int binv=(int)(blk->sub[i+2].ival[j]);
2240 s->hist->bin[i][j] = binv;
2253 gmx_fatal(FARGS, "Histogram counts don't match in %s",
2259 if (start_time + s->hist->sum*delta_time > *last_t)
2261 *last_t = start_time + s->hist->sum*delta_time;
2267 static void read_barsim_edr(char *fn, real *temp, lambda_t *lambda_head)
2273 gmx_enxnm_t *enm=NULL;
2276 samples_t **samples_rawdh=NULL; /* contains samples for raw delta_h */
2277 int *nhists=NULL; /* array to keep count & print at end */
2278 int *npts=NULL; /* array to keep count & print at end */
2279 double *lambdas=NULL; /* array to keep count & print at end */
2280 double native_lambda=-1;
2281 double end_time; /* the end time of the last batch of samples */
2284 fp = open_enx(fn,"r");
2285 do_enxnms(fp,&nre,&enm);
2288 while(do_enx(fp, fr))
2290 /* count the data blocks */
2295 /* DHCOLL block information: */
2296 double start_time=0, delta_time=0, start_lambda=0, delta_lambda=0;
2299 /* count the blocks and handle collection information: */
2300 for(i=0;i<fr->nblock;i++)
2302 if (fr->block[i].id == enxDHHIST )
2304 if ( fr->block[i].id == enxDH )
2306 if (fr->block[i].id == enxDHCOLL )
2309 if ( (fr->block[i].nsub < 1) ||
2310 (fr->block[i].sub[0].type != xdr_datatype_double) ||
2311 (fr->block[i].sub[0].nr < 5))
2313 gmx_fatal(FARGS, "Unexpected block data in file %s", fn);
2316 /* read the data from the DHCOLL block */
2317 rtemp = fr->block[i].sub[0].dval[0];
2318 start_time = fr->block[i].sub[0].dval[1];
2319 delta_time = fr->block[i].sub[0].dval[2];
2320 start_lambda = fr->block[i].sub[0].dval[3];
2321 delta_lambda = fr->block[i].sub[0].dval[4];
2325 gmx_fatal(FARGS, "Lambda values not constant in %s: can't apply BAR method", fn);
2327 if ( ( *temp != rtemp) && (*temp > 0) )
2329 gmx_fatal(FARGS,"Temperature in file %s different from earlier files or setting\n", fn);
2340 gmx_fatal(FARGS, "Did not find a delta h information in file %s" , fn);
2342 if (nblocks_raw > 0 && nblocks_hist > 0 )
2344 gmx_fatal(FARGS, "Can't handle both raw delta U data and histograms in the same file %s", fn);
2349 /* check the native lambda */
2350 if (!lambda_same(start_lambda, native_lambda) )
2352 gmx_fatal(FARGS, "Native lambda not constant in file %s: started at %g, and becomes %g at time %g",
2353 fn, native_lambda, start_lambda, start_time);
2355 /* check the number of samples against the previous number */
2356 if ( ((nblocks_raw+nblocks_hist)!=nsamples) || (nlam!=1 ) )
2358 gmx_fatal(FARGS, "Unexpected block count in %s: was %d, now %d\n",
2359 fn, nsamples+1, nblocks_raw+nblocks_hist+nlam);
2361 /* check whether last iterations's end time matches with
2362 the currrent start time */
2363 if ( (fabs(last_t - start_time) > 2*delta_time) && last_t>=0)
2365 /* it didn't. We need to store our samples and reallocate */
2366 for(i=0;i<nsamples;i++)
2368 if (samples_rawdh[i])
2370 /* insert it into the existing list */
2371 lambda_list_insert_sample(lambda_head,
2373 /* and make sure we'll allocate a new one this time
2375 samples_rawdh[i]=NULL;
2382 /* this is the first round; allocate the associated data
2384 native_lambda=start_lambda;
2385 nsamples=nblocks_raw+nblocks_hist;
2386 snew(nhists, nsamples);
2387 snew(npts, nsamples);
2388 snew(lambdas, nsamples);
2389 snew(samples_rawdh, nsamples);
2390 for(i=0;i<nsamples;i++)
2395 samples_rawdh[i]=NULL; /* init to NULL so we know which
2396 ones contain values */
2401 k=0; /* counter for the lambdas, etc. arrays */
2402 for(i=0;i<fr->nblock;i++)
2404 if (fr->block[i].id == enxDH)
2407 read_edr_rawdh_block(&(samples_rawdh[k]),
2410 start_time, delta_time,
2411 start_lambda, rtemp,
2414 if (samples_rawdh[k])
2416 lambdas[k]=samples_rawdh[k]->foreign_lambda;
2420 else if (fr->block[i].id == enxDHHIST)
2424 samples_t *s; /* this is where the data will go */
2425 s=read_edr_hist_block(&nb, &(fr->block[i]),
2426 start_time, delta_time,
2427 start_lambda, rtemp,
2432 lambdas[k]= s->foreign_lambda;
2435 /* and insert the new sample immediately */
2438 lambda_list_insert_sample(lambda_head, s+j);
2443 /* Now store all our extant sample collections */
2444 for(i=0;i<nsamples;i++)
2446 if (samples_rawdh[i])
2448 /* insert it into the existing list */
2449 lambda_list_insert_sample(lambda_head, samples_rawdh[i]);
2454 fprintf(stderr, "\n");
2455 printf("%s: %.1f - %.1f; lambda = %.3f\n foreign lambdas:",
2456 fn, first_t, last_t, native_lambda);
2457 for(i=0;i<nsamples;i++)
2461 printf(" %.3f (%d hists)", lambdas[i], nhists[i]);
2465 printf(" %.3f (%d pts)", lambdas[i], npts[i]);
2475 int gmx_bar(int argc,char *argv[])
2477 static const char *desc[] = {
2478 "[TT]g_bar[tt] calculates free energy difference estimates through ",
2479 "Bennett's acceptance ratio method (BAR). It also automatically",
2480 "adds series of individual free energies obtained with BAR into",
2481 "a combined free energy estimate.[PAR]",
2483 "Every individual BAR free energy difference relies on two ",
2484 "simulations at different states: say state A and state B, as",
2485 "controlled by a parameter, lambda (see the mdp parameter",
2486 "[TT]init_lambda[tt]). The BAR method calculates a ratio of weighted",
2487 "average of the Hamiltonian difference of state B given state A and",
2488 "vice versa. If the Hamiltonian does not linearly depend on lambda",
2489 "(in which case we can extrapolate the derivative of the Hamiltonian",
2490 "with respect to lambda, as is the default when [TT]free_energy[tt] is on),",
2491 "the energy differences to the other state need to be calculated",
2492 "explicitly during the simulation. This can be controlled with",
2493 "the mdp option [TT]foreign_lambda[tt].[PAR]",
2495 "Input option [TT]-f[tt] expects multiple dhdl files. ",
2496 "Two types of input files are supported:[BR]",
2497 "[TT]*[tt] Files with only one y-value, for such files it is assumed ",
2498 " that the y-value is dH/dlambda and that the Hamiltonian depends ",
2499 " linearly on lambda. The lambda value of the simulation is inferred ",
2500 " from the subtitle if present, otherwise from a number in the",
2501 " subdirectory in the file name.",
2503 "[TT]*[tt] Files with more than one y-value. The files should have columns ",
2504 " with dH/dlambda and Delta lambda. The lambda values are inferred ",
2505 " from the legends: lambda of the simulation from the legend of dH/dlambda ",
2506 " and the foreign lambda's from the legends of Delta H.[PAR]",
2507 "The lambda of the simulation is parsed from dhdl.xvg file's legend ",
2508 "containing the string 'dH', the foreign lambdas from the legend ",
2509 "containing the capitalized letters 'D' and 'H'. The temperature ",
2510 "is parsed from the legend line containing 'T ='.[PAR]",
2512 "The input option [TT]-g[tt] expects multiple .edr files. ",
2513 "These can contain either lists of energy differences (see the",
2514 "mdp option separate_dhdl_file), or a series of histograms",
2515 "(see the mdp options [TT]dh_hist_size[tt] and [TT]dh_hist_spacing[tt]).",
2516 "The temperature and lambda values are automatically deduced from",
2517 "the ener.edr file.[PAR]"
2519 "The free energy estimates are determined using BAR with bisection, ",
2520 "the precision of the output is set with [TT]-prec[tt]. ",
2521 "An error estimate taking into account time correlations ",
2522 "is made by splitting the data into blocks and determining ",
2523 "the free energy differences over those blocks and assuming ",
2524 "the blocks are independent. ",
2525 "The final error estimate is determined from the average variance ",
2526 "over 5 blocks. A range of blocks numbers for error estimation can ",
2527 "be provided with the options [TT]-nbmin[tt] and [TT]-nbmax[tt].[PAR]",
2529 "[TT]g_bar[tt] tries to aggregate samples with the same 'native' and 'foreign'",
2530 "lambda values, but always assumes independent samples: note that",
2531 "when aggregating energy differences/derivatives with different",
2532 "sampling intervals, this is almost certainly not correct: usually",
2533 "subsequent energies are correlated and different time intervals mean",
2534 "different degrees of correlation between samples.[PAR]",
2536 "The results are split in two parts: the last part contains the final ",
2537 "results in kJ/mol, together with the error estimate for each part ",
2538 "and the total. The first part contains detailed free energy ",
2539 "difference estimates and phase space overlap measures in units of ",
2540 "kT (together with their computed error estimate). The printed ",
2542 "[TT]*[tt] lam_A: the lambda values for point A.[BR]",
2543 "[TT]*[tt] lam_B: the lambda values for point B.[BR]",
2544 "[TT]*[tt] DG: the free energy estimate.[BR]",
2545 "[TT]*[tt] s_A: an estimate of the relative entropy of B in A.[BR]",
2546 "[TT]*[tt] s_A: an estimate of the relative entropy of A in B.[BR]",
2547 "[TT]*[tt] stdev: an estimate expected per-sample standard deviation.[PAR]",
2549 "The relative entropy of both states in each other's ensemble can be ",
2550 "interpreted as a measure of phase space overlap: ",
2551 "the relative entropy s_A of the work samples of lambda_B in the ",
2552 "ensemble of lambda_A (and vice versa for s_B), is a ",
2553 "measure of the 'distance' between Boltzmann distributions of ",
2554 "the two states, that goes to zero for identical distributions. See ",
2555 "Wu & Kofke, J. Chem. Phys. 123 084109 (2009) for more information.",
2557 "The estimate of the expected per-sample standard deviation, as given ",
2558 "in Bennett's original BAR paper: Bennett, J. Comp. Phys. 22, p 245 (1976).",
2559 "Eq. 10 therein gives an estimate of the quality of sampling (not directly",
2560 "of the actual statistical error, because it assumes independent samples).[PAR]",
2562 "To get a visual estimate of the phase space overlap, use the ",
2563 "[TT]-oh[tt] option to write series of histograms, together with the ",
2564 "[TT]-nbin[tt] option.[PAR]"
2566 static real begin=0,end=-1,temp=-1;
2567 int nd=2,nbmin=5,nbmax=5;
2569 gmx_bool calc_s,calc_v;
2571 { "-b", FALSE, etREAL, {&begin}, "Begin time for BAR" },
2572 { "-e", FALSE, etREAL, {&end}, "End time for BAR" },
2573 { "-temp", FALSE, etREAL, {&temp}, "Temperature (K)" },
2574 { "-prec", FALSE, etINT, {&nd}, "The number of digits after the decimal point" },
2575 { "-nbmin", FALSE, etINT, {&nbmin}, "Minimum number of blocks for error estimation" },
2576 { "-nbmax", FALSE, etINT, {&nbmax}, "Maximum number of blocks for error estimation" },
2577 { "-nbin", FALSE, etINT, {&nbin}, "Number of bins for histogram output"}
2581 { efXVG, "-f", "dhdl", ffOPTRDMULT },
2582 { efEDR, "-g", "ener", ffOPTRDMULT },
2583 { efXVG, "-o", "bar", ffOPTWR },
2584 { efXVG, "-oi", "barint", ffOPTWR },
2585 { efXVG, "-oh", "histogram", ffOPTWR }
2587 #define NFILE asize(fnm)
2590 int nf=0; /* file counter */
2592 int nfile_tot; /* total number of input files */
2597 lambda_t *lb; /* the pre-processed lambda data (linked list head) */
2598 lambda_t lambda_head; /* the head element */
2599 barres_t *results; /* the results */
2600 int nresults; /* number of results in results array */
2603 double prec,dg_tot,dg,sig, dg_tot_max, dg_tot_min;
2606 char dgformat[20],xvg2format[STRLEN],xvg3format[STRLEN],buf[STRLEN];
2607 char ktformat[STRLEN], sktformat[STRLEN];
2608 char kteformat[STRLEN], skteformat[STRLEN];
2611 gmx_bool result_OK=TRUE,bEE=TRUE;
2613 gmx_bool disc_err=FALSE;
2614 double sum_disc_err=0.; /* discretization error */
2615 gmx_bool histrange_err=FALSE;
2616 double sum_histrange_err=0.; /* histogram range error */
2617 double stat_err=0.; /* statistical error */
2619 CopyRight(stderr,argv[0]);
2620 parse_common_args(&argc,argv,
2622 NFILE,fnm,asize(pa),pa,asize(desc),desc,0,NULL,&oenv);
2624 if (opt2bSet("-f", NFILE, fnm))
2626 nxvgfile = opt2fns(&fxvgnms,"-f",NFILE,fnm);
2628 if (opt2bSet("-g", NFILE, fnm))
2630 nedrfile = opt2fns(&fedrnms,"-g",NFILE,fnm);
2633 /* make linked list */
2635 lambda_init(lb, 0, 0);
2640 nfile_tot = nxvgfile + nedrfile;
2644 gmx_fatal(FARGS,"No input files!");
2649 gmx_fatal(FARGS,"Can not have negative number of digits");
2653 snew(partsum,(nbmax+1)*(nbmax+1));
2656 /* read in all files. First xvg files */
2657 for(f=0; f<nxvgfile; f++)
2659 read_bar_xvg(fxvgnms[f],&temp,lb);
2662 /* then .edr files */
2663 for(f=0; f<nedrfile; f++)
2665 read_barsim_edr(fedrnms[f],&temp,lb);;
2669 /* fix the times to allow for equilibration */
2670 lambdas_impose_times(lb, begin, end);
2672 if (opt2bSet("-oh",NFILE,fnm))
2674 lambdas_histogram(lb, opt2fn("-oh",NFILE,fnm), nbin, oenv);
2677 /* assemble the output structures from the lambdas */
2678 results=barres_list_create(lb, &nresults);
2680 sum_disc_err=barres_list_max_disc_err(results, nresults);
2684 printf("\nNo results to calculate.\n");
2689 if (sum_disc_err > prec)
2692 nd = ceil(-log10(prec));
2693 printf("WARNING: setting the precision to %g because that is the minimum\n reasonable number, given the expected discretization error.\n", prec);
2697 sprintf(lamformat,"%%6.3f");
2698 sprintf( dgformat,"%%%d.%df",3+nd,nd);
2699 /* the format strings of the results in kT */
2700 sprintf( ktformat,"%%%d.%df",5+nd,nd);
2701 sprintf( sktformat,"%%%ds",6+nd);
2702 /* the format strings of the errors in kT */
2703 sprintf( kteformat,"%%%d.%df",3+nd,nd);
2704 sprintf( skteformat,"%%%ds",4+nd);
2705 sprintf(xvg2format,"%s %s\n","%g",dgformat);
2706 sprintf(xvg3format,"%s %s %s\n","%g",dgformat,dgformat);
2711 if (opt2bSet("-o",NFILE,fnm))
2713 sprintf(buf,"%s (%s)","\\DeltaG",unit_energy);
2714 fpb = xvgropen_type(opt2fn("-o",NFILE,fnm),"Free energy differences",
2715 "\\lambda",buf,exvggtXYDY,oenv);
2719 if (opt2bSet("-oi",NFILE,fnm))
2721 sprintf(buf,"%s (%s)","\\DeltaG",unit_energy);
2722 fpi = xvgropen(opt2fn("-oi",NFILE,fnm),"Free energy integral",
2723 "\\lambda",buf,oenv);
2731 /* first calculate results */
2734 for(f=0; f<nresults; f++)
2736 /* Determine the free energy difference with a factor of 10
2737 * more accuracy than requested for printing.
2739 calc_bar(&(results[f]), 0.1*prec, nbmin, nbmax,
2742 if (results[f].dg_disc_err > prec/10.)
2744 if (results[f].dg_histrange_err > prec/10.)
2748 /* print results in kT */
2752 printf("\nTemperature: %g K\n", temp);
2754 printf("\nDetailed results in kT (see help for explanation):\n\n");
2755 printf("%6s ", " lam_A");
2756 printf("%6s ", " lam_B");
2757 printf(sktformat, "DG ");
2759 printf(skteformat, "+/- ");
2761 printf(skteformat, "disc ");
2763 printf(skteformat, "range ");
2764 printf(sktformat, "s_A ");
2766 printf(skteformat, "+/- " );
2767 printf(sktformat, "s_B ");
2769 printf(skteformat, "+/- " );
2770 printf(sktformat, "stdev ");
2772 printf(skteformat, "+/- ");
2774 for(f=0; f<nresults; f++)
2776 printf(lamformat, results[f].a->native_lambda);
2778 printf(lamformat, results[f].b->native_lambda);
2780 printf(ktformat, results[f].dg);
2784 printf(kteformat, results[f].dg_err);
2789 printf(kteformat, results[f].dg_disc_err);
2794 printf(kteformat, results[f].dg_histrange_err);
2797 printf(ktformat, results[f].sa);
2801 printf(kteformat, results[f].sa_err);
2804 printf(ktformat, results[f].sb);
2808 printf(kteformat, results[f].sb_err);
2811 printf(ktformat, results[f].dg_stddev);
2815 printf(kteformat, results[f].dg_stddev_err);
2819 /* Check for negative relative entropy with a 95% certainty. */
2820 if (results[f].sa < -2*results[f].sa_err ||
2821 results[f].sb < -2*results[f].sb_err)
2829 printf("\nWARNING: Some of these results violate the Second Law of "
2830 "Thermodynamics: \n"
2831 " This is can be the result of severe undersampling, or "
2833 " there is something wrong with the simulations.\n");
2837 /* final results in kJ/mol */
2838 printf("\n\nFinal results in kJ/mol:\n\n");
2840 for(f=0; f<nresults; f++)
2845 fprintf(fpi, xvg2format, results[f].a->native_lambda, dg_tot);
2851 fprintf(fpb, xvg3format,
2852 0.5*(results[f].a->native_lambda +
2853 results[f].b->native_lambda),
2854 results[f].dg,results[f].dg_err);
2857 /*printf("lambda %4.2f - %4.2f, DG ", results[f].lambda_a,
2858 results[f].lambda_b);*/
2860 printf(lamformat, results[f].a->native_lambda);
2862 printf(lamformat, results[f].b->native_lambda);
2865 printf(dgformat,results[f].dg*kT);
2869 printf(dgformat,results[f].dg_err*kT);
2873 printf(" (max. range err. = ");
2874 printf(dgformat,results[f].dg_histrange_err*kT);
2876 sum_histrange_err += results[f].dg_histrange_err*kT;
2880 dg_tot += results[f].dg;
2884 printf(lamformat, results[0].a->native_lambda);
2886 printf(lamformat, results[nresults-1].b->native_lambda);
2889 printf(dgformat,dg_tot*kT);
2892 stat_err=bar_err(nbmin,nbmax,partsum)*kT;
2894 printf(dgformat, max(max(stat_err, sum_disc_err), sum_histrange_err));
2899 printf("\nmaximum discretization error = ");
2900 printf(dgformat,sum_disc_err);
2901 if (bEE && stat_err < sum_disc_err)
2903 printf("WARNING: discretization error (%g) is larger than statistical error.\n Decrease histogram spacing for more accurate results\n", stat_err);
2908 printf("\nmaximum histogram range error = ");
2909 printf(dgformat,sum_histrange_err);
2910 if (bEE && stat_err < sum_histrange_err)
2912 printf("WARNING: histogram range error (%g) is larger than statistical error.\n Increase histogram range for more accurate results\n", stat_err);
2921 fprintf(fpi, xvg2format,
2922 results[nresults-1].b->native_lambda, dg_tot);
2926 do_view(oenv,opt2fn_null("-o",NFILE,fnm),"-xydy");
2927 do_view(oenv,opt2fn_null("-oi",NFILE,fnm),"-xydy");