1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * Green Red Orange Magenta Azure Cyan Skyblue
39 #include "gmx_header_config.h"
55 #include "gmx_fatal.h"
60 /* Suppress Cygwin compiler warnings from using newlib version of
66 /* the dhdl.xvg data from a simulation (actually obsolete, but still
67 here for reading the dhdl.xvg file*/
71 int ftp; /* file type */
72 int nset; /* number of lambdas, including dhdl */
73 int *np; /* number of data points (du or hists) per lambda */
74 int np_alloc; /* number of points (du or hists) allocated */
75 double temp; /* temperature */
76 double *lambda; /* the lambdas (of first index for y). */
77 double *t; /* the times (of second index for y) */
78 double **y; /* the dU values. y[0] holds the derivative, while
79 further ones contain the energy differences between
80 the native lambda and the 'foreign' lambdas. */
82 double native_lambda; /* the native lambda */
84 struct xvg_t *next, *prev; /*location in the global linked list of xvg_ts*/
91 unsigned int *bin[2]; /* the (forward + reverse) histogram values */
92 double dx[2]; /* the histogram spacing. The reverse
93 dx is the negative of the forward dx.*/
94 gmx_large_int_t x0[2]; /* the (forward + reverse) histogram start
97 int nbin[2]; /* the (forward+reverse) number of bins */
98 gmx_large_int_t sum; /* the total number of counts. Must be
99 the same for forward + reverse. */
100 int nhist; /* number of hist datas (forward or reverse) */
102 double start_time, delta_time; /* start time, end time of histogram */
106 /* an aggregate of samples for partial free energy calculation */
107 typedef struct samples_t
109 double native_lambda;
110 double foreign_lambda;
111 double temp; /* the temperature */
112 gmx_bool derivative; /* whether this sample is a derivative */
114 /* The samples come either as either delta U lists: */
115 int ndu; /* the number of delta U samples */
116 double *du; /* the delta u's */
117 double *t; /* the times associated with those samples, or: */
118 double start_time,delta_time;/*start time and delta time for linear time*/
120 /* or as histograms: */
121 hist_t *hist; /* a histogram */
123 /* allocation data: (not NULL for data 'owned' by this struct) */
124 double *du_alloc, *t_alloc; /* allocated delta u arrays */
125 size_t ndu_alloc, nt_alloc; /* pre-allocated sizes */
126 hist_t *hist_alloc; /* allocated hist */
128 gmx_large_int_t ntot; /* total number of samples */
129 const char *filename; /* the file name this sample comes from */
132 /* a sample range (start to end for du-style data, or boolean
133 for both du-style data and histograms */
134 typedef struct sample_range_t
136 int start, end; /* start and end index for du style data */
137 gmx_bool use; /* whether to use this sample */
139 samples_t *s; /* the samples this range belongs to */
143 /* a collection of samples for a partial free energy calculation
144 (i.e. the collection of samples from one native lambda to one
146 typedef struct sample_coll_t
148 double native_lambda; /* these should be the same for all samples in the histogram?*/
149 double foreign_lambda; /* collection */
150 double temp; /* the temperature */
152 int nsamples; /* the number of samples */
153 samples_t **s; /* the samples themselves */
154 sample_range_t *r; /* the sample ranges */
155 int nsamples_alloc; /* number of allocated samples */
157 gmx_large_int_t ntot; /* total number of samples in the ranges of
160 struct sample_coll_t *next, *prev; /* next and previous in the list */
163 /* all the samples associated with a lambda point */
164 typedef struct lambda_t
166 double lambda; /* the native lambda (at start time if dynamic) */
167 double temp; /* temperature */
169 sample_coll_t *sc; /* the samples */
171 sample_coll_t sc_head; /*the pre-allocated list head for the linked list.*/
173 struct lambda_t *next, *prev; /* the next and prev in the list */
177 /* calculated values. */
179 sample_coll_t *a, *b; /* the simulation data */
181 double dg; /* the free energy difference */
182 double dg_err; /* the free energy difference */
184 double dg_disc_err; /* discretization error */
185 double dg_histrange_err; /* histogram range error */
187 double sa; /* relative entropy of b in state a */
188 double sa_err; /* error in sa */
189 double sb; /* relative entropy of a in state b */
190 double sb_err; /* error in sb */
192 double dg_stddev; /* expected dg stddev per sample */
193 double dg_stddev_err; /* error in dg_stddev */
199 static void hist_init(hist_t *h, int nhist, int *nbin)
204 gmx_fatal(FARGS, "histogram with more than two sets of data!");
208 snew(h->bin[i], nbin[i]);
211 h->start_time=h->delta_time=0;
218 static void hist_destroy(hist_t *h)
224 static void xvg_init(xvg_t *ba)
233 static void samples_init(samples_t *s, double native_lambda,
234 double foreign_lambda, double temp,
235 gmx_bool derivative, const char *filename)
237 s->native_lambda=native_lambda;
238 s->foreign_lambda=foreign_lambda;
240 s->derivative=derivative;
245 s->start_time = s->delta_time = 0;
254 s->filename=filename;
257 static void sample_range_init(sample_range_t *r, samples_t *s)
265 static void sample_coll_init(sample_coll_t *sc, double native_lambda,
266 double foreign_lambda, double temp)
268 sc->native_lambda = native_lambda;
269 sc->foreign_lambda = foreign_lambda;
275 sc->nsamples_alloc=0;
278 sc->next=sc->prev=NULL;
281 static void sample_coll_destroy(sample_coll_t *sc)
283 /* don't free the samples themselves */
289 static void lambda_init(lambda_t *l, double native_lambda, double temp)
291 l->lambda=native_lambda;
299 sample_coll_init(l->sc, native_lambda, 0., 0.);
304 static void barres_init(barres_t *br)
322 static gmx_bool lambda_same(double lambda1, double lambda2)
324 return gmx_within_tol(lambda1, lambda2, 10*GMX_REAL_EPS);
327 /* calculate the total number of samples in a sample collection */
328 static void sample_coll_calc_ntot(sample_coll_t *sc)
333 for(i=0;i<sc->nsamples;i++)
339 sc->ntot += sc->s[i]->ntot;
343 sc->ntot += sc->r[i].end - sc->r[i].start;
350 /* find the barsamples_t associated with a lambda that corresponds to
351 a specific foreign lambda */
352 static sample_coll_t *lambda_find_sample_coll(lambda_t *l,
353 double foreign_lambda)
355 sample_coll_t *sc=l->sc->next;
359 if (lambda_same(sc->foreign_lambda,foreign_lambda))
369 /* insert li into an ordered list of lambda_colls */
370 static void lambda_insert_sample_coll(lambda_t *l, sample_coll_t *sc)
372 sample_coll_t *scn=l->sc->next;
373 while ( (scn!=l->sc) )
375 if (scn->foreign_lambda > sc->foreign_lambda)
379 /* now insert it before the found scn */
386 /* insert li into an ordered list of lambdas */
387 static void lambda_insert_lambda(lambda_t *head, lambda_t *li)
389 lambda_t *lc=head->next;
392 if (lc->lambda > li->lambda)
396 /* now insert ourselves before the found lc */
403 /* insert a sample and a sample_range into a sample_coll. The
404 samples are stored as a pointer, the range is copied. */
405 static void sample_coll_insert_sample(sample_coll_t *sc, samples_t *s,
408 /* first check if it belongs here */
409 if (sc->temp != s->temp)
411 gmx_fatal(FARGS, "Temperatures in files %s and %s are not the same!",
412 s->filename, sc->next->s[0]->filename);
414 if (sc->native_lambda != s->native_lambda)
416 gmx_fatal(FARGS, "Native lambda in files %s and %s are not the same (and they should be)!",
417 s->filename, sc->next->s[0]->filename);
419 if (sc->foreign_lambda != s->foreign_lambda)
421 gmx_fatal(FARGS, "Foreign lambda in files %s and %s are not the same (and they should be)!",
422 s->filename, sc->next->s[0]->filename);
425 /* check if there's room */
426 if ( (sc->nsamples + 1) > sc->nsamples_alloc)
428 sc->nsamples_alloc = max(2*sc->nsamples_alloc, 2);
429 srenew(sc->s, sc->nsamples_alloc);
430 srenew(sc->r, sc->nsamples_alloc);
432 sc->s[sc->nsamples]=s;
433 sc->r[sc->nsamples]=*r;
436 sample_coll_calc_ntot(sc);
439 /* insert a sample into a lambda_list, creating the right sample_coll if
441 static void lambda_list_insert_sample(lambda_t *head, samples_t *s)
443 gmx_bool found=FALSE;
447 lambda_t *l=head->next;
449 /* first search for the right lambda_t */
452 if (lambda_same(l->lambda, s->native_lambda) )
462 snew(l, 1); /* allocate a new one */
463 lambda_init(l, s->native_lambda, s->temp); /* initialize it */
464 lambda_insert_lambda(head, l); /* add it to the list */
467 /* now look for a sample collection */
468 sc=lambda_find_sample_coll(l, s->foreign_lambda);
471 snew(sc, 1); /* allocate a new one */
472 sample_coll_init(sc, s->native_lambda, s->foreign_lambda, s->temp);
473 lambda_insert_sample_coll(l, sc);
476 /* now insert the samples into the sample coll */
477 sample_range_init(&r, s);
478 sample_coll_insert_sample(sc, s, &r);
482 /* make a histogram out of a sample collection */
483 static void sample_coll_make_hist(sample_coll_t *sc, int **bin,
484 int *nbin_alloc, int *nbin,
485 double *dx, double *xmin, int nbin_default)
488 gmx_bool dx_set=FALSE;
489 gmx_bool xmin_set=FALSE;
491 gmx_bool xmax_set=FALSE;
492 gmx_bool xmax_set_hard=FALSE; /* whether the xmax is bounded by the
493 limits of a histogram */
496 /* first determine dx and xmin; try the histograms */
497 for(i=0;i<sc->nsamples;i++)
501 hist_t *hist=sc->s[i]->hist;
502 for(k=0;k<hist->nhist;k++)
504 double hdx=hist->dx[k];
505 double xmax_now=(hist->x0[k]+hist->nbin[k])*hdx;
507 /* we use the biggest dx*/
508 if ( (!dx_set) || hist->dx[0] > *dx)
513 if ( (!xmin_set) || (hist->x0[k]*hdx) < *xmin)
516 *xmin = (hist->x0[k]*hdx);
519 if ( (!xmax_set) || (xmax_now>xmax && !xmax_set_hard) )
523 if (hist->bin[k][hist->nbin[k]-1] != 0)
526 if ( hist->bin[k][hist->nbin[k]-1]!=0 && (xmax_now < xmax) )
534 /* and the delta us */
535 for(i=0;i<sc->nsamples;i++)
537 if (sc->s[i]->ndu > 0)
539 /* determine min and max */
540 int starti=sc->r[i].start;
541 int endi=sc->r[i].end;
542 double du_xmin=sc->s[i]->du[starti];
543 double du_xmax=sc->s[i]->du[starti];
544 for(j=starti+1;j<endi;j++)
546 if (sc->s[i]->du[j] < du_xmin)
547 du_xmin = sc->s[i]->du[j];
548 if (sc->s[i]->du[j] > du_xmax)
549 du_xmax = sc->s[i]->du[j];
552 /* and now change the limits */
553 if ( (!xmin_set) || (du_xmin < *xmin) )
558 if ( (!xmax_set) || ((du_xmax > xmax) && !xmax_set_hard) )
566 if (!xmax_set || !xmin_set)
576 *dx=(xmax-(*xmin))/((*nbin)-2); /* -2 because we want the last bin to
577 be 0, and we count from 0 */
581 *nbin=(xmax-(*xmin))/(*dx);
584 if (*nbin > *nbin_alloc)
587 srenew(*bin, *nbin_alloc);
590 /* reset the histogram */
591 for(i=0;i<(*nbin);i++)
596 /* now add the actual data */
597 for(i=0;i<sc->nsamples;i++)
601 hist_t *hist=sc->s[i]->hist;
602 for(k=0;k<hist->nhist;k++)
604 double hdx = hist->dx[k];
605 double xmin_hist=hist->x0[k]*hdx;
606 for(j=0;j<hist->nbin[k];j++)
608 /* calculate the bin corresponding to the middle of the
610 double x=hdx*(j+0.5) + xmin_hist;
611 int binnr=(int)((x-(*xmin))/(*dx));
613 if (binnr >= *nbin || binnr<0)
616 (*bin)[binnr] += hist->bin[k][j];
622 int starti=sc->r[i].start;
623 int endi=sc->r[i].end;
624 for(j=starti;j<endi;j++)
626 int binnr=(int)((sc->s[i]->du[j]-(*xmin))/(*dx));
627 if (binnr >= *nbin || binnr<0)
636 /* write a collection of histograms to a file */
637 void lambdas_histogram(lambda_t *bl_head, const char *filename,
638 int nbin_default, const output_env_t oenv)
640 char label_x[STRLEN];
641 const char *dhdl="dH/d\\lambda",*deltag="\\DeltaH",*lambda="\\lambda";
642 const char *title="N(\\DeltaH)";
643 const char *label_y="Samples";
647 char **setnames=NULL;
648 gmx_bool first_set=FALSE;
649 /* histogram data: */
657 printf("\nWriting histogram to %s\n", filename);
658 sprintf(label_x, "\\DeltaH (%s)", unit_energy);
660 fp=xvgropen_type(filename, title, label_x, label_y, exvggtXNY, oenv);
662 /* first get all the set names */
664 /* iterate over all lambdas */
667 sample_coll_t *sc=bl->sc->next;
669 /* iterate over all samples */
673 srenew(setnames, nsets);
674 snew(setnames[nsets-1], STRLEN);
675 if (!lambda_same(sc->foreign_lambda, sc->native_lambda))
677 sprintf(setnames[nsets-1], "N(%s(%s=%g) | %s=%g)",
678 deltag, lambda, sc->foreign_lambda, lambda,
683 sprintf(setnames[nsets-1], "N(%s | %s=%g)",
684 dhdl, lambda, sc->native_lambda);
691 xvgr_legend(fp, nsets, (const char**)setnames, oenv);
694 /* now make the histograms */
696 /* iterate over all lambdas */
699 sample_coll_t *sc=bl->sc->next;
701 /* iterate over all samples */
706 xvgr_new_dataset(fp, 0, 0, NULL, oenv);
709 sample_coll_make_hist(sc, &hist, &nbin_alloc, &nbin, &dx, &min,
714 double xmin=i*dx + min;
715 double xmax=(i+1)*dx + min;
717 fprintf(fp, "%g %d\n%g %d\n", xmin, hist[i], xmax, hist[i]);
733 /* create a collection (array) of barres_t object given a ordered linked list
734 of barlamda_t sample collections */
735 static barres_t *barres_list_create(lambda_t *bl_head, int *nres,
745 /* first count the lambdas */
752 snew(res, nlambda-1);
754 /* next put the right samples in the res */
756 bl=bl_head->next->next; /* we start with the second one. */
759 sample_coll_t *sc, *scprev;
760 barres_t *br=&(res[*nres]);
761 /* there is always a previous one. we search for that as a foreign
763 scprev=lambda_find_sample_coll(bl->prev, bl->lambda);
764 sc=lambda_find_sample_coll(bl, bl->prev->lambda);
772 scprev=lambda_find_sample_coll(bl->prev, bl->prev->lambda);
773 sc=lambda_find_sample_coll(bl, bl->lambda);
777 printf("\nWARNING: Using the derivative data (dH/dlambda) to extrapolate delta H values.\nThis will only work if the Hamiltonian is linear in lambda.\n");
782 gmx_fatal(FARGS,"Some dhdl files contain only one value (dH/dl), while others \ncontain multiple values (dH/dl and/or Delta H), will not proceed \nbecause of possible inconsistencies.\n");
785 else if (!scprev && !sc)
787 gmx_fatal(FARGS,"There is no path from lambda=%g -> %g that is covered by foreign lambdas:\ncannot proceed with BAR.\nUse thermodynamic integration of dH/dl by calculating the averages of dH/dl\nwith g_analyze and integrating them.\nAlternatively, use the -extp option if (and only if) the Hamiltonian\ndepends linearly on lambda, which is NOT normally the case.\n", bl->prev->lambda, bl->lambda);
793 gmx_fatal(FARGS,"Could not find a set for foreign lambda = %g\nin the files for lambda = %g",bl->lambda,bl->prev->lambda);
797 gmx_fatal(FARGS,"Could not find a set for foreign lambda = %g\nin the files for lambda = %g",bl->prev->lambda,bl->lambda);
809 /* estimate the maximum discretization error */
810 static double barres_list_max_disc_err(barres_t *res, int nres)
818 barres_t *br=&(res[i]);
820 delta_lambda=fabs(br->b->native_lambda-br->a->native_lambda);
822 for(j=0;j<br->a->nsamples;j++)
824 if (br->a->s[j]->hist)
827 if (br->a->s[j]->derivative)
830 disc_err=max(disc_err, Wfac*br->a->s[j]->hist->dx[0]);
833 for(j=0;j<br->b->nsamples;j++)
835 if (br->b->s[j]->hist)
838 if (br->b->s[j]->derivative)
840 disc_err=max(disc_err, Wfac*br->b->s[j]->hist->dx[0]);
848 /* impose start and end times on a sample collection, updating sample_ranges */
849 static void sample_coll_impose_times(sample_coll_t *sc, double begin_t,
853 for(i=0;i<sc->nsamples;i++)
855 samples_t *s=sc->s[i];
856 sample_range_t *r=&(sc->r[i]);
859 double end_time=s->hist->delta_time*s->hist->sum +
861 if (s->hist->start_time < begin_t || end_time > end_t)
871 if (s->start_time < begin_t)
873 r->start=(int)((begin_t - s->start_time)/s->delta_time);
875 end_time=s->delta_time*s->ndu + s->start_time;
876 if (end_time > end_t)
878 r->end=(int)((end_t - s->start_time)/s->delta_time);
884 for(j=0;j<s->ndu;j++)
886 if (s->t[j] <begin_t)
891 if (s->t[j] >= end_t)
898 if (r->start > r->end)
904 sample_coll_calc_ntot(sc);
907 static void lambdas_impose_times(lambda_t *head, double begin, double end)
909 double first_t, last_t;
910 double begin_t, end_t;
914 if (begin<=0 && end<0)
919 /* first determine the global start and end times */
925 sample_coll_t *sc=lc->sc->next;
928 for(j=0;j<sc->nsamples;j++)
930 double start_t,end_t;
932 start_t = sc->s[j]->start_time;
933 end_t = sc->s[j]->start_time;
936 end_t += sc->s[j]->delta_time*sc->s[j]->hist->sum;
942 end_t = sc->s[j]->t[sc->s[j]->ndu-1];
946 end_t += sc->s[j]->delta_time*sc->s[j]->ndu;
950 if (start_t < first_t || first_t<0)
964 /* calculate the actual times */
982 printf("\n Samples in time interval: %.3f - %.3f\n", first_t, last_t);
988 printf("Removing samples outside of: %.3f - %.3f\n", begin_t, end_t);
990 /* then impose them */
994 sample_coll_t *sc=lc->sc->next;
997 sample_coll_impose_times(sc, begin_t, end_t);
1005 /* create subsample i out of ni from an existing sample_coll */
1006 static gmx_bool sample_coll_create_subsample(sample_coll_t *sc,
1007 sample_coll_t *sc_orig,
1011 int hist_start, hist_end;
1013 gmx_large_int_t ntot_start;
1014 gmx_large_int_t ntot_end;
1015 gmx_large_int_t ntot_so_far;
1017 *sc = *sc_orig; /* just copy all fields */
1019 /* allocate proprietary memory */
1020 snew(sc->s, sc_orig->nsamples);
1021 snew(sc->r, sc_orig->nsamples);
1023 /* copy the samples */
1024 for(j=0;j<sc_orig->nsamples;j++)
1026 sc->s[j] = sc_orig->s[j];
1027 sc->r[j] = sc_orig->r[j]; /* copy the ranges too */
1030 /* now fix start and end fields */
1031 /* the casts avoid possible overflows */
1032 ntot_start=(gmx_large_int_t)(sc_orig->ntot*(double)i/(double)ni);
1033 ntot_end =(gmx_large_int_t)(sc_orig->ntot*(double)(i+1)/(double)ni);
1035 for(j=0;j<sc->nsamples;j++)
1037 gmx_large_int_t ntot_add;
1038 gmx_large_int_t new_start, new_end;
1044 ntot_add = sc->s[j]->hist->sum;
1048 ntot_add = sc->r[j].end - sc->r[j].start;
1056 if (!sc->s[j]->hist)
1058 if (ntot_so_far < ntot_start)
1060 /* adjust starting point */
1061 new_start = sc->r[j].start + (ntot_start - ntot_so_far);
1065 new_start = sc->r[j].start;
1067 /* adjust end point */
1068 new_end = sc->r[j].start + (ntot_end - ntot_so_far);
1069 if (new_end > sc->r[j].end)
1071 new_end=sc->r[j].end;
1074 /* check if we're in range at all */
1075 if ( (new_end < new_start) || (new_start > sc->r[j].end) )
1080 /* and write the new range */
1081 sc->r[j].start=(int)new_start;
1082 sc->r[j].end=(int)new_end;
1089 double ntot_start_norm, ntot_end_norm;
1090 /* calculate the amount of overlap of the
1091 desired range (ntot_start -- ntot_end) onto
1092 the histogram range (ntot_so_far -- ntot_so_far+ntot_add)*/
1094 /* first calculate normalized bounds
1095 (where 0 is the start of the hist range, and 1 the end) */
1096 ntot_start_norm = (ntot_start-ntot_so_far)/(double)ntot_add;
1097 ntot_end_norm = (ntot_end-ntot_so_far)/(double)ntot_add;
1099 /* now fix the boundaries */
1100 ntot_start_norm = min(1, max(0., ntot_start_norm));
1101 ntot_end_norm = max(0, min(1., ntot_end_norm));
1103 /* and calculate the overlap */
1104 overlap = ntot_end_norm - ntot_start_norm;
1106 if (overlap > 0.95) /* we allow for 5% slack */
1108 sc->r[j].use = TRUE;
1110 else if (overlap < 0.05)
1112 sc->r[j].use = FALSE;
1120 ntot_so_far += ntot_add;
1122 sample_coll_calc_ntot(sc);
1127 /* calculate minimum and maximum work values in sample collection */
1128 static void sample_coll_min_max(sample_coll_t *sc, double Wfac,
1129 double *Wmin, double *Wmax)
1136 for(i=0;i<sc->nsamples;i++)
1138 samples_t *s=sc->s[i];
1139 sample_range_t *r=&(sc->r[i]);
1144 for(j=r->start; j<r->end; j++)
1146 *Wmin = min(*Wmin,s->du[j]*Wfac);
1147 *Wmax = max(*Wmax,s->du[j]*Wfac);
1152 int hd=0; /* determine the histogram direction: */
1154 if ( (s->hist->nhist>1) && (Wfac<0) )
1160 for(j=s->hist->nbin[hd]-1;j>=0;j--)
1162 *Wmin=min(*Wmin,Wfac*(s->hist->x0[hd])*dx);
1163 *Wmax=max(*Wmax,Wfac*(s->hist->x0[hd])*dx);
1164 /* look for the highest value bin with values */
1165 if (s->hist->bin[hd][j]>0)
1167 *Wmin=min(*Wmin,Wfac*(j+s->hist->x0[hd]+1)*dx);
1168 *Wmax=max(*Wmax,Wfac*(j+s->hist->x0[hd]+1)*dx);
1178 static double calc_bar_sum(int n,const double *W,double Wfac,double sbMmDG)
1187 sum += 1./(1. + exp(Wfac*W[i] + sbMmDG));
1193 /* calculate the BAR average given a histogram
1195 if type== 0, calculate the best estimate for the average,
1196 if type==-1, calculate the minimum possible value given the histogram
1197 if type== 1, calculate the maximum possible value given the histogram */
1198 static double calc_bar_sum_hist(const hist_t *hist, double Wfac, double sbMmDG,
1204 /* normalization factor multiplied with bin width and
1205 number of samples (we normalize through M): */
1207 int hd=0; /* determine the histogram direction: */
1210 if ( (hist->nhist>1) && (Wfac<0) )
1215 max=hist->nbin[hd]-1;
1218 max=hist->nbin[hd]; /* we also add whatever was out of range */
1223 double x=Wfac*((i+hist->x0[hd])+0.5)*dx; /* bin middle */
1224 double pxdx=hist->bin[0][i]*normdx; /* p(x)dx */
1226 sum += pxdx/(1. + exp(x + sbMmDG));
1232 static double calc_bar_lowlevel(sample_coll_t *ca, sample_coll_t *cb,
1233 double temp, double tol, int type)
1238 double Wfac1,Wfac2,Wmin,Wmax;
1239 double DG0,DG1,DG2,dDG1;
1241 double n1, n2; /* numbers of samples as doubles */
1246 /* count the numbers of samples */
1252 if (!lambda_same(ca->native_lambda, ca->foreign_lambda))
1254 /* this is the case when the delta U were calculated directly
1255 (i.e. we're not scaling dhdl) */
1261 /* we're using dhdl, so delta_lambda needs to be a
1262 multiplication factor. */
1263 double delta_lambda=cb->native_lambda-ca->native_lambda;
1264 Wfac1 = beta*delta_lambda;
1265 Wfac2 = -beta*delta_lambda;
1270 /* We print the output both in kT and kJ/mol.
1271 * Here we determine DG in kT, so when beta < 1
1272 * the precision has to be increased.
1277 /* Calculate minimum and maximum work to give an initial estimate of
1278 * delta G as their average.
1281 double Wmin1, Wmin2, Wmax1, Wmax2;
1282 sample_coll_min_max(ca, Wfac1, &Wmin1, &Wmax1);
1283 sample_coll_min_max(cb, Wfac2, &Wmin2, &Wmax2);
1285 Wmin=min(Wmin1, Wmin2);
1286 Wmax=max(Wmax1, Wmax2);
1294 fprintf(debug,"DG %9.5f %9.5f\n",DG0,DG2);
1296 /* We approximate by bisection: given our initial estimates
1297 we keep checking whether the halfway point is greater or
1298 smaller than what we get out of the BAR averages.
1300 For the comparison we can use twice the tolerance. */
1301 while (DG2 - DG0 > 2*tol)
1303 DG1 = 0.5*(DG0 + DG2);
1305 /*printf("Wfac1=%g, Wfac2=%g, beta=%g, DG1=%g\n",Wfac1,Wfac2,beta,
1308 /* calculate the BAR averages */
1311 for(i=0;i<ca->nsamples;i++)
1313 samples_t *s=ca->s[i];
1314 sample_range_t *r=&(ca->r[i]);
1319 dDG1 += calc_bar_sum_hist(s->hist, Wfac1, (M-DG1), type);
1323 dDG1 += calc_bar_sum(r->end - r->start, s->du + r->start,
1328 for(i=0;i<cb->nsamples;i++)
1330 samples_t *s=cb->s[i];
1331 sample_range_t *r=&(cb->r[i]);
1336 dDG1 -= calc_bar_sum_hist(s->hist, Wfac2, -(M-DG1), type);
1340 dDG1 -= calc_bar_sum(r->end - r->start, s->du + r->start,
1356 fprintf(debug,"DG %9.5f %9.5f\n",DG0,DG2);
1360 return 0.5*(DG0 + DG2);
1363 static void calc_rel_entropy(sample_coll_t *ca, sample_coll_t *cb,
1364 double temp, double dg, double *sa, double *sb)
1370 double Wfac1, Wfac2;
1376 /* count the numbers of samples */
1380 /* to ensure the work values are the same as during the delta_G */
1381 if (!lambda_same(ca->native_lambda, ca->foreign_lambda))
1383 /* this is the case when the delta U were calculated directly
1384 (i.e. we're not scaling dhdl) */
1390 /* we're using dhdl, so delta_lambda needs to be a
1391 multiplication factor. */
1392 double delta_lambda=cb->native_lambda-ca->native_lambda;
1393 Wfac1 = beta*delta_lambda;
1394 Wfac2 = -beta*delta_lambda;
1397 /* first calculate the average work in both directions */
1398 for(i=0;i<ca->nsamples;i++)
1400 samples_t *s=ca->s[i];
1401 sample_range_t *r=&(ca->r[i]);
1406 for(j=r->start;j<r->end;j++)
1407 W_ab += Wfac1*s->du[j];
1411 /* normalization factor multiplied with bin width and
1412 number of samples (we normalize through M): */
1415 int hd=0; /* histogram direction */
1416 if ( (s->hist->nhist>1) && (Wfac1<0) )
1422 for(j=0;j<s->hist->nbin[0];j++)
1424 double x=Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1425 double pxdx=s->hist->bin[0][j]*normdx; /* p(x)dx */
1433 for(i=0;i<cb->nsamples;i++)
1435 samples_t *s=cb->s[i];
1436 sample_range_t *r=&(cb->r[i]);
1441 for(j=r->start;j<r->end;j++)
1442 W_ba += Wfac1*s->du[j];
1446 /* normalization factor multiplied with bin width and
1447 number of samples (we normalize through M): */
1450 int hd=0; /* histogram direction */
1451 if ( (s->hist->nhist>1) && (Wfac2<0) )
1457 for(j=0;j<s->hist->nbin[0];j++)
1459 double x=Wfac1*((j+s->hist->x0[0])+0.5)*dx;/*bin ctr*/
1460 double pxdx=s->hist->bin[0][j]*normdx; /* p(x)dx */
1468 /* then calculate the relative entropies */
1473 static void calc_dg_stddev(sample_coll_t *ca, sample_coll_t *cb,
1474 double temp, double dg, double *stddev)
1478 double sigmafact=0.;
1480 double Wfac1, Wfac2;
1486 /* count the numbers of samples */
1490 /* to ensure the work values are the same as during the delta_G */
1491 if (!lambda_same(ca->native_lambda, ca->foreign_lambda))
1493 /* this is the case when the delta U were calculated directly
1494 (i.e. we're not scaling dhdl) */
1500 /* we're using dhdl, so delta_lambda needs to be a
1501 multiplication factor. */
1502 double delta_lambda=cb->native_lambda-ca->native_lambda;
1503 Wfac1 = beta*delta_lambda;
1504 Wfac2 = -beta*delta_lambda;
1510 /* calculate average in both directions */
1511 for(i=0;i<ca->nsamples;i++)
1513 samples_t *s=ca->s[i];
1514 sample_range_t *r=&(ca->r[i]);
1519 for(j=r->start;j<r->end;j++)
1521 sigmafact += 1./(2. + 2.*cosh((M + Wfac1*s->du[j] - dg)));
1526 /* normalization factor multiplied with bin width and
1527 number of samples (we normalize through M): */
1530 int hd=0; /* histogram direction */
1531 if ( (s->hist->nhist>1) && (Wfac1<0) )
1537 for(j=0;j<s->hist->nbin[0];j++)
1539 double x=Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1540 double pxdx=s->hist->bin[0][j]*normdx; /* p(x)dx */
1542 sigmafact += pxdx/(2. + 2.*cosh((M + x - dg)));
1547 for(i=0;i<cb->nsamples;i++)
1549 samples_t *s=cb->s[i];
1550 sample_range_t *r=&(cb->r[i]);
1555 for(j=r->start;j<r->end;j++)
1557 sigmafact += 1./(2. + 2.*cosh((M - Wfac2*s->du[j] - dg)));
1562 /* normalization factor multiplied with bin width and
1563 number of samples (we normalize through M): */
1566 int hd=0; /* histogram direction */
1567 if ( (s->hist->nhist>1) && (Wfac2<0) )
1573 for(j=0;j<s->hist->nbin[0];j++)
1575 double x=Wfac2*((j+s->hist->x0[0])+0.5)*dx;/*bin ctr*/
1576 double pxdx=s->hist->bin[0][j]*normdx; /* p(x)dx */
1578 sigmafact += pxdx/(2. + 2.*cosh((M - x - dg)));
1584 sigmafact /= (n1 + n2);
1588 Shirts, Bair, Hooker & Pande, Phys. Rev. Lett 91, 140601 (2003): */
1589 *stddev = sqrt(((1./sigmafact) - ( (n1+n2)/n1 + (n1+n2)/n2 )));
1594 static void calc_bar(barres_t *br, double tol,
1595 int npee_min, int npee_max, gmx_bool *bEE,
1599 double dg_sig2,sa_sig2,sb_sig2,stddev_sig2; /* intermediate variance values
1600 for calculated quantities */
1601 int nsample1, nsample2;
1602 double temp=br->a->temp;
1604 double dg_min, dg_max;
1605 gmx_bool have_hist=FALSE;
1607 br->dg=calc_bar_lowlevel(br->a, br->b, temp, tol, 0);
1609 br->dg_disc_err = 0.;
1610 br->dg_histrange_err = 0.;
1612 /* check if there are histograms */
1613 for(i=0;i<br->a->nsamples;i++)
1615 if (br->a->r[i].use && br->a->s[i]->hist)
1623 for(i=0;i<br->b->nsamples;i++)
1625 if (br->b->r[i].use && br->b->s[i]->hist)
1633 /* calculate histogram-specific errors */
1636 dg_min=calc_bar_lowlevel(br->a, br->b, temp, tol, -1);
1637 dg_max=calc_bar_lowlevel(br->a, br->b, temp, tol, 1);
1639 if (fabs(dg_max - dg_min) > GMX_REAL_EPS*10)
1641 /* the histogram range error is the biggest of the differences
1642 between the best estimate and the extremes */
1643 br->dg_histrange_err = fabs(dg_max - dg_min);
1645 br->dg_disc_err = 0.;
1646 for(i=0;i<br->a->nsamples;i++)
1648 if (br->a->s[i]->hist)
1649 br->dg_disc_err=max(br->dg_disc_err, br->a->s[i]->hist->dx[0]);
1651 for(i=0;i<br->b->nsamples;i++)
1653 if (br->b->s[i]->hist)
1654 br->dg_disc_err=max(br->dg_disc_err, br->b->s[i]->hist->dx[0]);
1657 calc_rel_entropy(br->a, br->b, temp, br->dg, &(br->sa), &(br->sb));
1659 calc_dg_stddev(br->a, br->b, temp, br->dg, &(br->dg_stddev) );
1668 sample_coll_t ca, cb;
1670 /* initialize the samples */
1671 sample_coll_init(&ca, br->a->native_lambda, br->a->foreign_lambda,
1673 sample_coll_init(&cb, br->b->native_lambda, br->b->foreign_lambda,
1676 for(npee=npee_min; npee<=npee_max; npee++)
1685 double dstddev2 = 0;
1688 for(p=0; p<npee; p++)
1695 cac=sample_coll_create_subsample(&ca, br->a, p, npee);
1696 cbc=sample_coll_create_subsample(&cb, br->b, p, npee);
1700 printf("WARNING: histogram number incompatible with block number for averaging: can't do error estimate\n");
1703 sample_coll_destroy(&ca);
1705 sample_coll_destroy(&cb);
1709 dgp=calc_bar_lowlevel(&ca, &cb, temp, tol, 0);
1713 partsum[npee*(npee_max+1)+p] += dgp;
1715 calc_rel_entropy(&ca, &cb, temp, dgp, &sac, &sbc);
1720 calc_dg_stddev(&ca, &cb, temp, dgp, &stddevc );
1723 dstddev2 += stddevc*stddevc;
1725 sample_coll_destroy(&ca);
1726 sample_coll_destroy(&cb);
1730 dg_sig2 += (dgs2-dgs*dgs)/(npee-1);
1736 sa_sig2 += (dsa2-dsa*dsa)/(npee-1);
1737 sb_sig2 += (dsb2-dsb*dsb)/(npee-1);
1741 stddev_sig2 += (dstddev2-dstddev*dstddev)/(npee-1);
1743 br->dg_err = sqrt(dg_sig2/(npee_max - npee_min + 1));
1744 br->sa_err = sqrt(sa_sig2/(npee_max - npee_min + 1));
1745 br->sb_err = sqrt(sb_sig2/(npee_max - npee_min + 1));
1746 br->dg_stddev_err = sqrt(stddev_sig2/(npee_max - npee_min + 1));
1751 static double bar_err(int nbmin, int nbmax, const double *partsum)
1754 double svar,s,s2,dg;
1757 for(nb=nbmin; nb<=nbmax; nb++)
1763 dg = partsum[nb*(nbmax+1)+b];
1769 svar += (s2 - s*s)/(nb - 1);
1772 return sqrt(svar/(nbmax + 1 - nbmin));
1775 /* deduce lambda value from legend.
1777 bdhdl = if true, value may be a derivative.
1779 bdhdl = whether the legend was for a derivative.
1781 static double legend2lambda(char *fn,const char *legend,gmx_bool *bdhdl)
1789 gmx_fatal(FARGS,"There is no legend in file '%s', can not deduce lambda",fn);
1791 ptr = strrchr(legend,' ');
1793 if (strstr(legend,"dH"))
1806 if (strchr(legend,'D') != NULL && strchr(legend,'H') != NULL)
1819 printf("%s\n", legend);
1820 gmx_fatal(FARGS,"There is no proper lambda legend in file '%s', can not deduce lambda",fn);
1822 if (sscanf(ptr,"%lf",&lambda) != 1)
1824 gmx_fatal(FARGS,"There is no proper lambda legend in file '%s', can not deduce lambda",fn);
1830 static gmx_bool subtitle2lambda(const char *subtitle,double *lambda)
1837 /* plain text lambda string */
1838 ptr = strstr(subtitle,"lambda");
1841 /* xmgrace formatted lambda string */
1842 ptr = strstr(subtitle,"\\xl\\f{}");
1846 /* xmgr formatted lambda string */
1847 ptr = strstr(subtitle,"\\8l\\4");
1851 ptr = strstr(ptr,"=");
1855 bFound = (sscanf(ptr+1,"%lf",lambda) == 1);
1861 static double filename2lambda(char *fn)
1864 char *ptr,*endptr,*digitptr;
1867 /* go to the end of the path string and search backward to find the last
1868 directory in the path which has to contain the value of lambda
1870 while (ptr[1] != '\0')
1874 /* searching backward to find the second directory separator */
1879 if (ptr[0] != DIR_SEPARATOR && ptr[1] == DIR_SEPARATOR)
1881 if (dirsep == 1) break;
1884 /* save the last position of a digit between the last two
1885 separators = in the last dirname */
1886 if (dirsep > 0 && isdigit(*ptr))
1894 gmx_fatal(FARGS,"While trying to read the lambda value from the file path:"
1895 " last directory in the path '%s' does not contain a number",fn);
1897 if (digitptr[-1] == '-')
1901 lambda = strtod(digitptr,&endptr);
1902 if (endptr == digitptr)
1904 gmx_fatal(FARGS,"Malformed number in file path '%s'",fn);
1910 static void read_bar_xvg_lowlevel(char *fn, real *temp, xvg_t *ba)
1913 char *subtitle,**legend,*ptr;
1915 gmx_bool native_lambda_read=FALSE;
1921 np = read_xvg_legend(fn,&ba->y,&ba->nset,&subtitle,&legend);
1924 gmx_fatal(FARGS,"File %s contains no usable data.",fn);
1928 snew(ba->np,ba->nset);
1929 for(i=0;i<ba->nset;i++)
1934 if (subtitle != NULL)
1936 ptr = strstr(subtitle,"T =");
1940 if (sscanf(ptr,"%lf",&ba->temp) == 1)
1944 gmx_fatal(FARGS,"Found temperature of %g in file '%s'",
1954 gmx_fatal(FARGS,"Did not find a temperature in the subtitle in file '%s', use the -temp option of [TT]g_bar[tt]",fn);
1959 /* Try to deduce lambda from the subtitle */
1962 if (subtitle2lambda(subtitle,&(ba->native_lambda)))
1964 native_lambda_read=TRUE;
1967 snew(ba->lambda,ba->nset-1);
1970 /* Check if we have a single set, no legend, nset=2 means t and dH/dl */
1973 if (!native_lambda_read)
1975 /* Deduce lambda from the file name */
1976 ba->native_lambda = filename2lambda(fn);
1977 native_lambda_read=TRUE;
1979 ba->lambda[0] = ba->native_lambda;
1983 gmx_fatal(FARGS,"File %s contains multiple sets but no legends, can not determine the lambda values",fn);
1988 for(i=0; i<ba->nset-1; i++)
1990 gmx_bool is_dhdl=(i==0);
1991 /* Read lambda from the legend */
1992 ba->lambda[i] = legend2lambda(fn,legend[i], &is_dhdl);
1994 if (is_dhdl && !native_lambda_read)
1996 ba->native_lambda = ba->lambda[i];
1997 native_lambda_read=TRUE;
2002 if (!native_lambda_read)
2004 gmx_fatal(FARGS,"File %s contains multiple sets but no indication of the native lambda",fn);
2007 /* Reorder the data */
2008 for(i=1; i<ba->nset; i++)
2010 ba->y[i-1] = ba->y[i];
2014 for(i=0; i<ba->nset-1; i++)
2023 static void read_bar_xvg(char *fn, real *temp, lambda_t *lambda_head)
2032 read_bar_xvg_lowlevel(fn, temp, barsim);
2034 if (barsim->nset <1 )
2036 gmx_fatal(FARGS,"File '%s' contains fewer than two columns", fn);
2039 if ( !gmx_within_tol(*temp,barsim->temp,GMX_FLOAT_EPS) && (*temp > 0) )
2041 gmx_fatal(FARGS,"Temperature in file %s different from earlier files or setting\n", fn);
2045 /* now create a series of samples_t */
2046 snew(s, barsim->nset);
2047 for(i=0;i<barsim->nset;i++)
2049 samples_init(s+i, barsim->native_lambda, barsim->lambda[i],
2050 barsim->temp, lambda_same(barsim->native_lambda,
2053 s[i].du=barsim->y[i];
2054 s[i].ndu=barsim->np[i];
2057 lambda_list_insert_sample(lambda_head, s+i);
2059 printf("%s: %.1f - %.1f; lambda = %.3f\n foreign lambdas:",
2060 fn, s[0].t[0], s[0].t[s[0].ndu-1], s[0].native_lambda);
2061 for(i=0;i<barsim->nset;i++)
2063 printf(" %.3f (%d pts)", s[i].foreign_lambda, s[i].ndu);
2068 static void read_edr_rawdh_block(samples_t **smp, int *ndu, t_enxblock *blk,
2069 double start_time, double delta_time,
2070 double native_lambda, double temp,
2071 double *last_t, const char *filename)
2075 double foreign_lambda;
2077 samples_t *s; /* convenience pointer */
2080 /* check the block types etc. */
2081 if ( (blk->nsub < 3) ||
2082 (blk->sub[0].type != xdr_datatype_int) ||
2083 (blk->sub[1].type != xdr_datatype_double) ||
2085 (blk->sub[2].type != xdr_datatype_float) &&
2086 (blk->sub[2].type != xdr_datatype_double)
2088 (blk->sub[0].nr < 1) ||
2089 (blk->sub[1].nr < 1) )
2092 "Unexpected/corrupted block data in file %s around time %g.",
2093 filename, start_time);
2096 derivative = blk->sub[0].ival[0];
2097 foreign_lambda = blk->sub[1].dval[0];
2101 /* initialize the samples structure if it's empty. */
2103 samples_init(*smp, native_lambda, foreign_lambda, temp,
2104 derivative!=0, filename);
2105 (*smp)->start_time=start_time;
2106 (*smp)->delta_time=delta_time;
2109 /* set convenience pointer */
2112 /* now double check */
2113 if ( ! lambda_same(s->foreign_lambda, foreign_lambda) ||
2114 ( (derivative!=0) != (s->derivative!=0) ) )
2116 fprintf(stderr, "Got foreign lambda=%g, expected: %g\n",
2117 foreign_lambda, s->foreign_lambda);
2118 fprintf(stderr, "Got derivative=%d, expected: %d\n",
2119 derivative, s->derivative);
2120 gmx_fatal(FARGS, "Corrupted data in file %s around t=%g.",
2121 filename, start_time);
2124 /* make room for the data */
2125 if (s->ndu_alloc < (size_t)(s->ndu + blk->sub[2].nr) )
2127 s->ndu_alloc += (s->ndu_alloc < (size_t)blk->sub[2].nr) ?
2128 blk->sub[2].nr*2 : s->ndu_alloc;
2129 srenew(s->du_alloc, s->ndu_alloc);
2133 s->ndu += blk->sub[2].nr;
2134 s->ntot += blk->sub[2].nr;
2135 *ndu = blk->sub[2].nr;
2137 /* and copy the data*/
2138 for(j=0;j<blk->sub[2].nr;j++)
2140 if (blk->sub[2].type == xdr_datatype_float)
2142 s->du[startj+j] = blk->sub[2].fval[j];
2146 s->du[startj+j] = blk->sub[2].dval[j];
2149 if (start_time + blk->sub[2].nr*delta_time > *last_t)
2151 *last_t = start_time + blk->sub[2].nr*delta_time;
2155 static samples_t *read_edr_hist_block(int *nsamples, t_enxblock *blk,
2156 double start_time, double delta_time,
2157 double native_lambda, double temp,
2158 double *last_t, const char *filename)
2163 double foreign_lambda;
2167 /* check the block types etc. */
2168 if ( (blk->nsub < 2) ||
2169 (blk->sub[0].type != xdr_datatype_double) ||
2170 (blk->sub[1].type != xdr_datatype_large_int) ||
2171 (blk->sub[0].nr < 2) ||
2172 (blk->sub[1].nr < 2) )
2175 "Unexpected/corrupted block data in file %s around time %g",
2176 filename, start_time);
2187 "Unexpected/corrupted block data in file %s around time %g",
2188 filename, start_time);
2194 foreign_lambda=blk->sub[0].dval[0];
2195 derivative=(int)(blk->sub[1].lval[1]);
2197 foreign_lambda=native_lambda;
2199 samples_init(s, native_lambda, foreign_lambda, temp,
2200 derivative!=0, filename);
2203 for(i=0;i<nhist;i++)
2205 nbins[i] = blk->sub[i+2].nr;
2208 hist_init(s->hist, nhist, nbins);
2210 for(i=0;i<nhist;i++)
2212 s->hist->x0[i]=blk->sub[1].lval[2+i];
2213 s->hist->dx[i] = blk->sub[0].dval[1];
2215 s->hist->dx[i] = - s->hist->dx[i];
2218 s->hist->start_time = start_time;
2219 s->hist->delta_time = delta_time;
2220 s->start_time = start_time;
2221 s->delta_time = delta_time;
2223 for(i=0;i<nhist;i++)
2226 gmx_large_int_t sum=0;
2228 for(j=0;j<s->hist->nbin[i];j++)
2230 int binv=(int)(blk->sub[i+2].ival[j]);
2232 s->hist->bin[i][j] = binv;
2245 gmx_fatal(FARGS, "Histogram counts don't match in %s",
2251 if (start_time + s->hist->sum*delta_time > *last_t)
2253 *last_t = start_time + s->hist->sum*delta_time;
2259 static void read_barsim_edr(char *fn, real *temp, lambda_t *lambda_head)
2265 gmx_enxnm_t *enm=NULL;
2268 samples_t **samples_rawdh=NULL; /* contains samples for raw delta_h */
2269 int *nhists=NULL; /* array to keep count & print at end */
2270 int *npts=NULL; /* array to keep count & print at end */
2271 double *lambdas=NULL; /* array to keep count & print at end */
2272 double native_lambda=-1;
2273 double end_time; /* the end time of the last batch of samples */
2276 fp = open_enx(fn,"r");
2277 do_enxnms(fp,&nre,&enm);
2280 while(do_enx(fp, fr))
2282 /* count the data blocks */
2287 /* DHCOLL block information: */
2288 double start_time=0, delta_time=0, start_lambda=0, delta_lambda=0;
2291 /* count the blocks and handle collection information: */
2292 for(i=0;i<fr->nblock;i++)
2294 if (fr->block[i].id == enxDHHIST )
2296 if ( fr->block[i].id == enxDH )
2298 if (fr->block[i].id == enxDHCOLL )
2301 if ( (fr->block[i].nsub < 1) ||
2302 (fr->block[i].sub[0].type != xdr_datatype_double) ||
2303 (fr->block[i].sub[0].nr < 5))
2305 gmx_fatal(FARGS, "Unexpected block data in file %s", fn);
2308 /* read the data from the DHCOLL block */
2309 rtemp = fr->block[i].sub[0].dval[0];
2310 start_time = fr->block[i].sub[0].dval[1];
2311 delta_time = fr->block[i].sub[0].dval[2];
2312 start_lambda = fr->block[i].sub[0].dval[3];
2313 delta_lambda = fr->block[i].sub[0].dval[4];
2317 gmx_fatal(FARGS, "Lambda values not constant in %s: can't apply BAR method", fn);
2319 if ( ( *temp != rtemp) && (*temp > 0) )
2321 gmx_fatal(FARGS,"Temperature in file %s different from earlier files or setting\n", fn);
2332 gmx_fatal(FARGS, "Did not find a delta h information in file %s" , fn);
2334 if (nblocks_raw > 0 && nblocks_hist > 0 )
2336 gmx_fatal(FARGS, "Can't handle both raw delta U data and histograms in the same file %s", fn);
2341 /* check the native lambda */
2342 if (!lambda_same(start_lambda, native_lambda) )
2344 gmx_fatal(FARGS, "Native lambda not constant in file %s: started at %g, and becomes %g at time %g",
2345 fn, native_lambda, start_lambda, start_time);
2347 /* check the number of samples against the previous number */
2348 if ( ((nblocks_raw+nblocks_hist)!=nsamples) || (nlam!=1 ) )
2350 gmx_fatal(FARGS, "Unexpected block count in %s: was %d, now %d\n",
2351 fn, nsamples+1, nblocks_raw+nblocks_hist+nlam);
2353 /* check whether last iterations's end time matches with
2354 the currrent start time */
2355 if ( (fabs(last_t - start_time) > 2*delta_time) && last_t>=0)
2357 /* it didn't. We need to store our samples and reallocate */
2358 for(i=0;i<nsamples;i++)
2360 if (samples_rawdh[i])
2362 /* insert it into the existing list */
2363 lambda_list_insert_sample(lambda_head,
2365 /* and make sure we'll allocate a new one this time
2367 samples_rawdh[i]=NULL;
2374 /* this is the first round; allocate the associated data
2376 native_lambda=start_lambda;
2377 nsamples=nblocks_raw+nblocks_hist;
2378 snew(nhists, nsamples);
2379 snew(npts, nsamples);
2380 snew(lambdas, nsamples);
2381 snew(samples_rawdh, nsamples);
2382 for(i=0;i<nsamples;i++)
2387 samples_rawdh[i]=NULL; /* init to NULL so we know which
2388 ones contain values */
2393 k=0; /* counter for the lambdas, etc. arrays */
2394 for(i=0;i<fr->nblock;i++)
2396 if (fr->block[i].id == enxDH)
2399 read_edr_rawdh_block(&(samples_rawdh[k]),
2402 start_time, delta_time,
2403 start_lambda, rtemp,
2406 if (samples_rawdh[k])
2408 lambdas[k]=samples_rawdh[k]->foreign_lambda;
2412 else if (fr->block[i].id == enxDHHIST)
2416 samples_t *s; /* this is where the data will go */
2417 s=read_edr_hist_block(&nb, &(fr->block[i]),
2418 start_time, delta_time,
2419 start_lambda, rtemp,
2424 lambdas[k]= s->foreign_lambda;
2427 /* and insert the new sample immediately */
2430 lambda_list_insert_sample(lambda_head, s+j);
2435 /* Now store all our extant sample collections */
2436 for(i=0;i<nsamples;i++)
2438 if (samples_rawdh[i])
2440 /* insert it into the existing list */
2441 lambda_list_insert_sample(lambda_head, samples_rawdh[i]);
2446 fprintf(stderr, "\n");
2447 printf("%s: %.1f - %.1f; lambda = %.3f\n foreign lambdas:",
2448 fn, first_t, last_t, native_lambda);
2449 for(i=0;i<nsamples;i++)
2453 printf(" %.3f (%d hists)", lambdas[i], nhists[i]);
2457 printf(" %.3f (%d pts)", lambdas[i], npts[i]);
2467 int gmx_bar(int argc,char *argv[])
2469 static const char *desc[] = {
2470 "[TT]g_bar[tt] calculates free energy difference estimates through ",
2471 "Bennett's acceptance ratio method (BAR). It also automatically",
2472 "adds series of individual free energies obtained with BAR into",
2473 "a combined free energy estimate.[PAR]",
2475 "Every individual BAR free energy difference relies on two ",
2476 "simulations at different states: say state A and state B, as",
2477 "controlled by a parameter, [GRK]lambda[grk] (see the [TT].mdp[tt] parameter",
2478 "[TT]init_lambda[tt]). The BAR method calculates a ratio of weighted",
2479 "average of the Hamiltonian difference of state B given state A and",
2481 "The energy differences to the other state must be calculated",
2482 "explicitly during the simulation. This can be done with",
2483 "the [TT].mdp[tt] option [TT]foreign_lambda[tt].[PAR]",
2485 "Input option [TT]-f[tt] expects multiple [TT]dhdl.xvg[tt] files. ",
2486 "Two types of input files are supported:[BR]",
2487 "[TT]*[tt] Files with more than one [IT]y[it]-value. ",
2488 "The files should have columns ",
2489 "with dH/d[GRK]lambda[grk] and [GRK]Delta[grk][GRK]lambda[grk]. ",
2490 "The [GRK]lambda[grk] values are inferred ",
2491 "from the legends: [GRK]lambda[grk] of the simulation from the legend of ",
2492 "dH/d[GRK]lambda[grk] and the foreign [GRK]lambda[grk] values from the ",
2493 "legends of Delta H",
2495 "[TT]*[tt] Files with only one [IT]y[it]-value. Using the",
2496 "[TT]-extp[tt] option for these files, it is assumed",
2497 "that the [IT]y[it]-value is dH/d[GRK]lambda[grk] and that the ",
2498 "Hamiltonian depends linearly on [GRK]lambda[grk]. ",
2499 "The [GRK]lambda[grk] value of the simulation is inferred from the ",
2500 "subtitle (if present), otherwise from a number in the subdirectory ",
2501 "in the file name.[PAR]",
2503 "The [GRK]lambda[grk] of the simulation is parsed from ",
2504 "[TT]dhdl.xvg[tt] file's legend containing the string 'dH', the ",
2505 "foreign [GRK]lambda[grk] values from the legend containing the ",
2506 "capitalized letters 'D' and 'H'. The temperature is parsed from ",
2507 "the legend line containing 'T ='.[PAR]",
2509 "The input option [TT]-g[tt] expects multiple [TT].edr[tt] files. ",
2510 "These can contain either lists of energy differences (see the ",
2511 "[TT].mdp[tt] option [TT]separate_dhdl_file[tt]), or a series of ",
2512 "histograms (see the [TT].mdp[tt] options [TT]dh_hist_size[tt] and ",
2513 "[TT]dh_hist_spacing[tt]).", "The temperature and [GRK]lambda[grk] ",
2514 "values are automatically deduced from the [TT]ener.edr[tt] file.[PAR]",
2516 "In addition to the [TT].mdp[tt] option [TT]foreign_lambda[tt], ",
2517 "the energy difference can also be extrapolated from the ",
2518 "dH/d[GRK]lambda[grk] values. This is done with the[TT]-extp[tt]",
2519 "option, which assumes that the system's Hamiltonian depends linearly",
2520 "on [GRK]lambda[grk], which is not normally the case.[PAR]",
2522 "The free energy estimates are determined using BAR with bisection, ",
2523 "with the precision of the output set with [TT]-prec[tt]. ",
2524 "An error estimate taking into account time correlations ",
2525 "is made by splitting the data into blocks and determining ",
2526 "the free energy differences over those blocks and assuming ",
2527 "the blocks are independent. ",
2528 "The final error estimate is determined from the average variance ",
2529 "over 5 blocks. A range of block numbers for error estimation can ",
2530 "be provided with the options [TT]-nbmin[tt] and [TT]-nbmax[tt].[PAR]",
2532 "[TT]g_bar[tt] tries to aggregate samples with the same 'native' and ",
2533 "'foreign' [GRK]lambda[grk] values, but always assumes independent ",
2534 "samples. [BB]Note[bb] that when aggregating energy ",
2535 "differences/derivatives with different sampling intervals, this is ",
2536 "almost certainly not correct. Usually subsequent energies are ",
2537 "correlated and different time intervals mean different degrees ",
2538 "of correlation between samples.[PAR]",
2540 "The results are split in two parts: the last part contains the final ",
2541 "results in kJ/mol, together with the error estimate for each part ",
2542 "and the total. The first part contains detailed free energy ",
2543 "difference estimates and phase space overlap measures in units of ",
2544 "kT (together with their computed error estimate). The printed ",
2546 "[TT]*[tt] lam_A: the [GRK]lambda[grk] values for point A.[BR]",
2547 "[TT]*[tt] lam_B: the [GRK]lambda[grk] values for point B.[BR]",
2548 "[TT]*[tt] DG: the free energy estimate.[BR]",
2549 "[TT]*[tt] s_A: an estimate of the relative entropy of B in A.[BR]",
2550 "[TT]*[tt] s_A: an estimate of the relative entropy of A in B.[BR]",
2551 "[TT]*[tt] stdev: an estimate expected per-sample standard deviation.[PAR]",
2553 "The relative entropy of both states in each other's ensemble can be ",
2554 "interpreted as a measure of phase space overlap: ",
2555 "the relative entropy s_A of the work samples of lambda_B in the ",
2556 "ensemble of lambda_A (and vice versa for s_B), is a ",
2557 "measure of the 'distance' between Boltzmann distributions of ",
2558 "the two states, that goes to zero for identical distributions. See ",
2559 "Wu & Kofke, J. Chem. Phys. 123 084109 (2005) for more information.",
2561 "The estimate of the expected per-sample standard deviation, as given ",
2562 "in Bennett's original BAR paper: Bennett, J. Comp. Phys. 22, p 245 (1976).",
2563 "Eq. 10 therein gives an estimate of the quality of sampling (not directly",
2564 "of the actual statistical error, because it assumes independent samples).[PAR]",
2566 "To get a visual estimate of the phase space overlap, use the ",
2567 "[TT]-oh[tt] option to write series of histograms, together with the ",
2568 "[TT]-nbin[tt] option.[PAR]"
2570 static real begin=0,end=-1,temp=-1;
2571 int nd=2,nbmin=5,nbmax=5;
2573 gmx_bool use_dhdl=FALSE;
2574 gmx_bool calc_s,calc_v;
2576 { "-b", FALSE, etREAL, {&begin}, "Begin time for BAR" },
2577 { "-e", FALSE, etREAL, {&end}, "End time for BAR" },
2578 { "-temp", FALSE, etREAL, {&temp}, "Temperature (K)" },
2579 { "-prec", FALSE, etINT, {&nd}, "The number of digits after the decimal point" },
2580 { "-nbmin", FALSE, etINT, {&nbmin}, "Minimum number of blocks for error estimation" },
2581 { "-nbmax", FALSE, etINT, {&nbmax}, "Maximum number of blocks for error estimation" },
2582 { "-nbin", FALSE, etINT, {&nbin}, "Number of bins for histogram output"},
2583 { "-extp", FALSE, etBOOL, {&use_dhdl}, "Whether to linearly extrapolate dH/dl values to use as energies"}
2587 { efXVG, "-f", "dhdl", ffOPTRDMULT },
2588 { efEDR, "-g", "ener", ffOPTRDMULT },
2589 { efXVG, "-o", "bar", ffOPTWR },
2590 { efXVG, "-oi", "barint", ffOPTWR },
2591 { efXVG, "-oh", "histogram", ffOPTWR }
2593 #define NFILE asize(fnm)
2596 int nf=0; /* file counter */
2598 int nfile_tot; /* total number of input files */
2603 lambda_t *lb; /* the pre-processed lambda data (linked list head) */
2604 lambda_t lambda_head; /* the head element */
2605 barres_t *results; /* the results */
2606 int nresults; /* number of results in results array */
2609 double prec,dg_tot,dg,sig, dg_tot_max, dg_tot_min;
2612 char dgformat[20],xvg2format[STRLEN],xvg3format[STRLEN],buf[STRLEN];
2613 char ktformat[STRLEN], sktformat[STRLEN];
2614 char kteformat[STRLEN], skteformat[STRLEN];
2617 gmx_bool result_OK=TRUE,bEE=TRUE;
2619 gmx_bool disc_err=FALSE;
2620 double sum_disc_err=0.; /* discretization error */
2621 gmx_bool histrange_err=FALSE;
2622 double sum_histrange_err=0.; /* histogram range error */
2623 double stat_err=0.; /* statistical error */
2625 CopyRight(stderr,argv[0]);
2626 parse_common_args(&argc,argv,
2628 NFILE,fnm,asize(pa),pa,asize(desc),desc,0,NULL,&oenv);
2630 if (opt2bSet("-f", NFILE, fnm))
2632 nxvgfile = opt2fns(&fxvgnms,"-f",NFILE,fnm);
2634 if (opt2bSet("-g", NFILE, fnm))
2636 nedrfile = opt2fns(&fedrnms,"-g",NFILE,fnm);
2639 /* make linked list */
2641 lambda_init(lb, 0, 0);
2646 nfile_tot = nxvgfile + nedrfile;
2650 gmx_fatal(FARGS,"No input files!");
2655 gmx_fatal(FARGS,"Can not have negative number of digits");
2659 snew(partsum,(nbmax+1)*(nbmax+1));
2662 /* read in all files. First xvg files */
2663 for(f=0; f<nxvgfile; f++)
2665 read_bar_xvg(fxvgnms[f],&temp,lb);
2668 /* then .edr files */
2669 for(f=0; f<nedrfile; f++)
2671 read_barsim_edr(fedrnms[f],&temp,lb);;
2675 /* fix the times to allow for equilibration */
2676 lambdas_impose_times(lb, begin, end);
2678 if (opt2bSet("-oh",NFILE,fnm))
2680 lambdas_histogram(lb, opt2fn("-oh",NFILE,fnm), nbin, oenv);
2683 /* assemble the output structures from the lambdas */
2684 results=barres_list_create(lb, &nresults, use_dhdl);
2686 sum_disc_err=barres_list_max_disc_err(results, nresults);
2690 printf("\nNo results to calculate.\n");
2694 if (sum_disc_err > prec)
2697 nd = ceil(-log10(prec));
2698 printf("WARNING: setting the precision to %g because that is the minimum\n reasonable number, given the expected discretization error.\n", prec);
2702 sprintf(lamformat,"%%6.3f");
2703 sprintf( dgformat,"%%%d.%df",3+nd,nd);
2704 /* the format strings of the results in kT */
2705 sprintf( ktformat,"%%%d.%df",5+nd,nd);
2706 sprintf( sktformat,"%%%ds",6+nd);
2707 /* the format strings of the errors in kT */
2708 sprintf( kteformat,"%%%d.%df",3+nd,nd);
2709 sprintf( skteformat,"%%%ds",4+nd);
2710 sprintf(xvg2format,"%s %s\n","%g",dgformat);
2711 sprintf(xvg3format,"%s %s %s\n","%g",dgformat,dgformat);
2716 if (opt2bSet("-o",NFILE,fnm))
2718 sprintf(buf,"%s (%s)","\\DeltaG","kT");
2719 fpb = xvgropen_type(opt2fn("-o",NFILE,fnm),"Free energy differences",
2720 "\\lambda",buf,exvggtXYDY,oenv);
2724 if (opt2bSet("-oi",NFILE,fnm))
2726 sprintf(buf,"%s (%s)","\\DeltaG","kT");
2727 fpi = xvgropen(opt2fn("-oi",NFILE,fnm),"Free energy integral",
2728 "\\lambda",buf,oenv);
2736 /* first calculate results */
2739 for(f=0; f<nresults; f++)
2741 /* Determine the free energy difference with a factor of 10
2742 * more accuracy than requested for printing.
2744 calc_bar(&(results[f]), 0.1*prec, nbmin, nbmax,
2747 if (results[f].dg_disc_err > prec/10.)
2749 if (results[f].dg_histrange_err > prec/10.)
2753 /* print results in kT */
2757 printf("\nTemperature: %g K\n", temp);
2759 printf("\nDetailed results in kT (see help for explanation):\n\n");
2760 printf("%6s ", " lam_A");
2761 printf("%6s ", " lam_B");
2762 printf(sktformat, "DG ");
2764 printf(skteformat, "+/- ");
2766 printf(skteformat, "disc ");
2768 printf(skteformat, "range ");
2769 printf(sktformat, "s_A ");
2771 printf(skteformat, "+/- " );
2772 printf(sktformat, "s_B ");
2774 printf(skteformat, "+/- " );
2775 printf(sktformat, "stdev ");
2777 printf(skteformat, "+/- ");
2779 for(f=0; f<nresults; f++)
2781 printf(lamformat, results[f].a->native_lambda);
2783 printf(lamformat, results[f].b->native_lambda);
2785 printf(ktformat, results[f].dg);
2789 printf(kteformat, results[f].dg_err);
2794 printf(kteformat, results[f].dg_disc_err);
2799 printf(kteformat, results[f].dg_histrange_err);
2802 printf(ktformat, results[f].sa);
2806 printf(kteformat, results[f].sa_err);
2809 printf(ktformat, results[f].sb);
2813 printf(kteformat, results[f].sb_err);
2816 printf(ktformat, results[f].dg_stddev);
2820 printf(kteformat, results[f].dg_stddev_err);
2824 /* Check for negative relative entropy with a 95% certainty. */
2825 if (results[f].sa < -2*results[f].sa_err ||
2826 results[f].sb < -2*results[f].sb_err)
2834 printf("\nWARNING: Some of these results violate the Second Law of "
2835 "Thermodynamics: \n"
2836 " This is can be the result of severe undersampling, or "
2838 " there is something wrong with the simulations.\n");
2842 /* final results in kJ/mol */
2843 printf("\n\nFinal results in kJ/mol:\n\n");
2845 for(f=0; f<nresults; f++)
2850 fprintf(fpi, xvg2format, results[f].a->native_lambda, dg_tot);
2856 fprintf(fpb, xvg3format,
2857 0.5*(results[f].a->native_lambda +
2858 results[f].b->native_lambda),
2859 results[f].dg,results[f].dg_err);
2862 /*printf("lambda %4.2f - %4.2f, DG ", results[f].lambda_a,
2863 results[f].lambda_b);*/
2865 printf(lamformat, results[f].a->native_lambda);
2867 printf(lamformat, results[f].b->native_lambda);
2870 printf(dgformat,results[f].dg*kT);
2874 printf(dgformat,results[f].dg_err*kT);
2878 printf(" (max. range err. = ");
2879 printf(dgformat,results[f].dg_histrange_err*kT);
2881 sum_histrange_err += results[f].dg_histrange_err*kT;
2885 dg_tot += results[f].dg;
2889 printf(lamformat, results[0].a->native_lambda);
2891 printf(lamformat, results[nresults-1].b->native_lambda);
2894 printf(dgformat,dg_tot*kT);
2897 stat_err=bar_err(nbmin,nbmax,partsum)*kT;
2899 printf(dgformat, max(max(stat_err, sum_disc_err), sum_histrange_err));
2904 printf("\nmaximum discretization error = ");
2905 printf(dgformat,sum_disc_err);
2906 if (bEE && stat_err < sum_disc_err)
2908 printf("WARNING: discretization error (%g) is larger than statistical error.\n Decrease histogram spacing for more accurate results\n", stat_err);
2913 printf("\nmaximum histogram range error = ");
2914 printf(dgformat,sum_histrange_err);
2915 if (bEE && stat_err < sum_histrange_err)
2917 printf("WARNING: histogram range error (%g) is larger than statistical error.\n Increase histogram range for more accurate results\n", stat_err);
2926 fprintf(fpi, xvg2format,
2927 results[nresults-1].b->native_lambda, dg_tot);
2931 do_view(oenv,opt2fn_null("-o",NFILE,fnm),"-xydy");
2932 do_view(oenv,opt2fn_null("-oi",NFILE,fnm),"-xydy");