2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team,
6 * check out http://www.gromacs.org for more information.
7 * Copyright (c) 2012, by the GROMACS development team, led by
8 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
9 * others, as listed in the AUTHORS file in the top-level source
10 * directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
41 #include "gmx_header_config.h"
57 #include "gmx_fatal.h"
62 /* Suppress Cygwin compiler warnings from using newlib version of
68 /* the dhdl.xvg data from a simulation (actually obsolete, but still
69 here for reading the dhdl.xvg file*/
73 int ftp; /* file type */
74 int nset; /* number of lambdas, including dhdl */
75 int *np; /* number of data points (du or hists) per lambda */
76 int np_alloc; /* number of points (du or hists) allocated */
77 double temp; /* temperature */
78 double *lambda; /* the lambdas (of first index for y). */
79 double *t; /* the times (of second index for y) */
80 double **y; /* the dU values. y[0] holds the derivative, while
81 further ones contain the energy differences between
82 the native lambda and the 'foreign' lambdas. */
84 double native_lambda; /* the native lambda */
86 struct xvg_t *next, *prev; /*location in the global linked list of xvg_ts*/
93 unsigned int *bin[2]; /* the (forward + reverse) histogram values */
94 double dx[2]; /* the histogram spacing. The reverse
95 dx is the negative of the forward dx.*/
96 gmx_large_int_t x0[2]; /* the (forward + reverse) histogram start
99 int nbin[2]; /* the (forward+reverse) number of bins */
100 gmx_large_int_t sum; /* the total number of counts. Must be
101 the same for forward + reverse. */
102 int nhist; /* number of hist datas (forward or reverse) */
104 double start_time, delta_time; /* start time, end time of histogram */
108 /* an aggregate of samples for partial free energy calculation */
109 typedef struct samples_t
111 double native_lambda;
112 double foreign_lambda;
113 double temp; /* the temperature */
114 gmx_bool derivative; /* whether this sample is a derivative */
116 /* The samples come either as either delta U lists: */
117 int ndu; /* the number of delta U samples */
118 double *du; /* the delta u's */
119 double *t; /* the times associated with those samples, or: */
120 double start_time,delta_time;/*start time and delta time for linear time*/
122 /* or as histograms: */
123 hist_t *hist; /* a histogram */
125 /* allocation data: (not NULL for data 'owned' by this struct) */
126 double *du_alloc, *t_alloc; /* allocated delta u arrays */
127 size_t ndu_alloc, nt_alloc; /* pre-allocated sizes */
128 hist_t *hist_alloc; /* allocated hist */
130 gmx_large_int_t ntot; /* total number of samples */
131 const char *filename; /* the file name this sample comes from */
134 /* a sample range (start to end for du-style data, or boolean
135 for both du-style data and histograms */
136 typedef struct sample_range_t
138 int start, end; /* start and end index for du style data */
139 gmx_bool use; /* whether to use this sample */
141 samples_t *s; /* the samples this range belongs to */
145 /* a collection of samples for a partial free energy calculation
146 (i.e. the collection of samples from one native lambda to one
148 typedef struct sample_coll_t
150 double native_lambda; /* these should be the same for all samples in the histogram?*/
151 double foreign_lambda; /* collection */
152 double temp; /* the temperature */
154 int nsamples; /* the number of samples */
155 samples_t **s; /* the samples themselves */
156 sample_range_t *r; /* the sample ranges */
157 int nsamples_alloc; /* number of allocated samples */
159 gmx_large_int_t ntot; /* total number of samples in the ranges of
162 struct sample_coll_t *next, *prev; /* next and previous in the list */
165 /* all the samples associated with a lambda point */
166 typedef struct lambda_t
168 double lambda; /* the native lambda (at start time if dynamic) */
169 double temp; /* temperature */
171 sample_coll_t *sc; /* the samples */
173 sample_coll_t sc_head; /*the pre-allocated list head for the linked list.*/
175 struct lambda_t *next, *prev; /* the next and prev in the list */
179 /* calculated values. */
181 sample_coll_t *a, *b; /* the simulation data */
183 double dg; /* the free energy difference */
184 double dg_err; /* the free energy difference */
186 double dg_disc_err; /* discretization error */
187 double dg_histrange_err; /* histogram range error */
189 double sa; /* relative entropy of b in state a */
190 double sa_err; /* error in sa */
191 double sb; /* relative entropy of a in state b */
192 double sb_err; /* error in sb */
194 double dg_stddev; /* expected dg stddev per sample */
195 double dg_stddev_err; /* error in dg_stddev */
201 static void hist_init(hist_t *h, int nhist, int *nbin)
206 gmx_fatal(FARGS, "histogram with more than two sets of data!");
210 snew(h->bin[i], nbin[i]);
213 h->start_time=h->delta_time=0;
220 static void hist_destroy(hist_t *h)
226 static void xvg_init(xvg_t *ba)
235 static void samples_init(samples_t *s, double native_lambda,
236 double foreign_lambda, double temp,
237 gmx_bool derivative, const char *filename)
239 s->native_lambda=native_lambda;
240 s->foreign_lambda=foreign_lambda;
242 s->derivative=derivative;
247 s->start_time = s->delta_time = 0;
256 s->filename=filename;
259 static void sample_range_init(sample_range_t *r, samples_t *s)
267 static void sample_coll_init(sample_coll_t *sc, double native_lambda,
268 double foreign_lambda, double temp)
270 sc->native_lambda = native_lambda;
271 sc->foreign_lambda = foreign_lambda;
277 sc->nsamples_alloc=0;
280 sc->next=sc->prev=NULL;
283 static void sample_coll_destroy(sample_coll_t *sc)
285 /* don't free the samples themselves */
291 static void lambda_init(lambda_t *l, double native_lambda, double temp)
293 l->lambda=native_lambda;
301 sample_coll_init(l->sc, native_lambda, 0., 0.);
306 static void barres_init(barres_t *br)
324 static gmx_bool lambda_same(double lambda1, double lambda2)
326 return gmx_within_tol(lambda1, lambda2, 10*GMX_REAL_EPS);
329 /* calculate the total number of samples in a sample collection */
330 static void sample_coll_calc_ntot(sample_coll_t *sc)
335 for(i=0;i<sc->nsamples;i++)
341 sc->ntot += sc->s[i]->ntot;
345 sc->ntot += sc->r[i].end - sc->r[i].start;
352 /* find the barsamples_t associated with a lambda that corresponds to
353 a specific foreign lambda */
354 static sample_coll_t *lambda_find_sample_coll(lambda_t *l,
355 double foreign_lambda)
357 sample_coll_t *sc=l->sc->next;
361 if (lambda_same(sc->foreign_lambda,foreign_lambda))
371 /* insert li into an ordered list of lambda_colls */
372 static void lambda_insert_sample_coll(lambda_t *l, sample_coll_t *sc)
374 sample_coll_t *scn=l->sc->next;
375 while ( (scn!=l->sc) )
377 if (scn->foreign_lambda > sc->foreign_lambda)
381 /* now insert it before the found scn */
388 /* insert li into an ordered list of lambdas */
389 static void lambda_insert_lambda(lambda_t *head, lambda_t *li)
391 lambda_t *lc=head->next;
394 if (lc->lambda > li->lambda)
398 /* now insert ourselves before the found lc */
405 /* insert a sample and a sample_range into a sample_coll. The
406 samples are stored as a pointer, the range is copied. */
407 static void sample_coll_insert_sample(sample_coll_t *sc, samples_t *s,
410 /* first check if it belongs here */
411 if (sc->temp != s->temp)
413 gmx_fatal(FARGS, "Temperatures in files %s and %s are not the same!",
414 s->filename, sc->next->s[0]->filename);
416 if (sc->native_lambda != s->native_lambda)
418 gmx_fatal(FARGS, "Native lambda in files %s and %s are not the same (and they should be)!",
419 s->filename, sc->next->s[0]->filename);
421 if (sc->foreign_lambda != s->foreign_lambda)
423 gmx_fatal(FARGS, "Foreign lambda in files %s and %s are not the same (and they should be)!",
424 s->filename, sc->next->s[0]->filename);
427 /* check if there's room */
428 if ( (sc->nsamples + 1) > sc->nsamples_alloc)
430 sc->nsamples_alloc = max(2*sc->nsamples_alloc, 2);
431 srenew(sc->s, sc->nsamples_alloc);
432 srenew(sc->r, sc->nsamples_alloc);
434 sc->s[sc->nsamples]=s;
435 sc->r[sc->nsamples]=*r;
438 sample_coll_calc_ntot(sc);
441 /* insert a sample into a lambda_list, creating the right sample_coll if
443 static void lambda_list_insert_sample(lambda_t *head, samples_t *s)
445 gmx_bool found=FALSE;
449 lambda_t *l=head->next;
451 /* first search for the right lambda_t */
454 if (lambda_same(l->lambda, s->native_lambda) )
464 snew(l, 1); /* allocate a new one */
465 lambda_init(l, s->native_lambda, s->temp); /* initialize it */
466 lambda_insert_lambda(head, l); /* add it to the list */
469 /* now look for a sample collection */
470 sc=lambda_find_sample_coll(l, s->foreign_lambda);
473 snew(sc, 1); /* allocate a new one */
474 sample_coll_init(sc, s->native_lambda, s->foreign_lambda, s->temp);
475 lambda_insert_sample_coll(l, sc);
478 /* now insert the samples into the sample coll */
479 sample_range_init(&r, s);
480 sample_coll_insert_sample(sc, s, &r);
484 /* make a histogram out of a sample collection */
485 static void sample_coll_make_hist(sample_coll_t *sc, int **bin,
486 int *nbin_alloc, int *nbin,
487 double *dx, double *xmin, int nbin_default)
490 gmx_bool dx_set=FALSE;
491 gmx_bool xmin_set=FALSE;
493 gmx_bool xmax_set=FALSE;
494 gmx_bool xmax_set_hard=FALSE; /* whether the xmax is bounded by the
495 limits of a histogram */
498 /* first determine dx and xmin; try the histograms */
499 for(i=0;i<sc->nsamples;i++)
503 hist_t *hist=sc->s[i]->hist;
504 for(k=0;k<hist->nhist;k++)
506 double hdx=hist->dx[k];
507 double xmax_now=(hist->x0[k]+hist->nbin[k])*hdx;
509 /* we use the biggest dx*/
510 if ( (!dx_set) || hist->dx[0] > *dx)
515 if ( (!xmin_set) || (hist->x0[k]*hdx) < *xmin)
518 *xmin = (hist->x0[k]*hdx);
521 if ( (!xmax_set) || (xmax_now>xmax && !xmax_set_hard) )
525 if (hist->bin[k][hist->nbin[k]-1] != 0)
528 if ( hist->bin[k][hist->nbin[k]-1]!=0 && (xmax_now < xmax) )
536 /* and the delta us */
537 for(i=0;i<sc->nsamples;i++)
539 if (sc->s[i]->ndu > 0)
541 /* determine min and max */
542 int starti=sc->r[i].start;
543 int endi=sc->r[i].end;
544 double du_xmin=sc->s[i]->du[starti];
545 double du_xmax=sc->s[i]->du[starti];
546 for(j=starti+1;j<endi;j++)
548 if (sc->s[i]->du[j] < du_xmin)
549 du_xmin = sc->s[i]->du[j];
550 if (sc->s[i]->du[j] > du_xmax)
551 du_xmax = sc->s[i]->du[j];
554 /* and now change the limits */
555 if ( (!xmin_set) || (du_xmin < *xmin) )
560 if ( (!xmax_set) || ((du_xmax > xmax) && !xmax_set_hard) )
568 if (!xmax_set || !xmin_set)
578 *dx=(xmax-(*xmin))/((*nbin)-2); /* -2 because we want the last bin to
579 be 0, and we count from 0 */
583 *nbin=(xmax-(*xmin))/(*dx);
586 if (*nbin > *nbin_alloc)
589 srenew(*bin, *nbin_alloc);
592 /* reset the histogram */
593 for(i=0;i<(*nbin);i++)
598 /* now add the actual data */
599 for(i=0;i<sc->nsamples;i++)
603 hist_t *hist=sc->s[i]->hist;
604 for(k=0;k<hist->nhist;k++)
606 double hdx = hist->dx[k];
607 double xmin_hist=hist->x0[k]*hdx;
608 for(j=0;j<hist->nbin[k];j++)
610 /* calculate the bin corresponding to the middle of the
612 double x=hdx*(j+0.5) + xmin_hist;
613 int binnr=(int)((x-(*xmin))/(*dx));
615 if (binnr >= *nbin || binnr<0)
618 (*bin)[binnr] += hist->bin[k][j];
624 int starti=sc->r[i].start;
625 int endi=sc->r[i].end;
626 for(j=starti;j<endi;j++)
628 int binnr=(int)((sc->s[i]->du[j]-(*xmin))/(*dx));
629 if (binnr >= *nbin || binnr<0)
638 /* write a collection of histograms to a file */
639 void lambdas_histogram(lambda_t *bl_head, const char *filename,
640 int nbin_default, const output_env_t oenv)
642 char label_x[STRLEN];
643 const char *dhdl="dH/d\\lambda",*deltag="\\DeltaH",*lambda="\\lambda";
644 const char *title="N(\\DeltaH)";
645 const char *label_y="Samples";
649 char **setnames=NULL;
650 gmx_bool first_set=FALSE;
651 /* histogram data: */
659 printf("\nWriting histogram to %s\n", filename);
660 sprintf(label_x, "\\DeltaH (%s)", unit_energy);
662 fp=xvgropen_type(filename, title, label_x, label_y, exvggtXNY, oenv);
664 /* first get all the set names */
666 /* iterate over all lambdas */
669 sample_coll_t *sc=bl->sc->next;
671 /* iterate over all samples */
675 srenew(setnames, nsets);
676 snew(setnames[nsets-1], STRLEN);
677 if (!lambda_same(sc->foreign_lambda, sc->native_lambda))
679 sprintf(setnames[nsets-1], "N(%s(%s=%g) | %s=%g)",
680 deltag, lambda, sc->foreign_lambda, lambda,
685 sprintf(setnames[nsets-1], "N(%s | %s=%g)",
686 dhdl, lambda, sc->native_lambda);
693 xvgr_legend(fp, nsets, (const char**)setnames, oenv);
696 /* now make the histograms */
698 /* iterate over all lambdas */
701 sample_coll_t *sc=bl->sc->next;
703 /* iterate over all samples */
708 xvgr_new_dataset(fp, 0, 0, NULL, oenv);
711 sample_coll_make_hist(sc, &hist, &nbin_alloc, &nbin, &dx, &min,
716 double xmin=i*dx + min;
717 double xmax=(i+1)*dx + min;
719 fprintf(fp, "%g %d\n%g %d\n", xmin, hist[i], xmax, hist[i]);
735 /* create a collection (array) of barres_t object given a ordered linked list
736 of barlamda_t sample collections */
737 static barres_t *barres_list_create(lambda_t *bl_head, int *nres,
747 /* first count the lambdas */
754 snew(res, nlambda-1);
756 /* next put the right samples in the res */
758 bl=bl_head->next->next; /* we start with the second one. */
761 sample_coll_t *sc, *scprev;
762 barres_t *br=&(res[*nres]);
763 /* there is always a previous one. we search for that as a foreign
765 scprev=lambda_find_sample_coll(bl->prev, bl->lambda);
766 sc=lambda_find_sample_coll(bl, bl->prev->lambda);
774 scprev=lambda_find_sample_coll(bl->prev, bl->prev->lambda);
775 sc=lambda_find_sample_coll(bl, bl->lambda);
779 printf("\nWARNING: Using the derivative data (dH/dlambda) to extrapolate delta H values.\nThis will only work if the Hamiltonian is linear in lambda.\n");
784 gmx_fatal(FARGS,"Some dhdl files contain only one value (dH/dl), while others \ncontain multiple values (dH/dl and/or Delta H), will not proceed \nbecause of possible inconsistencies.\n");
787 else if (!scprev && !sc)
789 gmx_fatal(FARGS,"There is no path from lambda=%g -> %g that is covered by foreign lambdas:\ncannot proceed with BAR.\nUse thermodynamic integration of dH/dl by calculating the averages of dH/dl\nwith g_analyze and integrating them.\nAlternatively, use the -extp option if (and only if) the Hamiltonian\ndepends linearly on lambda, which is NOT normally the case.\n", bl->prev->lambda, bl->lambda);
795 gmx_fatal(FARGS,"Could not find a set for foreign lambda = %g\nin the files for lambda = %g",bl->lambda,bl->prev->lambda);
799 gmx_fatal(FARGS,"Could not find a set for foreign lambda = %g\nin the files for lambda = %g",bl->prev->lambda,bl->lambda);
811 /* estimate the maximum discretization error */
812 static double barres_list_max_disc_err(barres_t *res, int nres)
820 barres_t *br=&(res[i]);
822 delta_lambda=fabs(br->b->native_lambda-br->a->native_lambda);
824 for(j=0;j<br->a->nsamples;j++)
826 if (br->a->s[j]->hist)
829 if (br->a->s[j]->derivative)
832 disc_err=max(disc_err, Wfac*br->a->s[j]->hist->dx[0]);
835 for(j=0;j<br->b->nsamples;j++)
837 if (br->b->s[j]->hist)
840 if (br->b->s[j]->derivative)
842 disc_err=max(disc_err, Wfac*br->b->s[j]->hist->dx[0]);
850 /* impose start and end times on a sample collection, updating sample_ranges */
851 static void sample_coll_impose_times(sample_coll_t *sc, double begin_t,
855 for(i=0;i<sc->nsamples;i++)
857 samples_t *s=sc->s[i];
858 sample_range_t *r=&(sc->r[i]);
861 double end_time=s->hist->delta_time*s->hist->sum +
863 if (s->hist->start_time < begin_t || end_time > end_t)
873 if (s->start_time < begin_t)
875 r->start=(int)((begin_t - s->start_time)/s->delta_time);
877 end_time=s->delta_time*s->ndu + s->start_time;
878 if (end_time > end_t)
880 r->end=(int)((end_t - s->start_time)/s->delta_time);
886 for(j=0;j<s->ndu;j++)
888 if (s->t[j] <begin_t)
893 if (s->t[j] >= end_t)
900 if (r->start > r->end)
906 sample_coll_calc_ntot(sc);
909 static void lambdas_impose_times(lambda_t *head, double begin, double end)
911 double first_t, last_t;
912 double begin_t, end_t;
916 if (begin<=0 && end<0)
921 /* first determine the global start and end times */
927 sample_coll_t *sc=lc->sc->next;
930 for(j=0;j<sc->nsamples;j++)
932 double start_t,end_t;
934 start_t = sc->s[j]->start_time;
935 end_t = sc->s[j]->start_time;
938 end_t += sc->s[j]->delta_time*sc->s[j]->hist->sum;
944 end_t = sc->s[j]->t[sc->s[j]->ndu-1];
948 end_t += sc->s[j]->delta_time*sc->s[j]->ndu;
952 if (start_t < first_t || first_t<0)
966 /* calculate the actual times */
984 printf("\n Samples in time interval: %.3f - %.3f\n", first_t, last_t);
990 printf("Removing samples outside of: %.3f - %.3f\n", begin_t, end_t);
992 /* then impose them */
996 sample_coll_t *sc=lc->sc->next;
999 sample_coll_impose_times(sc, begin_t, end_t);
1007 /* create subsample i out of ni from an existing sample_coll */
1008 static gmx_bool sample_coll_create_subsample(sample_coll_t *sc,
1009 sample_coll_t *sc_orig,
1013 int hist_start, hist_end;
1015 gmx_large_int_t ntot_start;
1016 gmx_large_int_t ntot_end;
1017 gmx_large_int_t ntot_so_far;
1019 *sc = *sc_orig; /* just copy all fields */
1021 /* allocate proprietary memory */
1022 snew(sc->s, sc_orig->nsamples);
1023 snew(sc->r, sc_orig->nsamples);
1025 /* copy the samples */
1026 for(j=0;j<sc_orig->nsamples;j++)
1028 sc->s[j] = sc_orig->s[j];
1029 sc->r[j] = sc_orig->r[j]; /* copy the ranges too */
1032 /* now fix start and end fields */
1033 /* the casts avoid possible overflows */
1034 ntot_start=(gmx_large_int_t)(sc_orig->ntot*(double)i/(double)ni);
1035 ntot_end =(gmx_large_int_t)(sc_orig->ntot*(double)(i+1)/(double)ni);
1037 for(j=0;j<sc->nsamples;j++)
1039 gmx_large_int_t ntot_add;
1040 gmx_large_int_t new_start, new_end;
1046 ntot_add = sc->s[j]->hist->sum;
1050 ntot_add = sc->r[j].end - sc->r[j].start;
1058 if (!sc->s[j]->hist)
1060 if (ntot_so_far < ntot_start)
1062 /* adjust starting point */
1063 new_start = sc->r[j].start + (ntot_start - ntot_so_far);
1067 new_start = sc->r[j].start;
1069 /* adjust end point */
1070 new_end = sc->r[j].start + (ntot_end - ntot_so_far);
1071 if (new_end > sc->r[j].end)
1073 new_end=sc->r[j].end;
1076 /* check if we're in range at all */
1077 if ( (new_end < new_start) || (new_start > sc->r[j].end) )
1082 /* and write the new range */
1083 sc->r[j].start=(int)new_start;
1084 sc->r[j].end=(int)new_end;
1091 double ntot_start_norm, ntot_end_norm;
1092 /* calculate the amount of overlap of the
1093 desired range (ntot_start -- ntot_end) onto
1094 the histogram range (ntot_so_far -- ntot_so_far+ntot_add)*/
1096 /* first calculate normalized bounds
1097 (where 0 is the start of the hist range, and 1 the end) */
1098 ntot_start_norm = (ntot_start-ntot_so_far)/(double)ntot_add;
1099 ntot_end_norm = (ntot_end-ntot_so_far)/(double)ntot_add;
1101 /* now fix the boundaries */
1102 ntot_start_norm = min(1, max(0., ntot_start_norm));
1103 ntot_end_norm = max(0, min(1., ntot_end_norm));
1105 /* and calculate the overlap */
1106 overlap = ntot_end_norm - ntot_start_norm;
1108 if (overlap > 0.95) /* we allow for 5% slack */
1110 sc->r[j].use = TRUE;
1112 else if (overlap < 0.05)
1114 sc->r[j].use = FALSE;
1122 ntot_so_far += ntot_add;
1124 sample_coll_calc_ntot(sc);
1129 /* calculate minimum and maximum work values in sample collection */
1130 static void sample_coll_min_max(sample_coll_t *sc, double Wfac,
1131 double *Wmin, double *Wmax)
1138 for(i=0;i<sc->nsamples;i++)
1140 samples_t *s=sc->s[i];
1141 sample_range_t *r=&(sc->r[i]);
1146 for(j=r->start; j<r->end; j++)
1148 *Wmin = min(*Wmin,s->du[j]*Wfac);
1149 *Wmax = max(*Wmax,s->du[j]*Wfac);
1154 int hd=0; /* determine the histogram direction: */
1156 if ( (s->hist->nhist>1) && (Wfac<0) )
1162 for(j=s->hist->nbin[hd]-1;j>=0;j--)
1164 *Wmin=min(*Wmin,Wfac*(s->hist->x0[hd])*dx);
1165 *Wmax=max(*Wmax,Wfac*(s->hist->x0[hd])*dx);
1166 /* look for the highest value bin with values */
1167 if (s->hist->bin[hd][j]>0)
1169 *Wmin=min(*Wmin,Wfac*(j+s->hist->x0[hd]+1)*dx);
1170 *Wmax=max(*Wmax,Wfac*(j+s->hist->x0[hd]+1)*dx);
1180 static double calc_bar_sum(int n,const double *W,double Wfac,double sbMmDG)
1189 sum += 1./(1. + exp(Wfac*W[i] + sbMmDG));
1195 /* calculate the BAR average given a histogram
1197 if type== 0, calculate the best estimate for the average,
1198 if type==-1, calculate the minimum possible value given the histogram
1199 if type== 1, calculate the maximum possible value given the histogram */
1200 static double calc_bar_sum_hist(const hist_t *hist, double Wfac, double sbMmDG,
1206 /* normalization factor multiplied with bin width and
1207 number of samples (we normalize through M): */
1209 int hd=0; /* determine the histogram direction: */
1212 if ( (hist->nhist>1) && (Wfac<0) )
1217 max=hist->nbin[hd]-1;
1220 max=hist->nbin[hd]; /* we also add whatever was out of range */
1225 double x=Wfac*((i+hist->x0[hd])+0.5)*dx; /* bin middle */
1226 double pxdx=hist->bin[0][i]*normdx; /* p(x)dx */
1228 sum += pxdx/(1. + exp(x + sbMmDG));
1234 static double calc_bar_lowlevel(sample_coll_t *ca, sample_coll_t *cb,
1235 double temp, double tol, int type)
1240 double Wfac1,Wfac2,Wmin,Wmax;
1241 double DG0,DG1,DG2,dDG1;
1243 double n1, n2; /* numbers of samples as doubles */
1248 /* count the numbers of samples */
1254 if (!lambda_same(ca->native_lambda, ca->foreign_lambda))
1256 /* this is the case when the delta U were calculated directly
1257 (i.e. we're not scaling dhdl) */
1263 /* we're using dhdl, so delta_lambda needs to be a
1264 multiplication factor. */
1265 double delta_lambda=cb->native_lambda-ca->native_lambda;
1266 Wfac1 = beta*delta_lambda;
1267 Wfac2 = -beta*delta_lambda;
1272 /* We print the output both in kT and kJ/mol.
1273 * Here we determine DG in kT, so when beta < 1
1274 * the precision has to be increased.
1279 /* Calculate minimum and maximum work to give an initial estimate of
1280 * delta G as their average.
1283 double Wmin1, Wmin2, Wmax1, Wmax2;
1284 sample_coll_min_max(ca, Wfac1, &Wmin1, &Wmax1);
1285 sample_coll_min_max(cb, Wfac2, &Wmin2, &Wmax2);
1287 Wmin=min(Wmin1, Wmin2);
1288 Wmax=max(Wmax1, Wmax2);
1296 fprintf(debug,"DG %9.5f %9.5f\n",DG0,DG2);
1298 /* We approximate by bisection: given our initial estimates
1299 we keep checking whether the halfway point is greater or
1300 smaller than what we get out of the BAR averages.
1302 For the comparison we can use twice the tolerance. */
1303 while (DG2 - DG0 > 2*tol)
1305 DG1 = 0.5*(DG0 + DG2);
1307 /*printf("Wfac1=%g, Wfac2=%g, beta=%g, DG1=%g\n",Wfac1,Wfac2,beta,
1310 /* calculate the BAR averages */
1313 for(i=0;i<ca->nsamples;i++)
1315 samples_t *s=ca->s[i];
1316 sample_range_t *r=&(ca->r[i]);
1321 dDG1 += calc_bar_sum_hist(s->hist, Wfac1, (M-DG1), type);
1325 dDG1 += calc_bar_sum(r->end - r->start, s->du + r->start,
1330 for(i=0;i<cb->nsamples;i++)
1332 samples_t *s=cb->s[i];
1333 sample_range_t *r=&(cb->r[i]);
1338 dDG1 -= calc_bar_sum_hist(s->hist, Wfac2, -(M-DG1), type);
1342 dDG1 -= calc_bar_sum(r->end - r->start, s->du + r->start,
1358 fprintf(debug,"DG %9.5f %9.5f\n",DG0,DG2);
1362 return 0.5*(DG0 + DG2);
1365 static void calc_rel_entropy(sample_coll_t *ca, sample_coll_t *cb,
1366 double temp, double dg, double *sa, double *sb)
1372 double Wfac1, Wfac2;
1378 /* count the numbers of samples */
1382 /* to ensure the work values are the same as during the delta_G */
1383 if (!lambda_same(ca->native_lambda, ca->foreign_lambda))
1385 /* this is the case when the delta U were calculated directly
1386 (i.e. we're not scaling dhdl) */
1392 /* we're using dhdl, so delta_lambda needs to be a
1393 multiplication factor. */
1394 double delta_lambda=cb->native_lambda-ca->native_lambda;
1395 Wfac1 = beta*delta_lambda;
1396 Wfac2 = -beta*delta_lambda;
1399 /* first calculate the average work in both directions */
1400 for(i=0;i<ca->nsamples;i++)
1402 samples_t *s=ca->s[i];
1403 sample_range_t *r=&(ca->r[i]);
1408 for(j=r->start;j<r->end;j++)
1409 W_ab += Wfac1*s->du[j];
1413 /* normalization factor multiplied with bin width and
1414 number of samples (we normalize through M): */
1417 int hd=0; /* histogram direction */
1418 if ( (s->hist->nhist>1) && (Wfac1<0) )
1424 for(j=0;j<s->hist->nbin[0];j++)
1426 double x=Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1427 double pxdx=s->hist->bin[0][j]*normdx; /* p(x)dx */
1435 for(i=0;i<cb->nsamples;i++)
1437 samples_t *s=cb->s[i];
1438 sample_range_t *r=&(cb->r[i]);
1443 for(j=r->start;j<r->end;j++)
1444 W_ba += Wfac1*s->du[j];
1448 /* normalization factor multiplied with bin width and
1449 number of samples (we normalize through M): */
1452 int hd=0; /* histogram direction */
1453 if ( (s->hist->nhist>1) && (Wfac2<0) )
1459 for(j=0;j<s->hist->nbin[0];j++)
1461 double x=Wfac1*((j+s->hist->x0[0])+0.5)*dx;/*bin ctr*/
1462 double pxdx=s->hist->bin[0][j]*normdx; /* p(x)dx */
1470 /* then calculate the relative entropies */
1475 static void calc_dg_stddev(sample_coll_t *ca, sample_coll_t *cb,
1476 double temp, double dg, double *stddev)
1480 double sigmafact=0.;
1482 double Wfac1, Wfac2;
1488 /* count the numbers of samples */
1492 /* to ensure the work values are the same as during the delta_G */
1493 if (!lambda_same(ca->native_lambda, ca->foreign_lambda))
1495 /* this is the case when the delta U were calculated directly
1496 (i.e. we're not scaling dhdl) */
1502 /* we're using dhdl, so delta_lambda needs to be a
1503 multiplication factor. */
1504 double delta_lambda=cb->native_lambda-ca->native_lambda;
1505 Wfac1 = beta*delta_lambda;
1506 Wfac2 = -beta*delta_lambda;
1512 /* calculate average in both directions */
1513 for(i=0;i<ca->nsamples;i++)
1515 samples_t *s=ca->s[i];
1516 sample_range_t *r=&(ca->r[i]);
1521 for(j=r->start;j<r->end;j++)
1523 sigmafact += 1./(2. + 2.*cosh((M + Wfac1*s->du[j] - dg)));
1528 /* normalization factor multiplied with bin width and
1529 number of samples (we normalize through M): */
1532 int hd=0; /* histogram direction */
1533 if ( (s->hist->nhist>1) && (Wfac1<0) )
1539 for(j=0;j<s->hist->nbin[0];j++)
1541 double x=Wfac1*((j+s->hist->x0[0])+0.5)*dx; /*bin ctr*/
1542 double pxdx=s->hist->bin[0][j]*normdx; /* p(x)dx */
1544 sigmafact += pxdx/(2. + 2.*cosh((M + x - dg)));
1549 for(i=0;i<cb->nsamples;i++)
1551 samples_t *s=cb->s[i];
1552 sample_range_t *r=&(cb->r[i]);
1557 for(j=r->start;j<r->end;j++)
1559 sigmafact += 1./(2. + 2.*cosh((M - Wfac2*s->du[j] - dg)));
1564 /* normalization factor multiplied with bin width and
1565 number of samples (we normalize through M): */
1568 int hd=0; /* histogram direction */
1569 if ( (s->hist->nhist>1) && (Wfac2<0) )
1575 for(j=0;j<s->hist->nbin[0];j++)
1577 double x=Wfac2*((j+s->hist->x0[0])+0.5)*dx;/*bin ctr*/
1578 double pxdx=s->hist->bin[0][j]*normdx; /* p(x)dx */
1580 sigmafact += pxdx/(2. + 2.*cosh((M - x - dg)));
1586 sigmafact /= (n1 + n2);
1590 Shirts, Bair, Hooker & Pande, Phys. Rev. Lett 91, 140601 (2003): */
1591 *stddev = sqrt(((1./sigmafact) - ( (n1+n2)/n1 + (n1+n2)/n2 )));
1596 static void calc_bar(barres_t *br, double tol,
1597 int npee_min, int npee_max, gmx_bool *bEE,
1601 double dg_sig2,sa_sig2,sb_sig2,stddev_sig2; /* intermediate variance values
1602 for calculated quantities */
1603 int nsample1, nsample2;
1604 double temp=br->a->temp;
1606 double dg_min, dg_max;
1607 gmx_bool have_hist=FALSE;
1609 br->dg=calc_bar_lowlevel(br->a, br->b, temp, tol, 0);
1611 br->dg_disc_err = 0.;
1612 br->dg_histrange_err = 0.;
1614 /* check if there are histograms */
1615 for(i=0;i<br->a->nsamples;i++)
1617 if (br->a->r[i].use && br->a->s[i]->hist)
1625 for(i=0;i<br->b->nsamples;i++)
1627 if (br->b->r[i].use && br->b->s[i]->hist)
1635 /* calculate histogram-specific errors */
1638 dg_min=calc_bar_lowlevel(br->a, br->b, temp, tol, -1);
1639 dg_max=calc_bar_lowlevel(br->a, br->b, temp, tol, 1);
1641 if (fabs(dg_max - dg_min) > GMX_REAL_EPS*10)
1643 /* the histogram range error is the biggest of the differences
1644 between the best estimate and the extremes */
1645 br->dg_histrange_err = fabs(dg_max - dg_min);
1647 br->dg_disc_err = 0.;
1648 for(i=0;i<br->a->nsamples;i++)
1650 if (br->a->s[i]->hist)
1651 br->dg_disc_err=max(br->dg_disc_err, br->a->s[i]->hist->dx[0]);
1653 for(i=0;i<br->b->nsamples;i++)
1655 if (br->b->s[i]->hist)
1656 br->dg_disc_err=max(br->dg_disc_err, br->b->s[i]->hist->dx[0]);
1659 calc_rel_entropy(br->a, br->b, temp, br->dg, &(br->sa), &(br->sb));
1661 calc_dg_stddev(br->a, br->b, temp, br->dg, &(br->dg_stddev) );
1670 sample_coll_t ca, cb;
1672 /* initialize the samples */
1673 sample_coll_init(&ca, br->a->native_lambda, br->a->foreign_lambda,
1675 sample_coll_init(&cb, br->b->native_lambda, br->b->foreign_lambda,
1678 for(npee=npee_min; npee<=npee_max; npee++)
1687 double dstddev2 = 0;
1690 for(p=0; p<npee; p++)
1697 cac=sample_coll_create_subsample(&ca, br->a, p, npee);
1698 cbc=sample_coll_create_subsample(&cb, br->b, p, npee);
1702 printf("WARNING: histogram number incompatible with block number for averaging: can't do error estimate\n");
1705 sample_coll_destroy(&ca);
1707 sample_coll_destroy(&cb);
1711 dgp=calc_bar_lowlevel(&ca, &cb, temp, tol, 0);
1715 partsum[npee*(npee_max+1)+p] += dgp;
1717 calc_rel_entropy(&ca, &cb, temp, dgp, &sac, &sbc);
1722 calc_dg_stddev(&ca, &cb, temp, dgp, &stddevc );
1725 dstddev2 += stddevc*stddevc;
1727 sample_coll_destroy(&ca);
1728 sample_coll_destroy(&cb);
1732 dg_sig2 += (dgs2-dgs*dgs)/(npee-1);
1738 sa_sig2 += (dsa2-dsa*dsa)/(npee-1);
1739 sb_sig2 += (dsb2-dsb*dsb)/(npee-1);
1743 stddev_sig2 += (dstddev2-dstddev*dstddev)/(npee-1);
1745 br->dg_err = sqrt(dg_sig2/(npee_max - npee_min + 1));
1746 br->sa_err = sqrt(sa_sig2/(npee_max - npee_min + 1));
1747 br->sb_err = sqrt(sb_sig2/(npee_max - npee_min + 1));
1748 br->dg_stddev_err = sqrt(stddev_sig2/(npee_max - npee_min + 1));
1753 static double bar_err(int nbmin, int nbmax, const double *partsum)
1756 double svar,s,s2,dg;
1759 for(nb=nbmin; nb<=nbmax; nb++)
1765 dg = partsum[nb*(nbmax+1)+b];
1771 svar += (s2 - s*s)/(nb - 1);
1774 return sqrt(svar/(nbmax + 1 - nbmin));
1777 /* deduce lambda value from legend.
1779 bdhdl = if true, value may be a derivative.
1781 bdhdl = whether the legend was for a derivative.
1783 static double legend2lambda(char *fn,const char *legend,gmx_bool *bdhdl)
1791 gmx_fatal(FARGS,"There is no legend in file '%s', can not deduce lambda",fn);
1793 ptr = strrchr(legend,' ');
1795 if (strstr(legend,"dH"))
1808 if (strchr(legend,'D') != NULL && strchr(legend,'H') != NULL)
1821 printf("%s\n", legend);
1822 gmx_fatal(FARGS,"There is no proper lambda legend in file '%s', can not deduce lambda",fn);
1824 if (sscanf(ptr,"%lf",&lambda) != 1)
1826 gmx_fatal(FARGS,"There is no proper lambda legend in file '%s', can not deduce lambda",fn);
1832 static gmx_bool subtitle2lambda(const char *subtitle,double *lambda)
1839 /* plain text lambda string */
1840 ptr = strstr(subtitle,"lambda");
1843 /* xmgrace formatted lambda string */
1844 ptr = strstr(subtitle,"\\xl\\f{}");
1848 /* xmgr formatted lambda string */
1849 ptr = strstr(subtitle,"\\8l\\4");
1853 ptr = strstr(ptr,"=");
1857 bFound = (sscanf(ptr+1,"%lf",lambda) == 1);
1863 static double filename2lambda(char *fn)
1866 char *ptr,*endptr,*digitptr;
1869 /* go to the end of the path string and search backward to find the last
1870 directory in the path which has to contain the value of lambda
1872 while (ptr[1] != '\0')
1876 /* searching backward to find the second directory separator */
1881 if (ptr[0] != DIR_SEPARATOR && ptr[1] == DIR_SEPARATOR)
1883 if (dirsep == 1) break;
1886 /* save the last position of a digit between the last two
1887 separators = in the last dirname */
1888 if (dirsep > 0 && isdigit(*ptr))
1896 gmx_fatal(FARGS,"While trying to read the lambda value from the file path:"
1897 " last directory in the path '%s' does not contain a number",fn);
1899 if (digitptr[-1] == '-')
1903 lambda = strtod(digitptr,&endptr);
1904 if (endptr == digitptr)
1906 gmx_fatal(FARGS,"Malformed number in file path '%s'",fn);
1912 static void read_bar_xvg_lowlevel(char *fn, real *temp, xvg_t *ba)
1915 char *subtitle,**legend,*ptr;
1917 gmx_bool native_lambda_read=FALSE;
1923 np = read_xvg_legend(fn,&ba->y,&ba->nset,&subtitle,&legend);
1926 gmx_fatal(FARGS,"File %s contains no usable data.",fn);
1930 snew(ba->np,ba->nset);
1931 for(i=0;i<ba->nset;i++)
1936 if (subtitle != NULL)
1938 ptr = strstr(subtitle,"T =");
1942 if (sscanf(ptr,"%lf",&ba->temp) == 1)
1946 gmx_fatal(FARGS,"Found temperature of %g in file '%s'",
1956 gmx_fatal(FARGS,"Did not find a temperature in the subtitle in file '%s', use the -temp option of [TT]g_bar[tt]",fn);
1961 /* Try to deduce lambda from the subtitle */
1964 if (subtitle2lambda(subtitle,&(ba->native_lambda)))
1966 native_lambda_read=TRUE;
1969 snew(ba->lambda,ba->nset-1);
1972 /* Check if we have a single set, no legend, nset=2 means t and dH/dl */
1975 if (!native_lambda_read)
1977 /* Deduce lambda from the file name */
1978 ba->native_lambda = filename2lambda(fn);
1979 native_lambda_read=TRUE;
1981 ba->lambda[0] = ba->native_lambda;
1985 gmx_fatal(FARGS,"File %s contains multiple sets but no legends, can not determine the lambda values",fn);
1990 for(i=0; i<ba->nset-1; i++)
1992 gmx_bool is_dhdl=(i==0);
1993 /* Read lambda from the legend */
1994 ba->lambda[i] = legend2lambda(fn,legend[i], &is_dhdl);
1996 if (is_dhdl && !native_lambda_read)
1998 ba->native_lambda = ba->lambda[i];
1999 native_lambda_read=TRUE;
2004 if (!native_lambda_read)
2006 gmx_fatal(FARGS,"File %s contains multiple sets but no indication of the native lambda",fn);
2009 /* Reorder the data */
2010 for(i=1; i<ba->nset; i++)
2012 ba->y[i-1] = ba->y[i];
2016 for(i=0; i<ba->nset-1; i++)
2025 static void read_bar_xvg(char *fn, real *temp, lambda_t *lambda_head)
2034 read_bar_xvg_lowlevel(fn, temp, barsim);
2036 if (barsim->nset <1 )
2038 gmx_fatal(FARGS,"File '%s' contains fewer than two columns", fn);
2041 if ( !gmx_within_tol(*temp,barsim->temp,GMX_FLOAT_EPS) && (*temp > 0) )
2043 gmx_fatal(FARGS,"Temperature in file %s different from earlier files or setting\n", fn);
2047 /* now create a series of samples_t */
2048 snew(s, barsim->nset);
2049 for(i=0;i<barsim->nset;i++)
2051 samples_init(s+i, barsim->native_lambda, barsim->lambda[i],
2052 barsim->temp, lambda_same(barsim->native_lambda,
2055 s[i].du=barsim->y[i];
2056 s[i].ndu=barsim->np[i];
2059 lambda_list_insert_sample(lambda_head, s+i);
2061 printf("%s: %.1f - %.1f; lambda = %.3f\n foreign lambdas:",
2062 fn, s[0].t[0], s[0].t[s[0].ndu-1], s[0].native_lambda);
2063 for(i=0;i<barsim->nset;i++)
2065 printf(" %.3f (%d pts)", s[i].foreign_lambda, s[i].ndu);
2070 static void read_edr_rawdh_block(samples_t **smp, int *ndu, t_enxblock *blk,
2071 double start_time, double delta_time,
2072 double native_lambda, double temp,
2073 double *last_t, const char *filename)
2077 double foreign_lambda;
2079 samples_t *s; /* convenience pointer */
2082 /* check the block types etc. */
2083 if ( (blk->nsub < 3) ||
2084 (blk->sub[0].type != xdr_datatype_int) ||
2085 (blk->sub[1].type != xdr_datatype_double) ||
2087 (blk->sub[2].type != xdr_datatype_float) &&
2088 (blk->sub[2].type != xdr_datatype_double)
2090 (blk->sub[0].nr < 1) ||
2091 (blk->sub[1].nr < 1) )
2094 "Unexpected/corrupted block data in file %s around time %g.",
2095 filename, start_time);
2098 derivative = blk->sub[0].ival[0];
2099 foreign_lambda = blk->sub[1].dval[0];
2103 /* initialize the samples structure if it's empty. */
2105 samples_init(*smp, native_lambda, foreign_lambda, temp,
2106 derivative!=0, filename);
2107 (*smp)->start_time=start_time;
2108 (*smp)->delta_time=delta_time;
2111 /* set convenience pointer */
2114 /* now double check */
2115 if ( ! lambda_same(s->foreign_lambda, foreign_lambda) ||
2116 ( (derivative!=0) != (s->derivative!=0) ) )
2118 fprintf(stderr, "Got foreign lambda=%g, expected: %g\n",
2119 foreign_lambda, s->foreign_lambda);
2120 fprintf(stderr, "Got derivative=%d, expected: %d\n",
2121 derivative, s->derivative);
2122 gmx_fatal(FARGS, "Corrupted data in file %s around t=%g.",
2123 filename, start_time);
2126 /* make room for the data */
2127 if (s->ndu_alloc < (size_t)(s->ndu + blk->sub[2].nr) )
2129 s->ndu_alloc += (s->ndu_alloc < (size_t)blk->sub[2].nr) ?
2130 blk->sub[2].nr*2 : s->ndu_alloc;
2131 srenew(s->du_alloc, s->ndu_alloc);
2135 s->ndu += blk->sub[2].nr;
2136 s->ntot += blk->sub[2].nr;
2137 *ndu = blk->sub[2].nr;
2139 /* and copy the data*/
2140 for(j=0;j<blk->sub[2].nr;j++)
2142 if (blk->sub[2].type == xdr_datatype_float)
2144 s->du[startj+j] = blk->sub[2].fval[j];
2148 s->du[startj+j] = blk->sub[2].dval[j];
2151 if (start_time + blk->sub[2].nr*delta_time > *last_t)
2153 *last_t = start_time + blk->sub[2].nr*delta_time;
2157 static samples_t *read_edr_hist_block(int *nsamples, t_enxblock *blk,
2158 double start_time, double delta_time,
2159 double native_lambda, double temp,
2160 double *last_t, const char *filename)
2165 double foreign_lambda;
2169 /* check the block types etc. */
2170 if ( (blk->nsub < 2) ||
2171 (blk->sub[0].type != xdr_datatype_double) ||
2172 (blk->sub[1].type != xdr_datatype_large_int) ||
2173 (blk->sub[0].nr < 2) ||
2174 (blk->sub[1].nr < 2) )
2177 "Unexpected/corrupted block data in file %s around time %g",
2178 filename, start_time);
2189 "Unexpected/corrupted block data in file %s around time %g",
2190 filename, start_time);
2196 foreign_lambda=blk->sub[0].dval[0];
2197 derivative=(int)(blk->sub[1].lval[1]);
2199 foreign_lambda=native_lambda;
2201 samples_init(s, native_lambda, foreign_lambda, temp,
2202 derivative!=0, filename);
2205 for(i=0;i<nhist;i++)
2207 nbins[i] = blk->sub[i+2].nr;
2210 hist_init(s->hist, nhist, nbins);
2212 for(i=0;i<nhist;i++)
2214 s->hist->x0[i]=blk->sub[1].lval[2+i];
2215 s->hist->dx[i] = blk->sub[0].dval[1];
2217 s->hist->dx[i] = - s->hist->dx[i];
2220 s->hist->start_time = start_time;
2221 s->hist->delta_time = delta_time;
2222 s->start_time = start_time;
2223 s->delta_time = delta_time;
2225 for(i=0;i<nhist;i++)
2228 gmx_large_int_t sum=0;
2230 for(j=0;j<s->hist->nbin[i];j++)
2232 int binv=(int)(blk->sub[i+2].ival[j]);
2234 s->hist->bin[i][j] = binv;
2247 gmx_fatal(FARGS, "Histogram counts don't match in %s",
2253 if (start_time + s->hist->sum*delta_time > *last_t)
2255 *last_t = start_time + s->hist->sum*delta_time;
2261 static void read_barsim_edr(char *fn, real *temp, lambda_t *lambda_head)
2267 gmx_enxnm_t *enm=NULL;
2270 samples_t **samples_rawdh=NULL; /* contains samples for raw delta_h */
2271 int *nhists=NULL; /* array to keep count & print at end */
2272 int *npts=NULL; /* array to keep count & print at end */
2273 double *lambdas=NULL; /* array to keep count & print at end */
2274 double native_lambda=-1;
2275 double end_time; /* the end time of the last batch of samples */
2278 fp = open_enx(fn,"r");
2279 do_enxnms(fp,&nre,&enm);
2282 while(do_enx(fp, fr))
2284 /* count the data blocks */
2289 /* DHCOLL block information: */
2290 double start_time=0, delta_time=0, start_lambda=0, delta_lambda=0;
2293 /* count the blocks and handle collection information: */
2294 for(i=0;i<fr->nblock;i++)
2296 if (fr->block[i].id == enxDHHIST )
2298 if ( fr->block[i].id == enxDH )
2300 if (fr->block[i].id == enxDHCOLL )
2303 if ( (fr->block[i].nsub < 1) ||
2304 (fr->block[i].sub[0].type != xdr_datatype_double) ||
2305 (fr->block[i].sub[0].nr < 5))
2307 gmx_fatal(FARGS, "Unexpected block data in file %s", fn);
2310 /* read the data from the DHCOLL block */
2311 rtemp = fr->block[i].sub[0].dval[0];
2312 start_time = fr->block[i].sub[0].dval[1];
2313 delta_time = fr->block[i].sub[0].dval[2];
2314 start_lambda = fr->block[i].sub[0].dval[3];
2315 delta_lambda = fr->block[i].sub[0].dval[4];
2319 gmx_fatal(FARGS, "Lambda values not constant in %s: can't apply BAR method", fn);
2321 if ( ( *temp != rtemp) && (*temp > 0) )
2323 gmx_fatal(FARGS,"Temperature in file %s different from earlier files or setting\n", fn);
2334 gmx_fatal(FARGS, "Did not find a delta h information in file %s" , fn);
2336 if (nblocks_raw > 0 && nblocks_hist > 0 )
2338 gmx_fatal(FARGS, "Can't handle both raw delta U data and histograms in the same file %s", fn);
2343 /* check the native lambda */
2344 if (!lambda_same(start_lambda, native_lambda) )
2346 gmx_fatal(FARGS, "Native lambda not constant in file %s: started at %g, and becomes %g at time %g",
2347 fn, native_lambda, start_lambda, start_time);
2349 /* check the number of samples against the previous number */
2350 if ( ((nblocks_raw+nblocks_hist)!=nsamples) || (nlam!=1 ) )
2352 gmx_fatal(FARGS, "Unexpected block count in %s: was %d, now %d\n",
2353 fn, nsamples+1, nblocks_raw+nblocks_hist+nlam);
2355 /* check whether last iterations's end time matches with
2356 the currrent start time */
2357 if ( (fabs(last_t - start_time) > 2*delta_time) && last_t>=0)
2359 /* it didn't. We need to store our samples and reallocate */
2360 for(i=0;i<nsamples;i++)
2362 if (samples_rawdh[i])
2364 /* insert it into the existing list */
2365 lambda_list_insert_sample(lambda_head,
2367 /* and make sure we'll allocate a new one this time
2369 samples_rawdh[i]=NULL;
2376 /* this is the first round; allocate the associated data
2378 native_lambda=start_lambda;
2379 nsamples=nblocks_raw+nblocks_hist;
2380 snew(nhists, nsamples);
2381 snew(npts, nsamples);
2382 snew(lambdas, nsamples);
2383 snew(samples_rawdh, nsamples);
2384 for(i=0;i<nsamples;i++)
2389 samples_rawdh[i]=NULL; /* init to NULL so we know which
2390 ones contain values */
2395 k=0; /* counter for the lambdas, etc. arrays */
2396 for(i=0;i<fr->nblock;i++)
2398 if (fr->block[i].id == enxDH)
2401 read_edr_rawdh_block(&(samples_rawdh[k]),
2404 start_time, delta_time,
2405 start_lambda, rtemp,
2408 if (samples_rawdh[k])
2410 lambdas[k]=samples_rawdh[k]->foreign_lambda;
2414 else if (fr->block[i].id == enxDHHIST)
2418 samples_t *s; /* this is where the data will go */
2419 s=read_edr_hist_block(&nb, &(fr->block[i]),
2420 start_time, delta_time,
2421 start_lambda, rtemp,
2426 lambdas[k]= s->foreign_lambda;
2429 /* and insert the new sample immediately */
2432 lambda_list_insert_sample(lambda_head, s+j);
2437 /* Now store all our extant sample collections */
2438 for(i=0;i<nsamples;i++)
2440 if (samples_rawdh[i])
2442 /* insert it into the existing list */
2443 lambda_list_insert_sample(lambda_head, samples_rawdh[i]);
2448 fprintf(stderr, "\n");
2449 printf("%s: %.1f - %.1f; lambda = %.3f\n foreign lambdas:",
2450 fn, first_t, last_t, native_lambda);
2451 for(i=0;i<nsamples;i++)
2455 printf(" %.3f (%d hists)", lambdas[i], nhists[i]);
2459 printf(" %.3f (%d pts)", lambdas[i], npts[i]);
2469 int gmx_bar(int argc,char *argv[])
2471 static const char *desc[] = {
2472 "[TT]g_bar[tt] calculates free energy difference estimates through ",
2473 "Bennett's acceptance ratio method (BAR). It also automatically",
2474 "adds series of individual free energies obtained with BAR into",
2475 "a combined free energy estimate.[PAR]",
2477 "Every individual BAR free energy difference relies on two ",
2478 "simulations at different states: say state A and state B, as",
2479 "controlled by a parameter, [GRK]lambda[grk] (see the [TT].mdp[tt] parameter",
2480 "[TT]init_lambda[tt]). The BAR method calculates a ratio of weighted",
2481 "average of the Hamiltonian difference of state B given state A and",
2483 "The energy differences to the other state must be calculated",
2484 "explicitly during the simulation. This can be done with",
2485 "the [TT].mdp[tt] option [TT]foreign_lambda[tt].[PAR]",
2487 "Input option [TT]-f[tt] expects multiple [TT]dhdl.xvg[tt] files. ",
2488 "Two types of input files are supported:[BR]",
2489 "[TT]*[tt] Files with more than one [IT]y[it]-value. ",
2490 "The files should have columns ",
2491 "with dH/d[GRK]lambda[grk] and [GRK]Delta[grk][GRK]lambda[grk]. ",
2492 "The [GRK]lambda[grk] values are inferred ",
2493 "from the legends: [GRK]lambda[grk] of the simulation from the legend of ",
2494 "dH/d[GRK]lambda[grk] and the foreign [GRK]lambda[grk] values from the ",
2495 "legends of Delta H",
2497 "[TT]*[tt] Files with only one [IT]y[it]-value. Using the",
2498 "[TT]-extp[tt] option for these files, it is assumed",
2499 "that the [IT]y[it]-value is dH/d[GRK]lambda[grk] and that the ",
2500 "Hamiltonian depends linearly on [GRK]lambda[grk]. ",
2501 "The [GRK]lambda[grk] value of the simulation is inferred from the ",
2502 "subtitle (if present), otherwise from a number in the subdirectory ",
2503 "in the file name.[PAR]",
2505 "The [GRK]lambda[grk] of the simulation is parsed from ",
2506 "[TT]dhdl.xvg[tt] file's legend containing the string 'dH', the ",
2507 "foreign [GRK]lambda[grk] values from the legend containing the ",
2508 "capitalized letters 'D' and 'H'. The temperature is parsed from ",
2509 "the legend line containing 'T ='.[PAR]",
2511 "The input option [TT]-g[tt] expects multiple [TT].edr[tt] files. ",
2512 "These can contain either lists of energy differences (see the ",
2513 "[TT].mdp[tt] option [TT]separate_dhdl_file[tt]), or a series of ",
2514 "histograms (see the [TT].mdp[tt] options [TT]dh_hist_size[tt] and ",
2515 "[TT]dh_hist_spacing[tt]).", "The temperature and [GRK]lambda[grk] ",
2516 "values are automatically deduced from the [TT]ener.edr[tt] file.[PAR]",
2518 "In addition to the [TT].mdp[tt] option [TT]foreign_lambda[tt], ",
2519 "the energy difference can also be extrapolated from the ",
2520 "dH/d[GRK]lambda[grk] values. This is done with the[TT]-extp[tt]",
2521 "option, which assumes that the system's Hamiltonian depends linearly",
2522 "on [GRK]lambda[grk], which is not normally the case.[PAR]",
2524 "The free energy estimates are determined using BAR with bisection, ",
2525 "with the precision of the output set with [TT]-prec[tt]. ",
2526 "An error estimate taking into account time correlations ",
2527 "is made by splitting the data into blocks and determining ",
2528 "the free energy differences over those blocks and assuming ",
2529 "the blocks are independent. ",
2530 "The final error estimate is determined from the average variance ",
2531 "over 5 blocks. A range of block numbers for error estimation can ",
2532 "be provided with the options [TT]-nbmin[tt] and [TT]-nbmax[tt].[PAR]",
2534 "[TT]g_bar[tt] tries to aggregate samples with the same 'native' and ",
2535 "'foreign' [GRK]lambda[grk] values, but always assumes independent ",
2536 "samples. [BB]Note[bb] that when aggregating energy ",
2537 "differences/derivatives with different sampling intervals, this is ",
2538 "almost certainly not correct. Usually subsequent energies are ",
2539 "correlated and different time intervals mean different degrees ",
2540 "of correlation between samples.[PAR]",
2542 "The results are split in two parts: the last part contains the final ",
2543 "results in kJ/mol, together with the error estimate for each part ",
2544 "and the total. The first part contains detailed free energy ",
2545 "difference estimates and phase space overlap measures in units of ",
2546 "kT (together with their computed error estimate). The printed ",
2548 "[TT]*[tt] lam_A: the [GRK]lambda[grk] values for point A.[BR]",
2549 "[TT]*[tt] lam_B: the [GRK]lambda[grk] values for point B.[BR]",
2550 "[TT]*[tt] DG: the free energy estimate.[BR]",
2551 "[TT]*[tt] s_A: an estimate of the relative entropy of B in A.[BR]",
2552 "[TT]*[tt] s_A: an estimate of the relative entropy of A in B.[BR]",
2553 "[TT]*[tt] stdev: an estimate expected per-sample standard deviation.[PAR]",
2555 "The relative entropy of both states in each other's ensemble can be ",
2556 "interpreted as a measure of phase space overlap: ",
2557 "the relative entropy s_A of the work samples of lambda_B in the ",
2558 "ensemble of lambda_A (and vice versa for s_B), is a ",
2559 "measure of the 'distance' between Boltzmann distributions of ",
2560 "the two states, that goes to zero for identical distributions. See ",
2561 "Wu & Kofke, J. Chem. Phys. 123 084109 (2005) for more information.",
2563 "The estimate of the expected per-sample standard deviation, as given ",
2564 "in Bennett's original BAR paper: Bennett, J. Comp. Phys. 22, p 245 (1976).",
2565 "Eq. 10 therein gives an estimate of the quality of sampling (not directly",
2566 "of the actual statistical error, because it assumes independent samples).[PAR]",
2568 "To get a visual estimate of the phase space overlap, use the ",
2569 "[TT]-oh[tt] option to write series of histograms, together with the ",
2570 "[TT]-nbin[tt] option.[PAR]"
2572 static real begin=0,end=-1,temp=-1;
2573 int nd=2,nbmin=5,nbmax=5;
2575 gmx_bool use_dhdl=FALSE;
2576 gmx_bool calc_s,calc_v;
2578 { "-b", FALSE, etREAL, {&begin}, "Begin time for BAR" },
2579 { "-e", FALSE, etREAL, {&end}, "End time for BAR" },
2580 { "-temp", FALSE, etREAL, {&temp}, "Temperature (K)" },
2581 { "-prec", FALSE, etINT, {&nd}, "The number of digits after the decimal point" },
2582 { "-nbmin", FALSE, etINT, {&nbmin}, "Minimum number of blocks for error estimation" },
2583 { "-nbmax", FALSE, etINT, {&nbmax}, "Maximum number of blocks for error estimation" },
2584 { "-nbin", FALSE, etINT, {&nbin}, "Number of bins for histogram output"},
2585 { "-extp", FALSE, etBOOL, {&use_dhdl}, "Whether to linearly extrapolate dH/dl values to use as energies"}
2589 { efXVG, "-f", "dhdl", ffOPTRDMULT },
2590 { efEDR, "-g", "ener", ffOPTRDMULT },
2591 { efXVG, "-o", "bar", ffOPTWR },
2592 { efXVG, "-oi", "barint", ffOPTWR },
2593 { efXVG, "-oh", "histogram", ffOPTWR }
2595 #define NFILE asize(fnm)
2598 int nf=0; /* file counter */
2600 int nfile_tot; /* total number of input files */
2605 lambda_t *lb; /* the pre-processed lambda data (linked list head) */
2606 lambda_t lambda_head; /* the head element */
2607 barres_t *results; /* the results */
2608 int nresults; /* number of results in results array */
2611 double prec,dg_tot,dg,sig, dg_tot_max, dg_tot_min;
2614 char dgformat[20],xvg2format[STRLEN],xvg3format[STRLEN],buf[STRLEN];
2615 char ktformat[STRLEN], sktformat[STRLEN];
2616 char kteformat[STRLEN], skteformat[STRLEN];
2619 gmx_bool result_OK=TRUE,bEE=TRUE;
2621 gmx_bool disc_err=FALSE;
2622 double sum_disc_err=0.; /* discretization error */
2623 gmx_bool histrange_err=FALSE;
2624 double sum_histrange_err=0.; /* histogram range error */
2625 double stat_err=0.; /* statistical error */
2627 CopyRight(stderr,argv[0]);
2628 parse_common_args(&argc,argv,
2630 NFILE,fnm,asize(pa),pa,asize(desc),desc,0,NULL,&oenv);
2632 if (opt2bSet("-f", NFILE, fnm))
2634 nxvgfile = opt2fns(&fxvgnms,"-f",NFILE,fnm);
2636 if (opt2bSet("-g", NFILE, fnm))
2638 nedrfile = opt2fns(&fedrnms,"-g",NFILE,fnm);
2641 /* make linked list */
2643 lambda_init(lb, 0, 0);
2648 nfile_tot = nxvgfile + nedrfile;
2652 gmx_fatal(FARGS,"No input files!");
2657 gmx_fatal(FARGS,"Can not have negative number of digits");
2661 snew(partsum,(nbmax+1)*(nbmax+1));
2664 /* read in all files. First xvg files */
2665 for(f=0; f<nxvgfile; f++)
2667 read_bar_xvg(fxvgnms[f],&temp,lb);
2670 /* then .edr files */
2671 for(f=0; f<nedrfile; f++)
2673 read_barsim_edr(fedrnms[f],&temp,lb);;
2677 /* fix the times to allow for equilibration */
2678 lambdas_impose_times(lb, begin, end);
2680 if (opt2bSet("-oh",NFILE,fnm))
2682 lambdas_histogram(lb, opt2fn("-oh",NFILE,fnm), nbin, oenv);
2685 /* assemble the output structures from the lambdas */
2686 results=barres_list_create(lb, &nresults, use_dhdl);
2688 sum_disc_err=barres_list_max_disc_err(results, nresults);
2692 printf("\nNo results to calculate.\n");
2696 if (sum_disc_err > prec)
2699 nd = ceil(-log10(prec));
2700 printf("WARNING: setting the precision to %g because that is the minimum\n reasonable number, given the expected discretization error.\n", prec);
2704 sprintf(lamformat,"%%6.3f");
2705 sprintf( dgformat,"%%%d.%df",3+nd,nd);
2706 /* the format strings of the results in kT */
2707 sprintf( ktformat,"%%%d.%df",5+nd,nd);
2708 sprintf( sktformat,"%%%ds",6+nd);
2709 /* the format strings of the errors in kT */
2710 sprintf( kteformat,"%%%d.%df",3+nd,nd);
2711 sprintf( skteformat,"%%%ds",4+nd);
2712 sprintf(xvg2format,"%s %s\n","%g",dgformat);
2713 sprintf(xvg3format,"%s %s %s\n","%g",dgformat,dgformat);
2718 if (opt2bSet("-o",NFILE,fnm))
2720 sprintf(buf,"%s (%s)","\\DeltaG","kT");
2721 fpb = xvgropen_type(opt2fn("-o",NFILE,fnm),"Free energy differences",
2722 "\\lambda",buf,exvggtXYDY,oenv);
2726 if (opt2bSet("-oi",NFILE,fnm))
2728 sprintf(buf,"%s (%s)","\\DeltaG","kT");
2729 fpi = xvgropen(opt2fn("-oi",NFILE,fnm),"Free energy integral",
2730 "\\lambda",buf,oenv);
2738 /* first calculate results */
2741 for(f=0; f<nresults; f++)
2743 /* Determine the free energy difference with a factor of 10
2744 * more accuracy than requested for printing.
2746 calc_bar(&(results[f]), 0.1*prec, nbmin, nbmax,
2749 if (results[f].dg_disc_err > prec/10.)
2751 if (results[f].dg_histrange_err > prec/10.)
2755 /* print results in kT */
2759 printf("\nTemperature: %g K\n", temp);
2761 printf("\nDetailed results in kT (see help for explanation):\n\n");
2762 printf("%6s ", " lam_A");
2763 printf("%6s ", " lam_B");
2764 printf(sktformat, "DG ");
2766 printf(skteformat, "+/- ");
2768 printf(skteformat, "disc ");
2770 printf(skteformat, "range ");
2771 printf(sktformat, "s_A ");
2773 printf(skteformat, "+/- " );
2774 printf(sktformat, "s_B ");
2776 printf(skteformat, "+/- " );
2777 printf(sktformat, "stdev ");
2779 printf(skteformat, "+/- ");
2781 for(f=0; f<nresults; f++)
2783 printf(lamformat, results[f].a->native_lambda);
2785 printf(lamformat, results[f].b->native_lambda);
2787 printf(ktformat, results[f].dg);
2791 printf(kteformat, results[f].dg_err);
2796 printf(kteformat, results[f].dg_disc_err);
2801 printf(kteformat, results[f].dg_histrange_err);
2804 printf(ktformat, results[f].sa);
2808 printf(kteformat, results[f].sa_err);
2811 printf(ktformat, results[f].sb);
2815 printf(kteformat, results[f].sb_err);
2818 printf(ktformat, results[f].dg_stddev);
2822 printf(kteformat, results[f].dg_stddev_err);
2826 /* Check for negative relative entropy with a 95% certainty. */
2827 if (results[f].sa < -2*results[f].sa_err ||
2828 results[f].sb < -2*results[f].sb_err)
2836 printf("\nWARNING: Some of these results violate the Second Law of "
2837 "Thermodynamics: \n"
2838 " This is can be the result of severe undersampling, or "
2840 " there is something wrong with the simulations.\n");
2844 /* final results in kJ/mol */
2845 printf("\n\nFinal results in kJ/mol:\n\n");
2847 for(f=0; f<nresults; f++)
2852 fprintf(fpi, xvg2format, results[f].a->native_lambda, dg_tot);
2858 fprintf(fpb, xvg3format,
2859 0.5*(results[f].a->native_lambda +
2860 results[f].b->native_lambda),
2861 results[f].dg,results[f].dg_err);
2864 /*printf("lambda %4.2f - %4.2f, DG ", results[f].lambda_a,
2865 results[f].lambda_b);*/
2867 printf(lamformat, results[f].a->native_lambda);
2869 printf(lamformat, results[f].b->native_lambda);
2872 printf(dgformat,results[f].dg*kT);
2876 printf(dgformat,results[f].dg_err*kT);
2880 printf(" (max. range err. = ");
2881 printf(dgformat,results[f].dg_histrange_err*kT);
2883 sum_histrange_err += results[f].dg_histrange_err*kT;
2887 dg_tot += results[f].dg;
2891 printf(lamformat, results[0].a->native_lambda);
2893 printf(lamformat, results[nresults-1].b->native_lambda);
2896 printf(dgformat,dg_tot*kT);
2899 stat_err=bar_err(nbmin,nbmax,partsum)*kT;
2901 printf(dgformat, max(max(stat_err, sum_disc_err), sum_histrange_err));
2906 printf("\nmaximum discretization error = ");
2907 printf(dgformat,sum_disc_err);
2908 if (bEE && stat_err < sum_disc_err)
2910 printf("WARNING: discretization error (%g) is larger than statistical error.\n Decrease histogram spacing for more accurate results\n", stat_err);
2915 printf("\nmaximum histogram range error = ");
2916 printf(dgformat,sum_histrange_err);
2917 if (bEE && stat_err < sum_histrange_err)
2919 printf("WARNING: histogram range error (%g) is larger than statistical error.\n Increase histogram range for more accurate results\n", stat_err);
2928 fprintf(fpi, xvg2format,
2929 results[nresults-1].b->native_lambda, dg_tot);
2933 do_view(oenv,opt2fn_null("-o",NFILE,fnm),"-xydy");
2934 do_view(oenv,opt2fn_null("-oi",NFILE,fnm),"-xydy");