3 * This source code is part of
7 * GROningen MAchine for Chemical Simulations
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2004, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
33 * Green Red Orange Magenta Azure Cyan Skyblue
42 #include "gmx_fatal.h"
46 #include "gmx_statistics.h"
52 static const char *etitles[] = { "E-docked", "Free Energy" };
63 static t_pdbfile *read_pdbf(const char *fn)
72 get_stx_coordnum (fn,&natoms);
73 init_t_atoms(&(pdbf->atoms),natoms,FALSE);
75 read_stx_conf(fn,buf,&pdbf->atoms,pdbf->x,NULL,&pdbf->ePBC,pdbf->box);
78 ptr = fgets2(buf,255,fp);
80 if (strstr(buf,"Intermolecular") != NULL) {
81 ptr = strchr(buf,'=');
82 sscanf(ptr+1,"%lf",&e);
85 else if (strstr(buf,"Estimated Free") != NULL) {
86 ptr = strchr(buf,'=');
87 sscanf(ptr+1,"%lf",&e);
91 } while (ptr != NULL);
97 static t_pdbfile **read_em_all(const char *fn,int *npdbf)
101 char buf[256],name[256];
105 buf[strlen(buf)-4] = '\0';
110 sprintf(name,"%s_%d.pdb",buf,i+1);
111 if ((bExist = gmx_fexist(name)) == TRUE) {
112 pdbf[i] = read_pdbf(name);
113 pdbf[i]->index = i+1;
117 srenew(pdbf,maxpdbf);
124 printf("Found %d pdb files\n",i);
129 static gmx_bool bFreeSort=FALSE;
131 static int pdbf_comp(const void *a,const void *b)
137 pa = *(t_pdbfile **)a;
138 pb = *(t_pdbfile **)b;
140 dc = pa->cluster_id - pb->cluster_id;
144 x = pa->efree - pb->efree;
146 x = pa->edocked - pb->edocked;
159 static void analyse_em_all(int npdb,t_pdbfile *pdbf[], const char *edocked,
160 const char *efree, const output_env_t oenv)
165 for(bFreeSort = FALSE; (bFreeSort <= TRUE); bFreeSort++) {
166 qsort(pdbf,npdb,sizeof(pdbf[0]),pdbf_comp);
167 fp = xvgropen(bFreeSort ? efree : edocked,
168 etitles[bFreeSort],"()","E (kJ/mol)",oenv);
169 for(i=0; (i<npdb); i++)
170 fprintf(fp,"%12lf\n",bFreeSort ? pdbf[i]->efree : pdbf[i]->edocked);
175 static void clust_stat(FILE *fp,int start,int end,t_pdbfile *pdbf[])
181 ed = gmx_stats_init();
182 ef = gmx_stats_init();
183 for(i=start; (i<end); i++) {
184 gmx_stats_add_point(ed,i-start,pdbf[i]->edocked,0,0);
185 gmx_stats_add_point(ef,i-start,pdbf[i]->efree,0,0);
187 gmx_stats_get_ase(ed,&aver,&sigma,NULL);
188 fprintf(fp," <%12s> = %8.3f (+/- %6.3f)\n",etitles[FALSE],aver,sigma);
189 gmx_stats_get_ase(ef,&aver,&sigma,NULL);
190 fprintf(fp," <%12s> = %8.3f (+/- %6.3f)\n",etitles[TRUE],aver,sigma);
197 static real rmsd_dist(t_pdbfile *pa,t_pdbfile *pb,gmx_bool bRMSD)
205 for(i=0; (i<pa->atoms.nr); i++) {
206 rvec_sub(pa->x[i],pb->x[i],dx);
207 rmsd += iprod(dx,dx);
209 rmsd = sqrt(rmsd/pa->atoms.nr);
215 for(i=0; (i<pa->atoms.nr); i++) {
216 rvec_inc(acm,pa->x[i]);
217 rvec_inc(bcm,pb->x[i]);
219 rvec_sub(acm,bcm,dx);
220 for(i=0; (i<DIM); i++)
221 dx[i] /= pa->atoms.nr;
227 static void line(FILE *fp)
229 fprintf(fp," - - - - - - - - - -\n");
232 static void cluster_em_all(FILE *fp,int npdb,t_pdbfile *pdbf[],
233 const char *pdbout,gmx_bool bFree,gmx_bool bRMSD,real cutoff)
240 qsort(pdbf,npdb,sizeof(pdbf[0]),pdbf_comp);
242 fprintf(fp,"Statistics over all structures:\n");
243 clust_stat(fp,0,npdb,pdbf);
246 /* Index to first structure in a cluster */
250 for(i=1; (i<npdb); i++) {
251 for(j=0; (j<ncluster); j++) {
252 rmsd = rmsd_dist(pdbf[cndx[j]],pdbf[i],bRMSD);
253 if (rmsd <= cutoff) {
254 /* Structure i is in cluster j */
255 pdbf[i]->cluster_id = pdbf[cndx[j]]->cluster_id;
260 /* New cluster! Cool! */
262 pdbf[i]->cluster_id = ncluster;
266 cndx[ncluster] = npdb;
267 qsort(pdbf,npdb,sizeof(pdbf[0]),pdbf_comp);
271 for(i=1; (i<npdb); i++) {
272 if (pdbf[i]->cluster_id != pdbf[i-1]->cluster_id) {
278 gmx_fatal(FARGS,"Consistency error: j = %d, ncluster = %d",j,ncluster);
280 fprintf(fp,"I found %d clusters based on %s and %s with a %.3f nm cut-off\n",
281 ncluster,etitles[bFree],bRMSD ? "RMSD" : "distance",cutoff);
283 for(j=0; (j<ncluster); j++) {
284 fprintf(fp,"Cluster: %3d %s: %10.5f kJ/mol %3d elements\n",
286 bFree ? pdbf[cndx[j]]->efree : pdbf[cndx[j]]->edocked,
288 clust_stat(fp,cndx[j],cndx[j+1],pdbf);
289 for(k=cndx[j]; (k<cndx[j+1]); k++)
290 fprintf(fp," %3d",pdbf[k]->index);
297 int gmx_anadock(int argc,char *argv[])
299 const char *desc[] = {
300 "[TT]g_anadock[tt] analyses the results of an Autodock run and clusters the",
301 "structures together, based on distance or RMSD. The docked energy",
302 "and free energy estimates are analysed, and for each cluster the",
303 "energy statistics are printed.[PAR]",
304 "An alternative approach to this is to cluster the structures first",
305 "using [TT]g_cluster[tt] and then sort the clusters on either lowest",
306 "energy or average energy."
309 { efPDB, "-f", NULL, ffREAD },
310 { efPDB, "-ox", "cluster", ffWRITE },
311 { efXVG, "-od", "edocked", ffWRITE },
312 { efXVG, "-of", "efree", ffWRITE },
313 { efLOG, "-g", "anadock", ffWRITE }
316 #define NFILE asize(fnm)
317 static gmx_bool bFree=FALSE,bRMS=TRUE;
318 static real cutoff = 0.2;
320 { "-free", FALSE, etBOOL, {&bFree},
321 "Use Free energy estimate from autodock for sorting the classes" },
322 { "-rms", FALSE, etBOOL, {&bRMS},
323 "Cluster on RMS or distance" },
324 { "-cutoff", FALSE, etREAL, {&cutoff},
325 "Maximum RMSD/distance for belonging to the same cluster" }
327 #define NPA asize(pa)
330 t_pdbfile **pdbf=NULL;
333 CopyRight(stderr,argv[0]);
334 parse_common_args(&argc,argv,0,NFILE,fnm,NPA,pa, asize(desc),desc,0,
337 fp = ffopen(opt2fn("-g",NFILE,fnm),"w");
338 please_cite(stdout,"Hetenyi2002b");
339 please_cite(fp,"Hetenyi2002b");
341 pdbf = read_em_all(opt2fn("-f",NFILE,fnm),&npdbf);
343 analyse_em_all(npdbf,pdbf,opt2fn("-od",NFILE,fnm),opt2fn("-of",NFILE,fnm),
346 cluster_em_all(fp,npdbf,pdbf,opt2fn("-ox",NFILE,fnm),