src/gromacs/gmxlib/index.c

   1 /*
   2  *
   3  *                This source code is part of
   4  *
   5  *                 G   R   O   M   A   C   S
   6  *
   7  *          GROningen MAchine for Chemical Simulations
   8  *
   9  *                        VERSION 3.2.0
  10  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  11  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  12  * Copyright (c) 2001-2004, The GROMACS development team,
  13  * check out http://www.gromacs.org for more information.
  14
  15  * This program is free software; you can redistribute it and/or
  16  * modify it under the terms of the GNU General Public License
  17  * as published by the Free Software Foundation; either version 2
  18  * of the License, or (at your option) any later version.
  19  *
  20  * If you want to redistribute modifications, please consider that
  21  * scientific software is very special. Version control is crucial -
  22  * bugs must be traceable. We will be happy to consider code for
  23  * inclusion in the official distribution, but derived work must not
  24  * be called official GROMACS. Details are found in the README & COPYING
  25  * files - if they are missing, get the official version at www.gromacs.org.
  26  *
  27  * To help us fund GROMACS development, we humbly ask that you cite
  28  * the papers on the package - you can find them in the top README file.
  29  *
  30  * For more info, check our website at http://www.gromacs.org
  31  *
  32  * And Hey:
  33  * GROningen Mixture of Alchemy and Childrens' Stories
  34  */
  35 #ifdef HAVE_CONFIG_H
  36 #include <config.h>
  37 #endif
  38
  39 #include <ctype.h>
  40 #include <string.h>
  41 #include <assert.h>
  42 #include "sysstuff.h"
  43 #include "strdb.h"
  44 #include "futil.h"
  45 #include "macros.h"
  46 #include "names.h"
  47 #include "string2.h"
  48 #include "statutil.h"
  49 #include "confio.h"
  50 #include "main.h"
  51 #include "copyrite.h"
  52 #include "typedefs.h"
  53 #include "smalloc.h"
  54 #include "invblock.h"
  55 #include "macros.h"
  56 #include "index.h"
  57 #include "txtdump.h"
  58 #include "gmxfio.h"
  59
  60
  61
  62 const char gmx_residuetype_undefined[]="Other";
  63
  64 struct gmx_residuetype
  65 {
  66     int      n;
  67     char **  resname;
  68     char **  restype;
  69
  70 };
  71
  72
  73 static gmx_bool gmx_ask_yesno(gmx_bool bASK)
  74 {
  75   char c;
  76
  77   if (bASK) {
  78     do {
  79       c=toupper(fgetc(stdin));
  80     } while ((c != 'Y') && (c != 'N'));
  81
  82     return (c == 'Y');
  83   }
  84   else
  85     return FALSE;
  86 }
  87
  88 t_blocka *new_blocka(void)
  89 {
  90   t_blocka *block;
  91
  92   snew(block,1);
  93   snew(block->index,1);
  94
  95   return block;
  96 }
  97
  98 void write_index(const char *outf, t_blocka *b,char **gnames)
  99 {
 100   FILE *out;
 101   int  i,j,k;
 102
 103   out=gmx_fio_fopen(outf,"w");
 104   /* fprintf(out,"%5d  %5d\n",b->nr,b->nra); */
 105   for(i=0; (i<b->nr); i++) {
 106     fprintf(out,"[ %s ]\n",gnames[i]);
 107     for(k=0,j=b->index[i]; j<b->index[i+1]; j++,k++) {
 108       fprintf(out,"%4d ",b->a[j]+1);
 109       if ((k % 15) == 14)
 110         fprintf(out,"\n");
 111     }
 112     fprintf(out,"\n");
 113   }
 114   gmx_fio_fclose(out);
 115 }
 116
 117 void add_grp(t_blocka *b,char ***gnames,int nra,atom_id a[],const char *name)
 118 {
 119   int i;
 120
 121   srenew(b->index,b->nr+2);
 122   srenew(*gnames,b->nr+1);
 123   (*gnames)[b->nr]=strdup(name);
 124
 125   srenew(b->a,b->nra+nra);
 126   for(i=0; (i<nra); i++)
 127     b->a[b->nra++]=a[i];
 128   b->nr++;
 129   b->index[b->nr]=b->nra;
 130 }
 131
 132 /* compare index in `a' with group in `b' at `index',
 133    when `index'<0 it is relative to end of `b' */
 134 static gmx_bool grp_cmp(t_blocka *b, int nra, atom_id a[], int index)
 135 {
 136   int i;
 137
 138   if (index < 0)
 139     index = b->nr-1+index;
 140   if (index >= b->nr)
 141     gmx_fatal(FARGS,"no such index group %d in t_blocka (nr=%d)",index,b->nr);
 142   /* compare sizes */
 143   if ( nra != b->index[index+1] - b->index[index] )
 144     return FALSE;
 145   for(i=0; i<nra; i++)
 146     if ( a[i] != b->a[b->index[index]+i] )
 147       return FALSE;
 148   return TRUE;
 149 }
 150
 151 static void
 152 p_status(const char **restype, int nres, const char **typenames, int ntypes)
 153 {
 154     int i,j;
 155     int found;
 156
 157     int * counter;
 158
 159     snew(counter,ntypes);
 160     for(i=0;i<ntypes;i++)
 161     {
 162         counter[i]=0;
 163     }
 164     for(i=0;i<nres;i++)
 165     {
 166         found=0;
 167         for(j=0;j<ntypes;j++)
 168         {
 169             if(!gmx_strcasecmp(restype[i],typenames[j]))
 170             {
 171                 counter[j]++;
 172             }
 173         }
 174     }
 175
 176     for(i=0; (i<ntypes); i++)
 177     {
 178         if (counter[i] > 0)
 179         {
 180             printf("There are: %5d %10s residues\n",counter[i],typenames[i]);
 181         }
 182     }
 183
 184     sfree(counter);
 185 }
 186
 187
 188 atom_id *
 189 mk_aid(t_atoms *atoms,const char ** restype,const char * typestring,int *nra,gmx_bool bMatch)
 190 /* Make an array of atom_ids for all atoms with residuetypes matching typestring, or the opposite if bMatch is false */
 191 {
 192     atom_id *a;
 193     int     i;
 194     int     res;
 195
 196     snew(a,atoms->nr);
 197     *nra=0;
 198     for(i=0; (i<atoms->nr); i++)
 199     {
 200         res=!gmx_strcasecmp(restype[atoms->atom[i].resind],typestring);
 201         if(bMatch==FALSE)
 202         {
 203             res=!res;
 204         }
 205         if(res)
 206         {
 207             a[(*nra)++]=i;
 208         }
 209     }
 210
 211     return a;
 212 }
 213
 214 typedef struct {
 215   char *rname;
 216   gmx_bool bNeg;
 217   char *gname;
 218 } restp_t;
 219
 220 static void analyse_other(const char ** restype,t_atoms *atoms,
 221                           t_blocka *gb,char ***gn,gmx_bool bASK,gmx_bool bVerb)
 222 {
 223   restp_t *restp=NULL;
 224   char **attp=NULL;
 225   char *rname,*aname;
 226   atom_id *other_ndx,*aid,*aaid;
 227   int  i,j,k,l,resind,naid,naaid,natp,nrestp=0;
 228
 229     for(i=0; (i<atoms->nres); i++)
 230     {
 231         if (gmx_strcasecmp(restype[i],"Protein") && gmx_strcasecmp(restype[i],"DNA") && gmx_strcasecmp(restype[i],"RNA") && gmx_strcasecmp(restype[i],"Water"))
 232         {
 233             break;
 234         }
 235     }
 236   if (i < atoms->nres) {
 237     /* we have others */
 238     if (bVerb)
 239       printf("Analysing residues not classified as Protein/DNA/RNA/Water and splitting into groups...\n");
 240     snew(other_ndx,atoms->nr);
 241     for(k=0; (k<atoms->nr); k++) {
 242       resind = atoms->atom[k].resind;
 243       rname = *atoms->resinfo[resind].name;
 244         if (gmx_strcasecmp(restype[resind],"Protein") && gmx_strcasecmp(restype[resind],"DNA") &&
 245             gmx_strcasecmp(restype[resind],"RNA") && gmx_strcasecmp(restype[resind],"Water"))
 246         {
 247
 248         for(l=0; (l<nrestp); l++)
 249           if (strcmp(restp[l].rname,rname) == 0)
 250             break;
 251         if (l==nrestp) {
 252           srenew(restp,nrestp+1);
 253           restp[nrestp].rname = strdup(rname);
 254           restp[nrestp].bNeg  = FALSE;
 255           restp[nrestp].gname = strdup(rname);
 256           nrestp++;
 257
 258     }
 259       }
 260     }
 261     for(i=0; (i<nrestp); i++) {
 262       snew(aid,atoms->nr);
 263       naid=0;
 264       for(j=0; (j<atoms->nr); j++) {
 265         rname = *atoms->resinfo[atoms->atom[j].resind].name;
 266         if ((strcmp(restp[i].rname,rname) == 0 && !restp[i].bNeg) ||
 267             (strcmp(restp[i].rname,rname) != 0 &&  restp[i].bNeg)) {
 268           aid[naid++] = j;
 269         }
 270       }
 271       add_grp(gb,gn,naid,aid,restp[i].gname);
 272       if (bASK) {
 273         printf("split %s into atoms (y/n) ? ",restp[i].gname);
 274         fflush(stdout);
 275         if (gmx_ask_yesno(bASK)) {
 276           natp=0;
 277           for(k=0; (k<naid); k++) {
 278             aname=*atoms->atomname[aid[k]];
 279             for(l=0; (l<natp); l++)
 280               if (strcmp(aname,attp[l]) == 0)
 281                 break;
 282             if (l == natp) {
 283               srenew(attp,++natp);
 284               attp[natp-1]=aname;
 285             }
 286           }
 287           if (natp > 1) {
 288             for(l=0; (l<natp); l++) {
 289               snew(aaid,naid);
 290               naaid=0;
 291               for(k=0; (k<naid); k++) {
 292                 aname=*atoms->atomname[aid[k]];
 293                 if (strcmp(aname,attp[l])==0)
 294                   aaid[naaid++]=aid[k];
 295               }
 296               add_grp(gb,gn,naaid,aaid,attp[l]);
 297               sfree(aaid);
 298             }
 299           }
 300           sfree(attp);
 301           attp=NULL;
 302         }
 303         sfree(aid);
 304       }
 305     }
 306     sfree(other_ndx);
 307   }
 308 }
 309
 310 /*! /brief Instances of this struct contain the data necessary to
 311  *         construct a single (protein) index group in
 312  *         analyse_prot(). */
 313 typedef struct gmx_help_make_index_group
 314 {
 315   /** The set of atom names that will be used to form this index group */
 316   const char **defining_atomnames;
 317   /** Size of the defining_atomnames array */
 318   const int num_defining_atomnames;
 319   /** Name of this index group */
 320   const char *group_name;
 321   /** Whether the above atom names name the atoms in the group, or
 322       those not in the group */
 323   gmx_bool bTakeComplement;
 324   /** The index in wholename gives the first item in the arrays of
 325      atomnames that should be tested with 'gmx_strncasecmp' in stead of
 326      gmx_strcasecmp, or -1 if all items should be tested with strcasecmp
 327      This is comparable to using a '*' wildcard at the end of specific
 328      atom names, but that is more involved to implement...
 329    */
 330   int wholename;
 331   /** Only create this index group if it differs from the one specified in compareto,
 332      where -1 means to always create this group. */
 333   int compareto;
 334 } t_gmx_help_make_index_group;
 335
 336 static void analyse_prot(const char ** restype,t_atoms *atoms,
 337                          t_blocka *gb,char ***gn,gmx_bool bASK,gmx_bool bVerb)
 338 {
 339   /* lists of atomnames to be used in constructing index groups: */
 340   static const char *pnoh[]    = { "H", "HN" };
 341   static const char *pnodum[]  = { "MN1",  "MN2",  "MCB1", "MCB2", "MCG1", "MCG2",
 342                              "MCD1", "MCD2", "MCE1", "MCE2", "MNZ1", "MNZ2" };
 343   static const char *calpha[]  = { "CA" };
 344   static const char *bb[]      = { "N","CA","C" };
 345   static const char *mc[]      = { "N","CA","C","O","O1","O2","OC1","OC2","OT","OXT" };
 346   static const char *mcb[]     = { "N","CA","CB","C","O","O1","O2","OC1","OC2","OT","OXT" };
 347   static const char *mch[]     = { "N","CA","C","O","O1","O2","OC1","OC2","OT","OXT",
 348                                    "H1","H2","H3","H","HN" };
 349
 350   static const t_gmx_help_make_index_group constructing_data[] =
 351     {{ NULL,   0, "Protein",      TRUE,  -1, -1},
 352      { pnoh,   asize(pnoh),   "Protein-H",    TRUE,  0,  -1},
 353      { calpha, asize(calpha), "C-alpha",      FALSE, -1, -1},
 354      { bb,     asize(bb),     "Backbone",     FALSE, -1, -1},
 355      { mc,     asize(mc),     "MainChain",    FALSE, -1, -1},
 356      { mcb,    asize(mcb),    "MainChain+Cb", FALSE, -1, -1},
 357      { mch,    asize(mch),    "MainChain+H",  FALSE, -1, -1},
 358      { mch,    asize(mch),    "SideChain",    TRUE,  -1, -1},
 359      { mch,    asize(mch),    "SideChain-H",  TRUE,  11, -1},
 360      { pnodum, asize(pnodum), "Prot-Masses",  TRUE,  -1, 0},
 361     };
 362   const int num_index_groups = asize(constructing_data);
 363
 364   int     n,j;
 365   atom_id *aid;
 366   int     nra,nnpres,npres;
 367   gmx_bool    match;
 368   char    ndx_name[STRLEN],*atnm;
 369   int i;
 370
 371   if (bVerb)
 372   {
 373     printf("Analysing Protein...\n");
 374   }
 375   snew(aid,atoms->nr);
 376
 377   /* calculate the number of protein residues */
 378   npres=0;
 379   for(i=0; (i<atoms->nres); i++) {
 380     if (0 == gmx_strcasecmp(restype[i],"Protein")) {
 381       npres++;
 382     }
 383   }
 384   /* find matching or complement atoms */
 385   for(i=0; (i<(int)num_index_groups); i++) {
 386     nra=0;
 387     for(n=0; (n<atoms->nr); n++) {
 388       if (0 == gmx_strcasecmp(restype[atoms->atom[n].resind],"Protein")) {
 389         match=FALSE;
 390         for(j=0; (j<constructing_data[i].num_defining_atomnames); j++) {
 391           /* skip digits at beginning of atomname, e.g. 1H */
 392           atnm=*atoms->atomname[n];
 393           while (isdigit(atnm[0])) {
 394             atnm++;
 395           }
 396           if ( (constructing_data[i].wholename==-1) || (j<constructing_data[i].wholename) ) {
 397             if (0 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j],atnm)) {
 398               match=TRUE;
 399             }
 400           } else {
 401             if (0 == gmx_strncasecmp(constructing_data[i].defining_atomnames[j],atnm,strlen(constructing_data[i].defining_atomnames[j]))) {
 402               match=TRUE;
 403             }
 404           }
 405         }
 406         if (constructing_data[i].bTakeComplement != match) {
 407           aid[nra++]=n;
 408         }
 409       }
 410     }
 411     /* if we want to add this group always or it differs from previous
 412        group, add it: */
 413     if ( -1 == constructing_data[i].compareto || !grp_cmp(gb,nra,aid,constructing_data[i].compareto-i) ) {
 414       add_grp(gb,gn,nra,aid,constructing_data[i].group_name);
 415     }
 416   }
 417
 418   if (bASK) {
 419     for(i=0; (i<(int)num_index_groups); i++) {
 420       printf("Split %12s into %5d residues (y/n) ? ",constructing_data[i].group_name,npres);
 421       if (gmx_ask_yesno(bASK)) {
 422         int resind;
 423         nra = 0;
 424         for(n=0;((atoms->atom[n].resind < npres) && (n<atoms->nr));) {
 425           resind = atoms->atom[n].resind;
 426           for(;((atoms->atom[n].resind==resind) && (n<atoms->nr));n++) {
 427             match=FALSE;
 428             for(j=0;(j<constructing_data[i].num_defining_atomnames); j++) {
 429               if (0 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j],*atoms->atomname[n])) {
 430                 match=TRUE;
 431               }
 432             }
 433             if (constructing_data[i].bTakeComplement != match) {
 434               aid[nra++]=n;
 435             }
 436           }
 437           /* copy the residuename to the tail of the groupname */
 438           if (nra > 0) {
 439             t_resinfo *ri;
 440             ri = &atoms->resinfo[resind];
 441             sprintf(ndx_name,"%s_%s%d%c",
 442                     constructing_data[i].group_name,*ri->name,ri->nr,ri->ic==' ' ? '\0' : ri->ic);
 443             add_grp(gb,gn,nra,aid,ndx_name);
 444             nra = 0;
 445           }
 446         }
 447       }
 448     }
 449     printf("Make group with sidechain and C=O swapped (y/n) ? ");
 450     if (gmx_ask_yesno(bASK)) {
 451       /* Make swap sidechain C=O index */
 452       int resind,hold;
 453       nra = 0;
 454       for(n=0;((atoms->atom[n].resind < npres) && (n<atoms->nr));) {
 455         resind = atoms->atom[n].resind;
 456         hold  = -1;
 457         for(;((atoms->atom[n].resind==resind) && (n<atoms->nr));n++)
 458           if (strcmp("CA",*atoms->atomname[n]) == 0) {
 459             aid[nra++]=n;
 460             hold=nra;
 461             nra+=2;
 462           } else if (strcmp("C",*atoms->atomname[n]) == 0) {
 463             if (hold == -1) {
 464               gmx_incons("Atom naming problem");
 465             }
 466             aid[hold]=n;
 467           } else if (strcmp("O",*atoms->atomname[n]) == 0) {
 468             if (hold == -1) {
 469               gmx_incons("Atom naming problem");
 470             }
 471             aid[hold+1]=n;
 472           } else if (strcmp("O1",*atoms->atomname[n]) == 0) {
 473             if (hold == -1) {
 474               gmx_incons("Atom naming problem");
 475             }
 476             aid[hold+1]=n;
 477           } else
 478             aid[nra++]=n;
 479       }
 480       /* copy the residuename to the tail of the groupname */
 481       if (nra > 0) {
 482         add_grp(gb,gn,nra,aid,"SwapSC-CO");
 483         nra = 0;
 484       }
 485     }
 486   }
 487   sfree(aid);
 488 }
 489
 490
 491
 492
 493 /* Return 0 if the name was found, otherwise -1.
 494  * p_restype is set to a pointer to the type name, or 'Other' if we did not find it.
 495  */
 496 int
 497 gmx_residuetype_get_type(gmx_residuetype_t rt,const char * resname, const char ** p_restype)
 498 {
 499     int    i,rc;
 500
 501     rc=-1;
 502     for(i=0;i<rt->n && rc;i++)
 503     {
 504         rc=gmx_strcasecmp(rt->resname[i],resname);
 505     }
 506
 507     *p_restype = (rc==0) ? rt->restype[i-1] : gmx_residuetype_undefined;
 508
 509     return rc;
 510 }
 511
 512 int
 513 gmx_residuetype_add(gmx_residuetype_t rt,const char *newresname, const char *newrestype)
 514 {
 515     int     i;
 516     int     found;
 517     const char *  p_oldtype;
 518
 519     found = !gmx_residuetype_get_type(rt,newresname,&p_oldtype);
 520
 521     if(found && gmx_strcasecmp(p_oldtype,newrestype))
 522     {
 523         fprintf(stderr,"Warning: Residue '%s' already present with type '%s' in database, ignoring new type '%s'.",
 524                 newresname,p_oldtype,newrestype);
 525     }
 526
 527     if(found==0)
 528     {
 529         srenew(rt->resname,rt->n+1);
 530         srenew(rt->restype,rt->n+1);
 531         rt->resname[rt->n]=strdup(newresname);
 532         rt->restype[rt->n]=strdup(newrestype);
 533         rt->n++;
 534     }
 535
 536     return 0;
 537 }
 538
 539
 540 int
 541 gmx_residuetype_init(gmx_residuetype_t *prt)
 542 {
 543     FILE *  db;
 544     char    line[STRLEN];
 545     char    resname[STRLEN],restype[STRLEN],dum[STRLEN];
 546     char *  p;
 547     int     i;
 548     struct gmx_residuetype *rt;
 549
 550     snew(rt,1);
 551     *prt=rt;
 552
 553     rt->n        = 0;
 554     rt->resname  = NULL;
 555     rt->restype = NULL;
 556
 557     db=libopen("residuetypes.dat");
 558
 559     while(get_a_line(db,line,STRLEN))
 560     {
 561         strip_comment(line);
 562         trim(line);
 563         if(line[0]!='\0')
 564         {
 565             if(sscanf(line,"%s %s %s",resname,restype,dum)!=2)
 566             {
 567                 gmx_fatal(FARGS,"Incorrect number of columns (2 expected) for line in residuetypes.dat");
 568             }
 569             gmx_residuetype_add(rt,resname,restype);
 570         }
 571     }
 572
 573     fclose(db);
 574
 575     return 0;
 576 }
 577
 578
 579
 580 int
 581 gmx_residuetype_destroy(gmx_residuetype_t rt)
 582 {
 583     int i;
 584
 585     for(i=0;i<rt->n;i++)
 586     {
 587         free(rt->resname[i]);
 588         free(rt->restype[i]);
 589     }
 590     free(rt);
 591
 592     return 0;
 593 }
 594
 595 int
 596 gmx_residuetype_get_alltypes(gmx_residuetype_t    rt,
 597                              const char ***       p_typenames,
 598                              int *                ntypes)
 599 {
 600     int      i,j,n;
 601     int      found;
 602     const char **  my_typename;
 603     char *   p;
 604
 605     n=0;
 606
 607     my_typename=NULL;
 608     for(i=0;i<rt->n;i++)
 609     {
 610         p=rt->restype[i];
 611         found=0;
 612         for(j=0;j<n && !found;j++)
 613         {
 614             found=!gmx_strcasecmp(p,my_typename[j]);
 615         }
 616
 617         if(!found)
 618         {
 619             srenew(my_typename,n+1);
 620             my_typename[n]=p;
 621             n++;
 622         }
 623     }
 624     *ntypes=n;
 625     *p_typenames=my_typename;
 626
 627     return 0;
 628 }
 629
 630
 631
 632 gmx_bool
 633 gmx_residuetype_is_protein(gmx_residuetype_t rt, const char *resnm)
 634 {
 635     gmx_bool rc;
 636     const char *p_type;
 637
 638     if(gmx_residuetype_get_type(rt,resnm,&p_type)==0 &&
 639        gmx_strcasecmp(p_type,"Protein")==0)
 640     {
 641         rc=TRUE;
 642     }
 643     else
 644     {
 645         rc=FALSE;
 646     }
 647     return rc;
 648 }
 649
 650 gmx_bool
 651 gmx_residuetype_is_dna(gmx_residuetype_t rt, const char *resnm)
 652 {
 653     gmx_bool rc;
 654     const char *p_type;
 655
 656     if(gmx_residuetype_get_type(rt,resnm,&p_type)==0 &&
 657        gmx_strcasecmp(p_type,"DNA")==0)
 658     {
 659         rc=TRUE;
 660     }
 661     else
 662     {
 663         rc=FALSE;
 664     }
 665     return rc;
 666 }
 667
 668 gmx_bool
 669 gmx_residuetype_is_rna(gmx_residuetype_t rt, const char *resnm)
 670 {
 671     gmx_bool rc;
 672     const char *p_type;
 673
 674     if(gmx_residuetype_get_type(rt,resnm,&p_type)==0 &&
 675        gmx_strcasecmp(p_type,"RNA")==0)
 676     {
 677         rc=TRUE;
 678     }
 679     else
 680     {
 681         rc=FALSE;
 682     }
 683     return rc;
 684 }
 685
 686 /* Return the size of the arrays */
 687 int
 688 gmx_residuetype_get_size(gmx_residuetype_t rt)
 689 {
 690     return rt->n;
 691 }
 692
 693 /* Search for a residuetype with name resnm within the
 694  * gmx_residuetype database. Return the index if found,
 695  * otherwise -1.
 696  */
 697 int
 698 gmx_residuetype_get_index(gmx_residuetype_t rt, const char *resnm)
 699 {
 700     int i,rc;
 701
 702     rc=-1;
 703     for(i=0;i<rt->n && rc;i++)
 704     {
 705         rc=gmx_strcasecmp(rt->resname[i],resnm);
 706     }
 707
 708     return (0 == rc) ? i-1 : -1;
 709 }
 710
 711 /* Return the name of the residuetype with the given index, or
 712  * NULL if not found. */
 713 const char *
 714 gmx_residuetype_get_name(gmx_residuetype_t rt, int index)
 715 {
 716   if(index >= 0 && index < rt->n) {
 717     return rt->resname[index];
 718   } else {
 719     return NULL;
 720   }
 721 }
 722
 723
 724
 725 void analyse(t_atoms *atoms,t_blocka *gb,char ***gn,gmx_bool bASK,gmx_bool bVerb)
 726 {
 727     gmx_residuetype_t rt=NULL;
 728     char    *resnm;
 729     atom_id *aid;
 730     const char **  restype;
 731     int     nra;
 732     int     i,k;
 733     size_t  j;
 734     int     ntypes;
 735     char *  p;
 736     const char ** p_typename;
 737     int     iwater,iion;
 738     int     nwater,nion;
 739     int     found;
 740
 741     if (bVerb)
 742     {
 743         printf("Analysing residue names:\n");
 744     }
 745     /* Create system group, every single atom */
 746     snew(aid,atoms->nr);
 747     for(i=0;i<atoms->nr;i++)
 748     {
 749         aid[i]=i;
 750     }
 751     add_grp(gb,gn,atoms->nr,aid,"System");
 752     sfree(aid);
 753
 754     /* For every residue, get a pointer to the residue type name */
 755     gmx_residuetype_init(&rt);
 756     assert(rt);
 757
 758     snew(restype,atoms->nres);
 759     ntypes = 0;
 760     p_typename = NULL;
 761     for(i=0;i<atoms->nres;i++)
 762     {
 763         resnm = *atoms->resinfo[i].name;
 764         gmx_residuetype_get_type(rt,resnm,&(restype[i]));
 765
 766         /* Note that this does not lead to a N*N loop, but N*K, where
 767          * K is the number of residue _types_, which is small and independent of N.
 768          */
 769         found = 0;
 770         for(k=0;k<ntypes && !found;k++)
 771         {
 772             found = !strcmp(restype[i],p_typename[k]);
 773         }
 774         if(!found)
 775         {
 776             srenew(p_typename,ntypes+1);
 777             p_typename[ntypes++] = strdup(restype[i]);
 778         }
 779     }
 780
 781     if (bVerb)
 782     {
 783         p_status(restype,atoms->nres,p_typename,ntypes);
 784     }
 785
 786     for(k=0;k<ntypes;k++)
 787     {
 788         aid=mk_aid(atoms,restype,p_typename[k],&nra,TRUE);
 789
 790         /* Check for special types to do fancy stuff with */
 791
 792         if(!gmx_strcasecmp(p_typename[k],"Protein") && nra>0)
 793         {
 794             sfree(aid);
 795             /* PROTEIN */
 796             analyse_prot(restype,atoms,gb,gn,bASK,bVerb);
 797
 798             /* Create a Non-Protein group */
 799             aid=mk_aid(atoms,restype,"Protein",&nra,FALSE);
 800             if ((nra > 0) && (nra < atoms->nr))
 801             {
 802                 add_grp(gb,gn,nra,aid,"non-Protein");
 803             }
 804             sfree(aid);
 805         }
 806         else if(!gmx_strcasecmp(p_typename[k],"Water") && nra>0)
 807         {
 808             add_grp(gb,gn,nra,aid,p_typename[k]);
 809             /* Add this group as 'SOL' too, for backward compatibility with older gromacs versions */
 810             add_grp(gb,gn,nra,aid,"SOL");
 811
 812             sfree(aid);
 813
 814             /* Solvent, create a negated group too */
 815             aid=mk_aid(atoms,restype,"Water",&nra,FALSE);
 816             if ((nra > 0) && (nra < atoms->nr))
 817             {
 818                 add_grp(gb,gn,nra,aid,"non-Water");
 819             }
 820             sfree(aid);
 821         }
 822         else if(nra>0)
 823         {
 824             /* Other groups */
 825             add_grp(gb,gn,nra,aid,p_typename[k]);
 826             sfree(aid);
 827             analyse_other(restype,atoms,gb,gn,bASK,bVerb);
 828         }
 829     }
 830
 831     sfree(p_typename);
 832     sfree(restype);
 833     gmx_residuetype_destroy(rt);
 834
 835     /* Create a merged water_and_ions group */
 836     iwater = -1;
 837     iion   = -1;
 838     nwater = 0;
 839     nion   = 0;
 840
 841     for(i=0;i<gb->nr;i++)
 842     {
 843         if(!gmx_strcasecmp((*gn)[i],"Water"))
 844         {
 845             iwater = i;
 846             nwater = gb->index[i+1]-gb->index[i];
 847         }
 848         else if(!gmx_strcasecmp((*gn)[i],"Ion"))
 849         {
 850             iion = i;
 851             nion = gb->index[i+1]-gb->index[i];
 852         }
 853     }
 854
 855     if(nwater>0 && nion>0)
 856     {
 857         srenew(gb->index,gb->nr+2);
 858         srenew(*gn,gb->nr+1);
 859         (*gn)[gb->nr] = strdup("Water_and_ions");
 860         srenew(gb->a,gb->nra+nwater+nion);
 861         if(nwater>0)
 862         {
 863             for(i=gb->index[iwater];i<gb->index[iwater+1];i++)
 864             {
 865                 gb->a[gb->nra++] = gb->a[i];
 866             }
 867         }
 868         if(nion>0)
 869         {
 870             for(i=gb->index[iion];i<gb->index[iion+1];i++)
 871             {
 872                 gb->a[gb->nra++] = gb->a[i];
 873             }
 874         }
 875         gb->nr++;
 876         gb->index[gb->nr]=gb->nra;
 877     }
 878 }
 879
 880
 881 void check_index(char *gname,int n,atom_id index[],char *traj,int natoms)
 882 {
 883   int i;
 884
 885   for(i=0; i<n; i++)
 886     if (index[i] >= natoms)
 887       gmx_fatal(FARGS,"%s atom number (index[%d]=%d) is larger than the number of atoms in %s (%d)",
 888                   gname ? gname : "Index",i+1, index[i]+1,
 889                   traj ? traj : "the trajectory",natoms);
 890     else if (index[i] < 0)
 891       gmx_fatal(FARGS,"%s atom number (index[%d]=%d) is less than zero",
 892                 gname ? gname : "Index",i+1, index[i]+1);
 893 }
 894
 895 t_blocka *init_index(const char *gfile, char ***grpname)
 896 {
 897   FILE     *in;
 898   t_blocka  *b;
 899   int      a,maxentries;
 900   int      i,j,ng,nread;
 901   char     line[STRLEN],*pt,str[STRLEN];
 902
 903   in=gmx_fio_fopen(gfile,"r");
 904   snew(b,1);
 905   get_a_line(in,line,STRLEN);
 906   if ( line[0]=='[' ) {
 907     /* new format */
 908     b->nr=0;
 909     b->index=NULL;
 910     b->nra=0;
 911     b->a=NULL;
 912     *grpname=NULL;
 913     maxentries=0;
 914     do {
 915       if (get_header(line,str)) {
 916         b->nr++;
 917         srenew(b->index,b->nr+1);
 918         srenew(*grpname,b->nr);
 919         if (b->nr==1)
 920           b->index[0]=0;
 921         b->index[b->nr]=b->index[b->nr-1];
 922         (*grpname)[b->nr-1]=strdup(str);
 923       } else {
 924         pt=line;
 925         while (sscanf(pt,"%s",str) == 1) {
 926           i=b->index[b->nr];
 927           if (i>=maxentries) {
 928             maxentries+=1024;
 929             srenew(b->a,maxentries);
 930           }
 931           b->a[i]=strtol(str, NULL, 10)-1;
 932           b->index[b->nr]++;
 933           (b->nra)++;
 934           pt=strstr(pt,str)+strlen(str);
 935         }
 936       }
 937     } while (get_a_line(in,line,STRLEN));
 938   }
 939   else {
 940     /* old format */
 941     sscanf(line,"%d%d",&b->nr,&b->nra);
 942     snew(b->index,b->nr+1);
 943     snew(*grpname,b->nr);
 944     b->index[0]=0;
 945     snew(b->a,b->nra);
 946     for (i=0; (i<b->nr); i++) {
 947       nread=fscanf(in,"%s%d",str,&ng);
 948       (*grpname)[i]=strdup(str);
 949       b->index[i+1]=b->index[i]+ng;
 950       if (b->index[i+1] > b->nra)
 951         gmx_fatal(FARGS,"Something wrong in your indexfile at group %s",str);
 952       for(j=0; (j<ng); j++) {
 953         nread=fscanf(in,"%d",&a);
 954         b->a[b->index[i]+j]=a;
 955       }
 956     }
 957   }
 958   gmx_fio_fclose(in);
 959
 960   for(i=0; (i<b->nr); i++) {
 961     for(j=b->index[i]; (j<b->index[i+1]); j++) {
 962       if (b->a[j] < 0)
 963         fprintf(stderr,"\nWARNING: negative index %d in group %s\n\n",
 964                 b->a[j],(*grpname)[i]);
 965     }
 966   }
 967
 968   return b;
 969 }
 970
 971 static void minstring(char *str)
 972 {
 973   int i;
 974
 975   for (i=0; (i < (int)strlen(str)); i++)
 976     if (str[i]=='-')
 977       str[i]='_';
 978 }
 979
 980 int find_group(char s[], int ngrps, char **grpname)
 981 {
 982   int aa, i, n;
 983   char string[STRLEN];
 984   gmx_bool bMultiple;
 985
 986   bMultiple = FALSE;
 987   n = strlen(s);
 988   aa=NOTSET;
 989   /* first look for whole name match */
 990   if (aa==NOTSET)
 991     for(i=0; i<ngrps; i++)
 992       if (gmx_strcasecmp_min(s,grpname[i])==0) {
 993         if(aa!=NOTSET)
 994           bMultiple = TRUE;
 995         aa=i;
 996       }
 997   /* second look for first string match */
 998   if (aa==NOTSET)
 999     for(i=0; i<ngrps; i++)
1000       if (gmx_strncasecmp_min(s,grpname[i],n)==0) {
1001         if(aa!=NOTSET)
1002           bMultiple = TRUE;
1003         aa=i;
1004       }
1005   /* last look for arbitrary substring match */
1006   if (aa==NOTSET) {
1007     upstring(s);
1008     minstring(s);
1009     for(i=0; i<ngrps; i++) {
1010       strcpy(string, grpname[i]);
1011       upstring(string);
1012       minstring(string);
1013       if (strstr(string,s)!=NULL) {
1014         if(aa!=NOTSET)
1015           bMultiple = TRUE;
1016         aa=i;
1017       }
1018     }
1019   }
1020   if (bMultiple) {
1021     printf("Error: Multiple groups '%s' selected\n", s);
1022     aa=NOTSET;
1023   }
1024   return aa;
1025 }
1026
1027 static int qgroup(int *a, int ngrps, char **grpname)
1028 {
1029     char s[STRLEN];
1030     int  aa;
1031     gmx_bool bInRange;
1032     char *end;
1033
1034     do {
1035         fprintf(stderr,"Select a group: ");
1036         do {
1037             if ( scanf("%s",s)!=1 )
1038                 gmx_fatal(FARGS,"Cannot read from input");
1039             trim(s); /* remove spaces */
1040         } while (strlen(s)==0);
1041         aa = strtol(s, &end, 10);
1042         if (aa==0 && end[0] != '\0') /* string entered */
1043             aa = find_group(s, ngrps, grpname);
1044         bInRange = (aa >= 0 && aa < ngrps);
1045         if (!bInRange)
1046             printf("Error: No such group '%s'\n", s);
1047     } while (!bInRange);
1048     printf("Selected %d: '%s'\n", aa, grpname[aa]);
1049     *a = aa;
1050     return aa;
1051 }
1052
1053 static void rd_groups(t_blocka *grps,char **grpname,char *gnames[],
1054                       int ngrps,int isize[],atom_id *index[],int grpnr[])
1055 {
1056   int i,j,gnr1;
1057
1058   if (grps->nr==0)
1059     gmx_fatal(FARGS,"Error: no groups in indexfile");
1060   for(i=0; (i<grps->nr); i++)
1061     fprintf(stderr,"Group %5d (%15s) has %5d elements\n",i,grpname[i],
1062            grps->index[i+1]-grps->index[i]);
1063   for(i=0; (i<ngrps); i++) {
1064     if (grps->nr > 1)
1065       do {
1066         gnr1=qgroup(&grpnr[i], grps->nr, grpname);
1067         if ((gnr1<0) || (gnr1>=grps->nr))
1068           fprintf(stderr,"Select between %d and %d.\n",0,grps->nr-1);
1069       } while ((gnr1<0) || (gnr1>=grps->nr));
1070     else {
1071       fprintf(stderr,"There is one group in the index\n");
1072       gnr1=0;
1073     }
1074     gnames[i]=strdup(grpname[gnr1]);
1075     isize[i]=grps->index[gnr1+1]-grps->index[gnr1];
1076     snew(index[i],isize[i]);
1077     for(j=0; (j<isize[i]); j++)
1078       index[i][j]=grps->a[grps->index[gnr1]+j];
1079   }
1080 }
1081
1082 void rd_index(const char *statfile,int ngrps,int isize[],
1083               atom_id *index[],char *grpnames[])
1084 {
1085   char    **gnames;
1086   t_blocka *grps;
1087   int     *grpnr;
1088
1089   snew(grpnr,ngrps);
1090   if (!statfile)
1091     gmx_fatal(FARGS,"No index file specified");
1092   grps=init_index(statfile,&gnames);
1093   rd_groups(grps,gnames,grpnames,ngrps,isize,index,grpnr);
1094 }
1095
1096 void rd_index_nrs(char *statfile,int ngrps,int isize[],
1097                   atom_id *index[],char *grpnames[],int grpnr[])
1098 {
1099   char    **gnames;
1100   t_blocka *grps;
1101
1102   if (!statfile)
1103     gmx_fatal(FARGS,"No index file specified");
1104   grps=init_index(statfile,&gnames);
1105
1106   rd_groups(grps,gnames,grpnames,ngrps,isize,index,grpnr);
1107 }
1108
1109 void get_index(t_atoms *atoms, const char *fnm, int ngrps,
1110                int isize[], atom_id *index[],char *grpnames[])
1111 {
1112   char    ***gnames;
1113   t_blocka *grps = NULL;
1114   int     *grpnr;
1115
1116   snew(grpnr,ngrps);
1117   snew(gnames,1);
1118   if (fnm != NULL) {
1119     grps=init_index(fnm,gnames);
1120   }
1121   else if (atoms) {
1122     snew(grps,1);
1123     snew(grps->index,1);
1124     analyse(atoms,grps,gnames,FALSE,FALSE);
1125   }
1126   else
1127     gmx_incons("You need to supply a valid atoms structure or a valid index file name");
1128
1129   rd_groups(grps,*gnames,grpnames,ngrps,isize,index,grpnr);
1130 }
1131
1132 t_cluster_ndx *cluster_index(FILE *fplog,const char *ndx)
1133 {
1134   t_cluster_ndx *c;
1135   int i;
1136
1137   snew(c,1);
1138   c->clust     = init_index(ndx,&c->grpname);
1139   c->maxframe = -1;
1140   for(i=0; (i<c->clust->nra); i++)
1141     c->maxframe = max(c->maxframe,c->clust->a[i]);
1142   fprintf(fplog ? fplog : stdout,
1143           "There are %d clusters containing %d structures, highest framenr is %d\n",
1144           c->clust->nr,c->clust->nra,c->maxframe);
1145   if (debug) {
1146     pr_blocka(debug,0,"clust",c->clust,TRUE);
1147     for(i=0; (i<c->clust->nra); i++)
1148       if ((c->clust->a[i] < 0) || (c->clust->a[i] > c->maxframe))
1149         gmx_fatal(FARGS,"Range check error for c->clust->a[%d] = %d\n"
1150                   "should be within 0 and %d",i,c->clust->a[i],c->maxframe+1);
1151   }
1152   c->inv_clust=make_invblocka(c->clust,c->maxframe);
1153
1154   return c;
1155 }
1156