src/gromacs/gmxlib/index.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #ifdef HAVE_CONFIG_H
  38 #include <config.h>
  39 #endif
  40
  41 #include <ctype.h>
  42 #include <string.h>
  43 #include <assert.h>
  44 #include "sysstuff.h"
  45 #include "macros.h"
  46 #include "names.h"
  47 #include "gromacs/utility/cstringutil.h"
  48 #include "main.h"
  49 #include "typedefs.h"
  50 #include "gromacs/utility/smalloc.h"
  51 #include "invblock.h"
  52 #include "macros.h"
  53 #include "index.h"
  54 #include "txtdump.h"
  55
  56 #include "gromacs/fileio/futil.h"
  57 #include "gromacs/fileio/gmxfio.h"
  58 #include "gromacs/fileio/strdb.h"
  59 #include "gmx_fatal.h"
  60
  61 const char gmx_residuetype_undefined[] = "Other";
  62
  63 struct gmx_residuetype
  64 {
  65     int      n;
  66     char **  resname;
  67     char **  restype;
  68
  69 };
  70
  71
  72 static gmx_bool gmx_ask_yesno(gmx_bool bASK)
  73 {
  74     char c;
  75
  76     if (bASK)
  77     {
  78         do
  79         {
  80             c = toupper(fgetc(stdin));
  81         }
  82         while ((c != 'Y') && (c != 'N'));
  83
  84         return (c == 'Y');
  85     }
  86     else
  87     {
  88         return FALSE;
  89     }
  90 }
  91
  92 t_blocka *new_blocka(void)
  93 {
  94     t_blocka *block;
  95
  96     snew(block, 1);
  97     snew(block->index, 1);
  98
  99     return block;
 100 }
 101
 102 void write_index(const char *outf, t_blocka *b, char **gnames, gmx_bool bDuplicate, int natoms)
 103 {
 104     FILE *out;
 105     int   i, j, k;
 106
 107     out = gmx_fio_fopen(outf, "w");
 108     /* fprintf(out,"%5d  %5d\n",b->nr,b->nra); */
 109     for (i = 0; (i < b->nr); i++)
 110     {
 111         fprintf(out, "[ %s ]\n", gnames[i]);
 112         for (k = 0, j = b->index[i]; j < b->index[i+1]; j++, k++)
 113         {
 114             fprintf(out, "%4d ", b->a[j]+1);
 115             if ((k % 15) == 14)
 116             {
 117                 fprintf(out, "\n");
 118             }
 119         }
 120         fprintf(out, "\n");
 121     }
 122
 123     /* Duplicate copy, useful for computational electrophysiology double-layer setups */
 124     if (bDuplicate)
 125     {
 126         fprintf(stderr, "Duplicating the whole system with an atom offset of %d atoms.\n", natoms);
 127         for (i = 0; (i < b->nr); i++)
 128         {
 129             fprintf(out, "[ %s_copy ]\n", gnames[i]);
 130             for (k = 0, j = b->index[i]; j < b->index[i+1]; j++, k++)
 131             {
 132                 fprintf(out, "%4d ", b->a[j]+1 + natoms );
 133                 if ((k % 15) == 14)
 134                 {
 135                     fprintf(out, "\n");
 136                 }
 137             }
 138             fprintf(out, "\n");
 139         }
 140     }
 141
 142     gmx_fio_fclose(out);
 143 }
 144
 145 void add_grp(t_blocka *b, char ***gnames, int nra, atom_id a[], const char *name)
 146 {
 147     int i;
 148
 149     srenew(b->index, b->nr+2);
 150     srenew(*gnames, b->nr+1);
 151     (*gnames)[b->nr] = strdup(name);
 152
 153     srenew(b->a, b->nra+nra);
 154     for (i = 0; (i < nra); i++)
 155     {
 156         b->a[b->nra++] = a[i];
 157     }
 158     b->nr++;
 159     b->index[b->nr] = b->nra;
 160 }
 161
 162 /* compare index in `a' with group in `b' at `index',
 163    when `index'<0 it is relative to end of `b' */
 164 static gmx_bool grp_cmp(t_blocka *b, int nra, atom_id a[], int index)
 165 {
 166     int i;
 167
 168     if (index < 0)
 169     {
 170         index = b->nr-1+index;
 171     }
 172     if (index >= b->nr)
 173     {
 174         gmx_fatal(FARGS, "no such index group %d in t_blocka (nr=%d)", index, b->nr);
 175     }
 176     /* compare sizes */
 177     if (nra != b->index[index+1] - b->index[index])
 178     {
 179         return FALSE;
 180     }
 181     for (i = 0; i < nra; i++)
 182     {
 183         if (a[i] != b->a[b->index[index]+i])
 184         {
 185             return FALSE;
 186         }
 187     }
 188     return TRUE;
 189 }
 190
 191 static void
 192 p_status(const char **restype, int nres, const char **typenames, int ntypes)
 193 {
 194     int   i, j;
 195     int   found;
 196
 197     int * counter;
 198
 199     snew(counter, ntypes);
 200     for (i = 0; i < ntypes; i++)
 201     {
 202         counter[i] = 0;
 203     }
 204     for (i = 0; i < nres; i++)
 205     {
 206         found = 0;
 207         for (j = 0; j < ntypes; j++)
 208         {
 209             if (!gmx_strcasecmp(restype[i], typenames[j]))
 210             {
 211                 counter[j]++;
 212             }
 213         }
 214     }
 215
 216     for (i = 0; (i < ntypes); i++)
 217     {
 218         if (counter[i] > 0)
 219         {
 220             printf("There are: %5d %10s residues\n", counter[i], typenames[i]);
 221         }
 222     }
 223
 224     sfree(counter);
 225 }
 226
 227
 228 atom_id *
 229 mk_aid(t_atoms *atoms, const char ** restype, const char * typestring, int *nra, gmx_bool bMatch)
 230 /* Make an array of atom_ids for all atoms with residuetypes matching typestring, or the opposite if bMatch is false */
 231 {
 232     atom_id *a;
 233     int      i;
 234     int      res;
 235
 236     snew(a, atoms->nr);
 237     *nra = 0;
 238     for (i = 0; (i < atoms->nr); i++)
 239     {
 240         res = !gmx_strcasecmp(restype[atoms->atom[i].resind], typestring);
 241         if (bMatch == FALSE)
 242         {
 243             res = !res;
 244         }
 245         if (res)
 246         {
 247             a[(*nra)++] = i;
 248         }
 249     }
 250
 251     return a;
 252 }
 253
 254 typedef struct {
 255     char    *rname;
 256     gmx_bool bNeg;
 257     char    *gname;
 258 } restp_t;
 259
 260 static void analyse_other(const char ** restype, t_atoms *atoms,
 261                           t_blocka *gb, char ***gn, gmx_bool bASK, gmx_bool bVerb)
 262 {
 263     restp_t *restp = NULL;
 264     char   **attp  = NULL;
 265     char    *rname, *aname;
 266     atom_id *other_ndx, *aid, *aaid;
 267     int      i, j, k, l, resind, naid, naaid, natp, nrestp = 0;
 268
 269     for (i = 0; (i < atoms->nres); i++)
 270     {
 271         if (gmx_strcasecmp(restype[i], "Protein") && gmx_strcasecmp(restype[i], "DNA") && gmx_strcasecmp(restype[i], "RNA") && gmx_strcasecmp(restype[i], "Water"))
 272         {
 273             break;
 274         }
 275     }
 276     if (i < atoms->nres)
 277     {
 278         /* we have others */
 279         if (bVerb)
 280         {
 281             printf("Analysing residues not classified as Protein/DNA/RNA/Water and splitting into groups...\n");
 282         }
 283         snew(other_ndx, atoms->nr);
 284         for (k = 0; (k < atoms->nr); k++)
 285         {
 286             resind = atoms->atom[k].resind;
 287             rname  = *atoms->resinfo[resind].name;
 288             if (gmx_strcasecmp(restype[resind], "Protein") && gmx_strcasecmp(restype[resind], "DNA") &&
 289                 gmx_strcasecmp(restype[resind], "RNA") && gmx_strcasecmp(restype[resind], "Water"))
 290             {
 291
 292                 for (l = 0; (l < nrestp); l++)
 293                 {
 294                     if (strcmp(restp[l].rname, rname) == 0)
 295                     {
 296                         break;
 297                     }
 298                 }
 299                 if (l == nrestp)
 300                 {
 301                     srenew(restp, nrestp+1);
 302                     restp[nrestp].rname = strdup(rname);
 303                     restp[nrestp].bNeg  = FALSE;
 304                     restp[nrestp].gname = strdup(rname);
 305                     nrestp++;
 306
 307                 }
 308             }
 309         }
 310         for (i = 0; (i < nrestp); i++)
 311         {
 312             snew(aid, atoms->nr);
 313             naid = 0;
 314             for (j = 0; (j < atoms->nr); j++)
 315             {
 316                 rname = *atoms->resinfo[atoms->atom[j].resind].name;
 317                 if ((strcmp(restp[i].rname, rname) == 0 && !restp[i].bNeg) ||
 318                     (strcmp(restp[i].rname, rname) != 0 &&  restp[i].bNeg))
 319                 {
 320                     aid[naid++] = j;
 321                 }
 322             }
 323             add_grp(gb, gn, naid, aid, restp[i].gname);
 324             if (bASK)
 325             {
 326                 printf("split %s into atoms (y/n) ? ", restp[i].gname);
 327                 fflush(stdout);
 328                 if (gmx_ask_yesno(bASK))
 329                 {
 330                     natp = 0;
 331                     for (k = 0; (k < naid); k++)
 332                     {
 333                         aname = *atoms->atomname[aid[k]];
 334                         for (l = 0; (l < natp); l++)
 335                         {
 336                             if (strcmp(aname, attp[l]) == 0)
 337                             {
 338                                 break;
 339                             }
 340                         }
 341                         if (l == natp)
 342                         {
 343                             srenew(attp, ++natp);
 344                             attp[natp-1] = aname;
 345                         }
 346                     }
 347                     if (natp > 1)
 348                     {
 349                         for (l = 0; (l < natp); l++)
 350                         {
 351                             snew(aaid, naid);
 352                             naaid = 0;
 353                             for (k = 0; (k < naid); k++)
 354                             {
 355                                 aname = *atoms->atomname[aid[k]];
 356                                 if (strcmp(aname, attp[l]) == 0)
 357                                 {
 358                                     aaid[naaid++] = aid[k];
 359                                 }
 360                             }
 361                             add_grp(gb, gn, naaid, aaid, attp[l]);
 362                             sfree(aaid);
 363                         }
 364                     }
 365                     sfree(attp);
 366                     attp = NULL;
 367                 }
 368                 sfree(aid);
 369             }
 370         }
 371         sfree(other_ndx);
 372     }
 373 }
 374
 375 /*! \brief
 376  * Cata necessary to construct a single (protein) index group in
 377  * analyse_prot().
 378  */
 379 typedef struct gmx_help_make_index_group
 380 {
 381     /** The set of atom names that will be used to form this index group */
 382     const char **defining_atomnames;
 383     /** Size of the defining_atomnames array */
 384     const int    num_defining_atomnames;
 385     /** Name of this index group */
 386     const char  *group_name;
 387     /** Whether the above atom names name the atoms in the group, or
 388         those not in the group */
 389     gmx_bool bTakeComplement;
 390     /** The index in wholename gives the first item in the arrays of
 391        atomnames that should be tested with 'gmx_strncasecmp' in stead of
 392        gmx_strcasecmp, or -1 if all items should be tested with strcasecmp
 393        This is comparable to using a '*' wildcard at the end of specific
 394        atom names, but that is more involved to implement...
 395      */
 396     int wholename;
 397     /** Only create this index group if it differs from the one specified in compareto,
 398        where -1 means to always create this group. */
 399     int compareto;
 400 } t_gmx_help_make_index_group;
 401
 402 static void analyse_prot(const char ** restype, t_atoms *atoms,
 403                          t_blocka *gb, char ***gn, gmx_bool bASK, gmx_bool bVerb)
 404 {
 405     /* lists of atomnames to be used in constructing index groups: */
 406     static const char *pnoh[]    = { "H", "HN" };
 407     static const char *pnodum[]  = {
 408         "MN1",  "MN2",  "MCB1", "MCB2", "MCG1", "MCG2",
 409         "MCD1", "MCD2", "MCE1", "MCE2", "MNZ1", "MNZ2"
 410     };
 411     static const char *calpha[]  = { "CA" };
 412     static const char *bb[]      = { "N", "CA", "C" };
 413     static const char *mc[]      = { "N", "CA", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT" };
 414     static const char *mcb[]     = { "N", "CA", "CB", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT" };
 415     static const char *mch[]     = {
 416         "N", "CA", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT",
 417         "H1", "H2", "H3", "H", "HN"
 418     };
 419
 420     static const t_gmx_help_make_index_group constructing_data[] =
 421     {{ NULL,   0, "Protein",      TRUE,  -1, -1},
 422      { pnoh,   asize(pnoh),   "Protein-H",    TRUE,  0,  -1},
 423      { calpha, asize(calpha), "C-alpha",      FALSE, -1, -1},
 424      { bb,     asize(bb),     "Backbone",     FALSE, -1, -1},
 425      { mc,     asize(mc),     "MainChain",    FALSE, -1, -1},
 426      { mcb,    asize(mcb),    "MainChain+Cb", FALSE, -1, -1},
 427      { mch,    asize(mch),    "MainChain+H",  FALSE, -1, -1},
 428      { mch,    asize(mch),    "SideChain",    TRUE,  -1, -1},
 429      { mch,    asize(mch),    "SideChain-H",  TRUE,  11, -1},
 430      { pnodum, asize(pnodum), "Prot-Masses",  TRUE,  -1, 0}, };
 431     const int   num_index_groups = asize(constructing_data);
 432
 433     int         n, j;
 434     atom_id    *aid;
 435     int         nra, nnpres, npres;
 436     gmx_bool    match;
 437     char        ndx_name[STRLEN], *atnm;
 438     int         i;
 439
 440     if (bVerb)
 441     {
 442         printf("Analysing Protein...\n");
 443     }
 444     snew(aid, atoms->nr);
 445
 446     /* calculate the number of protein residues */
 447     npres = 0;
 448     for (i = 0; (i < atoms->nres); i++)
 449     {
 450         if (0 == gmx_strcasecmp(restype[i], "Protein"))
 451         {
 452             npres++;
 453         }
 454     }
 455     /* find matching or complement atoms */
 456     for (i = 0; (i < (int)num_index_groups); i++)
 457     {
 458         nra = 0;
 459         for (n = 0; (n < atoms->nr); n++)
 460         {
 461             if (0 == gmx_strcasecmp(restype[atoms->atom[n].resind], "Protein"))
 462             {
 463                 match = FALSE;
 464                 for (j = 0; (j < constructing_data[i].num_defining_atomnames); j++)
 465                 {
 466                     /* skip digits at beginning of atomname, e.g. 1H */
 467                     atnm = *atoms->atomname[n];
 468                     while (isdigit(atnm[0]))
 469                     {
 470                         atnm++;
 471                     }
 472                     if ( (constructing_data[i].wholename == -1) || (j < constructing_data[i].wholename) )
 473                     {
 474                         if (0 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j], atnm))
 475                         {
 476                             match = TRUE;
 477                         }
 478                     }
 479                     else
 480                     {
 481                         if (0 == gmx_strncasecmp(constructing_data[i].defining_atomnames[j], atnm, strlen(constructing_data[i].defining_atomnames[j])))
 482                         {
 483                             match = TRUE;
 484                         }
 485                     }
 486                 }
 487                 if (constructing_data[i].bTakeComplement != match)
 488                 {
 489                     aid[nra++] = n;
 490                 }
 491             }
 492         }
 493         /* if we want to add this group always or it differs from previous
 494            group, add it: */
 495         if (-1 == constructing_data[i].compareto || !grp_cmp(gb, nra, aid, constructing_data[i].compareto-i) )
 496         {
 497             add_grp(gb, gn, nra, aid, constructing_data[i].group_name);
 498         }
 499     }
 500
 501     if (bASK)
 502     {
 503         for (i = 0; (i < (int)num_index_groups); i++)
 504         {
 505             printf("Split %12s into %5d residues (y/n) ? ", constructing_data[i].group_name, npres);
 506             if (gmx_ask_yesno(bASK))
 507             {
 508                 int resind;
 509                 nra = 0;
 510                 for (n = 0; ((atoms->atom[n].resind < npres) && (n < atoms->nr)); )
 511                 {
 512                     resind = atoms->atom[n].resind;
 513                     for (; ((atoms->atom[n].resind == resind) && (n < atoms->nr)); n++)
 514                     {
 515                         match = FALSE;
 516                         for (j = 0; (j < constructing_data[i].num_defining_atomnames); j++)
 517                         {
 518                             if (0 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j], *atoms->atomname[n]))
 519                             {
 520                                 match = TRUE;
 521                             }
 522                         }
 523                         if (constructing_data[i].bTakeComplement != match)
 524                         {
 525                             aid[nra++] = n;
 526                         }
 527                     }
 528                     /* copy the residuename to the tail of the groupname */
 529                     if (nra > 0)
 530                     {
 531                         t_resinfo *ri;
 532                         ri = &atoms->resinfo[resind];
 533                         sprintf(ndx_name, "%s_%s%d%c",
 534                                 constructing_data[i].group_name, *ri->name, ri->nr, ri->ic == ' ' ? '\0' : ri->ic);
 535                         add_grp(gb, gn, nra, aid, ndx_name);
 536                         nra = 0;
 537                     }
 538                 }
 539             }
 540         }
 541         printf("Make group with sidechain and C=O swapped (y/n) ? ");
 542         if (gmx_ask_yesno(bASK))
 543         {
 544             /* Make swap sidechain C=O index */
 545             int resind, hold;
 546             nra = 0;
 547             for (n = 0; ((atoms->atom[n].resind < npres) && (n < atoms->nr)); )
 548             {
 549                 resind = atoms->atom[n].resind;
 550                 hold   = -1;
 551                 for (; ((atoms->atom[n].resind == resind) && (n < atoms->nr)); n++)
 552                 {
 553                     if (strcmp("CA", *atoms->atomname[n]) == 0)
 554                     {
 555                         aid[nra++] = n;
 556                         hold       = nra;
 557                         nra       += 2;
 558                     }
 559                     else if (strcmp("C", *atoms->atomname[n]) == 0)
 560                     {
 561                         if (hold == -1)
 562                         {
 563                             gmx_incons("Atom naming problem");
 564                         }
 565                         aid[hold] = n;
 566                     }
 567                     else if (strcmp("O", *atoms->atomname[n]) == 0)
 568                     {
 569                         if (hold == -1)
 570                         {
 571                             gmx_incons("Atom naming problem");
 572                         }
 573                         aid[hold+1] = n;
 574                     }
 575                     else if (strcmp("O1", *atoms->atomname[n]) == 0)
 576                     {
 577                         if (hold == -1)
 578                         {
 579                             gmx_incons("Atom naming problem");
 580                         }
 581                         aid[hold+1] = n;
 582                     }
 583                     else
 584                     {
 585                         aid[nra++] = n;
 586                     }
 587                 }
 588             }
 589             /* copy the residuename to the tail of the groupname */
 590             if (nra > 0)
 591             {
 592                 add_grp(gb, gn, nra, aid, "SwapSC-CO");
 593                 nra = 0;
 594             }
 595         }
 596     }
 597     sfree(aid);
 598 }
 599
 600
 601
 602
 603 /* Return 0 if the name was found, otherwise -1.
 604  * p_restype is set to a pointer to the type name, or 'Other' if we did not find it.
 605  */
 606 int
 607 gmx_residuetype_get_type(gmx_residuetype_t rt, const char * resname, const char ** p_restype)
 608 {
 609     int    i, rc;
 610
 611     rc = -1;
 612     for (i = 0; i < rt->n && rc; i++)
 613     {
 614         rc = gmx_strcasecmp(rt->resname[i], resname);
 615     }
 616
 617     *p_restype = (rc == 0) ? rt->restype[i-1] : gmx_residuetype_undefined;
 618
 619     return rc;
 620 }
 621
 622 int
 623 gmx_residuetype_add(gmx_residuetype_t rt, const char *newresname, const char *newrestype)
 624 {
 625     int           i;
 626     int           found;
 627     const char *  p_oldtype;
 628
 629     found = !gmx_residuetype_get_type(rt, newresname, &p_oldtype);
 630
 631     if (found && gmx_strcasecmp(p_oldtype, newrestype))
 632     {
 633         fprintf(stderr, "Warning: Residue '%s' already present with type '%s' in database, ignoring new type '%s'.",
 634                 newresname, p_oldtype, newrestype);
 635     }
 636
 637     if (found == 0)
 638     {
 639         srenew(rt->resname, rt->n+1);
 640         srenew(rt->restype, rt->n+1);
 641         rt->resname[rt->n] = strdup(newresname);
 642         rt->restype[rt->n] = strdup(newrestype);
 643         rt->n++;
 644     }
 645
 646     return 0;
 647 }
 648
 649
 650 int
 651 gmx_residuetype_init(gmx_residuetype_t *prt)
 652 {
 653     FILE                 *  db;
 654     char                    line[STRLEN];
 655     char                    resname[STRLEN], restype[STRLEN], dum[STRLEN];
 656     char                 *  p;
 657     int                     i;
 658     struct gmx_residuetype *rt;
 659
 660     snew(rt, 1);
 661     *prt = rt;
 662
 663     rt->n        = 0;
 664     rt->resname  = NULL;
 665     rt->restype  = NULL;
 666
 667     db = libopen("residuetypes.dat");
 668
 669     while (get_a_line(db, line, STRLEN))
 670     {
 671         strip_comment(line);
 672         trim(line);
 673         if (line[0] != '\0')
 674         {
 675             if (sscanf(line, "%s %s %s", resname, restype, dum) != 2)
 676             {
 677                 gmx_fatal(FARGS, "Incorrect number of columns (2 expected) for line in residuetypes.dat");
 678             }
 679             gmx_residuetype_add(rt, resname, restype);
 680         }
 681     }
 682
 683     fclose(db);
 684
 685     return 0;
 686 }
 687
 688
 689
 690 int
 691 gmx_residuetype_destroy(gmx_residuetype_t rt)
 692 {
 693     int i;
 694
 695     for (i = 0; i < rt->n; i++)
 696     {
 697         sfree(rt->resname[i]);
 698         sfree(rt->restype[i]);
 699     }
 700     sfree(rt->resname);
 701     sfree(rt->restype);
 702     sfree(rt);
 703
 704     return 0;
 705 }
 706
 707 int
 708 gmx_residuetype_get_alltypes(gmx_residuetype_t    rt,
 709                              const char ***       p_typenames,
 710                              int *                ntypes)
 711 {
 712     int            i, j, n;
 713     int            found;
 714     const char **  my_typename;
 715     char       *   p;
 716
 717     n = 0;
 718
 719     my_typename = NULL;
 720     for (i = 0; i < rt->n; i++)
 721     {
 722         p     = rt->restype[i];
 723         found = 0;
 724         for (j = 0; j < n && !found; j++)
 725         {
 726             found = !gmx_strcasecmp(p, my_typename[j]);
 727         }
 728
 729         if (!found)
 730         {
 731             srenew(my_typename, n+1);
 732             my_typename[n] = p;
 733             n++;
 734         }
 735     }
 736     *ntypes      = n;
 737     *p_typenames = my_typename;
 738
 739     return 0;
 740 }
 741
 742
 743
 744 gmx_bool
 745 gmx_residuetype_is_protein(gmx_residuetype_t rt, const char *resnm)
 746 {
 747     gmx_bool    rc;
 748     const char *p_type;
 749
 750     if (gmx_residuetype_get_type(rt, resnm, &p_type) == 0 &&
 751         gmx_strcasecmp(p_type, "Protein") == 0)
 752     {
 753         rc = TRUE;
 754     }
 755     else
 756     {
 757         rc = FALSE;
 758     }
 759     return rc;
 760 }
 761
 762 gmx_bool
 763 gmx_residuetype_is_dna(gmx_residuetype_t rt, const char *resnm)
 764 {
 765     gmx_bool    rc;
 766     const char *p_type;
 767
 768     if (gmx_residuetype_get_type(rt, resnm, &p_type) == 0 &&
 769         gmx_strcasecmp(p_type, "DNA") == 0)
 770     {
 771         rc = TRUE;
 772     }
 773     else
 774     {
 775         rc = FALSE;
 776     }
 777     return rc;
 778 }
 779
 780 gmx_bool
 781 gmx_residuetype_is_rna(gmx_residuetype_t rt, const char *resnm)
 782 {
 783     gmx_bool    rc;
 784     const char *p_type;
 785
 786     if (gmx_residuetype_get_type(rt, resnm, &p_type) == 0 &&
 787         gmx_strcasecmp(p_type, "RNA") == 0)
 788     {
 789         rc = TRUE;
 790     }
 791     else
 792     {
 793         rc = FALSE;
 794     }
 795     return rc;
 796 }
 797
 798 /* Return the size of the arrays */
 799 int
 800 gmx_residuetype_get_size(gmx_residuetype_t rt)
 801 {
 802     return rt->n;
 803 }
 804
 805 /* Search for a residuetype with name resnm within the
 806  * gmx_residuetype database. Return the index if found,
 807  * otherwise -1.
 808  */
 809 int
 810 gmx_residuetype_get_index(gmx_residuetype_t rt, const char *resnm)
 811 {
 812     int i, rc;
 813
 814     rc = -1;
 815     for (i = 0; i < rt->n && rc; i++)
 816     {
 817         rc = gmx_strcasecmp(rt->resname[i], resnm);
 818     }
 819
 820     return (0 == rc) ? i-1 : -1;
 821 }
 822
 823 /* Return the name of the residuetype with the given index, or
 824  * NULL if not found. */
 825 const char *
 826 gmx_residuetype_get_name(gmx_residuetype_t rt, int index)
 827 {
 828     if (index >= 0 && index < rt->n)
 829     {
 830         return rt->resname[index];
 831     }
 832     else
 833     {
 834         return NULL;
 835     }
 836 }
 837
 838
 839
 840 void analyse(t_atoms *atoms, t_blocka *gb, char ***gn, gmx_bool bASK, gmx_bool bVerb)
 841 {
 842     gmx_residuetype_t rt = NULL;
 843     char             *resnm;
 844     atom_id          *aid;
 845     const char    **  restype;
 846     int               nra;
 847     int               i, k;
 848     size_t            j;
 849     int               ntypes;
 850     char           *  p;
 851     const char     ** p_typename;
 852     int               iwater, iion;
 853     int               nwater, nion;
 854     int               found;
 855
 856     if (bVerb)
 857     {
 858         printf("Analysing residue names:\n");
 859     }
 860     /* Create system group, every single atom */
 861     snew(aid, atoms->nr);
 862     for (i = 0; i < atoms->nr; i++)
 863     {
 864         aid[i] = i;
 865     }
 866     add_grp(gb, gn, atoms->nr, aid, "System");
 867     sfree(aid);
 868
 869     /* For every residue, get a pointer to the residue type name */
 870     gmx_residuetype_init(&rt);
 871     assert(rt);
 872
 873     snew(restype, atoms->nres);
 874     ntypes     = 0;
 875     p_typename = NULL;
 876     for (i = 0; i < atoms->nres; i++)
 877     {
 878         resnm = *atoms->resinfo[i].name;
 879         gmx_residuetype_get_type(rt, resnm, &(restype[i]));
 880
 881         /* Note that this does not lead to a N*N loop, but N*K, where
 882          * K is the number of residue _types_, which is small and independent of N.
 883          */
 884         found = 0;
 885         for (k = 0; k < ntypes && !found; k++)
 886         {
 887             found = !strcmp(restype[i], p_typename[k]);
 888         }
 889         if (!found)
 890         {
 891             srenew(p_typename, ntypes+1);
 892             p_typename[ntypes++] = strdup(restype[i]);
 893         }
 894     }
 895
 896     if (bVerb)
 897     {
 898         p_status(restype, atoms->nres, p_typename, ntypes);
 899     }
 900
 901     for (k = 0; k < ntypes; k++)
 902     {
 903         aid = mk_aid(atoms, restype, p_typename[k], &nra, TRUE);
 904
 905         /* Check for special types to do fancy stuff with */
 906
 907         if (!gmx_strcasecmp(p_typename[k], "Protein") && nra > 0)
 908         {
 909             sfree(aid);
 910             /* PROTEIN */
 911             analyse_prot(restype, atoms, gb, gn, bASK, bVerb);
 912
 913             /* Create a Non-Protein group */
 914             aid = mk_aid(atoms, restype, "Protein", &nra, FALSE);
 915             if ((nra > 0) && (nra < atoms->nr))
 916             {
 917                 add_grp(gb, gn, nra, aid, "non-Protein");
 918             }
 919             sfree(aid);
 920         }
 921         else if (!gmx_strcasecmp(p_typename[k], "Water") && nra > 0)
 922         {
 923             add_grp(gb, gn, nra, aid, p_typename[k]);
 924             /* Add this group as 'SOL' too, for backward compatibility with older gromacs versions */
 925             add_grp(gb, gn, nra, aid, "SOL");
 926
 927             sfree(aid);
 928
 929             /* Solvent, create a negated group too */
 930             aid = mk_aid(atoms, restype, "Water", &nra, FALSE);
 931             if ((nra > 0) && (nra < atoms->nr))
 932             {
 933                 add_grp(gb, gn, nra, aid, "non-Water");
 934             }
 935             sfree(aid);
 936         }
 937         else if (nra > 0)
 938         {
 939             /* Other groups */
 940             add_grp(gb, gn, nra, aid, p_typename[k]);
 941             sfree(aid);
 942             analyse_other(restype, atoms, gb, gn, bASK, bVerb);
 943         }
 944     }
 945
 946     sfree(p_typename);
 947     sfree(restype);
 948     gmx_residuetype_destroy(rt);
 949
 950     /* Create a merged water_and_ions group */
 951     iwater = -1;
 952     iion   = -1;
 953     nwater = 0;
 954     nion   = 0;
 955
 956     for (i = 0; i < gb->nr; i++)
 957     {
 958         if (!gmx_strcasecmp((*gn)[i], "Water"))
 959         {
 960             iwater = i;
 961             nwater = gb->index[i+1]-gb->index[i];
 962         }
 963         else if (!gmx_strcasecmp((*gn)[i], "Ion"))
 964         {
 965             iion = i;
 966             nion = gb->index[i+1]-gb->index[i];
 967         }
 968     }
 969
 970     if (nwater > 0 && nion > 0)
 971     {
 972         srenew(gb->index, gb->nr+2);
 973         srenew(*gn, gb->nr+1);
 974         (*gn)[gb->nr] = strdup("Water_and_ions");
 975         srenew(gb->a, gb->nra+nwater+nion);
 976         if (nwater > 0)
 977         {
 978             for (i = gb->index[iwater]; i < gb->index[iwater+1]; i++)
 979             {
 980                 gb->a[gb->nra++] = gb->a[i];
 981             }
 982         }
 983         if (nion > 0)
 984         {
 985             for (i = gb->index[iion]; i < gb->index[iion+1]; i++)
 986             {
 987                 gb->a[gb->nra++] = gb->a[i];
 988             }
 989         }
 990         gb->nr++;
 991         gb->index[gb->nr] = gb->nra;
 992     }
 993 }
 994
 995
 996 void check_index(char *gname, int n, atom_id index[], char *traj, int natoms)
 997 {
 998     int i;
 999
1000     for (i = 0; i < n; i++)
1001     {
1002         if (index[i] >= natoms)
1003         {
1004             gmx_fatal(FARGS, "%s atom number (index[%d]=%d) is larger than the number of atoms in %s (%d)",
1005                       gname ? gname : "Index", i+1, index[i]+1,
1006                       traj ? traj : "the trajectory", natoms);
1007         }
1008         else if (index[i] < 0)
1009         {
1010             gmx_fatal(FARGS, "%s atom number (index[%d]=%d) is less than zero",
1011                       gname ? gname : "Index", i+1, index[i]+1);
1012         }
1013     }
1014 }
1015
1016 t_blocka *init_index(const char *gfile, char ***grpname)
1017 {
1018     FILE      *in;
1019     t_blocka  *b;
1020     int        a, maxentries;
1021     int        i, j, ng, nread;
1022     char       line[STRLEN], *pt, str[STRLEN];
1023
1024     in = gmx_fio_fopen(gfile, "r");
1025     snew(b, 1);
1026     get_a_line(in, line, STRLEN);
1027     if (line[0] == '[')
1028     {
1029         /* new format */
1030         b->nr      = 0;
1031         b->index   = NULL;
1032         b->nra     = 0;
1033         b->a       = NULL;
1034         *grpname   = NULL;
1035         maxentries = 0;
1036         do
1037         {
1038             if (get_header(line, str))
1039             {
1040                 b->nr++;
1041                 srenew(b->index, b->nr+1);
1042                 srenew(*grpname, b->nr);
1043                 if (b->nr == 1)
1044                 {
1045                     b->index[0] = 0;
1046                 }
1047                 b->index[b->nr]     = b->index[b->nr-1];
1048                 (*grpname)[b->nr-1] = strdup(str);
1049             }
1050             else
1051             {
1052                 if (b->nr == 0)
1053                 {
1054                     gmx_fatal(FARGS, "The first header of your indexfile is invalid");
1055                 }
1056                 pt = line;
1057                 while (sscanf(pt, "%s", str) == 1)
1058                 {
1059                     i = b->index[b->nr];
1060                     if (i >= maxentries)
1061                     {
1062                         maxentries += 1024;
1063                         srenew(b->a, maxentries);
1064                     }
1065                     b->a[i] = strtol(str, NULL, 10)-1;
1066                     b->index[b->nr]++;
1067                     (b->nra)++;
1068                     pt = strstr(pt, str)+strlen(str);
1069                 }
1070             }
1071         }
1072         while (get_a_line(in, line, STRLEN));
1073     }
1074     else
1075     {
1076         /* old format */
1077         sscanf(line, "%d%d", &b->nr, &b->nra);
1078         snew(b->index, b->nr+1);
1079         snew(*grpname, b->nr);
1080         b->index[0] = 0;
1081         snew(b->a, b->nra);
1082         for (i = 0; (i < b->nr); i++)
1083         {
1084             nread         = fscanf(in, "%s%d", str, &ng);
1085             (*grpname)[i] = strdup(str);
1086             b->index[i+1] = b->index[i]+ng;
1087             if (b->index[i+1] > b->nra)
1088             {
1089                 gmx_fatal(FARGS, "Something wrong in your indexfile at group %s", str);
1090             }
1091             for (j = 0; (j < ng); j++)
1092             {
1093                 nread               = fscanf(in, "%d", &a);
1094                 b->a[b->index[i]+j] = a;
1095             }
1096         }
1097     }
1098     gmx_fio_fclose(in);
1099
1100     for (i = 0; (i < b->nr); i++)
1101     {
1102         for (j = b->index[i]; (j < b->index[i+1]); j++)
1103         {
1104             if (b->a[j] < 0)
1105             {
1106                 fprintf(stderr, "\nWARNING: negative index %d in group %s\n\n",
1107                         b->a[j], (*grpname)[i]);
1108             }
1109         }
1110     }
1111
1112     return b;
1113 }
1114
1115 static void minstring(char *str)
1116 {
1117     int i;
1118
1119     for (i = 0; (i < (int)strlen(str)); i++)
1120     {
1121         if (str[i] == '-')
1122         {
1123             str[i] = '_';
1124         }
1125     }
1126 }
1127
1128 int find_group(char s[], int ngrps, char **grpname)
1129 {
1130     int      aa, i, n;
1131     char     string[STRLEN];
1132     gmx_bool bMultiple;
1133
1134     bMultiple = FALSE;
1135     n         = strlen(s);
1136     aa        = NOTSET;
1137     /* first look for whole name match */
1138     if (aa == NOTSET)
1139     {
1140         for (i = 0; i < ngrps; i++)
1141         {
1142             if (gmx_strcasecmp_min(s, grpname[i]) == 0)
1143             {
1144                 if (aa != NOTSET)
1145                 {
1146                     bMultiple = TRUE;
1147                 }
1148                 aa = i;
1149             }
1150         }
1151     }
1152     /* second look for first string match */
1153     if (aa == NOTSET)
1154     {
1155         for (i = 0; i < ngrps; i++)
1156         {
1157             if (gmx_strncasecmp_min(s, grpname[i], n) == 0)
1158             {
1159                 if (aa != NOTSET)
1160                 {
1161                     bMultiple = TRUE;
1162                 }
1163                 aa = i;
1164             }
1165         }
1166     }
1167     /* last look for arbitrary substring match */
1168     if (aa == NOTSET)
1169     {
1170         upstring(s);
1171         minstring(s);
1172         for (i = 0; i < ngrps; i++)
1173         {
1174             strcpy(string, grpname[i]);
1175             upstring(string);
1176             minstring(string);
1177             if (strstr(string, s) != NULL)
1178             {
1179                 if (aa != NOTSET)
1180                 {
1181                     bMultiple = TRUE;
1182                 }
1183                 aa = i;
1184             }
1185         }
1186     }
1187     if (bMultiple)
1188     {
1189         printf("Error: Multiple groups '%s' selected\n", s);
1190         aa = NOTSET;
1191     }
1192     return aa;
1193 }
1194
1195 static int qgroup(int *a, int ngrps, char **grpname)
1196 {
1197     char     s[STRLEN];
1198     int      aa;
1199     gmx_bool bInRange;
1200     char    *end;
1201
1202     do
1203     {
1204         fprintf(stderr, "Select a group: ");
1205         do
1206         {
1207             if (scanf("%s", s) != 1)
1208             {
1209                 gmx_fatal(FARGS, "Cannot read from input");
1210             }
1211             trim(s); /* remove spaces */
1212         }
1213         while (strlen(s) == 0);
1214         aa = strtol(s, &end, 10);
1215         if (aa == 0 && end[0] != '\0') /* string entered */
1216         {
1217             aa = find_group(s, ngrps, grpname);
1218         }
1219         bInRange = (aa >= 0 && aa < ngrps);
1220         if (!bInRange)
1221         {
1222             printf("Error: No such group '%s'\n", s);
1223         }
1224     }
1225     while (!bInRange);
1226     printf("Selected %d: '%s'\n", aa, grpname[aa]);
1227     *a = aa;
1228     return aa;
1229 }
1230
1231 static void rd_groups(t_blocka *grps, char **grpname, char *gnames[],
1232                       int ngrps, int isize[], atom_id *index[], int grpnr[])
1233 {
1234     int i, j, gnr1;
1235
1236     if (grps->nr == 0)
1237     {
1238         gmx_fatal(FARGS, "Error: no groups in indexfile");
1239     }
1240     for (i = 0; (i < grps->nr); i++)
1241     {
1242         fprintf(stderr, "Group %5d (%15s) has %5d elements\n", i, grpname[i],
1243                 grps->index[i+1]-grps->index[i]);
1244     }
1245     for (i = 0; (i < ngrps); i++)
1246     {
1247         if (grps->nr > 1)
1248         {
1249             do
1250             {
1251                 gnr1 = qgroup(&grpnr[i], grps->nr, grpname);
1252                 if ((gnr1 < 0) || (gnr1 >= grps->nr))
1253                 {
1254                     fprintf(stderr, "Select between %d and %d.\n", 0, grps->nr-1);
1255                 }
1256             }
1257             while ((gnr1 < 0) || (gnr1 >= grps->nr));
1258         }
1259         else
1260         {
1261             fprintf(stderr, "There is one group in the index\n");
1262             gnr1 = 0;
1263         }
1264         gnames[i] = strdup(grpname[gnr1]);
1265         isize[i]  = grps->index[gnr1+1]-grps->index[gnr1];
1266         snew(index[i], isize[i]);
1267         for (j = 0; (j < isize[i]); j++)
1268         {
1269             index[i][j] = grps->a[grps->index[gnr1]+j];
1270         }
1271     }
1272 }
1273
1274 void rd_index(const char *statfile, int ngrps, int isize[],
1275               atom_id *index[], char *grpnames[])
1276 {
1277     char    **gnames;
1278     t_blocka *grps;
1279     int      *grpnr;
1280
1281     snew(grpnr, ngrps);
1282     if (!statfile)
1283     {
1284         gmx_fatal(FARGS, "No index file specified");
1285     }
1286     grps = init_index(statfile, &gnames);
1287     rd_groups(grps, gnames, grpnames, ngrps, isize, index, grpnr);
1288 }
1289
1290 void rd_index_nrs(char *statfile, int ngrps, int isize[],
1291                   atom_id *index[], char *grpnames[], int grpnr[])
1292 {
1293     char    **gnames;
1294     t_blocka *grps;
1295
1296     if (!statfile)
1297     {
1298         gmx_fatal(FARGS, "No index file specified");
1299     }
1300     grps = init_index(statfile, &gnames);
1301
1302     rd_groups(grps, gnames, grpnames, ngrps, isize, index, grpnr);
1303 }
1304
1305 void get_index(t_atoms *atoms, const char *fnm, int ngrps,
1306                int isize[], atom_id *index[], char *grpnames[])
1307 {
1308     char    ***gnames;
1309     t_blocka  *grps = NULL;
1310     int       *grpnr;
1311
1312     snew(grpnr, ngrps);
1313     snew(gnames, 1);
1314     if (fnm != NULL)
1315     {
1316         grps = init_index(fnm, gnames);
1317     }
1318     else if (atoms)
1319     {
1320         snew(grps, 1);
1321         snew(grps->index, 1);
1322         analyse(atoms, grps, gnames, FALSE, FALSE);
1323     }
1324     else
1325     {
1326         gmx_incons("You need to supply a valid atoms structure or a valid index file name");
1327     }
1328
1329     rd_groups(grps, *gnames, grpnames, ngrps, isize, index, grpnr);
1330 }
1331
1332 t_cluster_ndx *cluster_index(FILE *fplog, const char *ndx)
1333 {
1334     t_cluster_ndx *c;
1335     int            i;
1336
1337     snew(c, 1);
1338     c->clust     = init_index(ndx, &c->grpname);
1339     c->maxframe  = -1;
1340     for (i = 0; (i < c->clust->nra); i++)
1341     {
1342         c->maxframe = max(c->maxframe, c->clust->a[i]);
1343     }
1344     fprintf(fplog ? fplog : stdout,
1345             "There are %d clusters containing %d structures, highest framenr is %d\n",
1346             c->clust->nr, c->clust->nra, c->maxframe);
1347     if (debug)
1348     {
1349         pr_blocka(debug, 0, "clust", c->clust, TRUE);
1350         for (i = 0; (i < c->clust->nra); i++)
1351         {
1352             if ((c->clust->a[i] < 0) || (c->clust->a[i] > c->maxframe))
1353             {
1354                 gmx_fatal(FARGS, "Range check error for c->clust->a[%d] = %d\n"
1355                           "should be within 0 and %d", i, c->clust->a[i], c->maxframe+1);
1356             }
1357         }
1358     }
1359     c->inv_clust = make_invblocka(c->clust, c->maxframe);
1360
1361     return c;
1362 }