src/gromacs/topology/index.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #include "gromacs/topology/index.h"
  38
  39 #ifdef HAVE_CONFIG_H
  40 #include <config.h>
  41 #endif
  42
  43 #include <assert.h>
  44 #include <ctype.h>
  45 #include <stdlib.h>
  46 #include <string.h>
  47
  48 #include <algorithm>
  49
  50 #include "gromacs/legacyheaders/macros.h"
  51 #include "gromacs/legacyheaders/txtdump.h"
  52
  53 #include "gromacs/fileio/gmxfio.h"
  54 #include "gromacs/fileio/strdb.h"
  55 #include "gromacs/topology/atoms.h"
  56 #include "gromacs/topology/block.h"
  57 #include "gromacs/topology/invblock.h"
  58 #include "gromacs/utility/cstringutil.h"
  59 #include "gromacs/utility/fatalerror.h"
  60 #include "gromacs/utility/futil.h"
  61 #include "gromacs/utility/smalloc.h"
  62
  63 const char gmx_residuetype_undefined[] = "Other";
  64
  65 struct gmx_residuetype
  66 {
  67     int      n;
  68     char **  resname;
  69     char **  restype;
  70
  71 };
  72
  73
  74 static gmx_bool gmx_ask_yesno(gmx_bool bASK)
  75 {
  76     char c;
  77
  78     if (bASK)
  79     {
  80         do
  81         {
  82             c = toupper(fgetc(stdin));
  83         }
  84         while ((c != 'Y') && (c != 'N'));
  85
  86         return (c == 'Y');
  87     }
  88     else
  89     {
  90         return FALSE;
  91     }
  92 }
  93
  94 void write_index(const char *outf, t_blocka *b, char **gnames, gmx_bool bDuplicate, int natoms)
  95 {
  96     FILE *out;
  97     int   i, j, k;
  98
  99     out = gmx_fio_fopen(outf, "w");
 100     /* fprintf(out,"%5d  %5d\n",b->nr,b->nra); */
 101     for (i = 0; (i < b->nr); i++)
 102     {
 103         fprintf(out, "[ %s ]\n", gnames[i]);
 104         for (k = 0, j = b->index[i]; j < b->index[i+1]; j++, k++)
 105         {
 106             fprintf(out, "%4d ", b->a[j]+1);
 107             if ((k % 15) == 14)
 108             {
 109                 fprintf(out, "\n");
 110             }
 111         }
 112         fprintf(out, "\n");
 113     }
 114
 115     /* Duplicate copy, useful for computational electrophysiology double-layer setups */
 116     if (bDuplicate)
 117     {
 118         fprintf(stderr, "Duplicating the whole system with an atom offset of %d atoms.\n", natoms);
 119         for (i = 0; (i < b->nr); i++)
 120         {
 121             fprintf(out, "[ %s_copy ]\n", gnames[i]);
 122             for (k = 0, j = b->index[i]; j < b->index[i+1]; j++, k++)
 123             {
 124                 fprintf(out, "%4d ", b->a[j]+1 + natoms );
 125                 if ((k % 15) == 14)
 126                 {
 127                     fprintf(out, "\n");
 128                 }
 129             }
 130             fprintf(out, "\n");
 131         }
 132     }
 133
 134     gmx_fio_fclose(out);
 135 }
 136
 137 void add_grp(t_blocka *b, char ***gnames, int nra, atom_id a[], const char *name)
 138 {
 139     int i;
 140
 141     srenew(b->index, b->nr+2);
 142     srenew(*gnames, b->nr+1);
 143     (*gnames)[b->nr] = strdup(name);
 144
 145     srenew(b->a, b->nra+nra);
 146     for (i = 0; (i < nra); i++)
 147     {
 148         b->a[b->nra++] = a[i];
 149     }
 150     b->nr++;
 151     b->index[b->nr] = b->nra;
 152 }
 153
 154 /* compare index in `a' with group in `b' at `index',
 155    when `index'<0 it is relative to end of `b' */
 156 static gmx_bool grp_cmp(t_blocka *b, int nra, atom_id a[], int index)
 157 {
 158     int i;
 159
 160     if (index < 0)
 161     {
 162         index = b->nr-1+index;
 163     }
 164     if (index >= b->nr)
 165     {
 166         gmx_fatal(FARGS, "no such index group %d in t_blocka (nr=%d)", index, b->nr);
 167     }
 168     /* compare sizes */
 169     if (nra != b->index[index+1] - b->index[index])
 170     {
 171         return FALSE;
 172     }
 173     for (i = 0; i < nra; i++)
 174     {
 175         if (a[i] != b->a[b->index[index]+i])
 176         {
 177             return FALSE;
 178         }
 179     }
 180     return TRUE;
 181 }
 182
 183 static void
 184 p_status(const char **restype, int nres, const char **typenames, int ntypes)
 185 {
 186     int   i, j;
 187     int * counter;
 188
 189     snew(counter, ntypes);
 190     for (i = 0; i < ntypes; i++)
 191     {
 192         counter[i] = 0;
 193     }
 194     for (i = 0; i < nres; i++)
 195     {
 196         for (j = 0; j < ntypes; j++)
 197         {
 198             if (!gmx_strcasecmp(restype[i], typenames[j]))
 199             {
 200                 counter[j]++;
 201             }
 202         }
 203     }
 204
 205     for (i = 0; (i < ntypes); i++)
 206     {
 207         if (counter[i] > 0)
 208         {
 209             printf("There are: %5d %10s residues\n", counter[i], typenames[i]);
 210         }
 211     }
 212
 213     sfree(counter);
 214 }
 215
 216
 217 static atom_id *
 218 mk_aid(t_atoms *atoms, const char ** restype, const char * typestring, int *nra, gmx_bool bMatch)
 219 /* Make an array of atom_ids for all atoms with residuetypes matching typestring, or the opposite if bMatch is false */
 220 {
 221     atom_id *a;
 222     int      i;
 223     int      res;
 224
 225     snew(a, atoms->nr);
 226     *nra = 0;
 227     for (i = 0; (i < atoms->nr); i++)
 228     {
 229         res = !gmx_strcasecmp(restype[atoms->atom[i].resind], typestring);
 230         if (bMatch == FALSE)
 231         {
 232             res = !res;
 233         }
 234         if (res)
 235         {
 236             a[(*nra)++] = i;
 237         }
 238     }
 239
 240     return a;
 241 }
 242
 243 typedef struct {
 244     char    *rname;
 245     gmx_bool bNeg;
 246     char    *gname;
 247 } restp_t;
 248
 249 static void analyse_other(const char ** restype, t_atoms *atoms,
 250                           t_blocka *gb, char ***gn, gmx_bool bASK, gmx_bool bVerb)
 251 {
 252     restp_t *restp = NULL;
 253     char   **attp  = NULL;
 254     char    *rname, *aname;
 255     atom_id *other_ndx, *aid, *aaid;
 256     int      i, j, k, l, resind, naid, naaid, natp, nrestp = 0;
 257
 258     for (i = 0; (i < atoms->nres); i++)
 259     {
 260         if (gmx_strcasecmp(restype[i], "Protein") && gmx_strcasecmp(restype[i], "DNA") && gmx_strcasecmp(restype[i], "RNA") && gmx_strcasecmp(restype[i], "Water"))
 261         {
 262             break;
 263         }
 264     }
 265     if (i < atoms->nres)
 266     {
 267         /* we have others */
 268         if (bVerb)
 269         {
 270             printf("Analysing residues not classified as Protein/DNA/RNA/Water and splitting into groups...\n");
 271         }
 272         snew(other_ndx, atoms->nr);
 273         for (k = 0; (k < atoms->nr); k++)
 274         {
 275             resind = atoms->atom[k].resind;
 276             rname  = *atoms->resinfo[resind].name;
 277             if (gmx_strcasecmp(restype[resind], "Protein") && gmx_strcasecmp(restype[resind], "DNA") &&
 278                 gmx_strcasecmp(restype[resind], "RNA") && gmx_strcasecmp(restype[resind], "Water"))
 279             {
 280
 281                 for (l = 0; (l < nrestp); l++)
 282                 {
 283                     if (strcmp(restp[l].rname, rname) == 0)
 284                     {
 285                         break;
 286                     }
 287                 }
 288                 if (l == nrestp)
 289                 {
 290                     srenew(restp, nrestp+1);
 291                     restp[nrestp].rname = strdup(rname);
 292                     restp[nrestp].bNeg  = FALSE;
 293                     restp[nrestp].gname = strdup(rname);
 294                     nrestp++;
 295
 296                 }
 297             }
 298         }
 299         for (i = 0; (i < nrestp); i++)
 300         {
 301             snew(aid, atoms->nr);
 302             naid = 0;
 303             for (j = 0; (j < atoms->nr); j++)
 304             {
 305                 rname = *atoms->resinfo[atoms->atom[j].resind].name;
 306                 if ((strcmp(restp[i].rname, rname) == 0 && !restp[i].bNeg) ||
 307                     (strcmp(restp[i].rname, rname) != 0 &&  restp[i].bNeg))
 308                 {
 309                     aid[naid++] = j;
 310                 }
 311             }
 312             add_grp(gb, gn, naid, aid, restp[i].gname);
 313             if (bASK)
 314             {
 315                 printf("split %s into atoms (y/n) ? ", restp[i].gname);
 316                 fflush(stdout);
 317                 if (gmx_ask_yesno(bASK))
 318                 {
 319                     natp = 0;
 320                     for (k = 0; (k < naid); k++)
 321                     {
 322                         aname = *atoms->atomname[aid[k]];
 323                         for (l = 0; (l < natp); l++)
 324                         {
 325                             if (strcmp(aname, attp[l]) == 0)
 326                             {
 327                                 break;
 328                             }
 329                         }
 330                         if (l == natp)
 331                         {
 332                             srenew(attp, ++natp);
 333                             attp[natp-1] = aname;
 334                         }
 335                     }
 336                     if (natp > 1)
 337                     {
 338                         for (l = 0; (l < natp); l++)
 339                         {
 340                             snew(aaid, naid);
 341                             naaid = 0;
 342                             for (k = 0; (k < naid); k++)
 343                             {
 344                                 aname = *atoms->atomname[aid[k]];
 345                                 if (strcmp(aname, attp[l]) == 0)
 346                                 {
 347                                     aaid[naaid++] = aid[k];
 348                                 }
 349                             }
 350                             add_grp(gb, gn, naaid, aaid, attp[l]);
 351                             sfree(aaid);
 352                         }
 353                     }
 354                     sfree(attp);
 355                     attp = NULL;
 356                 }
 357                 sfree(aid);
 358             }
 359         }
 360         sfree(other_ndx);
 361     }
 362 }
 363
 364 /*! \brief
 365  * Cata necessary to construct a single (protein) index group in
 366  * analyse_prot().
 367  */
 368 typedef struct gmx_help_make_index_group
 369 {
 370     /** The set of atom names that will be used to form this index group */
 371     const char **defining_atomnames;
 372     /** Size of the defining_atomnames array */
 373     int          num_defining_atomnames;
 374     /** Name of this index group */
 375     const char  *group_name;
 376     /** Whether the above atom names name the atoms in the group, or
 377         those not in the group */
 378     gmx_bool bTakeComplement;
 379     /** The index in wholename gives the first item in the arrays of
 380        atomnames that should be tested with 'gmx_strncasecmp' in stead of
 381        gmx_strcasecmp, or -1 if all items should be tested with strcasecmp
 382        This is comparable to using a '*' wildcard at the end of specific
 383        atom names, but that is more involved to implement...
 384      */
 385     int wholename;
 386     /** Only create this index group if it differs from the one specified in compareto,
 387        where -1 means to always create this group. */
 388     int compareto;
 389 } t_gmx_help_make_index_group;
 390
 391 static void analyse_prot(const char ** restype, t_atoms *atoms,
 392                          t_blocka *gb, char ***gn, gmx_bool bASK, gmx_bool bVerb)
 393 {
 394     /* lists of atomnames to be used in constructing index groups: */
 395     static const char *pnoh[]    = { "H", "HN" };
 396     static const char *pnodum[]  = {
 397         "MN1",  "MN2",  "MCB1", "MCB2", "MCG1", "MCG2",
 398         "MCD1", "MCD2", "MCE1", "MCE2", "MNZ1", "MNZ2"
 399     };
 400     static const char *calpha[]  = { "CA" };
 401     static const char *bb[]      = { "N", "CA", "C" };
 402     static const char *mc[]      = { "N", "CA", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT" };
 403     static const char *mcb[]     = { "N", "CA", "CB", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT" };
 404     static const char *mch[]     = {
 405         "N", "CA", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT",
 406         "H1", "H2", "H3", "H", "HN"
 407     };
 408
 409     static const t_gmx_help_make_index_group constructing_data[] =
 410     {{ NULL,   0, "Protein",      TRUE,  -1, -1},
 411      { pnoh,   asize(pnoh),   "Protein-H",    TRUE,  0,  -1},
 412      { calpha, asize(calpha), "C-alpha",      FALSE, -1, -1},
 413      { bb,     asize(bb),     "Backbone",     FALSE, -1, -1},
 414      { mc,     asize(mc),     "MainChain",    FALSE, -1, -1},
 415      { mcb,    asize(mcb),    "MainChain+Cb", FALSE, -1, -1},
 416      { mch,    asize(mch),    "MainChain+H",  FALSE, -1, -1},
 417      { mch,    asize(mch),    "SideChain",    TRUE,  -1, -1},
 418      { mch,    asize(mch),    "SideChain-H",  TRUE,  11, -1},
 419      { pnodum, asize(pnodum), "Prot-Masses",  TRUE,  -1, 0}, };
 420     const int   num_index_groups = asize(constructing_data);
 421
 422     int         n, j;
 423     atom_id    *aid;
 424     int         nra, npres;
 425     gmx_bool    match;
 426     char        ndx_name[STRLEN], *atnm;
 427     int         i;
 428
 429     if (bVerb)
 430     {
 431         printf("Analysing Protein...\n");
 432     }
 433     snew(aid, atoms->nr);
 434
 435     /* calculate the number of protein residues */
 436     npres = 0;
 437     for (i = 0; (i < atoms->nres); i++)
 438     {
 439         if (0 == gmx_strcasecmp(restype[i], "Protein"))
 440         {
 441             npres++;
 442         }
 443     }
 444     /* find matching or complement atoms */
 445     for (i = 0; (i < (int)num_index_groups); i++)
 446     {
 447         nra = 0;
 448         for (n = 0; (n < atoms->nr); n++)
 449         {
 450             if (0 == gmx_strcasecmp(restype[atoms->atom[n].resind], "Protein"))
 451             {
 452                 match = FALSE;
 453                 for (j = 0; (j < constructing_data[i].num_defining_atomnames); j++)
 454                 {
 455                     /* skip digits at beginning of atomname, e.g. 1H */
 456                     atnm = *atoms->atomname[n];
 457                     while (isdigit(atnm[0]))
 458                     {
 459                         atnm++;
 460                     }
 461                     if ( (constructing_data[i].wholename == -1) || (j < constructing_data[i].wholename) )
 462                     {
 463                         if (0 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j], atnm))
 464                         {
 465                             match = TRUE;
 466                         }
 467                     }
 468                     else
 469                     {
 470                         if (0 == gmx_strncasecmp(constructing_data[i].defining_atomnames[j], atnm, strlen(constructing_data[i].defining_atomnames[j])))
 471                         {
 472                             match = TRUE;
 473                         }
 474                     }
 475                 }
 476                 if (constructing_data[i].bTakeComplement != match)
 477                 {
 478                     aid[nra++] = n;
 479                 }
 480             }
 481         }
 482         /* if we want to add this group always or it differs from previous
 483            group, add it: */
 484         if (-1 == constructing_data[i].compareto || !grp_cmp(gb, nra, aid, constructing_data[i].compareto-i) )
 485         {
 486             add_grp(gb, gn, nra, aid, constructing_data[i].group_name);
 487         }
 488     }
 489
 490     if (bASK)
 491     {
 492         for (i = 0; (i < (int)num_index_groups); i++)
 493         {
 494             printf("Split %12s into %5d residues (y/n) ? ", constructing_data[i].group_name, npres);
 495             if (gmx_ask_yesno(bASK))
 496             {
 497                 int resind;
 498                 nra = 0;
 499                 for (n = 0; ((atoms->atom[n].resind < npres) && (n < atoms->nr)); )
 500                 {
 501                     resind = atoms->atom[n].resind;
 502                     for (; ((atoms->atom[n].resind == resind) && (n < atoms->nr)); n++)
 503                     {
 504                         match = FALSE;
 505                         for (j = 0; (j < constructing_data[i].num_defining_atomnames); j++)
 506                         {
 507                             if (0 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j], *atoms->atomname[n]))
 508                             {
 509                                 match = TRUE;
 510                             }
 511                         }
 512                         if (constructing_data[i].bTakeComplement != match)
 513                         {
 514                             aid[nra++] = n;
 515                         }
 516                     }
 517                     /* copy the residuename to the tail of the groupname */
 518                     if (nra > 0)
 519                     {
 520                         t_resinfo *ri;
 521                         ri = &atoms->resinfo[resind];
 522                         sprintf(ndx_name, "%s_%s%d%c",
 523                                 constructing_data[i].group_name, *ri->name, ri->nr, ri->ic == ' ' ? '\0' : ri->ic);
 524                         add_grp(gb, gn, nra, aid, ndx_name);
 525                         nra = 0;
 526                     }
 527                 }
 528             }
 529         }
 530         printf("Make group with sidechain and C=O swapped (y/n) ? ");
 531         if (gmx_ask_yesno(bASK))
 532         {
 533             /* Make swap sidechain C=O index */
 534             int resind, hold;
 535             nra = 0;
 536             for (n = 0; ((atoms->atom[n].resind < npres) && (n < atoms->nr)); )
 537             {
 538                 resind = atoms->atom[n].resind;
 539                 hold   = -1;
 540                 for (; ((atoms->atom[n].resind == resind) && (n < atoms->nr)); n++)
 541                 {
 542                     if (strcmp("CA", *atoms->atomname[n]) == 0)
 543                     {
 544                         aid[nra++] = n;
 545                         hold       = nra;
 546                         nra       += 2;
 547                     }
 548                     else if (strcmp("C", *atoms->atomname[n]) == 0)
 549                     {
 550                         if (hold == -1)
 551                         {
 552                             gmx_incons("Atom naming problem");
 553                         }
 554                         aid[hold] = n;
 555                     }
 556                     else if (strcmp("O", *atoms->atomname[n]) == 0)
 557                     {
 558                         if (hold == -1)
 559                         {
 560                             gmx_incons("Atom naming problem");
 561                         }
 562                         aid[hold+1] = n;
 563                     }
 564                     else if (strcmp("O1", *atoms->atomname[n]) == 0)
 565                     {
 566                         if (hold == -1)
 567                         {
 568                             gmx_incons("Atom naming problem");
 569                         }
 570                         aid[hold+1] = n;
 571                     }
 572                     else
 573                     {
 574                         aid[nra++] = n;
 575                     }
 576                 }
 577             }
 578             /* copy the residuename to the tail of the groupname */
 579             if (nra > 0)
 580             {
 581                 add_grp(gb, gn, nra, aid, "SwapSC-CO");
 582             }
 583         }
 584     }
 585     sfree(aid);
 586 }
 587
 588
 589
 590
 591 /* Return 0 if the name was found, otherwise -1.
 592  * p_restype is set to a pointer to the type name, or 'Other' if we did not find it.
 593  */
 594 int
 595 gmx_residuetype_get_type(gmx_residuetype_t rt, const char * resname, const char ** p_restype)
 596 {
 597     int    i, rc;
 598
 599     rc = -1;
 600     for (i = 0; i < rt->n && rc; i++)
 601     {
 602         rc = gmx_strcasecmp(rt->resname[i], resname);
 603     }
 604
 605     *p_restype = (rc == 0) ? rt->restype[i-1] : gmx_residuetype_undefined;
 606
 607     return rc;
 608 }
 609
 610 int
 611 gmx_residuetype_add(gmx_residuetype_t rt, const char *newresname, const char *newrestype)
 612 {
 613     int           found;
 614     const char *  p_oldtype;
 615
 616     found = !gmx_residuetype_get_type(rt, newresname, &p_oldtype);
 617
 618     if (found && gmx_strcasecmp(p_oldtype, newrestype))
 619     {
 620         fprintf(stderr, "Warning: Residue '%s' already present with type '%s' in database, ignoring new type '%s'.",
 621                 newresname, p_oldtype, newrestype);
 622     }
 623
 624     if (found == 0)
 625     {
 626         srenew(rt->resname, rt->n+1);
 627         srenew(rt->restype, rt->n+1);
 628         rt->resname[rt->n] = strdup(newresname);
 629         rt->restype[rt->n] = strdup(newrestype);
 630         rt->n++;
 631     }
 632
 633     return 0;
 634 }
 635
 636
 637 int
 638 gmx_residuetype_init(gmx_residuetype_t *prt)
 639 {
 640     FILE                 *  db;
 641     char                    line[STRLEN];
 642     char                    resname[STRLEN], restype[STRLEN], dum[STRLEN];
 643     struct gmx_residuetype *rt;
 644
 645     snew(rt, 1);
 646     *prt = rt;
 647
 648     rt->n        = 0;
 649     rt->resname  = NULL;
 650     rt->restype  = NULL;
 651
 652     db = libopen("residuetypes.dat");
 653
 654     while (get_a_line(db, line, STRLEN))
 655     {
 656         strip_comment(line);
 657         trim(line);
 658         if (line[0] != '\0')
 659         {
 660             if (sscanf(line, "%s %s %s", resname, restype, dum) != 2)
 661             {
 662                 gmx_fatal(FARGS, "Incorrect number of columns (2 expected) for line in residuetypes.dat");
 663             }
 664             gmx_residuetype_add(rt, resname, restype);
 665         }
 666     }
 667
 668     fclose(db);
 669
 670     return 0;
 671 }
 672
 673
 674
 675 int
 676 gmx_residuetype_destroy(gmx_residuetype_t rt)
 677 {
 678     int i;
 679
 680     for (i = 0; i < rt->n; i++)
 681     {
 682         sfree(rt->resname[i]);
 683         sfree(rt->restype[i]);
 684     }
 685     sfree(rt->resname);
 686     sfree(rt->restype);
 687     sfree(rt);
 688
 689     return 0;
 690 }
 691
 692 int
 693 gmx_residuetype_get_alltypes(gmx_residuetype_t    rt,
 694                              const char ***       p_typenames,
 695                              int *                ntypes)
 696 {
 697     int            i, n;
 698     const char **  my_typename;
 699
 700     n           = 0;
 701     my_typename = NULL;
 702     for (i = 0; i < rt->n; i++)
 703     {
 704         const char *const p      = rt->restype[i];
 705         bool              bFound = false;
 706         for (int j = 0; j < n && !bFound; j++)
 707         {
 708             assert(my_typename != NULL);
 709             bFound = !gmx_strcasecmp(p, my_typename[j]);
 710         }
 711         if (!bFound)
 712         {
 713             srenew(my_typename, n+1);
 714             my_typename[n] = p;
 715             n++;
 716         }
 717     }
 718     *ntypes      = n;
 719     *p_typenames = my_typename;
 720
 721     return 0;
 722 }
 723
 724
 725
 726 gmx_bool
 727 gmx_residuetype_is_protein(gmx_residuetype_t rt, const char *resnm)
 728 {
 729     gmx_bool    rc;
 730     const char *p_type;
 731
 732     if (gmx_residuetype_get_type(rt, resnm, &p_type) == 0 &&
 733         gmx_strcasecmp(p_type, "Protein") == 0)
 734     {
 735         rc = TRUE;
 736     }
 737     else
 738     {
 739         rc = FALSE;
 740     }
 741     return rc;
 742 }
 743
 744 gmx_bool
 745 gmx_residuetype_is_dna(gmx_residuetype_t rt, const char *resnm)
 746 {
 747     gmx_bool    rc;
 748     const char *p_type;
 749
 750     if (gmx_residuetype_get_type(rt, resnm, &p_type) == 0 &&
 751         gmx_strcasecmp(p_type, "DNA") == 0)
 752     {
 753         rc = TRUE;
 754     }
 755     else
 756     {
 757         rc = FALSE;
 758     }
 759     return rc;
 760 }
 761
 762 gmx_bool
 763 gmx_residuetype_is_rna(gmx_residuetype_t rt, const char *resnm)
 764 {
 765     gmx_bool    rc;
 766     const char *p_type;
 767
 768     if (gmx_residuetype_get_type(rt, resnm, &p_type) == 0 &&
 769         gmx_strcasecmp(p_type, "RNA") == 0)
 770     {
 771         rc = TRUE;
 772     }
 773     else
 774     {
 775         rc = FALSE;
 776     }
 777     return rc;
 778 }
 779
 780 /* Return the size of the arrays */
 781 int
 782 gmx_residuetype_get_size(gmx_residuetype_t rt)
 783 {
 784     return rt->n;
 785 }
 786
 787 /* Search for a residuetype with name resnm within the
 788  * gmx_residuetype database. Return the index if found,
 789  * otherwise -1.
 790  */
 791 int
 792 gmx_residuetype_get_index(gmx_residuetype_t rt, const char *resnm)
 793 {
 794     int i, rc;
 795
 796     rc = -1;
 797     for (i = 0; i < rt->n && rc; i++)
 798     {
 799         rc = gmx_strcasecmp(rt->resname[i], resnm);
 800     }
 801
 802     return (0 == rc) ? i-1 : -1;
 803 }
 804
 805 /* Return the name of the residuetype with the given index, or
 806  * NULL if not found. */
 807 const char *
 808 gmx_residuetype_get_name(gmx_residuetype_t rt, int index)
 809 {
 810     if (index >= 0 && index < rt->n)
 811     {
 812         return rt->resname[index];
 813     }
 814     else
 815     {
 816         return NULL;
 817     }
 818 }
 819
 820
 821
 822 void analyse(t_atoms *atoms, t_blocka *gb, char ***gn, gmx_bool bASK, gmx_bool bVerb)
 823 {
 824     gmx_residuetype_t rt = NULL;
 825     char             *resnm;
 826     atom_id          *aid;
 827     const char    **  restype;
 828     int               nra;
 829     int               i, k;
 830     int               ntypes;
 831     const char     ** p_typename;
 832     int               iwater, iion;
 833     int               nwater, nion;
 834     int               found;
 835
 836     if (bVerb)
 837     {
 838         printf("Analysing residue names:\n");
 839     }
 840     /* Create system group, every single atom */
 841     snew(aid, atoms->nr);
 842     for (i = 0; i < atoms->nr; i++)
 843     {
 844         aid[i] = i;
 845     }
 846     add_grp(gb, gn, atoms->nr, aid, "System");
 847     sfree(aid);
 848
 849     /* For every residue, get a pointer to the residue type name */
 850     gmx_residuetype_init(&rt);
 851     assert(rt);
 852
 853     snew(restype, atoms->nres);
 854     ntypes     = 0;
 855     p_typename = NULL;
 856     for (i = 0; i < atoms->nres; i++)
 857     {
 858         resnm = *atoms->resinfo[i].name;
 859         gmx_residuetype_get_type(rt, resnm, &(restype[i]));
 860
 861         /* Note that this does not lead to a N*N loop, but N*K, where
 862          * K is the number of residue _types_, which is small and independent of N.
 863          */
 864         found = 0;
 865         for (k = 0; k < ntypes && !found; k++)
 866         {
 867             assert(p_typename != NULL);
 868             found = !strcmp(restype[i], p_typename[k]);
 869         }
 870         if (!found)
 871         {
 872             srenew(p_typename, ntypes+1);
 873             p_typename[ntypes++] = strdup(restype[i]);
 874         }
 875     }
 876
 877     if (bVerb)
 878     {
 879         p_status(restype, atoms->nres, p_typename, ntypes);
 880     }
 881
 882     for (k = 0; k < ntypes; k++)
 883     {
 884         aid = mk_aid(atoms, restype, p_typename[k], &nra, TRUE);
 885
 886         /* Check for special types to do fancy stuff with */
 887
 888         if (!gmx_strcasecmp(p_typename[k], "Protein") && nra > 0)
 889         {
 890             sfree(aid);
 891             /* PROTEIN */
 892             analyse_prot(restype, atoms, gb, gn, bASK, bVerb);
 893
 894             /* Create a Non-Protein group */
 895             aid = mk_aid(atoms, restype, "Protein", &nra, FALSE);
 896             if ((nra > 0) && (nra < atoms->nr))
 897             {
 898                 add_grp(gb, gn, nra, aid, "non-Protein");
 899             }
 900             sfree(aid);
 901         }
 902         else if (!gmx_strcasecmp(p_typename[k], "Water") && nra > 0)
 903         {
 904             add_grp(gb, gn, nra, aid, p_typename[k]);
 905             /* Add this group as 'SOL' too, for backward compatibility with older gromacs versions */
 906             add_grp(gb, gn, nra, aid, "SOL");
 907
 908             sfree(aid);
 909
 910             /* Solvent, create a negated group too */
 911             aid = mk_aid(atoms, restype, "Water", &nra, FALSE);
 912             if ((nra > 0) && (nra < atoms->nr))
 913             {
 914                 add_grp(gb, gn, nra, aid, "non-Water");
 915             }
 916             sfree(aid);
 917         }
 918         else if (nra > 0)
 919         {
 920             /* Other groups */
 921             add_grp(gb, gn, nra, aid, p_typename[k]);
 922             sfree(aid);
 923             analyse_other(restype, atoms, gb, gn, bASK, bVerb);
 924         }
 925     }
 926
 927     sfree(p_typename);
 928     sfree(restype);
 929     gmx_residuetype_destroy(rt);
 930
 931     /* Create a merged water_and_ions group */
 932     iwater = -1;
 933     iion   = -1;
 934     nwater = 0;
 935     nion   = 0;
 936
 937     for (i = 0; i < gb->nr; i++)
 938     {
 939         if (!gmx_strcasecmp((*gn)[i], "Water"))
 940         {
 941             iwater = i;
 942             nwater = gb->index[i+1]-gb->index[i];
 943         }
 944         else if (!gmx_strcasecmp((*gn)[i], "Ion"))
 945         {
 946             iion = i;
 947             nion = gb->index[i+1]-gb->index[i];
 948         }
 949     }
 950
 951     if (nwater > 0 && nion > 0)
 952     {
 953         srenew(gb->index, gb->nr+2);
 954         srenew(*gn, gb->nr+1);
 955         (*gn)[gb->nr] = strdup("Water_and_ions");
 956         srenew(gb->a, gb->nra+nwater+nion);
 957         if (nwater > 0)
 958         {
 959             for (i = gb->index[iwater]; i < gb->index[iwater+1]; i++)
 960             {
 961                 gb->a[gb->nra++] = gb->a[i];
 962             }
 963         }
 964         if (nion > 0)
 965         {
 966             for (i = gb->index[iion]; i < gb->index[iion+1]; i++)
 967             {
 968                 gb->a[gb->nra++] = gb->a[i];
 969             }
 970         }
 971         gb->nr++;
 972         gb->index[gb->nr] = gb->nra;
 973     }
 974 }
 975
 976
 977 void check_index(char *gname, int n, atom_id index[], char *traj, int natoms)
 978 {
 979     int i;
 980
 981     for (i = 0; i < n; i++)
 982     {
 983         if (index[i] >= natoms)
 984         {
 985             gmx_fatal(FARGS, "%s atom number (index[%d]=%d) is larger than the number of atoms in %s (%d)",
 986                       gname ? gname : "Index", i+1, index[i]+1,
 987                       traj ? traj : "the trajectory", natoms);
 988         }
 989         else if (index[i] < 0)
 990         {
 991             gmx_fatal(FARGS, "%s atom number (index[%d]=%d) is less than zero",
 992                       gname ? gname : "Index", i+1, index[i]+1);
 993         }
 994     }
 995 }
 996
 997 t_blocka *init_index(const char *gfile, char ***grpname)
 998 {
 999     FILE      *in;
1000     t_blocka  *b;
1001     int        maxentries;
1002     int        i, j;
1003     char       line[STRLEN], *pt, str[STRLEN];
1004
1005     in = gmx_fio_fopen(gfile, "r");
1006     snew(b, 1);
1007     b->nr      = 0;
1008     b->index   = NULL;
1009     b->nra     = 0;
1010     b->a       = NULL;
1011     *grpname   = NULL;
1012     maxentries = 0;
1013     while (get_a_line(in, line, STRLEN))
1014     {
1015         if (get_header(line, str))
1016         {
1017             b->nr++;
1018             srenew(b->index, b->nr+1);
1019             srenew(*grpname, b->nr);
1020             if (b->nr == 1)
1021             {
1022                 b->index[0] = 0;
1023             }
1024             b->index[b->nr]     = b->index[b->nr-1];
1025             (*grpname)[b->nr-1] = strdup(str);
1026         }
1027         else
1028         {
1029             if (b->nr == 0)
1030             {
1031                 gmx_fatal(FARGS, "The first header of your indexfile is invalid");
1032             }
1033             pt = line;
1034             while (sscanf(pt, "%s", str) == 1)
1035             {
1036                 i = b->index[b->nr];
1037                 if (i >= maxentries)
1038                 {
1039                     maxentries += 1024;
1040                     srenew(b->a, maxentries);
1041                 }
1042                 assert(b->a != NULL); // for clang analyzer
1043                 b->a[i] = strtol(str, NULL, 10)-1;
1044                 b->index[b->nr]++;
1045                 (b->nra)++;
1046                 pt = strstr(pt, str)+strlen(str);
1047             }
1048         }
1049     }
1050     gmx_fio_fclose(in);
1051
1052     for (i = 0; (i < b->nr); i++)
1053     {
1054         assert(b->a != NULL); // for clang analyzer
1055         for (j = b->index[i]; (j < b->index[i+1]); j++)
1056         {
1057             if (b->a[j] < 0)
1058             {
1059                 fprintf(stderr, "\nWARNING: negative index %d in group %s\n\n",
1060                         b->a[j], (*grpname)[i]);
1061             }
1062         }
1063     }
1064
1065     return b;
1066 }
1067
1068 static void minstring(char *str)
1069 {
1070     int i;
1071
1072     for (i = 0; (i < (int)strlen(str)); i++)
1073     {
1074         if (str[i] == '-')
1075         {
1076             str[i] = '_';
1077         }
1078     }
1079 }
1080
1081 int find_group(char s[], int ngrps, char **grpname)
1082 {
1083     int      aa, i, n;
1084     char     string[STRLEN];
1085     gmx_bool bMultiple;
1086
1087     bMultiple = FALSE;
1088     n         = strlen(s);
1089     aa        = NOTSET;
1090     /* first look for whole name match */
1091     if (aa == NOTSET)
1092     {
1093         for (i = 0; i < ngrps; i++)
1094         {
1095             if (gmx_strcasecmp_min(s, grpname[i]) == 0)
1096             {
1097                 if (aa != NOTSET)
1098                 {
1099                     bMultiple = TRUE;
1100                 }
1101                 aa = i;
1102             }
1103         }
1104     }
1105     /* second look for first string match */
1106     if (aa == NOTSET)
1107     {
1108         for (i = 0; i < ngrps; i++)
1109         {
1110             if (gmx_strncasecmp_min(s, grpname[i], n) == 0)
1111             {
1112                 if (aa != NOTSET)
1113                 {
1114                     bMultiple = TRUE;
1115                 }
1116                 aa = i;
1117             }
1118         }
1119     }
1120     /* last look for arbitrary substring match */
1121     if (aa == NOTSET)
1122     {
1123         upstring(s);
1124         minstring(s);
1125         for (i = 0; i < ngrps; i++)
1126         {
1127             strcpy(string, grpname[i]);
1128             upstring(string);
1129             minstring(string);
1130             if (strstr(string, s) != NULL)
1131             {
1132                 if (aa != NOTSET)
1133                 {
1134                     bMultiple = TRUE;
1135                 }
1136                 aa = i;
1137             }
1138         }
1139     }
1140     if (bMultiple)
1141     {
1142         printf("Error: Multiple groups '%s' selected\n", s);
1143         aa = NOTSET;
1144     }
1145     return aa;
1146 }
1147
1148 static int qgroup(int *a, int ngrps, char **grpname)
1149 {
1150     char     s[STRLEN];
1151     int      aa;
1152     gmx_bool bInRange;
1153     char    *end;
1154
1155     do
1156     {
1157         fprintf(stderr, "Select a group: ");
1158         do
1159         {
1160             if (scanf("%s", s) != 1)
1161             {
1162                 gmx_fatal(FARGS, "Cannot read from input");
1163             }
1164             trim(s); /* remove spaces */
1165         }
1166         while (strlen(s) == 0);
1167         aa = strtol(s, &end, 10);
1168         if (aa == 0 && end[0] != '\0') /* string entered */
1169         {
1170             aa = find_group(s, ngrps, grpname);
1171         }
1172         bInRange = (aa >= 0 && aa < ngrps);
1173         if (!bInRange)
1174         {
1175             printf("Error: No such group '%s'\n", s);
1176         }
1177     }
1178     while (!bInRange);
1179     printf("Selected %d: '%s'\n", aa, grpname[aa]);
1180     *a = aa;
1181     return aa;
1182 }
1183
1184 static void rd_groups(t_blocka *grps, char **grpname, char *gnames[],
1185                       int ngrps, int isize[], atom_id *index[], int grpnr[])
1186 {
1187     int i, j, gnr1;
1188
1189     if (grps->nr == 0)
1190     {
1191         gmx_fatal(FARGS, "Error: no groups in indexfile");
1192     }
1193     for (i = 0; (i < grps->nr); i++)
1194     {
1195         fprintf(stderr, "Group %5d (%15s) has %5d elements\n", i, grpname[i],
1196                 grps->index[i+1]-grps->index[i]);
1197     }
1198     for (i = 0; (i < ngrps); i++)
1199     {
1200         if (grps->nr > 1)
1201         {
1202             do
1203             {
1204                 gnr1 = qgroup(&grpnr[i], grps->nr, grpname);
1205                 if ((gnr1 < 0) || (gnr1 >= grps->nr))
1206                 {
1207                     fprintf(stderr, "Select between %d and %d.\n", 0, grps->nr-1);
1208                 }
1209             }
1210             while ((gnr1 < 0) || (gnr1 >= grps->nr));
1211         }
1212         else
1213         {
1214             fprintf(stderr, "There is one group in the index\n");
1215             gnr1 = 0;
1216         }
1217         gnames[i] = strdup(grpname[gnr1]);
1218         isize[i]  = grps->index[gnr1+1]-grps->index[gnr1];
1219         snew(index[i], isize[i]);
1220         for (j = 0; (j < isize[i]); j++)
1221         {
1222             index[i][j] = grps->a[grps->index[gnr1]+j];
1223         }
1224     }
1225 }
1226
1227 void rd_index(const char *statfile, int ngrps, int isize[],
1228               atom_id *index[], char *grpnames[])
1229 {
1230     char    **gnames;
1231     t_blocka *grps;
1232     int      *grpnr;
1233
1234     snew(grpnr, ngrps);
1235     if (!statfile)
1236     {
1237         gmx_fatal(FARGS, "No index file specified");
1238     }
1239     grps = init_index(statfile, &gnames);
1240     rd_groups(grps, gnames, grpnames, ngrps, isize, index, grpnr);
1241 }
1242
1243 void rd_index_nrs(char *statfile, int ngrps, int isize[],
1244                   atom_id *index[], char *grpnames[], int grpnr[])
1245 {
1246     char    **gnames;
1247     t_blocka *grps;
1248
1249     if (!statfile)
1250     {
1251         gmx_fatal(FARGS, "No index file specified");
1252     }
1253     grps = init_index(statfile, &gnames);
1254
1255     rd_groups(grps, gnames, grpnames, ngrps, isize, index, grpnr);
1256 }
1257
1258 void get_index(t_atoms *atoms, const char *fnm, int ngrps,
1259                int isize[], atom_id *index[], char *grpnames[])
1260 {
1261     char    ***gnames;
1262     t_blocka  *grps = NULL;
1263     int       *grpnr;
1264
1265     snew(grpnr, ngrps);
1266     snew(gnames, 1);
1267     if (fnm != NULL)
1268     {
1269         grps = init_index(fnm, gnames);
1270     }
1271     else if (atoms)
1272     {
1273         snew(grps, 1);
1274         snew(grps->index, 1);
1275         analyse(atoms, grps, gnames, FALSE, FALSE);
1276     }
1277     else
1278     {
1279         gmx_incons("You need to supply a valid atoms structure or a valid index file name");
1280     }
1281
1282     rd_groups(grps, *gnames, grpnames, ngrps, isize, index, grpnr);
1283 }
1284
1285 t_cluster_ndx *cluster_index(FILE *fplog, const char *ndx)
1286 {
1287     t_cluster_ndx *c;
1288     int            i;
1289
1290     snew(c, 1);
1291     c->clust     = init_index(ndx, &c->grpname);
1292     c->maxframe  = -1;
1293     for (i = 0; (i < c->clust->nra); i++)
1294     {
1295         c->maxframe = std::max(c->maxframe, c->clust->a[i]);
1296     }
1297     fprintf(fplog ? fplog : stdout,
1298             "There are %d clusters containing %d structures, highest framenr is %d\n",
1299             c->clust->nr, c->clust->nra, c->maxframe);
1300     if (debug)
1301     {
1302         pr_blocka(debug, 0, "clust", c->clust, TRUE);
1303         for (i = 0; (i < c->clust->nra); i++)
1304         {
1305             if ((c->clust->a[i] < 0) || (c->clust->a[i] > c->maxframe))
1306             {
1307                 gmx_fatal(FARGS, "Range check error for c->clust->a[%d] = %d\n"
1308                           "should be within 0 and %d", i, c->clust->a[i], c->maxframe+1);
1309             }
1310         }
1311     }
1312     c->inv_clust = make_invblocka(c->clust, c->maxframe);
1313
1314     return c;
1315 }