src/gromacs/topology/index.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #include "gromacs/topology/index.h"
  38
  39 #ifdef HAVE_CONFIG_H
  40 #include <config.h>
  41 #endif
  42
  43 #include <assert.h>
  44 #include <ctype.h>
  45 #include <stdlib.h>
  46 #include <string.h>
  47
  48 #include <algorithm>
  49
  50 #include "gromacs/legacyheaders/macros.h"
  51 #include "gromacs/legacyheaders/txtdump.h"
  52
  53 #include "gromacs/fileio/gmxfio.h"
  54 #include "gromacs/fileio/strdb.h"
  55 #include "gromacs/topology/atoms.h"
  56 #include "gromacs/topology/block.h"
  57 #include "gromacs/topology/invblock.h"
  58 #include "gromacs/utility/common.h"
  59 #include "gromacs/utility/cstringutil.h"
  60 #include "gromacs/utility/fatalerror.h"
  61 #include "gromacs/utility/futil.h"
  62 #include "gromacs/utility/smalloc.h"
  63
  64 const char gmx_residuetype_undefined[] = "Other";
  65
  66 struct gmx_residuetype
  67 {
  68     int      n;
  69     char **  resname;
  70     char **  restype;
  71
  72 };
  73
  74
  75 static gmx_bool gmx_ask_yesno(gmx_bool bASK)
  76 {
  77     char c;
  78
  79     if (bASK)
  80     {
  81         do
  82         {
  83             c = toupper(fgetc(stdin));
  84         }
  85         while ((c != 'Y') && (c != 'N'));
  86
  87         return (c == 'Y');
  88     }
  89     else
  90     {
  91         return FALSE;
  92     }
  93 }
  94
  95 void write_index(const char *outf, t_blocka *b, char **gnames, gmx_bool bDuplicate, int natoms)
  96 {
  97     FILE *out;
  98     int   i, j, k;
  99
 100     out = gmx_fio_fopen(outf, "w");
 101     /* fprintf(out,"%5d  %5d\n",b->nr,b->nra); */
 102     for (i = 0; (i < b->nr); i++)
 103     {
 104         fprintf(out, "[ %s ]\n", gnames[i]);
 105         for (k = 0, j = b->index[i]; j < b->index[i+1]; j++, k++)
 106         {
 107             fprintf(out, "%4d ", b->a[j]+1);
 108             if ((k % 15) == 14)
 109             {
 110                 fprintf(out, "\n");
 111             }
 112         }
 113         fprintf(out, "\n");
 114     }
 115
 116     /* Duplicate copy, useful for computational electrophysiology double-layer setups */
 117     if (bDuplicate)
 118     {
 119         fprintf(stderr, "Duplicating the whole system with an atom offset of %d atoms.\n", natoms);
 120         for (i = 0; (i < b->nr); i++)
 121         {
 122             fprintf(out, "[ %s_copy ]\n", gnames[i]);
 123             for (k = 0, j = b->index[i]; j < b->index[i+1]; j++, k++)
 124             {
 125                 fprintf(out, "%4d ", b->a[j]+1 + natoms );
 126                 if ((k % 15) == 14)
 127                 {
 128                     fprintf(out, "\n");
 129                 }
 130             }
 131             fprintf(out, "\n");
 132         }
 133     }
 134
 135     gmx_fio_fclose(out);
 136 }
 137
 138 void add_grp(t_blocka *b, char ***gnames, int nra, atom_id a[], const char *name)
 139 {
 140     int i;
 141
 142     srenew(b->index, b->nr+2);
 143     srenew(*gnames, b->nr+1);
 144     (*gnames)[b->nr] = strdup(name);
 145
 146     srenew(b->a, b->nra+nra);
 147     for (i = 0; (i < nra); i++)
 148     {
 149         b->a[b->nra++] = a[i];
 150     }
 151     b->nr++;
 152     b->index[b->nr] = b->nra;
 153 }
 154
 155 /* compare index in `a' with group in `b' at `index',
 156    when `index'<0 it is relative to end of `b' */
 157 static gmx_bool grp_cmp(t_blocka *b, int nra, atom_id a[], int index)
 158 {
 159     int i;
 160
 161     if (index < 0)
 162     {
 163         index = b->nr-1+index;
 164     }
 165     if (index >= b->nr)
 166     {
 167         gmx_fatal(FARGS, "no such index group %d in t_blocka (nr=%d)", index, b->nr);
 168     }
 169     /* compare sizes */
 170     if (nra != b->index[index+1] - b->index[index])
 171     {
 172         return FALSE;
 173     }
 174     for (i = 0; i < nra; i++)
 175     {
 176         if (a[i] != b->a[b->index[index]+i])
 177         {
 178             return FALSE;
 179         }
 180     }
 181     return TRUE;
 182 }
 183
 184 static void
 185 p_status(const char **restype, int nres, const char **typenames, int ntypes)
 186 {
 187     int   i, j;
 188     int * counter;
 189
 190     snew(counter, ntypes);
 191     for (i = 0; i < ntypes; i++)
 192     {
 193         counter[i] = 0;
 194     }
 195     for (i = 0; i < nres; i++)
 196     {
 197         for (j = 0; j < ntypes; j++)
 198         {
 199             if (!gmx_strcasecmp(restype[i], typenames[j]))
 200             {
 201                 counter[j]++;
 202             }
 203         }
 204     }
 205
 206     for (i = 0; (i < ntypes); i++)
 207     {
 208         if (counter[i] > 0)
 209         {
 210             printf("There are: %5d %10s residues\n", counter[i], typenames[i]);
 211         }
 212     }
 213
 214     sfree(counter);
 215 }
 216
 217
 218 static atom_id *
 219 mk_aid(t_atoms *atoms, const char ** restype, const char * typestring, int *nra, gmx_bool bMatch)
 220 /* Make an array of atom_ids for all atoms with residuetypes matching typestring, or the opposite if bMatch is false */
 221 {
 222     atom_id *a;
 223     int      i;
 224     int      res;
 225
 226     snew(a, atoms->nr);
 227     *nra = 0;
 228     for (i = 0; (i < atoms->nr); i++)
 229     {
 230         res = !gmx_strcasecmp(restype[atoms->atom[i].resind], typestring);
 231         if (bMatch == FALSE)
 232         {
 233             res = !res;
 234         }
 235         if (res)
 236         {
 237             a[(*nra)++] = i;
 238         }
 239     }
 240
 241     return a;
 242 }
 243
 244 typedef struct {
 245     char    *rname;
 246     gmx_bool bNeg;
 247     char    *gname;
 248 } restp_t;
 249
 250 static void analyse_other(const char ** restype, t_atoms *atoms,
 251                           t_blocka *gb, char ***gn, gmx_bool bASK, gmx_bool bVerb)
 252 {
 253     restp_t *restp = NULL;
 254     char   **attp  = NULL;
 255     char    *rname, *aname;
 256     atom_id *other_ndx, *aid, *aaid;
 257     int      i, j, k, l, resind, naid, naaid, natp, nrestp = 0;
 258
 259     for (i = 0; (i < atoms->nres); i++)
 260     {
 261         if (gmx_strcasecmp(restype[i], "Protein") && gmx_strcasecmp(restype[i], "DNA") && gmx_strcasecmp(restype[i], "RNA") && gmx_strcasecmp(restype[i], "Water"))
 262         {
 263             break;
 264         }
 265     }
 266     if (i < atoms->nres)
 267     {
 268         /* we have others */
 269         if (bVerb)
 270         {
 271             printf("Analysing residues not classified as Protein/DNA/RNA/Water and splitting into groups...\n");
 272         }
 273         snew(other_ndx, atoms->nr);
 274         for (k = 0; (k < atoms->nr); k++)
 275         {
 276             resind = atoms->atom[k].resind;
 277             rname  = *atoms->resinfo[resind].name;
 278             if (gmx_strcasecmp(restype[resind], "Protein") && gmx_strcasecmp(restype[resind], "DNA") &&
 279                 gmx_strcasecmp(restype[resind], "RNA") && gmx_strcasecmp(restype[resind], "Water"))
 280             {
 281
 282                 for (l = 0; (l < nrestp); l++)
 283                 {
 284                     if (strcmp(restp[l].rname, rname) == 0)
 285                     {
 286                         break;
 287                     }
 288                 }
 289                 if (l == nrestp)
 290                 {
 291                     srenew(restp, nrestp+1);
 292                     restp[nrestp].rname = strdup(rname);
 293                     restp[nrestp].bNeg  = FALSE;
 294                     restp[nrestp].gname = strdup(rname);
 295                     nrestp++;
 296
 297                 }
 298             }
 299         }
 300         for (i = 0; (i < nrestp); i++)
 301         {
 302             snew(aid, atoms->nr);
 303             naid = 0;
 304             for (j = 0; (j < atoms->nr); j++)
 305             {
 306                 rname = *atoms->resinfo[atoms->atom[j].resind].name;
 307                 if ((strcmp(restp[i].rname, rname) == 0 && !restp[i].bNeg) ||
 308                     (strcmp(restp[i].rname, rname) != 0 &&  restp[i].bNeg))
 309                 {
 310                     aid[naid++] = j;
 311                 }
 312             }
 313             add_grp(gb, gn, naid, aid, restp[i].gname);
 314             if (bASK)
 315             {
 316                 printf("split %s into atoms (y/n) ? ", restp[i].gname);
 317                 fflush(stdout);
 318                 if (gmx_ask_yesno(bASK))
 319                 {
 320                     natp = 0;
 321                     for (k = 0; (k < naid); k++)
 322                     {
 323                         aname = *atoms->atomname[aid[k]];
 324                         for (l = 0; (l < natp); l++)
 325                         {
 326                             if (strcmp(aname, attp[l]) == 0)
 327                             {
 328                                 break;
 329                             }
 330                         }
 331                         if (l == natp)
 332                         {
 333                             srenew(attp, ++natp);
 334                             attp[natp-1] = aname;
 335                         }
 336                     }
 337                     if (natp > 1)
 338                     {
 339                         for (l = 0; (l < natp); l++)
 340                         {
 341                             snew(aaid, naid);
 342                             naaid = 0;
 343                             for (k = 0; (k < naid); k++)
 344                             {
 345                                 aname = *atoms->atomname[aid[k]];
 346                                 if (strcmp(aname, attp[l]) == 0)
 347                                 {
 348                                     aaid[naaid++] = aid[k];
 349                                 }
 350                             }
 351                             add_grp(gb, gn, naaid, aaid, attp[l]);
 352                             sfree(aaid);
 353                         }
 354                     }
 355                     sfree(attp);
 356                     attp = NULL;
 357                 }
 358                 sfree(aid);
 359             }
 360         }
 361         sfree(other_ndx);
 362     }
 363 }
 364
 365 /*! \brief
 366  * Cata necessary to construct a single (protein) index group in
 367  * analyse_prot().
 368  */
 369 typedef struct gmx_help_make_index_group
 370 {
 371     /** The set of atom names that will be used to form this index group */
 372     const char **defining_atomnames;
 373     /** Size of the defining_atomnames array */
 374     int          num_defining_atomnames;
 375     /** Name of this index group */
 376     const char  *group_name;
 377     /** Whether the above atom names name the atoms in the group, or
 378         those not in the group */
 379     gmx_bool bTakeComplement;
 380     /** The index in wholename gives the first item in the arrays of
 381        atomnames that should be tested with 'gmx_strncasecmp' in stead of
 382        gmx_strcasecmp, or -1 if all items should be tested with strcasecmp
 383        This is comparable to using a '*' wildcard at the end of specific
 384        atom names, but that is more involved to implement...
 385      */
 386     int wholename;
 387     /** Only create this index group if it differs from the one specified in compareto,
 388        where -1 means to always create this group. */
 389     int compareto;
 390 } t_gmx_help_make_index_group;
 391
 392 static void analyse_prot(const char ** restype, t_atoms *atoms,
 393                          t_blocka *gb, char ***gn, gmx_bool bASK, gmx_bool bVerb)
 394 {
 395     /* lists of atomnames to be used in constructing index groups: */
 396     static const char *pnoh[]    = { "H", "HN" };
 397     static const char *pnodum[]  = {
 398         "MN1",  "MN2",  "MCB1", "MCB2", "MCG1", "MCG2",
 399         "MCD1", "MCD2", "MCE1", "MCE2", "MNZ1", "MNZ2"
 400     };
 401     static const char *calpha[]  = { "CA" };
 402     static const char *bb[]      = { "N", "CA", "C" };
 403     static const char *mc[]      = { "N", "CA", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT" };
 404     static const char *mcb[]     = { "N", "CA", "CB", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT" };
 405     static const char *mch[]     = {
 406         "N", "CA", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT",
 407         "H1", "H2", "H3", "H", "HN"
 408     };
 409
 410     static const t_gmx_help_make_index_group constructing_data[] =
 411     {{ NULL,   0, "Protein",      TRUE,  -1, -1},
 412      { pnoh,   asize(pnoh),   "Protein-H",    TRUE,  0,  -1},
 413      { calpha, asize(calpha), "C-alpha",      FALSE, -1, -1},
 414      { bb,     asize(bb),     "Backbone",     FALSE, -1, -1},
 415      { mc,     asize(mc),     "MainChain",    FALSE, -1, -1},
 416      { mcb,    asize(mcb),    "MainChain+Cb", FALSE, -1, -1},
 417      { mch,    asize(mch),    "MainChain+H",  FALSE, -1, -1},
 418      { mch,    asize(mch),    "SideChain",    TRUE,  -1, -1},
 419      { mch,    asize(mch),    "SideChain-H",  TRUE,  11, -1},
 420      { pnodum, asize(pnodum), "Prot-Masses",  TRUE,  -1, 0}, };
 421     const int   num_index_groups = asize(constructing_data);
 422
 423     int         n, j;
 424     atom_id    *aid;
 425     int         nra, npres;
 426     gmx_bool    match;
 427     char        ndx_name[STRLEN], *atnm;
 428     int         i;
 429
 430     if (bVerb)
 431     {
 432         printf("Analysing Protein...\n");
 433     }
 434     snew(aid, atoms->nr);
 435
 436     /* calculate the number of protein residues */
 437     npres = 0;
 438     for (i = 0; (i < atoms->nres); i++)
 439     {
 440         if (0 == gmx_strcasecmp(restype[i], "Protein"))
 441         {
 442             npres++;
 443         }
 444     }
 445     /* find matching or complement atoms */
 446     for (i = 0; (i < (int)num_index_groups); i++)
 447     {
 448         nra = 0;
 449         for (n = 0; (n < atoms->nr); n++)
 450         {
 451             if (0 == gmx_strcasecmp(restype[atoms->atom[n].resind], "Protein"))
 452             {
 453                 match = FALSE;
 454                 for (j = 0; (j < constructing_data[i].num_defining_atomnames); j++)
 455                 {
 456                     /* skip digits at beginning of atomname, e.g. 1H */
 457                     atnm = *atoms->atomname[n];
 458                     while (isdigit(atnm[0]))
 459                     {
 460                         atnm++;
 461                     }
 462                     if ( (constructing_data[i].wholename == -1) || (j < constructing_data[i].wholename) )
 463                     {
 464                         if (0 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j], atnm))
 465                         {
 466                             match = TRUE;
 467                         }
 468                     }
 469                     else
 470                     {
 471                         if (0 == gmx_strncasecmp(constructing_data[i].defining_atomnames[j], atnm, strlen(constructing_data[i].defining_atomnames[j])))
 472                         {
 473                             match = TRUE;
 474                         }
 475                     }
 476                 }
 477                 if (constructing_data[i].bTakeComplement != match)
 478                 {
 479                     aid[nra++] = n;
 480                 }
 481             }
 482         }
 483         /* if we want to add this group always or it differs from previous
 484            group, add it: */
 485         if (-1 == constructing_data[i].compareto || !grp_cmp(gb, nra, aid, constructing_data[i].compareto-i) )
 486         {
 487             add_grp(gb, gn, nra, aid, constructing_data[i].group_name);
 488         }
 489     }
 490
 491     if (bASK)
 492     {
 493         for (i = 0; (i < (int)num_index_groups); i++)
 494         {
 495             printf("Split %12s into %5d residues (y/n) ? ", constructing_data[i].group_name, npres);
 496             if (gmx_ask_yesno(bASK))
 497             {
 498                 int resind;
 499                 nra = 0;
 500                 for (n = 0; ((atoms->atom[n].resind < npres) && (n < atoms->nr)); )
 501                 {
 502                     resind = atoms->atom[n].resind;
 503                     for (; ((atoms->atom[n].resind == resind) && (n < atoms->nr)); n++)
 504                     {
 505                         match = FALSE;
 506                         for (j = 0; (j < constructing_data[i].num_defining_atomnames); j++)
 507                         {
 508                             if (0 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j], *atoms->atomname[n]))
 509                             {
 510                                 match = TRUE;
 511                             }
 512                         }
 513                         if (constructing_data[i].bTakeComplement != match)
 514                         {
 515                             aid[nra++] = n;
 516                         }
 517                     }
 518                     /* copy the residuename to the tail of the groupname */
 519                     if (nra > 0)
 520                     {
 521                         t_resinfo *ri;
 522                         ri = &atoms->resinfo[resind];
 523                         sprintf(ndx_name, "%s_%s%d%c",
 524                                 constructing_data[i].group_name, *ri->name, ri->nr, ri->ic == ' ' ? '\0' : ri->ic);
 525                         add_grp(gb, gn, nra, aid, ndx_name);
 526                         nra = 0;
 527                     }
 528                 }
 529             }
 530         }
 531         printf("Make group with sidechain and C=O swapped (y/n) ? ");
 532         if (gmx_ask_yesno(bASK))
 533         {
 534             /* Make swap sidechain C=O index */
 535             int resind, hold;
 536             nra = 0;
 537             for (n = 0; ((atoms->atom[n].resind < npres) && (n < atoms->nr)); )
 538             {
 539                 resind = atoms->atom[n].resind;
 540                 hold   = -1;
 541                 for (; ((atoms->atom[n].resind == resind) && (n < atoms->nr)); n++)
 542                 {
 543                     if (strcmp("CA", *atoms->atomname[n]) == 0)
 544                     {
 545                         aid[nra++] = n;
 546                         hold       = nra;
 547                         nra       += 2;
 548                     }
 549                     else if (strcmp("C", *atoms->atomname[n]) == 0)
 550                     {
 551                         if (hold == -1)
 552                         {
 553                             gmx_incons("Atom naming problem");
 554                         }
 555                         aid[hold] = n;
 556                     }
 557                     else if (strcmp("O", *atoms->atomname[n]) == 0)
 558                     {
 559                         if (hold == -1)
 560                         {
 561                             gmx_incons("Atom naming problem");
 562                         }
 563                         aid[hold+1] = n;
 564                     }
 565                     else if (strcmp("O1", *atoms->atomname[n]) == 0)
 566                     {
 567                         if (hold == -1)
 568                         {
 569                             gmx_incons("Atom naming problem");
 570                         }
 571                         aid[hold+1] = n;
 572                     }
 573                     else
 574                     {
 575                         aid[nra++] = n;
 576                     }
 577                 }
 578             }
 579             /* copy the residuename to the tail of the groupname */
 580             if (nra > 0)
 581             {
 582                 add_grp(gb, gn, nra, aid, "SwapSC-CO");
 583             }
 584         }
 585     }
 586     sfree(aid);
 587 }
 588
 589
 590
 591
 592 /* Return 0 if the name was found, otherwise -1.
 593  * p_restype is set to a pointer to the type name, or 'Other' if we did not find it.
 594  */
 595 int
 596 gmx_residuetype_get_type(gmx_residuetype_t rt, const char * resname, const char ** p_restype)
 597 {
 598     int    i, rc;
 599
 600     rc = -1;
 601     for (i = 0; i < rt->n && rc; i++)
 602     {
 603         rc = gmx_strcasecmp(rt->resname[i], resname);
 604     }
 605
 606     *p_restype = (rc == 0) ? rt->restype[i-1] : gmx_residuetype_undefined;
 607
 608     return rc;
 609 }
 610
 611 int
 612 gmx_residuetype_add(gmx_residuetype_t rt, const char *newresname, const char *newrestype)
 613 {
 614     int           found;
 615     const char *  p_oldtype;
 616
 617     found = !gmx_residuetype_get_type(rt, newresname, &p_oldtype);
 618
 619     if (found && gmx_strcasecmp(p_oldtype, newrestype))
 620     {
 621         fprintf(stderr, "Warning: Residue '%s' already present with type '%s' in database, ignoring new type '%s'.",
 622                 newresname, p_oldtype, newrestype);
 623     }
 624
 625     if (found == 0)
 626     {
 627         srenew(rt->resname, rt->n+1);
 628         srenew(rt->restype, rt->n+1);
 629         rt->resname[rt->n] = strdup(newresname);
 630         rt->restype[rt->n] = strdup(newrestype);
 631         rt->n++;
 632     }
 633
 634     return 0;
 635 }
 636
 637
 638 int
 639 gmx_residuetype_init(gmx_residuetype_t *prt)
 640 {
 641     FILE                 *  db;
 642     char                    line[STRLEN];
 643     char                    resname[STRLEN], restype[STRLEN], dum[STRLEN];
 644     struct gmx_residuetype *rt;
 645
 646     snew(rt, 1);
 647     *prt = rt;
 648
 649     rt->n        = 0;
 650     rt->resname  = NULL;
 651     rt->restype  = NULL;
 652
 653     db = libopen("residuetypes.dat");
 654
 655     while (get_a_line(db, line, STRLEN))
 656     {
 657         strip_comment(line);
 658         trim(line);
 659         if (line[0] != '\0')
 660         {
 661             if (sscanf(line, "%s %s %s", resname, restype, dum) != 2)
 662             {
 663                 gmx_fatal(FARGS, "Incorrect number of columns (2 expected) for line in residuetypes.dat");
 664             }
 665             gmx_residuetype_add(rt, resname, restype);
 666         }
 667     }
 668
 669     fclose(db);
 670
 671     return 0;
 672 }
 673
 674
 675
 676 int
 677 gmx_residuetype_destroy(gmx_residuetype_t rt)
 678 {
 679     int i;
 680
 681     for (i = 0; i < rt->n; i++)
 682     {
 683         sfree(rt->resname[i]);
 684         sfree(rt->restype[i]);
 685     }
 686     sfree(rt->resname);
 687     sfree(rt->restype);
 688     sfree(rt);
 689
 690     return 0;
 691 }
 692
 693 int
 694 gmx_residuetype_get_alltypes(gmx_residuetype_t    rt,
 695                              const char ***       p_typenames,
 696                              int *                ntypes)
 697 {
 698     int            i, n;
 699     const char **  my_typename;
 700
 701     n           = 0;
 702     my_typename = NULL;
 703     for (i = 0; i < rt->n; i++)
 704     {
 705         const char *const p      = rt->restype[i];
 706         bool              bFound = false;
 707         for (int j = 0; j < n && !bFound; j++)
 708         {
 709             assert(my_typename != NULL);
 710             bFound = !gmx_strcasecmp(p, my_typename[j]);
 711         }
 712         if (!bFound)
 713         {
 714             srenew(my_typename, n+1);
 715             my_typename[n] = p;
 716             n++;
 717         }
 718     }
 719     *ntypes      = n;
 720     *p_typenames = my_typename;
 721
 722     return 0;
 723 }
 724
 725
 726
 727 gmx_bool
 728 gmx_residuetype_is_protein(gmx_residuetype_t rt, const char *resnm)
 729 {
 730     gmx_bool    rc;
 731     const char *p_type;
 732
 733     if (gmx_residuetype_get_type(rt, resnm, &p_type) == 0 &&
 734         gmx_strcasecmp(p_type, "Protein") == 0)
 735     {
 736         rc = TRUE;
 737     }
 738     else
 739     {
 740         rc = FALSE;
 741     }
 742     return rc;
 743 }
 744
 745 gmx_bool
 746 gmx_residuetype_is_dna(gmx_residuetype_t rt, const char *resnm)
 747 {
 748     gmx_bool    rc;
 749     const char *p_type;
 750
 751     if (gmx_residuetype_get_type(rt, resnm, &p_type) == 0 &&
 752         gmx_strcasecmp(p_type, "DNA") == 0)
 753     {
 754         rc = TRUE;
 755     }
 756     else
 757     {
 758         rc = FALSE;
 759     }
 760     return rc;
 761 }
 762
 763 gmx_bool
 764 gmx_residuetype_is_rna(gmx_residuetype_t rt, const char *resnm)
 765 {
 766     gmx_bool    rc;
 767     const char *p_type;
 768
 769     if (gmx_residuetype_get_type(rt, resnm, &p_type) == 0 &&
 770         gmx_strcasecmp(p_type, "RNA") == 0)
 771     {
 772         rc = TRUE;
 773     }
 774     else
 775     {
 776         rc = FALSE;
 777     }
 778     return rc;
 779 }
 780
 781 /* Return the size of the arrays */
 782 int
 783 gmx_residuetype_get_size(gmx_residuetype_t rt)
 784 {
 785     return rt->n;
 786 }
 787
 788 /* Search for a residuetype with name resnm within the
 789  * gmx_residuetype database. Return the index if found,
 790  * otherwise -1.
 791  */
 792 int
 793 gmx_residuetype_get_index(gmx_residuetype_t rt, const char *resnm)
 794 {
 795     int i, rc;
 796
 797     rc = -1;
 798     for (i = 0; i < rt->n && rc; i++)
 799     {
 800         rc = gmx_strcasecmp(rt->resname[i], resnm);
 801     }
 802
 803     return (0 == rc) ? i-1 : -1;
 804 }
 805
 806 /* Return the name of the residuetype with the given index, or
 807  * NULL if not found. */
 808 const char *
 809 gmx_residuetype_get_name(gmx_residuetype_t rt, int index)
 810 {
 811     if (index >= 0 && index < rt->n)
 812     {
 813         return rt->resname[index];
 814     }
 815     else
 816     {
 817         return NULL;
 818     }
 819 }
 820
 821
 822
 823 void analyse(t_atoms *atoms, t_blocka *gb, char ***gn, gmx_bool bASK, gmx_bool bVerb)
 824 {
 825     gmx_residuetype_t rt = NULL;
 826     char             *resnm;
 827     atom_id          *aid;
 828     const char    **  restype;
 829     int               nra;
 830     int               i, k;
 831     int               ntypes;
 832     const char     ** p_typename;
 833     int               iwater, iion;
 834     int               nwater, nion;
 835     int               found;
 836
 837     if (bVerb)
 838     {
 839         printf("Analysing residue names:\n");
 840     }
 841     /* Create system group, every single atom */
 842     snew(aid, atoms->nr);
 843     for (i = 0; i < atoms->nr; i++)
 844     {
 845         aid[i] = i;
 846     }
 847     add_grp(gb, gn, atoms->nr, aid, "System");
 848     sfree(aid);
 849
 850     /* For every residue, get a pointer to the residue type name */
 851     gmx_residuetype_init(&rt);
 852     assert(rt);
 853
 854     snew(restype, atoms->nres);
 855     ntypes     = 0;
 856     p_typename = NULL;
 857     for (i = 0; i < atoms->nres; i++)
 858     {
 859         resnm = *atoms->resinfo[i].name;
 860         gmx_residuetype_get_type(rt, resnm, &(restype[i]));
 861
 862         /* Note that this does not lead to a N*N loop, but N*K, where
 863          * K is the number of residue _types_, which is small and independent of N.
 864          */
 865         found = 0;
 866         for (k = 0; k < ntypes && !found; k++)
 867         {
 868             assert(p_typename != NULL);
 869             found = !strcmp(restype[i], p_typename[k]);
 870         }
 871         if (!found)
 872         {
 873             srenew(p_typename, ntypes+1);
 874             p_typename[ntypes++] = strdup(restype[i]);
 875         }
 876     }
 877
 878     if (bVerb)
 879     {
 880         p_status(restype, atoms->nres, p_typename, ntypes);
 881     }
 882
 883     for (k = 0; k < ntypes; k++)
 884     {
 885         aid = mk_aid(atoms, restype, p_typename[k], &nra, TRUE);
 886
 887         /* Check for special types to do fancy stuff with */
 888
 889         if (!gmx_strcasecmp(p_typename[k], "Protein") && nra > 0)
 890         {
 891             sfree(aid);
 892             /* PROTEIN */
 893             analyse_prot(restype, atoms, gb, gn, bASK, bVerb);
 894
 895             /* Create a Non-Protein group */
 896             aid = mk_aid(atoms, restype, "Protein", &nra, FALSE);
 897             if ((nra > 0) && (nra < atoms->nr))
 898             {
 899                 add_grp(gb, gn, nra, aid, "non-Protein");
 900             }
 901             sfree(aid);
 902         }
 903         else if (!gmx_strcasecmp(p_typename[k], "Water") && nra > 0)
 904         {
 905             add_grp(gb, gn, nra, aid, p_typename[k]);
 906             /* Add this group as 'SOL' too, for backward compatibility with older gromacs versions */
 907             add_grp(gb, gn, nra, aid, "SOL");
 908
 909             sfree(aid);
 910
 911             /* Solvent, create a negated group too */
 912             aid = mk_aid(atoms, restype, "Water", &nra, FALSE);
 913             if ((nra > 0) && (nra < atoms->nr))
 914             {
 915                 add_grp(gb, gn, nra, aid, "non-Water");
 916             }
 917             sfree(aid);
 918         }
 919         else if (nra > 0)
 920         {
 921             /* Other groups */
 922             add_grp(gb, gn, nra, aid, p_typename[k]);
 923             sfree(aid);
 924             analyse_other(restype, atoms, gb, gn, bASK, bVerb);
 925         }
 926     }
 927
 928     sfree(p_typename);
 929     sfree(restype);
 930     gmx_residuetype_destroy(rt);
 931
 932     /* Create a merged water_and_ions group */
 933     iwater = -1;
 934     iion   = -1;
 935     nwater = 0;
 936     nion   = 0;
 937
 938     for (i = 0; i < gb->nr; i++)
 939     {
 940         if (!gmx_strcasecmp((*gn)[i], "Water"))
 941         {
 942             iwater = i;
 943             nwater = gb->index[i+1]-gb->index[i];
 944         }
 945         else if (!gmx_strcasecmp((*gn)[i], "Ion"))
 946         {
 947             iion = i;
 948             nion = gb->index[i+1]-gb->index[i];
 949         }
 950     }
 951
 952     if (nwater > 0 && nion > 0)
 953     {
 954         srenew(gb->index, gb->nr+2);
 955         srenew(*gn, gb->nr+1);
 956         (*gn)[gb->nr] = strdup("Water_and_ions");
 957         srenew(gb->a, gb->nra+nwater+nion);
 958         if (nwater > 0)
 959         {
 960             for (i = gb->index[iwater]; i < gb->index[iwater+1]; i++)
 961             {
 962                 gb->a[gb->nra++] = gb->a[i];
 963             }
 964         }
 965         if (nion > 0)
 966         {
 967             for (i = gb->index[iion]; i < gb->index[iion+1]; i++)
 968             {
 969                 gb->a[gb->nra++] = gb->a[i];
 970             }
 971         }
 972         gb->nr++;
 973         gb->index[gb->nr] = gb->nra;
 974     }
 975 }
 976
 977
 978 void check_index(char *gname, int n, atom_id index[], char *traj, int natoms)
 979 {
 980     int i;
 981
 982     for (i = 0; i < n; i++)
 983     {
 984         if (index[i] >= natoms)
 985         {
 986             gmx_fatal(FARGS, "%s atom number (index[%d]=%d) is larger than the number of atoms in %s (%d)",
 987                       gname ? gname : "Index", i+1, index[i]+1,
 988                       traj ? traj : "the trajectory", natoms);
 989         }
 990         else if (index[i] < 0)
 991         {
 992             gmx_fatal(FARGS, "%s atom number (index[%d]=%d) is less than zero",
 993                       gname ? gname : "Index", i+1, index[i]+1);
 994         }
 995     }
 996 }
 997
 998 t_blocka *init_index(const char *gfile, char ***grpname)
 999 {
1000     FILE      *in;
1001     t_blocka  *b;
1002     int        a, maxentries;
1003     int        i, j, ng;
1004     char       line[STRLEN], *pt, str[STRLEN];
1005
1006     in = gmx_fio_fopen(gfile, "r");
1007     snew(b, 1);
1008     get_a_line(in, line, STRLEN);
1009     if (line[0] == '[')
1010     {
1011         /* new format */
1012         b->nr      = 0;
1013         b->index   = NULL;
1014         b->nra     = 0;
1015         b->a       = NULL;
1016         *grpname   = NULL;
1017         maxentries = 0;
1018         do
1019         {
1020             if (get_header(line, str))
1021             {
1022                 b->nr++;
1023                 srenew(b->index, b->nr+1);
1024                 srenew(*grpname, b->nr);
1025                 if (b->nr == 1)
1026                 {
1027                     b->index[0] = 0;
1028                 }
1029                 b->index[b->nr]     = b->index[b->nr-1];
1030                 (*grpname)[b->nr-1] = strdup(str);
1031             }
1032             else
1033             {
1034                 if (b->nr == 0)
1035                 {
1036                     gmx_fatal(FARGS, "The first header of your indexfile is invalid");
1037                 }
1038                 pt = line;
1039                 while (sscanf(pt, "%s", str) == 1)
1040                 {
1041                     i = b->index[b->nr];
1042                     if (i >= maxentries)
1043                     {
1044                         maxentries += 1024;
1045                         srenew(b->a, maxentries);
1046                     }
1047                     b->a[i] = strtol(str, NULL, 10)-1;
1048                     b->index[b->nr]++;
1049                     (b->nra)++;
1050                     pt = strstr(pt, str)+strlen(str);
1051                 }
1052             }
1053         }
1054         while (get_a_line(in, line, STRLEN));
1055     }
1056     else
1057     {
1058         /* old format */
1059         sscanf(line, "%d%d", &b->nr, &b->nra);
1060         snew(b->index, b->nr+1);
1061         snew(*grpname, b->nr);
1062         b->index[0] = 0;
1063         snew(b->a, b->nra);
1064         for (i = 0; (i < b->nr); i++)
1065         {
1066             GMX_IGNORE_RETURN_VALUE(fscanf(in, "%s%d", str, &ng));
1067             (*grpname)[i] = strdup(str);
1068             b->index[i+1] = b->index[i]+ng;
1069             if (b->index[i+1] > b->nra)
1070             {
1071                 gmx_fatal(FARGS, "Something wrong in your indexfile at group %s", str);
1072             }
1073             for (j = 0; (j < ng); j++)
1074             {
1075                 GMX_IGNORE_RETURN_VALUE(fscanf(in, "%d", &a));
1076                 b->a[b->index[i]+j] = a;
1077             }
1078         }
1079     }
1080     gmx_fio_fclose(in);
1081
1082     for (i = 0; (i < b->nr); i++)
1083     {
1084         for (j = b->index[i]; (j < b->index[i+1]); j++)
1085         {
1086             if (b->a[j] < 0)
1087             {
1088                 fprintf(stderr, "\nWARNING: negative index %d in group %s\n\n",
1089                         b->a[j], (*grpname)[i]);
1090             }
1091         }
1092     }
1093
1094     return b;
1095 }
1096
1097 static void minstring(char *str)
1098 {
1099     int i;
1100
1101     for (i = 0; (i < (int)strlen(str)); i++)
1102     {
1103         if (str[i] == '-')
1104         {
1105             str[i] = '_';
1106         }
1107     }
1108 }
1109
1110 int find_group(char s[], int ngrps, char **grpname)
1111 {
1112     int      aa, i, n;
1113     char     string[STRLEN];
1114     gmx_bool bMultiple;
1115
1116     bMultiple = FALSE;
1117     n         = strlen(s);
1118     aa        = NOTSET;
1119     /* first look for whole name match */
1120     if (aa == NOTSET)
1121     {
1122         for (i = 0; i < ngrps; i++)
1123         {
1124             if (gmx_strcasecmp_min(s, grpname[i]) == 0)
1125             {
1126                 if (aa != NOTSET)
1127                 {
1128                     bMultiple = TRUE;
1129                 }
1130                 aa = i;
1131             }
1132         }
1133     }
1134     /* second look for first string match */
1135     if (aa == NOTSET)
1136     {
1137         for (i = 0; i < ngrps; i++)
1138         {
1139             if (gmx_strncasecmp_min(s, grpname[i], n) == 0)
1140             {
1141                 if (aa != NOTSET)
1142                 {
1143                     bMultiple = TRUE;
1144                 }
1145                 aa = i;
1146             }
1147         }
1148     }
1149     /* last look for arbitrary substring match */
1150     if (aa == NOTSET)
1151     {
1152         upstring(s);
1153         minstring(s);
1154         for (i = 0; i < ngrps; i++)
1155         {
1156             strcpy(string, grpname[i]);
1157             upstring(string);
1158             minstring(string);
1159             if (strstr(string, s) != NULL)
1160             {
1161                 if (aa != NOTSET)
1162                 {
1163                     bMultiple = TRUE;
1164                 }
1165                 aa = i;
1166             }
1167         }
1168     }
1169     if (bMultiple)
1170     {
1171         printf("Error: Multiple groups '%s' selected\n", s);
1172         aa = NOTSET;
1173     }
1174     return aa;
1175 }
1176
1177 static int qgroup(int *a, int ngrps, char **grpname)
1178 {
1179     char     s[STRLEN];
1180     int      aa;
1181     gmx_bool bInRange;
1182     char    *end;
1183
1184     do
1185     {
1186         fprintf(stderr, "Select a group: ");
1187         do
1188         {
1189             if (scanf("%s", s) != 1)
1190             {
1191                 gmx_fatal(FARGS, "Cannot read from input");
1192             }
1193             trim(s); /* remove spaces */
1194         }
1195         while (strlen(s) == 0);
1196         aa = strtol(s, &end, 10);
1197         if (aa == 0 && end[0] != '\0') /* string entered */
1198         {
1199             aa = find_group(s, ngrps, grpname);
1200         }
1201         bInRange = (aa >= 0 && aa < ngrps);
1202         if (!bInRange)
1203         {
1204             printf("Error: No such group '%s'\n", s);
1205         }
1206     }
1207     while (!bInRange);
1208     printf("Selected %d: '%s'\n", aa, grpname[aa]);
1209     *a = aa;
1210     return aa;
1211 }
1212
1213 static void rd_groups(t_blocka *grps, char **grpname, char *gnames[],
1214                       int ngrps, int isize[], atom_id *index[], int grpnr[])
1215 {
1216     int i, j, gnr1;
1217
1218     if (grps->nr == 0)
1219     {
1220         gmx_fatal(FARGS, "Error: no groups in indexfile");
1221     }
1222     for (i = 0; (i < grps->nr); i++)
1223     {
1224         fprintf(stderr, "Group %5d (%15s) has %5d elements\n", i, grpname[i],
1225                 grps->index[i+1]-grps->index[i]);
1226     }
1227     for (i = 0; (i < ngrps); i++)
1228     {
1229         if (grps->nr > 1)
1230         {
1231             do
1232             {
1233                 gnr1 = qgroup(&grpnr[i], grps->nr, grpname);
1234                 if ((gnr1 < 0) || (gnr1 >= grps->nr))
1235                 {
1236                     fprintf(stderr, "Select between %d and %d.\n", 0, grps->nr-1);
1237                 }
1238             }
1239             while ((gnr1 < 0) || (gnr1 >= grps->nr));
1240         }
1241         else
1242         {
1243             fprintf(stderr, "There is one group in the index\n");
1244             gnr1 = 0;
1245         }
1246         gnames[i] = strdup(grpname[gnr1]);
1247         isize[i]  = grps->index[gnr1+1]-grps->index[gnr1];
1248         snew(index[i], isize[i]);
1249         for (j = 0; (j < isize[i]); j++)
1250         {
1251             index[i][j] = grps->a[grps->index[gnr1]+j];
1252         }
1253     }
1254 }
1255
1256 void rd_index(const char *statfile, int ngrps, int isize[],
1257               atom_id *index[], char *grpnames[])
1258 {
1259     char    **gnames;
1260     t_blocka *grps;
1261     int      *grpnr;
1262
1263     snew(grpnr, ngrps);
1264     if (!statfile)
1265     {
1266         gmx_fatal(FARGS, "No index file specified");
1267     }
1268     grps = init_index(statfile, &gnames);
1269     rd_groups(grps, gnames, grpnames, ngrps, isize, index, grpnr);
1270 }
1271
1272 void rd_index_nrs(char *statfile, int ngrps, int isize[],
1273                   atom_id *index[], char *grpnames[], int grpnr[])
1274 {
1275     char    **gnames;
1276     t_blocka *grps;
1277
1278     if (!statfile)
1279     {
1280         gmx_fatal(FARGS, "No index file specified");
1281     }
1282     grps = init_index(statfile, &gnames);
1283
1284     rd_groups(grps, gnames, grpnames, ngrps, isize, index, grpnr);
1285 }
1286
1287 void get_index(t_atoms *atoms, const char *fnm, int ngrps,
1288                int isize[], atom_id *index[], char *grpnames[])
1289 {
1290     char    ***gnames;
1291     t_blocka  *grps = NULL;
1292     int       *grpnr;
1293
1294     snew(grpnr, ngrps);
1295     snew(gnames, 1);
1296     if (fnm != NULL)
1297     {
1298         grps = init_index(fnm, gnames);
1299     }
1300     else if (atoms)
1301     {
1302         snew(grps, 1);
1303         snew(grps->index, 1);
1304         analyse(atoms, grps, gnames, FALSE, FALSE);
1305     }
1306     else
1307     {
1308         gmx_incons("You need to supply a valid atoms structure or a valid index file name");
1309     }
1310
1311     rd_groups(grps, *gnames, grpnames, ngrps, isize, index, grpnr);
1312 }
1313
1314 t_cluster_ndx *cluster_index(FILE *fplog, const char *ndx)
1315 {
1316     t_cluster_ndx *c;
1317     int            i;
1318
1319     snew(c, 1);
1320     c->clust     = init_index(ndx, &c->grpname);
1321     c->maxframe  = -1;
1322     for (i = 0; (i < c->clust->nra); i++)
1323     {
1324         c->maxframe = std::max(c->maxframe, c->clust->a[i]);
1325     }
1326     fprintf(fplog ? fplog : stdout,
1327             "There are %d clusters containing %d structures, highest framenr is %d\n",
1328             c->clust->nr, c->clust->nra, c->maxframe);
1329     if (debug)
1330     {
1331         pr_blocka(debug, 0, "clust", c->clust, TRUE);
1332         for (i = 0; (i < c->clust->nra); i++)
1333         {
1334             if ((c->clust->a[i] < 0) || (c->clust->a[i] > c->maxframe))
1335             {
1336                 gmx_fatal(FARGS, "Range check error for c->clust->a[%d] = %d\n"
1337                           "should be within 0 and %d", i, c->clust->a[i], c->maxframe+1);
1338             }
1339         }
1340     }
1341     c->inv_clust = make_invblocka(c->clust, c->maxframe);
1342
1343     return c;
1344 }