src/gromacs/topology/index.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #include "gmxpre.h"
  38
  39 #include "index.h"
  40
  41 #include <assert.h>
  42 #include <ctype.h>
  43 #include <stdlib.h>
  44 #include <string.h>
  45
  46 #include <algorithm>
  47
  48 #include "gromacs/fileio/gmxfio.h"
  49 #include "gromacs/fileio/strdb.h"
  50 #include "gromacs/legacyheaders/macros.h"
  51 #include "gromacs/legacyheaders/txtdump.h"
  52 #include "gromacs/topology/atoms.h"
  53 #include "gromacs/topology/block.h"
  54 #include "gromacs/topology/invblock.h"
  55 #include "gromacs/topology/residuetypes.h"
  56 #include "gromacs/utility/cstringutil.h"
  57 #include "gromacs/utility/fatalerror.h"
  58 #include "gromacs/utility/smalloc.h"
  59
  60 static gmx_bool gmx_ask_yesno(gmx_bool bASK)
  61 {
  62     char c;
  63
  64     if (bASK)
  65     {
  66         do
  67         {
  68             c = toupper(fgetc(stdin));
  69         }
  70         while ((c != 'Y') && (c != 'N'));
  71
  72         return (c == 'Y');
  73     }
  74     else
  75     {
  76         return FALSE;
  77     }
  78 }
  79
  80 void write_index(const char *outf, t_blocka *b, char **gnames, gmx_bool bDuplicate, int natoms)
  81 {
  82     FILE *out;
  83     int   i, j, k;
  84
  85     out = gmx_fio_fopen(outf, "w");
  86     /* fprintf(out,"%5d  %5d\n",b->nr,b->nra); */
  87     for (i = 0; (i < b->nr); i++)
  88     {
  89         fprintf(out, "[ %s ]\n", gnames[i]);
  90         for (k = 0, j = b->index[i]; j < b->index[i+1]; j++, k++)
  91         {
  92             fprintf(out, "%4d ", b->a[j]+1);
  93             if ((k % 15) == 14)
  94             {
  95                 fprintf(out, "\n");
  96             }
  97         }
  98         fprintf(out, "\n");
  99     }
 100
 101     /* Duplicate copy, useful for computational electrophysiology double-layer setups */
 102     if (bDuplicate)
 103     {
 104         fprintf(stderr, "Duplicating the whole system with an atom offset of %d atoms.\n", natoms);
 105         for (i = 0; (i < b->nr); i++)
 106         {
 107             fprintf(out, "[ %s_copy ]\n", gnames[i]);
 108             for (k = 0, j = b->index[i]; j < b->index[i+1]; j++, k++)
 109             {
 110                 fprintf(out, "%4d ", b->a[j]+1 + natoms );
 111                 if ((k % 15) == 14)
 112                 {
 113                     fprintf(out, "\n");
 114                 }
 115             }
 116             fprintf(out, "\n");
 117         }
 118     }
 119
 120     gmx_fio_fclose(out);
 121 }
 122
 123 void add_grp(t_blocka *b, char ***gnames, int nra, atom_id a[], const char *name)
 124 {
 125     int i;
 126
 127     srenew(b->index, b->nr+2);
 128     srenew(*gnames, b->nr+1);
 129     (*gnames)[b->nr] = gmx_strdup(name);
 130
 131     srenew(b->a, b->nra+nra);
 132     for (i = 0; (i < nra); i++)
 133     {
 134         b->a[b->nra++] = a[i];
 135     }
 136     b->nr++;
 137     b->index[b->nr] = b->nra;
 138 }
 139
 140 /* compare index in `a' with group in `b' at `index',
 141    when `index'<0 it is relative to end of `b' */
 142 static gmx_bool grp_cmp(t_blocka *b, int nra, atom_id a[], int index)
 143 {
 144     int i;
 145
 146     if (index < 0)
 147     {
 148         index = b->nr-1+index;
 149     }
 150     if (index >= b->nr)
 151     {
 152         gmx_fatal(FARGS, "no such index group %d in t_blocka (nr=%d)", index, b->nr);
 153     }
 154     /* compare sizes */
 155     if (nra != b->index[index+1] - b->index[index])
 156     {
 157         return FALSE;
 158     }
 159     for (i = 0; i < nra; i++)
 160     {
 161         if (a[i] != b->a[b->index[index]+i])
 162         {
 163             return FALSE;
 164         }
 165     }
 166     return TRUE;
 167 }
 168
 169 static void
 170 p_status(const char **restype, int nres, const char **typenames, int ntypes)
 171 {
 172     int   i, j;
 173     int * counter;
 174
 175     snew(counter, ntypes);
 176     for (i = 0; i < ntypes; i++)
 177     {
 178         counter[i] = 0;
 179     }
 180     for (i = 0; i < nres; i++)
 181     {
 182         for (j = 0; j < ntypes; j++)
 183         {
 184             if (!gmx_strcasecmp(restype[i], typenames[j]))
 185             {
 186                 counter[j]++;
 187             }
 188         }
 189     }
 190
 191     for (i = 0; (i < ntypes); i++)
 192     {
 193         if (counter[i] > 0)
 194         {
 195             printf("There are: %5d %10s residues\n", counter[i], typenames[i]);
 196         }
 197     }
 198
 199     sfree(counter);
 200 }
 201
 202
 203 static atom_id *
 204 mk_aid(t_atoms *atoms, const char ** restype, const char * typestring, int *nra, gmx_bool bMatch)
 205 /* Make an array of atom_ids for all atoms with residuetypes matching typestring, or the opposite if bMatch is false */
 206 {
 207     atom_id *a;
 208     int      i;
 209     int      res;
 210
 211     snew(a, atoms->nr);
 212     *nra = 0;
 213     for (i = 0; (i < atoms->nr); i++)
 214     {
 215         res = !gmx_strcasecmp(restype[atoms->atom[i].resind], typestring);
 216         if (bMatch == FALSE)
 217         {
 218             res = !res;
 219         }
 220         if (res)
 221         {
 222             a[(*nra)++] = i;
 223         }
 224     }
 225
 226     return a;
 227 }
 228
 229 typedef struct {
 230     char    *rname;
 231     gmx_bool bNeg;
 232     char    *gname;
 233 } restp_t;
 234
 235 static void analyse_other(const char ** restype, t_atoms *atoms,
 236                           t_blocka *gb, char ***gn, gmx_bool bASK, gmx_bool bVerb)
 237 {
 238     restp_t *restp = NULL;
 239     char   **attp  = NULL;
 240     char    *rname, *aname;
 241     atom_id *other_ndx, *aid, *aaid;
 242     int      i, j, k, l, resind, naid, naaid, natp, nrestp = 0;
 243
 244     for (i = 0; (i < atoms->nres); i++)
 245     {
 246         if (gmx_strcasecmp(restype[i], "Protein") && gmx_strcasecmp(restype[i], "DNA") && gmx_strcasecmp(restype[i], "RNA") && gmx_strcasecmp(restype[i], "Water"))
 247         {
 248             break;
 249         }
 250     }
 251     if (i < atoms->nres)
 252     {
 253         /* we have others */
 254         if (bVerb)
 255         {
 256             printf("Analysing residues not classified as Protein/DNA/RNA/Water and splitting into groups...\n");
 257         }
 258         snew(other_ndx, atoms->nr);
 259         for (k = 0; (k < atoms->nr); k++)
 260         {
 261             resind = atoms->atom[k].resind;
 262             rname  = *atoms->resinfo[resind].name;
 263             if (gmx_strcasecmp(restype[resind], "Protein") && gmx_strcasecmp(restype[resind], "DNA") &&
 264                 gmx_strcasecmp(restype[resind], "RNA") && gmx_strcasecmp(restype[resind], "Water"))
 265             {
 266
 267                 for (l = 0; (l < nrestp); l++)
 268                 {
 269                     if (strcmp(restp[l].rname, rname) == 0)
 270                     {
 271                         break;
 272                     }
 273                 }
 274                 if (l == nrestp)
 275                 {
 276                     srenew(restp, nrestp+1);
 277                     restp[nrestp].rname = gmx_strdup(rname);
 278                     restp[nrestp].bNeg  = FALSE;
 279                     restp[nrestp].gname = gmx_strdup(rname);
 280                     nrestp++;
 281
 282                 }
 283             }
 284         }
 285         for (i = 0; (i < nrestp); i++)
 286         {
 287             snew(aid, atoms->nr);
 288             naid = 0;
 289             for (j = 0; (j < atoms->nr); j++)
 290             {
 291                 rname = *atoms->resinfo[atoms->atom[j].resind].name;
 292                 if ((strcmp(restp[i].rname, rname) == 0 && !restp[i].bNeg) ||
 293                     (strcmp(restp[i].rname, rname) != 0 &&  restp[i].bNeg))
 294                 {
 295                     aid[naid++] = j;
 296                 }
 297             }
 298             add_grp(gb, gn, naid, aid, restp[i].gname);
 299             if (bASK)
 300             {
 301                 printf("split %s into atoms (y/n) ? ", restp[i].gname);
 302                 fflush(stdout);
 303                 if (gmx_ask_yesno(bASK))
 304                 {
 305                     natp = 0;
 306                     for (k = 0; (k < naid); k++)
 307                     {
 308                         aname = *atoms->atomname[aid[k]];
 309                         for (l = 0; (l < natp); l++)
 310                         {
 311                             if (strcmp(aname, attp[l]) == 0)
 312                             {
 313                                 break;
 314                             }
 315                         }
 316                         if (l == natp)
 317                         {
 318                             srenew(attp, ++natp);
 319                             attp[natp-1] = aname;
 320                         }
 321                     }
 322                     if (natp > 1)
 323                     {
 324                         for (l = 0; (l < natp); l++)
 325                         {
 326                             snew(aaid, naid);
 327                             naaid = 0;
 328                             for (k = 0; (k < naid); k++)
 329                             {
 330                                 aname = *atoms->atomname[aid[k]];
 331                                 if (strcmp(aname, attp[l]) == 0)
 332                                 {
 333                                     aaid[naaid++] = aid[k];
 334                                 }
 335                             }
 336                             add_grp(gb, gn, naaid, aaid, attp[l]);
 337                             sfree(aaid);
 338                         }
 339                     }
 340                     sfree(attp);
 341                     attp = NULL;
 342                 }
 343                 sfree(aid);
 344             }
 345         }
 346         sfree(other_ndx);
 347     }
 348 }
 349
 350 /*! \brief
 351  * Cata necessary to construct a single (protein) index group in
 352  * analyse_prot().
 353  */
 354 typedef struct gmx_help_make_index_group
 355 {
 356     /** The set of atom names that will be used to form this index group */
 357     const char **defining_atomnames;
 358     /** Size of the defining_atomnames array */
 359     int          num_defining_atomnames;
 360     /** Name of this index group */
 361     const char  *group_name;
 362     /** Whether the above atom names name the atoms in the group, or
 363         those not in the group */
 364     gmx_bool bTakeComplement;
 365     /** The index in wholename gives the first item in the arrays of
 366        atomnames that should be tested with 'gmx_strncasecmp' in stead of
 367        gmx_strcasecmp, or -1 if all items should be tested with strcasecmp
 368        This is comparable to using a '*' wildcard at the end of specific
 369        atom names, but that is more involved to implement...
 370      */
 371     int wholename;
 372     /** Only create this index group if it differs from the one specified in compareto,
 373        where -1 means to always create this group. */
 374     int compareto;
 375 } t_gmx_help_make_index_group;
 376
 377 static void analyse_prot(const char ** restype, t_atoms *atoms,
 378                          t_blocka *gb, char ***gn, gmx_bool bASK, gmx_bool bVerb)
 379 {
 380     /* lists of atomnames to be used in constructing index groups: */
 381     static const char *pnoh[]    = { "H", "HN" };
 382     static const char *pnodum[]  = {
 383         "MN1",  "MN2",  "MCB1", "MCB2", "MCG1", "MCG2",
 384         "MCD1", "MCD2", "MCE1", "MCE2", "MNZ1", "MNZ2"
 385     };
 386     static const char *calpha[]  = { "CA" };
 387     static const char *bb[]      = { "N", "CA", "C" };
 388     static const char *mc[]      = { "N", "CA", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT" };
 389     static const char *mcb[]     = { "N", "CA", "CB", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT" };
 390     static const char *mch[]     = {
 391         "N", "CA", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT",
 392         "H1", "H2", "H3", "H", "HN"
 393     };
 394
 395     static const t_gmx_help_make_index_group constructing_data[] =
 396     {{ NULL,   0, "Protein",      TRUE,  -1, -1},
 397      { pnoh,   asize(pnoh),   "Protein-H",    TRUE,  0,  -1},
 398      { calpha, asize(calpha), "C-alpha",      FALSE, -1, -1},
 399      { bb,     asize(bb),     "Backbone",     FALSE, -1, -1},
 400      { mc,     asize(mc),     "MainChain",    FALSE, -1, -1},
 401      { mcb,    asize(mcb),    "MainChain+Cb", FALSE, -1, -1},
 402      { mch,    asize(mch),    "MainChain+H",  FALSE, -1, -1},
 403      { mch,    asize(mch),    "SideChain",    TRUE,  -1, -1},
 404      { mch,    asize(mch),    "SideChain-H",  TRUE,  11, -1},
 405      { pnodum, asize(pnodum), "Prot-Masses",  TRUE,  -1, 0}, };
 406     const int   num_index_groups = asize(constructing_data);
 407
 408     int         n, j;
 409     atom_id    *aid;
 410     int         nra, npres;
 411     gmx_bool    match;
 412     char        ndx_name[STRLEN], *atnm;
 413     int         i;
 414
 415     if (bVerb)
 416     {
 417         printf("Analysing Protein...\n");
 418     }
 419     snew(aid, atoms->nr);
 420
 421     /* calculate the number of protein residues */
 422     npres = 0;
 423     for (i = 0; (i < atoms->nres); i++)
 424     {
 425         if (0 == gmx_strcasecmp(restype[i], "Protein"))
 426         {
 427             npres++;
 428         }
 429     }
 430     /* find matching or complement atoms */
 431     for (i = 0; (i < (int)num_index_groups); i++)
 432     {
 433         nra = 0;
 434         for (n = 0; (n < atoms->nr); n++)
 435         {
 436             if (0 == gmx_strcasecmp(restype[atoms->atom[n].resind], "Protein"))
 437             {
 438                 match = FALSE;
 439                 for (j = 0; (j < constructing_data[i].num_defining_atomnames); j++)
 440                 {
 441                     /* skip digits at beginning of atomname, e.g. 1H */
 442                     atnm = *atoms->atomname[n];
 443                     while (isdigit(atnm[0]))
 444                     {
 445                         atnm++;
 446                     }
 447                     if ( (constructing_data[i].wholename == -1) || (j < constructing_data[i].wholename) )
 448                     {
 449                         if (0 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j], atnm))
 450                         {
 451                             match = TRUE;
 452                         }
 453                     }
 454                     else
 455                     {
 456                         if (0 == gmx_strncasecmp(constructing_data[i].defining_atomnames[j], atnm, strlen(constructing_data[i].defining_atomnames[j])))
 457                         {
 458                             match = TRUE;
 459                         }
 460                     }
 461                 }
 462                 if (constructing_data[i].bTakeComplement != match)
 463                 {
 464                     aid[nra++] = n;
 465                 }
 466             }
 467         }
 468         /* if we want to add this group always or it differs from previous
 469            group, add it: */
 470         if (-1 == constructing_data[i].compareto || !grp_cmp(gb, nra, aid, constructing_data[i].compareto-i) )
 471         {
 472             add_grp(gb, gn, nra, aid, constructing_data[i].group_name);
 473         }
 474     }
 475
 476     if (bASK)
 477     {
 478         for (i = 0; (i < (int)num_index_groups); i++)
 479         {
 480             printf("Split %12s into %5d residues (y/n) ? ", constructing_data[i].group_name, npres);
 481             if (gmx_ask_yesno(bASK))
 482             {
 483                 int resind;
 484                 nra = 0;
 485                 for (n = 0; ((atoms->atom[n].resind < npres) && (n < atoms->nr)); )
 486                 {
 487                     resind = atoms->atom[n].resind;
 488                     for (; ((atoms->atom[n].resind == resind) && (n < atoms->nr)); n++)
 489                     {
 490                         match = FALSE;
 491                         for (j = 0; (j < constructing_data[i].num_defining_atomnames); j++)
 492                         {
 493                             if (0 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j], *atoms->atomname[n]))
 494                             {
 495                                 match = TRUE;
 496                             }
 497                         }
 498                         if (constructing_data[i].bTakeComplement != match)
 499                         {
 500                             aid[nra++] = n;
 501                         }
 502                     }
 503                     /* copy the residuename to the tail of the groupname */
 504                     if (nra > 0)
 505                     {
 506                         t_resinfo *ri;
 507                         ri = &atoms->resinfo[resind];
 508                         sprintf(ndx_name, "%s_%s%d%c",
 509                                 constructing_data[i].group_name, *ri->name, ri->nr, ri->ic == ' ' ? '\0' : ri->ic);
 510                         add_grp(gb, gn, nra, aid, ndx_name);
 511                         nra = 0;
 512                     }
 513                 }
 514             }
 515         }
 516         printf("Make group with sidechain and C=O swapped (y/n) ? ");
 517         if (gmx_ask_yesno(bASK))
 518         {
 519             /* Make swap sidechain C=O index */
 520             int resind, hold;
 521             nra = 0;
 522             for (n = 0; ((atoms->atom[n].resind < npres) && (n < atoms->nr)); )
 523             {
 524                 resind = atoms->atom[n].resind;
 525                 hold   = -1;
 526                 for (; ((atoms->atom[n].resind == resind) && (n < atoms->nr)); n++)
 527                 {
 528                     if (strcmp("CA", *atoms->atomname[n]) == 0)
 529                     {
 530                         aid[nra++] = n;
 531                         hold       = nra;
 532                         nra       += 2;
 533                     }
 534                     else if (strcmp("C", *atoms->atomname[n]) == 0)
 535                     {
 536                         if (hold == -1)
 537                         {
 538                             gmx_incons("Atom naming problem");
 539                         }
 540                         aid[hold] = n;
 541                     }
 542                     else if (strcmp("O", *atoms->atomname[n]) == 0)
 543                     {
 544                         if (hold == -1)
 545                         {
 546                             gmx_incons("Atom naming problem");
 547                         }
 548                         aid[hold+1] = n;
 549                     }
 550                     else if (strcmp("O1", *atoms->atomname[n]) == 0)
 551                     {
 552                         if (hold == -1)
 553                         {
 554                             gmx_incons("Atom naming problem");
 555                         }
 556                         aid[hold+1] = n;
 557                     }
 558                     else
 559                     {
 560                         aid[nra++] = n;
 561                     }
 562                 }
 563             }
 564             /* copy the residuename to the tail of the groupname */
 565             if (nra > 0)
 566             {
 567                 add_grp(gb, gn, nra, aid, "SwapSC-CO");
 568             }
 569         }
 570     }
 571     sfree(aid);
 572 }
 573
 574
 575 void analyse(t_atoms *atoms, t_blocka *gb, char ***gn, gmx_bool bASK, gmx_bool bVerb)
 576 {
 577     gmx_residuetype_t*rt = NULL;
 578     char             *resnm;
 579     atom_id          *aid;
 580     const char    **  restype;
 581     int               nra;
 582     int               i, k;
 583     int               ntypes;
 584     const char     ** p_typename;
 585     int               iwater, iion;
 586     int               nwater, nion;
 587     int               found;
 588
 589     if (bVerb)
 590     {
 591         printf("Analysing residue names:\n");
 592     }
 593     /* Create system group, every single atom */
 594     snew(aid, atoms->nr);
 595     for (i = 0; i < atoms->nr; i++)
 596     {
 597         aid[i] = i;
 598     }
 599     add_grp(gb, gn, atoms->nr, aid, "System");
 600     sfree(aid);
 601
 602     /* For every residue, get a pointer to the residue type name */
 603     gmx_residuetype_init(&rt);
 604     assert(rt);
 605
 606     snew(restype, atoms->nres);
 607     ntypes     = 0;
 608     p_typename = NULL;
 609     for (i = 0; i < atoms->nres; i++)
 610     {
 611         resnm = *atoms->resinfo[i].name;
 612         gmx_residuetype_get_type(rt, resnm, &(restype[i]));
 613
 614         /* Note that this does not lead to a N*N loop, but N*K, where
 615          * K is the number of residue _types_, which is small and independent of N.
 616          */
 617         found = 0;
 618         for (k = 0; k < ntypes && !found; k++)
 619         {
 620             assert(p_typename != NULL);
 621             found = !strcmp(restype[i], p_typename[k]);
 622         }
 623         if (!found)
 624         {
 625             srenew(p_typename, ntypes+1);
 626             p_typename[ntypes++] = gmx_strdup(restype[i]);
 627         }
 628     }
 629
 630     if (bVerb)
 631     {
 632         p_status(restype, atoms->nres, p_typename, ntypes);
 633     }
 634
 635     for (k = 0; k < ntypes; k++)
 636     {
 637         aid = mk_aid(atoms, restype, p_typename[k], &nra, TRUE);
 638
 639         /* Check for special types to do fancy stuff with */
 640
 641         if (!gmx_strcasecmp(p_typename[k], "Protein") && nra > 0)
 642         {
 643             sfree(aid);
 644             /* PROTEIN */
 645             analyse_prot(restype, atoms, gb, gn, bASK, bVerb);
 646
 647             /* Create a Non-Protein group */
 648             aid = mk_aid(atoms, restype, "Protein", &nra, FALSE);
 649             if ((nra > 0) && (nra < atoms->nr))
 650             {
 651                 add_grp(gb, gn, nra, aid, "non-Protein");
 652             }
 653             sfree(aid);
 654         }
 655         else if (!gmx_strcasecmp(p_typename[k], "Water") && nra > 0)
 656         {
 657             add_grp(gb, gn, nra, aid, p_typename[k]);
 658             /* Add this group as 'SOL' too, for backward compatibility with older gromacs versions */
 659             add_grp(gb, gn, nra, aid, "SOL");
 660
 661             sfree(aid);
 662
 663             /* Solvent, create a negated group too */
 664             aid = mk_aid(atoms, restype, "Water", &nra, FALSE);
 665             if ((nra > 0) && (nra < atoms->nr))
 666             {
 667                 add_grp(gb, gn, nra, aid, "non-Water");
 668             }
 669             sfree(aid);
 670         }
 671         else if (nra > 0)
 672         {
 673             /* Other groups */
 674             add_grp(gb, gn, nra, aid, p_typename[k]);
 675             sfree(aid);
 676             analyse_other(restype, atoms, gb, gn, bASK, bVerb);
 677         }
 678     }
 679
 680     sfree(p_typename);
 681     sfree(restype);
 682     gmx_residuetype_destroy(rt);
 683
 684     /* Create a merged water_and_ions group */
 685     iwater = -1;
 686     iion   = -1;
 687     nwater = 0;
 688     nion   = 0;
 689
 690     for (i = 0; i < gb->nr; i++)
 691     {
 692         if (!gmx_strcasecmp((*gn)[i], "Water"))
 693         {
 694             iwater = i;
 695             nwater = gb->index[i+1]-gb->index[i];
 696         }
 697         else if (!gmx_strcasecmp((*gn)[i], "Ion"))
 698         {
 699             iion = i;
 700             nion = gb->index[i+1]-gb->index[i];
 701         }
 702     }
 703
 704     if (nwater > 0 && nion > 0)
 705     {
 706         srenew(gb->index, gb->nr+2);
 707         srenew(*gn, gb->nr+1);
 708         (*gn)[gb->nr] = gmx_strdup("Water_and_ions");
 709         srenew(gb->a, gb->nra+nwater+nion);
 710         if (nwater > 0)
 711         {
 712             for (i = gb->index[iwater]; i < gb->index[iwater+1]; i++)
 713             {
 714                 gb->a[gb->nra++] = gb->a[i];
 715             }
 716         }
 717         if (nion > 0)
 718         {
 719             for (i = gb->index[iion]; i < gb->index[iion+1]; i++)
 720             {
 721                 gb->a[gb->nra++] = gb->a[i];
 722             }
 723         }
 724         gb->nr++;
 725         gb->index[gb->nr] = gb->nra;
 726     }
 727 }
 728
 729
 730 void check_index(char *gname, int n, atom_id index[], char *traj, int natoms)
 731 {
 732     int i;
 733
 734     for (i = 0; i < n; i++)
 735     {
 736         if (index[i] >= natoms)
 737         {
 738             gmx_fatal(FARGS, "%s atom number (index[%d]=%d) is larger than the number of atoms in %s (%d)",
 739                       gname ? gname : "Index", i+1, index[i]+1,
 740                       traj ? traj : "the trajectory", natoms);
 741         }
 742         else if (index[i] < 0)
 743         {
 744             gmx_fatal(FARGS, "%s atom number (index[%d]=%d) is less than zero",
 745                       gname ? gname : "Index", i+1, index[i]+1);
 746         }
 747     }
 748 }
 749
 750 t_blocka *init_index(const char *gfile, char ***grpname)
 751 {
 752     FILE      *in;
 753     t_blocka  *b;
 754     int        maxentries;
 755     int        i, j;
 756     char       line[STRLEN], *pt, str[STRLEN];
 757
 758     in = gmx_fio_fopen(gfile, "r");
 759     snew(b, 1);
 760     b->nr      = 0;
 761     b->index   = NULL;
 762     b->nra     = 0;
 763     b->a       = NULL;
 764     *grpname   = NULL;
 765     maxentries = 0;
 766     while (get_a_line(in, line, STRLEN))
 767     {
 768         if (get_header(line, str))
 769         {
 770             b->nr++;
 771             srenew(b->index, b->nr+1);
 772             srenew(*grpname, b->nr);
 773             if (b->nr == 1)
 774             {
 775                 b->index[0] = 0;
 776             }
 777             b->index[b->nr]     = b->index[b->nr-1];
 778             (*grpname)[b->nr-1] = gmx_strdup(str);
 779         }
 780         else
 781         {
 782             if (b->nr == 0)
 783             {
 784                 gmx_fatal(FARGS, "The first header of your indexfile is invalid");
 785             }
 786             pt = line;
 787             while (sscanf(pt, "%s", str) == 1)
 788             {
 789                 i = b->index[b->nr];
 790                 if (i >= maxentries)
 791                 {
 792                     maxentries += 1024;
 793                     srenew(b->a, maxentries);
 794                 }
 795                 assert(b->a != NULL); // for clang analyzer
 796                 b->a[i] = strtol(str, NULL, 10)-1;
 797                 b->index[b->nr]++;
 798                 (b->nra)++;
 799                 pt = strstr(pt, str)+strlen(str);
 800             }
 801         }
 802     }
 803     gmx_fio_fclose(in);
 804
 805     for (i = 0; (i < b->nr); i++)
 806     {
 807         assert(b->a != NULL); // for clang analyzer
 808         for (j = b->index[i]; (j < b->index[i+1]); j++)
 809         {
 810             if (b->a[j] < 0)
 811             {
 812                 fprintf(stderr, "\nWARNING: negative index %d in group %s\n\n",
 813                         b->a[j], (*grpname)[i]);
 814             }
 815         }
 816     }
 817
 818     return b;
 819 }
 820
 821 static void minstring(char *str)
 822 {
 823     int i;
 824
 825     for (i = 0; (i < (int)strlen(str)); i++)
 826     {
 827         if (str[i] == '-')
 828         {
 829             str[i] = '_';
 830         }
 831     }
 832 }
 833
 834 int find_group(char s[], int ngrps, char **grpname)
 835 {
 836     int      aa, i, n;
 837     char     string[STRLEN];
 838     gmx_bool bMultiple;
 839
 840     bMultiple = FALSE;
 841     n         = strlen(s);
 842     aa        = NOTSET;
 843     /* first look for whole name match */
 844     if (aa == NOTSET)
 845     {
 846         for (i = 0; i < ngrps; i++)
 847         {
 848             if (gmx_strcasecmp_min(s, grpname[i]) == 0)
 849             {
 850                 if (aa != NOTSET)
 851                 {
 852                     bMultiple = TRUE;
 853                 }
 854                 aa = i;
 855             }
 856         }
 857     }
 858     /* second look for first string match */
 859     if (aa == NOTSET)
 860     {
 861         for (i = 0; i < ngrps; i++)
 862         {
 863             if (gmx_strncasecmp_min(s, grpname[i], n) == 0)
 864             {
 865                 if (aa != NOTSET)
 866                 {
 867                     bMultiple = TRUE;
 868                 }
 869                 aa = i;
 870             }
 871         }
 872     }
 873     /* last look for arbitrary substring match */
 874     if (aa == NOTSET)
 875     {
 876         upstring(s);
 877         minstring(s);
 878         for (i = 0; i < ngrps; i++)
 879         {
 880             strcpy(string, grpname[i]);
 881             upstring(string);
 882             minstring(string);
 883             if (strstr(string, s) != NULL)
 884             {
 885                 if (aa != NOTSET)
 886                 {
 887                     bMultiple = TRUE;
 888                 }
 889                 aa = i;
 890             }
 891         }
 892     }
 893     if (bMultiple)
 894     {
 895         printf("Error: Multiple groups '%s' selected\n", s);
 896         aa = NOTSET;
 897     }
 898     return aa;
 899 }
 900
 901 static int qgroup(int *a, int ngrps, char **grpname)
 902 {
 903     char     s[STRLEN];
 904     int      aa;
 905     gmx_bool bInRange;
 906     char    *end;
 907
 908     do
 909     {
 910         fprintf(stderr, "Select a group: ");
 911         do
 912         {
 913             if (scanf("%s", s) != 1)
 914             {
 915                 gmx_fatal(FARGS, "Cannot read from input");
 916             }
 917             trim(s); /* remove spaces */
 918         }
 919         while (strlen(s) == 0);
 920         aa = strtol(s, &end, 10);
 921         if (aa == 0 && end[0] != '\0') /* string entered */
 922         {
 923             aa = find_group(s, ngrps, grpname);
 924         }
 925         bInRange = (aa >= 0 && aa < ngrps);
 926         if (!bInRange)
 927         {
 928             printf("Error: No such group '%s'\n", s);
 929         }
 930     }
 931     while (!bInRange);
 932     printf("Selected %d: '%s'\n", aa, grpname[aa]);
 933     *a = aa;
 934     return aa;
 935 }
 936
 937 static void rd_groups(t_blocka *grps, char **grpname, char *gnames[],
 938                       int ngrps, int isize[], atom_id *index[], int grpnr[])
 939 {
 940     int i, j, gnr1;
 941
 942     if (grps->nr == 0)
 943     {
 944         gmx_fatal(FARGS, "Error: no groups in indexfile");
 945     }
 946     for (i = 0; (i < grps->nr); i++)
 947     {
 948         fprintf(stderr, "Group %5d (%15s) has %5d elements\n", i, grpname[i],
 949                 grps->index[i+1]-grps->index[i]);
 950     }
 951     for (i = 0; (i < ngrps); i++)
 952     {
 953         if (grps->nr > 1)
 954         {
 955             do
 956             {
 957                 gnr1 = qgroup(&grpnr[i], grps->nr, grpname);
 958                 if ((gnr1 < 0) || (gnr1 >= grps->nr))
 959                 {
 960                     fprintf(stderr, "Select between %d and %d.\n", 0, grps->nr-1);
 961                 }
 962             }
 963             while ((gnr1 < 0) || (gnr1 >= grps->nr));
 964         }
 965         else
 966         {
 967             fprintf(stderr, "There is one group in the index\n");
 968             gnr1 = 0;
 969         }
 970         gnames[i] = gmx_strdup(grpname[gnr1]);
 971         isize[i]  = grps->index[gnr1+1]-grps->index[gnr1];
 972         snew(index[i], isize[i]);
 973         for (j = 0; (j < isize[i]); j++)
 974         {
 975             index[i][j] = grps->a[grps->index[gnr1]+j];
 976         }
 977     }
 978 }
 979
 980 void rd_index(const char *statfile, int ngrps, int isize[],
 981               atom_id *index[], char *grpnames[])
 982 {
 983     char    **gnames;
 984     t_blocka *grps;
 985     int      *grpnr;
 986
 987     snew(grpnr, ngrps);
 988     if (!statfile)
 989     {
 990         gmx_fatal(FARGS, "No index file specified");
 991     }
 992     grps = init_index(statfile, &gnames);
 993     rd_groups(grps, gnames, grpnames, ngrps, isize, index, grpnr);
 994 }
 995
 996 void rd_index_nrs(char *statfile, int ngrps, int isize[],
 997                   atom_id *index[], char *grpnames[], int grpnr[])
 998 {
 999     char    **gnames;
1000     t_blocka *grps;
1001
1002     if (!statfile)
1003     {
1004         gmx_fatal(FARGS, "No index file specified");
1005     }
1006     grps = init_index(statfile, &gnames);
1007
1008     rd_groups(grps, gnames, grpnames, ngrps, isize, index, grpnr);
1009 }
1010
1011 void get_index(t_atoms *atoms, const char *fnm, int ngrps,
1012                int isize[], atom_id *index[], char *grpnames[])
1013 {
1014     char    ***gnames;
1015     t_blocka  *grps = NULL;
1016     int       *grpnr;
1017
1018     snew(grpnr, ngrps);
1019     snew(gnames, 1);
1020     if (fnm != NULL)
1021     {
1022         grps = init_index(fnm, gnames);
1023     }
1024     else if (atoms)
1025     {
1026         snew(grps, 1);
1027         snew(grps->index, 1);
1028         analyse(atoms, grps, gnames, FALSE, FALSE);
1029     }
1030     else
1031     {
1032         gmx_incons("You need to supply a valid atoms structure or a valid index file name");
1033     }
1034
1035     rd_groups(grps, *gnames, grpnames, ngrps, isize, index, grpnr);
1036 }
1037
1038 t_cluster_ndx *cluster_index(FILE *fplog, const char *ndx)
1039 {
1040     t_cluster_ndx *c;
1041     int            i;
1042
1043     snew(c, 1);
1044     c->clust     = init_index(ndx, &c->grpname);
1045     c->maxframe  = -1;
1046     for (i = 0; (i < c->clust->nra); i++)
1047     {
1048         c->maxframe = std::max(c->maxframe, c->clust->a[i]);
1049     }
1050     fprintf(fplog ? fplog : stdout,
1051             "There are %d clusters containing %d structures, highest framenr is %d\n",
1052             c->clust->nr, c->clust->nra, c->maxframe);
1053     if (debug)
1054     {
1055         pr_blocka(debug, 0, "clust", c->clust, TRUE);
1056         for (i = 0; (i < c->clust->nra); i++)
1057         {
1058             if ((c->clust->a[i] < 0) || (c->clust->a[i] > c->maxframe))
1059             {
1060                 gmx_fatal(FARGS, "Range check error for c->clust->a[%d] = %d\n"
1061                           "should be within 0 and %d", i, c->clust->a[i], c->maxframe+1);
1062             }
1063         }
1064     }
1065     c->inv_clust = make_invblocka(c->clust, c->maxframe);
1066
1067     return c;
1068 }