src/gromacs/topology/index.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #include "gmxpre.h"
  38
  39 #include "index.h"
  40
  41 #include <assert.h>
  42 #include <ctype.h>
  43 #include <stdlib.h>
  44 #include <string.h>
  45
  46 #include <algorithm>
  47
  48 #include "gromacs/topology/atoms.h"
  49 #include "gromacs/topology/block.h"
  50 #include "gromacs/topology/invblock.h"
  51 #include "gromacs/topology/residuetypes.h"
  52 #include "gromacs/utility/arraysize.h"
  53 #include "gromacs/utility/cstringutil.h"
  54 #include "gromacs/utility/fatalerror.h"
  55 #include "gromacs/utility/futil.h"
  56 #include "gromacs/utility/smalloc.h"
  57 #include "gromacs/utility/strdb.h"
  58
  59 static gmx_bool gmx_ask_yesno(gmx_bool bASK)
  60 {
  61     char c;
  62
  63     if (bASK)
  64     {
  65         do
  66         {
  67             c = toupper(fgetc(stdin));
  68         }
  69         while ((c != 'Y') && (c != 'N'));
  70
  71         return (c == 'Y');
  72     }
  73     else
  74     {
  75         return FALSE;
  76     }
  77 }
  78
  79 void write_index(const char *outf, t_blocka *b, char **gnames, gmx_bool bDuplicate, int natoms)
  80 {
  81     FILE *out;
  82     int   i, j, k;
  83
  84     out = gmx_ffopen(outf, "w");
  85     /* fprintf(out,"%5d  %5d\n",b->nr,b->nra); */
  86     for (i = 0; (i < b->nr); i++)
  87     {
  88         fprintf(out, "[ %s ]", gnames[i]);
  89         for (k = 0, j = b->index[i]; j < b->index[i+1]; j++, k++)
  90         {
  91             const char sep = (k % 15 == 0 ? '\n' : ' ');
  92             fprintf(out, "%c%4d", sep, b->a[j]+1);
  93         }
  94         fprintf(out, "\n");
  95     }
  96
  97     /* Duplicate copy, useful for computational electrophysiology double-layer setups */
  98     if (bDuplicate)
  99     {
 100         fprintf(stderr, "Duplicating the whole system with an atom offset of %d atoms.\n", natoms);
 101         for (i = 0; (i < b->nr); i++)
 102         {
 103             fprintf(out, "[ %s_copy ]", gnames[i]);
 104             for (k = 0, j = b->index[i]; j < b->index[i+1]; j++, k++)
 105             {
 106                 const char sep = (k % 15 == 0 ? '\n' : ' ');
 107                 fprintf(out, "%c%4d", sep, b->a[j]+1 + natoms);
 108             }
 109             fprintf(out, "\n");
 110         }
 111     }
 112
 113     gmx_ffclose(out);
 114 }
 115
 116 void add_grp(t_blocka *b, char ***gnames, int nra, int a[], const char *name)
 117 {
 118     int i;
 119
 120     srenew(b->index, b->nr+2);
 121     srenew(*gnames, b->nr+1);
 122     (*gnames)[b->nr] = gmx_strdup(name);
 123
 124     srenew(b->a, b->nra+nra);
 125     for (i = 0; (i < nra); i++)
 126     {
 127         b->a[b->nra++] = a[i];
 128     }
 129     b->nr++;
 130     b->index[b->nr] = b->nra;
 131 }
 132
 133 /* compare index in `a' with group in `b' at `index',
 134    when `index'<0 it is relative to end of `b' */
 135 static gmx_bool grp_cmp(t_blocka *b, int nra, int a[], int index)
 136 {
 137     int i;
 138
 139     if (index < 0)
 140     {
 141         index = b->nr-1+index;
 142     }
 143     if (index >= b->nr)
 144     {
 145         gmx_fatal(FARGS, "no such index group %d in t_blocka (nr=%d)", index, b->nr);
 146     }
 147     /* compare sizes */
 148     if (nra != b->index[index+1] - b->index[index])
 149     {
 150         return FALSE;
 151     }
 152     for (i = 0; i < nra; i++)
 153     {
 154         if (a[i] != b->a[b->index[index]+i])
 155         {
 156             return FALSE;
 157         }
 158     }
 159     return TRUE;
 160 }
 161
 162 static void
 163 p_status(const char *const *restype, int nres,
 164          const char *const *typenames, int ntypes)
 165 {
 166     int   i, j;
 167     int * counter;
 168
 169     snew(counter, ntypes);
 170     for (i = 0; i < ntypes; i++)
 171     {
 172         counter[i] = 0;
 173     }
 174     for (i = 0; i < nres; i++)
 175     {
 176         for (j = 0; j < ntypes; j++)
 177         {
 178             if (!gmx_strcasecmp(restype[i], typenames[j]))
 179             {
 180                 counter[j]++;
 181             }
 182         }
 183     }
 184
 185     for (i = 0; (i < ntypes); i++)
 186     {
 187         if (counter[i] > 0)
 188         {
 189             printf("There are: %5d %10s residues\n", counter[i], typenames[i]);
 190         }
 191     }
 192
 193     sfree(counter);
 194 }
 195
 196
 197 static int *
 198 mk_aid(const t_atoms *atoms, const char ** restype, const char * typestring, int *nra, gmx_bool bMatch)
 199 /* Make an array of ints for all atoms with residuetypes matching typestring, or the opposite if bMatch is false */
 200 {
 201     int     *a;
 202     int      i;
 203     int      res;
 204
 205     snew(a, atoms->nr);
 206     *nra = 0;
 207     for (i = 0; (i < atoms->nr); i++)
 208     {
 209         res = !gmx_strcasecmp(restype[atoms->atom[i].resind], typestring);
 210         if (bMatch == FALSE)
 211         {
 212             res = !res;
 213         }
 214         if (res)
 215         {
 216             a[(*nra)++] = i;
 217         }
 218     }
 219
 220     return a;
 221 }
 222
 223 typedef struct {
 224     char    *rname;
 225     gmx_bool bNeg;
 226     char    *gname;
 227 } restp_t;
 228
 229 static void analyse_other(const char ** restype, const t_atoms *atoms,
 230                           t_blocka *gb, char ***gn, gmx_bool bASK, gmx_bool bVerb)
 231 {
 232     restp_t *restp = NULL;
 233     char   **attp  = NULL;
 234     char    *rname, *aname;
 235     int     *aid, *aaid;
 236     int      i, j, k, l, resind, naid, naaid, natp, nrestp = 0;
 237
 238     for (i = 0; (i < atoms->nres); i++)
 239     {
 240         if (gmx_strcasecmp(restype[i], "Protein") && gmx_strcasecmp(restype[i], "DNA") && gmx_strcasecmp(restype[i], "RNA") && gmx_strcasecmp(restype[i], "Water"))
 241         {
 242             break;
 243         }
 244     }
 245     if (i < atoms->nres)
 246     {
 247         /* we have others */
 248         if (bVerb)
 249         {
 250             printf("Analysing residues not classified as Protein/DNA/RNA/Water and splitting into groups...\n");
 251         }
 252         for (k = 0; (k < atoms->nr); k++)
 253         {
 254             resind = atoms->atom[k].resind;
 255             rname  = *atoms->resinfo[resind].name;
 256             if (gmx_strcasecmp(restype[resind], "Protein") && gmx_strcasecmp(restype[resind], "DNA") &&
 257                 gmx_strcasecmp(restype[resind], "RNA") && gmx_strcasecmp(restype[resind], "Water"))
 258             {
 259
 260                 for (l = 0; (l < nrestp); l++)
 261                 {
 262                     assert(restp);
 263                     if (strcmp(restp[l].rname, rname) == 0)
 264                     {
 265                         break;
 266                     }
 267                 }
 268                 if (l == nrestp)
 269                 {
 270                     srenew(restp, nrestp+1);
 271                     restp[nrestp].rname = gmx_strdup(rname);
 272                     restp[nrestp].bNeg  = FALSE;
 273                     restp[nrestp].gname = gmx_strdup(rname);
 274                     nrestp++;
 275                 }
 276             }
 277         }
 278         for (i = 0; (i < nrestp); i++)
 279         {
 280             snew(aid, atoms->nr);
 281             naid = 0;
 282             for (j = 0; (j < atoms->nr); j++)
 283             {
 284                 rname = *atoms->resinfo[atoms->atom[j].resind].name;
 285                 if ((strcmp(restp[i].rname, rname) == 0 && !restp[i].bNeg) ||
 286                     (strcmp(restp[i].rname, rname) != 0 &&  restp[i].bNeg))
 287                 {
 288                     aid[naid++] = j;
 289                 }
 290             }
 291             add_grp(gb, gn, naid, aid, restp[i].gname);
 292             if (bASK)
 293             {
 294                 printf("split %s into atoms (y/n) ? ", restp[i].gname);
 295                 fflush(stdout);
 296                 if (gmx_ask_yesno(bASK))
 297                 {
 298                     natp = 0;
 299                     for (k = 0; (k < naid); k++)
 300                     {
 301                         aname = *atoms->atomname[aid[k]];
 302                         for (l = 0; (l < natp); l++)
 303                         {
 304                             if (strcmp(aname, attp[l]) == 0)
 305                             {
 306                                 break;
 307                             }
 308                         }
 309                         if (l == natp)
 310                         {
 311                             srenew(attp, ++natp);
 312                             attp[natp-1] = aname;
 313                         }
 314                     }
 315                     if (natp > 1)
 316                     {
 317                         for (l = 0; (l < natp); l++)
 318                         {
 319                             snew(aaid, naid);
 320                             naaid = 0;
 321                             for (k = 0; (k < naid); k++)
 322                             {
 323                                 aname = *atoms->atomname[aid[k]];
 324                                 if (strcmp(aname, attp[l]) == 0)
 325                                 {
 326                                     aaid[naaid++] = aid[k];
 327                                 }
 328                             }
 329                             add_grp(gb, gn, naaid, aaid, attp[l]);
 330                             sfree(aaid);
 331                         }
 332                     }
 333                     sfree(attp);
 334                     attp = NULL;
 335                 }
 336             }
 337             sfree(aid);
 338             sfree(restp[i].rname);
 339             sfree(restp[i].gname);
 340         }
 341         sfree(restp);
 342     }
 343 }
 344
 345 /*! \brief
 346  * Cata necessary to construct a single (protein) index group in
 347  * analyse_prot().
 348  */
 349 typedef struct gmx_help_make_index_group
 350 {
 351     /** The set of atom names that will be used to form this index group */
 352     const char **defining_atomnames;
 353     /** Size of the defining_atomnames array */
 354     int          num_defining_atomnames;
 355     /** Name of this index group */
 356     const char  *group_name;
 357     /** Whether the above atom names name the atoms in the group, or
 358         those not in the group */
 359     gmx_bool bTakeComplement;
 360     /** The index in wholename gives the first item in the arrays of
 361        atomnames that should be tested with 'gmx_strncasecmp' in stead of
 362        gmx_strcasecmp, or -1 if all items should be tested with strcasecmp
 363        This is comparable to using a '*' wildcard at the end of specific
 364        atom names, but that is more involved to implement...
 365      */
 366     int wholename;
 367     /** Only create this index group if it differs from the one specified in compareto,
 368        where -1 means to always create this group. */
 369     int compareto;
 370 } t_gmx_help_make_index_group;
 371
 372 static void analyse_prot(const char ** restype, const t_atoms *atoms,
 373                          t_blocka *gb, char ***gn, gmx_bool bASK, gmx_bool bVerb)
 374 {
 375     /* lists of atomnames to be used in constructing index groups: */
 376     static const char *pnoh[]    = { "H", "HN" };
 377     static const char *pnodum[]  = {
 378         "MN1",  "MN2",  "MCB1", "MCB2", "MCG1", "MCG2",
 379         "MCD1", "MCD2", "MCE1", "MCE2", "MNZ1", "MNZ2"
 380     };
 381     static const char *calpha[]  = { "CA" };
 382     static const char *bb[]      = { "N", "CA", "C" };
 383     static const char *mc[]      = { "N", "CA", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT" };
 384     static const char *mcb[]     = { "N", "CA", "CB", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT" };
 385     static const char *mch[]     = {
 386         "N", "CA", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT",
 387         "H1", "H2", "H3", "H", "HN"
 388     };
 389
 390     static const t_gmx_help_make_index_group constructing_data[] =
 391     {{ NULL,   0, "Protein",      TRUE,  -1, -1},
 392      { pnoh,   asize(pnoh),   "Protein-H",    TRUE,  0,  -1},
 393      { calpha, asize(calpha), "C-alpha",      FALSE, -1, -1},
 394      { bb,     asize(bb),     "Backbone",     FALSE, -1, -1},
 395      { mc,     asize(mc),     "MainChain",    FALSE, -1, -1},
 396      { mcb,    asize(mcb),    "MainChain+Cb", FALSE, -1, -1},
 397      { mch,    asize(mch),    "MainChain+H",  FALSE, -1, -1},
 398      { mch,    asize(mch),    "SideChain",    TRUE,  -1, -1},
 399      { mch,    asize(mch),    "SideChain-H",  TRUE,  11, -1},
 400      { pnodum, asize(pnodum), "Prot-Masses",  TRUE,  -1, 0}, };
 401     const int   num_index_groups = asize(constructing_data);
 402
 403     int         n, j;
 404     int        *aid;
 405     int         nra, npres;
 406     gmx_bool    match;
 407     char        ndx_name[STRLEN], *atnm;
 408     int         i;
 409
 410     if (bVerb)
 411     {
 412         printf("Analysing Protein...\n");
 413     }
 414     snew(aid, atoms->nr);
 415
 416     /* calculate the number of protein residues */
 417     npres = 0;
 418     for (i = 0; (i < atoms->nres); i++)
 419     {
 420         if (0 == gmx_strcasecmp(restype[i], "Protein"))
 421         {
 422             npres++;
 423         }
 424     }
 425     /* find matching or complement atoms */
 426     for (i = 0; (i < (int)num_index_groups); i++)
 427     {
 428         nra = 0;
 429         for (n = 0; (n < atoms->nr); n++)
 430         {
 431             if (0 == gmx_strcasecmp(restype[atoms->atom[n].resind], "Protein"))
 432             {
 433                 match = FALSE;
 434                 for (j = 0; (j < constructing_data[i].num_defining_atomnames); j++)
 435                 {
 436                     /* skip digits at beginning of atomname, e.g. 1H */
 437                     atnm = *atoms->atomname[n];
 438                     while (isdigit(atnm[0]))
 439                     {
 440                         atnm++;
 441                     }
 442                     if ( (constructing_data[i].wholename == -1) || (j < constructing_data[i].wholename) )
 443                     {
 444                         if (0 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j], atnm))
 445                         {
 446                             match = TRUE;
 447                         }
 448                     }
 449                     else
 450                     {
 451                         if (0 == gmx_strncasecmp(constructing_data[i].defining_atomnames[j], atnm, strlen(constructing_data[i].defining_atomnames[j])))
 452                         {
 453                             match = TRUE;
 454                         }
 455                     }
 456                 }
 457                 if (constructing_data[i].bTakeComplement != match)
 458                 {
 459                     aid[nra++] = n;
 460                 }
 461             }
 462         }
 463         /* if we want to add this group always or it differs from previous
 464            group, add it: */
 465         if (-1 == constructing_data[i].compareto || !grp_cmp(gb, nra, aid, constructing_data[i].compareto-i) )
 466         {
 467             add_grp(gb, gn, nra, aid, constructing_data[i].group_name);
 468         }
 469     }
 470
 471     if (bASK)
 472     {
 473         for (i = 0; (i < (int)num_index_groups); i++)
 474         {
 475             printf("Split %12s into %5d residues (y/n) ? ", constructing_data[i].group_name, npres);
 476             if (gmx_ask_yesno(bASK))
 477             {
 478                 int resind;
 479                 nra = 0;
 480                 for (n = 0; ((atoms->atom[n].resind < npres) && (n < atoms->nr)); )
 481                 {
 482                     resind = atoms->atom[n].resind;
 483                     for (; ((atoms->atom[n].resind == resind) && (n < atoms->nr)); n++)
 484                     {
 485                         match = FALSE;
 486                         for (j = 0; (j < constructing_data[i].num_defining_atomnames); j++)
 487                         {
 488                             if (0 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j], *atoms->atomname[n]))
 489                             {
 490                                 match = TRUE;
 491                             }
 492                         }
 493                         if (constructing_data[i].bTakeComplement != match)
 494                         {
 495                             aid[nra++] = n;
 496                         }
 497                     }
 498                     /* copy the residuename to the tail of the groupname */
 499                     if (nra > 0)
 500                     {
 501                         t_resinfo *ri;
 502                         ri = &atoms->resinfo[resind];
 503                         sprintf(ndx_name, "%s_%s%d%c",
 504                                 constructing_data[i].group_name, *ri->name, ri->nr, ri->ic == ' ' ? '\0' : ri->ic);
 505                         add_grp(gb, gn, nra, aid, ndx_name);
 506                         nra = 0;
 507                     }
 508                 }
 509             }
 510         }
 511         printf("Make group with sidechain and C=O swapped (y/n) ? ");
 512         if (gmx_ask_yesno(bASK))
 513         {
 514             /* Make swap sidechain C=O index */
 515             int resind, hold;
 516             nra = 0;
 517             for (n = 0; ((atoms->atom[n].resind < npres) && (n < atoms->nr)); )
 518             {
 519                 resind = atoms->atom[n].resind;
 520                 hold   = -1;
 521                 for (; ((atoms->atom[n].resind == resind) && (n < atoms->nr)); n++)
 522                 {
 523                     if (strcmp("CA", *atoms->atomname[n]) == 0)
 524                     {
 525                         aid[nra++] = n;
 526                         hold       = nra;
 527                         nra       += 2;
 528                     }
 529                     else if (strcmp("C", *atoms->atomname[n]) == 0)
 530                     {
 531                         if (hold == -1)
 532                         {
 533                             gmx_incons("Atom naming problem");
 534                         }
 535                         aid[hold] = n;
 536                     }
 537                     else if (strcmp("O", *atoms->atomname[n]) == 0)
 538                     {
 539                         if (hold == -1)
 540                         {
 541                             gmx_incons("Atom naming problem");
 542                         }
 543                         aid[hold+1] = n;
 544                     }
 545                     else if (strcmp("O1", *atoms->atomname[n]) == 0)
 546                     {
 547                         if (hold == -1)
 548                         {
 549                             gmx_incons("Atom naming problem");
 550                         }
 551                         aid[hold+1] = n;
 552                     }
 553                     else
 554                     {
 555                         aid[nra++] = n;
 556                     }
 557                 }
 558             }
 559             /* copy the residuename to the tail of the groupname */
 560             if (nra > 0)
 561             {
 562                 add_grp(gb, gn, nra, aid, "SwapSC-CO");
 563             }
 564         }
 565     }
 566     sfree(aid);
 567 }
 568
 569
 570 void analyse(const t_atoms *atoms, t_blocka *gb, char ***gn, gmx_bool bASK, gmx_bool bVerb)
 571 {
 572     gmx_residuetype_t*rt = NULL;
 573     char             *resnm;
 574     int              *aid;
 575     const char    **  restype;
 576     int               nra;
 577     int               i, k;
 578     int               ntypes;
 579     char           ** p_typename;
 580     int               iwater, iion;
 581     int               nwater, nion;
 582     int               found;
 583
 584     if (bVerb)
 585     {
 586         printf("Analysing residue names:\n");
 587     }
 588     /* Create system group, every single atom */
 589     snew(aid, atoms->nr);
 590     for (i = 0; i < atoms->nr; i++)
 591     {
 592         aid[i] = i;
 593     }
 594     add_grp(gb, gn, atoms->nr, aid, "System");
 595     sfree(aid);
 596
 597     /* For every residue, get a pointer to the residue type name */
 598     gmx_residuetype_init(&rt);
 599     assert(rt);
 600
 601     snew(restype, atoms->nres);
 602     ntypes     = 0;
 603     p_typename = NULL;
 604     if (atoms->nres > 0)
 605     {
 606         int i = 0;
 607
 608         resnm = *atoms->resinfo[i].name;
 609         gmx_residuetype_get_type(rt, resnm, &(restype[i]));
 610         snew(p_typename, ntypes+1);
 611         p_typename[ntypes] = gmx_strdup(restype[i]);
 612         ntypes++;
 613
 614         for (i = 1; i < atoms->nres; i++)
 615         {
 616             resnm = *atoms->resinfo[i].name;
 617             gmx_residuetype_get_type(rt, resnm, &(restype[i]));
 618
 619             /* Note that this does not lead to a N*N loop, but N*K, where
 620              * K is the number of residue _types_, which is small and independent of N.
 621              */
 622             found = 0;
 623             for (k = 0; k < ntypes && !found; k++)
 624             {
 625                 found = !strcmp(restype[i], p_typename[k]);
 626             }
 627             if (!found)
 628             {
 629                 srenew(p_typename, ntypes+1);
 630                 p_typename[ntypes] = gmx_strdup(restype[i]);
 631                 ntypes++;
 632             }
 633         }
 634     }
 635
 636     if (bVerb)
 637     {
 638         p_status(restype, atoms->nres, p_typename, ntypes);
 639     }
 640
 641     for (k = 0; k < ntypes; k++)
 642     {
 643         aid = mk_aid(atoms, restype, p_typename[k], &nra, TRUE);
 644
 645         /* Check for special types to do fancy stuff with */
 646
 647         if (!gmx_strcasecmp(p_typename[k], "Protein") && nra > 0)
 648         {
 649             sfree(aid);
 650             /* PROTEIN */
 651             analyse_prot(restype, atoms, gb, gn, bASK, bVerb);
 652
 653             /* Create a Non-Protein group */
 654             aid = mk_aid(atoms, restype, "Protein", &nra, FALSE);
 655             if ((nra > 0) && (nra < atoms->nr))
 656             {
 657                 add_grp(gb, gn, nra, aid, "non-Protein");
 658             }
 659             sfree(aid);
 660         }
 661         else if (!gmx_strcasecmp(p_typename[k], "Water") && nra > 0)
 662         {
 663             add_grp(gb, gn, nra, aid, p_typename[k]);
 664             /* Add this group as 'SOL' too, for backward compatibility with older gromacs versions */
 665             add_grp(gb, gn, nra, aid, "SOL");
 666
 667             sfree(aid);
 668
 669             /* Solvent, create a negated group too */
 670             aid = mk_aid(atoms, restype, "Water", &nra, FALSE);
 671             if ((nra > 0) && (nra < atoms->nr))
 672             {
 673                 add_grp(gb, gn, nra, aid, "non-Water");
 674             }
 675             sfree(aid);
 676         }
 677         else if (nra > 0)
 678         {
 679             /* Other groups */
 680             add_grp(gb, gn, nra, aid, p_typename[k]);
 681             sfree(aid);
 682             analyse_other(restype, atoms, gb, gn, bASK, bVerb);
 683         }
 684         sfree(p_typename[k]);
 685     }
 686
 687     sfree(p_typename);
 688     sfree(restype);
 689     gmx_residuetype_destroy(rt);
 690
 691     /* Create a merged water_and_ions group */
 692     iwater = -1;
 693     iion   = -1;
 694     nwater = 0;
 695     nion   = 0;
 696
 697     for (i = 0; i < gb->nr; i++)
 698     {
 699         if (!gmx_strcasecmp((*gn)[i], "Water"))
 700         {
 701             iwater = i;
 702             nwater = gb->index[i+1]-gb->index[i];
 703         }
 704         else if (!gmx_strcasecmp((*gn)[i], "Ion"))
 705         {
 706             iion = i;
 707             nion = gb->index[i+1]-gb->index[i];
 708         }
 709     }
 710
 711     if (nwater > 0 && nion > 0)
 712     {
 713         srenew(gb->index, gb->nr+2);
 714         srenew(*gn, gb->nr+1);
 715         (*gn)[gb->nr] = gmx_strdup("Water_and_ions");
 716         srenew(gb->a, gb->nra+nwater+nion);
 717         if (nwater > 0)
 718         {
 719             for (i = gb->index[iwater]; i < gb->index[iwater+1]; i++)
 720             {
 721                 gb->a[gb->nra++] = gb->a[i];
 722             }
 723         }
 724         if (nion > 0)
 725         {
 726             for (i = gb->index[iion]; i < gb->index[iion+1]; i++)
 727             {
 728                 gb->a[gb->nra++] = gb->a[i];
 729             }
 730         }
 731         gb->nr++;
 732         gb->index[gb->nr] = gb->nra;
 733     }
 734 }
 735
 736
 737 void check_index(char *gname, int n, int index[], char *traj, int natoms)
 738 {
 739     int i;
 740
 741     for (i = 0; i < n; i++)
 742     {
 743         if (index[i] >= natoms)
 744         {
 745             gmx_fatal(FARGS, "%s atom number (index[%d]=%d) is larger than the number of atoms in %s (%d)",
 746                       gname ? gname : "Index", i+1, index[i]+1,
 747                       traj ? traj : "the trajectory", natoms);
 748         }
 749         else if (index[i] < 0)
 750         {
 751             gmx_fatal(FARGS, "%s atom number (index[%d]=%d) is less than zero",
 752                       gname ? gname : "Index", i+1, index[i]+1);
 753         }
 754     }
 755 }
 756
 757 t_blocka *init_index(const char *gfile, char ***grpname)
 758 {
 759     FILE      *in;
 760     t_blocka  *b;
 761     int        maxentries;
 762     int        i, j;
 763     char       line[STRLEN], *pt, str[STRLEN];
 764
 765     in = gmx_ffopen(gfile, "r");
 766     snew(b, 1);
 767     b->nr      = 0;
 768     b->index   = NULL;
 769     b->nra     = 0;
 770     b->a       = NULL;
 771     *grpname   = NULL;
 772     maxentries = 0;
 773     while (get_a_line(in, line, STRLEN))
 774     {
 775         if (get_header(line, str))
 776         {
 777             b->nr++;
 778             srenew(b->index, b->nr+1);
 779             srenew(*grpname, b->nr);
 780             if (b->nr == 1)
 781             {
 782                 b->index[0] = 0;
 783             }
 784             b->index[b->nr]     = b->index[b->nr-1];
 785             (*grpname)[b->nr-1] = gmx_strdup(str);
 786         }
 787         else
 788         {
 789             if (b->nr == 0)
 790             {
 791                 gmx_fatal(FARGS, "The first header of your indexfile is invalid");
 792             }
 793             pt = line;
 794             while (sscanf(pt, "%s", str) == 1)
 795             {
 796                 i = b->index[b->nr];
 797                 if (i >= maxentries)
 798                 {
 799                     maxentries += 1024;
 800                     srenew(b->a, maxentries);
 801                 }
 802                 assert(b->a != NULL); // for clang analyzer
 803                 b->a[i] = strtol(str, NULL, 10)-1;
 804                 b->index[b->nr]++;
 805                 (b->nra)++;
 806                 pt = strstr(pt, str)+strlen(str);
 807             }
 808         }
 809     }
 810     gmx_ffclose(in);
 811
 812     for (i = 0; (i < b->nr); i++)
 813     {
 814         assert(b->a != NULL); // for clang analyzer
 815         for (j = b->index[i]; (j < b->index[i+1]); j++)
 816         {
 817             if (b->a[j] < 0)
 818             {
 819                 fprintf(stderr, "\nWARNING: negative index %d in group %s\n\n",
 820                         b->a[j], (*grpname)[i]);
 821             }
 822         }
 823     }
 824
 825     return b;
 826 }
 827
 828 static void minstring(char *str)
 829 {
 830     int i;
 831
 832     for (i = 0; (i < (int)strlen(str)); i++)
 833     {
 834         if (str[i] == '-')
 835         {
 836             str[i] = '_';
 837         }
 838     }
 839 }
 840
 841 int find_group(const char *s, int ngrps, char **grpname)
 842 {
 843     int      aa, i, n;
 844     char     string[STRLEN];
 845     gmx_bool bMultiple;
 846     bMultiple = FALSE;
 847     n         = strlen(s);
 848     aa        = -1;
 849     /* first look for whole name match */
 850     {
 851         for (i = 0; i < ngrps; i++)
 852         {
 853             if (gmx_strcasecmp_min(s, grpname[i]) == 0)
 854             {
 855                 if (aa != -1)
 856                 {
 857                     bMultiple = TRUE;
 858                 }
 859                 aa = i;
 860             }
 861         }
 862     }
 863     /* second look for first string match */
 864     if (aa == -1)
 865     {
 866         for (i = 0; i < ngrps; i++)
 867         {
 868             if (gmx_strncasecmp_min(s, grpname[i], n) == 0)
 869             {
 870                 if (aa != -1)
 871                 {
 872                     bMultiple = TRUE;
 873                 }
 874                 aa = i;
 875             }
 876         }
 877     }
 878     /* last look for arbitrary substring match */
 879     if (aa == -1)
 880     {
 881         char key[STRLEN];
 882         strncpy(key, s, sizeof(key)-1);
 883         key[STRLEN-1] = '\0';
 884         upstring(key);
 885         minstring(key);
 886         for (i = 0; i < ngrps; i++)
 887         {
 888             strcpy(string, grpname[i]);
 889             upstring(string);
 890             minstring(string);
 891             if (strstr(string, key) != NULL)
 892             {
 893                 if (aa != -1)
 894                 {
 895                     bMultiple = TRUE;
 896                 }
 897                 aa = i;
 898             }
 899         }
 900     }
 901     if (bMultiple)
 902     {
 903         printf("Error: Multiple groups '%s' selected\n", s);
 904         aa = -1;
 905     }
 906     return aa;
 907 }
 908
 909 static int qgroup(int *a, int ngrps, char **grpname)
 910 {
 911     char     s[STRLEN];
 912     int      aa;
 913     gmx_bool bInRange;
 914     char    *end;
 915
 916     do
 917     {
 918         fprintf(stderr, "Select a group: ");
 919         do
 920         {
 921             if (scanf("%s", s) != 1)
 922             {
 923                 gmx_fatal(FARGS, "Cannot read from input");
 924             }
 925             trim(s); /* remove spaces */
 926         }
 927         while (strlen(s) == 0);
 928         aa = strtol(s, &end, 10);
 929         if (aa == 0 && end[0] != '\0') /* string entered */
 930         {
 931             aa = find_group(s, ngrps, grpname);
 932         }
 933         bInRange = (aa >= 0 && aa < ngrps);
 934         if (!bInRange)
 935         {
 936             printf("Error: No such group '%s'\n", s);
 937         }
 938     }
 939     while (!bInRange);
 940     printf("Selected %d: '%s'\n", aa, grpname[aa]);
 941     *a = aa;
 942     return aa;
 943 }
 944
 945 static void rd_groups(t_blocka *grps, char **grpname, char *gnames[],
 946                       int ngrps, int isize[], int *index[], int grpnr[])
 947 {
 948     int i, j, gnr1;
 949
 950     if (grps->nr == 0)
 951     {
 952         gmx_fatal(FARGS, "Error: no groups in indexfile");
 953     }
 954     for (i = 0; (i < grps->nr); i++)
 955     {
 956         fprintf(stderr, "Group %5d (%15s) has %5d elements\n", i, grpname[i],
 957                 grps->index[i+1]-grps->index[i]);
 958     }
 959     for (i = 0; (i < ngrps); i++)
 960     {
 961         if (grps->nr > 1)
 962         {
 963             do
 964             {
 965                 gnr1 = qgroup(&grpnr[i], grps->nr, grpname);
 966                 if ((gnr1 < 0) || (gnr1 >= grps->nr))
 967                 {
 968                     fprintf(stderr, "Select between %d and %d.\n", 0, grps->nr-1);
 969                 }
 970             }
 971             while ((gnr1 < 0) || (gnr1 >= grps->nr));
 972         }
 973         else
 974         {
 975             fprintf(stderr, "There is one group in the index\n");
 976             gnr1 = 0;
 977         }
 978         gnames[i] = gmx_strdup(grpname[gnr1]);
 979         isize[i]  = grps->index[gnr1+1]-grps->index[gnr1];
 980         snew(index[i], isize[i]);
 981         for (j = 0; (j < isize[i]); j++)
 982         {
 983             index[i][j] = grps->a[grps->index[gnr1]+j];
 984         }
 985     }
 986 }
 987
 988 void rd_index(const char *statfile, int ngrps, int isize[],
 989               int *index[], char *grpnames[])
 990 {
 991     char    **gnames;
 992     t_blocka *grps;
 993     int      *grpnr;
 994
 995     snew(grpnr, ngrps);
 996     if (!statfile)
 997     {
 998         gmx_fatal(FARGS, "No index file specified");
 999     }
1000     grps = init_index(statfile, &gnames);
1001     rd_groups(grps, gnames, grpnames, ngrps, isize, index, grpnr);
1002 }
1003
1004 void rd_index_nrs(char *statfile, int ngrps, int isize[],
1005                   int *index[], char *grpnames[], int grpnr[])
1006 {
1007     char    **gnames;
1008     t_blocka *grps;
1009
1010     if (!statfile)
1011     {
1012         gmx_fatal(FARGS, "No index file specified");
1013     }
1014     grps = init_index(statfile, &gnames);
1015
1016     rd_groups(grps, gnames, grpnames, ngrps, isize, index, grpnr);
1017 }
1018
1019 void get_index(const t_atoms *atoms, const char *fnm, int ngrps,
1020                int isize[], int *index[], char *grpnames[])
1021 {
1022     char    ***gnames;
1023     t_blocka  *grps = NULL;
1024     int       *grpnr;
1025
1026     snew(grpnr, ngrps);
1027     snew(gnames, 1);
1028     if (fnm != NULL)
1029     {
1030         grps = init_index(fnm, gnames);
1031     }
1032     else if (atoms)
1033     {
1034         snew(grps, 1);
1035         snew(grps->index, 1);
1036         analyse(atoms, grps, gnames, FALSE, FALSE);
1037     }
1038     else
1039     {
1040         gmx_incons("You need to supply a valid atoms structure or a valid index file name");
1041     }
1042
1043     rd_groups(grps, *gnames, grpnames, ngrps, isize, index, grpnr);
1044 }
1045
1046 t_cluster_ndx *cluster_index(FILE *fplog, const char *ndx)
1047 {
1048     t_cluster_ndx *c;
1049     int            i;
1050
1051     snew(c, 1);
1052     c->clust     = init_index(ndx, &c->grpname);
1053     c->maxframe  = -1;
1054     for (i = 0; (i < c->clust->nra); i++)
1055     {
1056         c->maxframe = std::max(c->maxframe, c->clust->a[i]);
1057     }
1058     fprintf(fplog ? fplog : stdout,
1059             "There are %d clusters containing %d structures, highest framenr is %d\n",
1060             c->clust->nr, c->clust->nra, c->maxframe);
1061     if (debug)
1062     {
1063         pr_blocka(debug, 0, "clust", c->clust, TRUE);
1064         for (i = 0; (i < c->clust->nra); i++)
1065         {
1066             if ((c->clust->a[i] < 0) || (c->clust->a[i] > c->maxframe))
1067             {
1068                 gmx_fatal(FARGS, "Range check error for c->clust->a[%d] = %d\n"
1069                           "should be within 0 and %d", i, c->clust->a[i], c->maxframe+1);
1070             }
1071         }
1072     }
1073     c->inv_clust = make_invblocka(c->clust, c->maxframe);
1074
1075     return c;
1076 }