src/gromacs/topology/index.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #include "gmxpre.h"
  38
  39 #include "gromacs/topology/index.h"
  40
  41 #include "config.h"
  42
  43 #include <assert.h>
  44 #include <ctype.h>
  45 #include <stdlib.h>
  46 #include <string.h>
  47
  48 #include <algorithm>
  49
  50 #include "gromacs/legacyheaders/macros.h"
  51 #include "gromacs/legacyheaders/txtdump.h"
  52
  53 #include "gromacs/fileio/gmxfio.h"
  54 #include "gromacs/fileio/strdb.h"
  55 #include "gromacs/topology/atoms.h"
  56 #include "gromacs/topology/block.h"
  57 #include "gromacs/topology/invblock.h"
  58 #include "gromacs/topology/residuetypes.h"
  59 #include "gromacs/utility/cstringutil.h"
  60 #include "gromacs/utility/fatalerror.h"
  61 #include "gromacs/utility/smalloc.h"
  62
  63 static gmx_bool gmx_ask_yesno(gmx_bool bASK)
  64 {
  65     char c;
  66
  67     if (bASK)
  68     {
  69         do
  70         {
  71             c = toupper(fgetc(stdin));
  72         }
  73         while ((c != 'Y') && (c != 'N'));
  74
  75         return (c == 'Y');
  76     }
  77     else
  78     {
  79         return FALSE;
  80     }
  81 }
  82
  83 void write_index(const char *outf, t_blocka *b, char **gnames, gmx_bool bDuplicate, int natoms)
  84 {
  85     FILE *out;
  86     int   i, j, k;
  87
  88     out = gmx_fio_fopen(outf, "w");
  89     /* fprintf(out,"%5d  %5d\n",b->nr,b->nra); */
  90     for (i = 0; (i < b->nr); i++)
  91     {
  92         fprintf(out, "[ %s ]\n", gnames[i]);
  93         for (k = 0, j = b->index[i]; j < b->index[i+1]; j++, k++)
  94         {
  95             fprintf(out, "%4d ", b->a[j]+1);
  96             if ((k % 15) == 14)
  97             {
  98                 fprintf(out, "\n");
  99             }
 100         }
 101         fprintf(out, "\n");
 102     }
 103
 104     /* Duplicate copy, useful for computational electrophysiology double-layer setups */
 105     if (bDuplicate)
 106     {
 107         fprintf(stderr, "Duplicating the whole system with an atom offset of %d atoms.\n", natoms);
 108         for (i = 0; (i < b->nr); i++)
 109         {
 110             fprintf(out, "[ %s_copy ]\n", gnames[i]);
 111             for (k = 0, j = b->index[i]; j < b->index[i+1]; j++, k++)
 112             {
 113                 fprintf(out, "%4d ", b->a[j]+1 + natoms );
 114                 if ((k % 15) == 14)
 115                 {
 116                     fprintf(out, "\n");
 117                 }
 118             }
 119             fprintf(out, "\n");
 120         }
 121     }
 122
 123     gmx_fio_fclose(out);
 124 }
 125
 126 void add_grp(t_blocka *b, char ***gnames, int nra, atom_id a[], const char *name)
 127 {
 128     int i;
 129
 130     srenew(b->index, b->nr+2);
 131     srenew(*gnames, b->nr+1);
 132     (*gnames)[b->nr] = gmx_strdup(name);
 133
 134     srenew(b->a, b->nra+nra);
 135     for (i = 0; (i < nra); i++)
 136     {
 137         b->a[b->nra++] = a[i];
 138     }
 139     b->nr++;
 140     b->index[b->nr] = b->nra;
 141 }
 142
 143 /* compare index in `a' with group in `b' at `index',
 144    when `index'<0 it is relative to end of `b' */
 145 static gmx_bool grp_cmp(t_blocka *b, int nra, atom_id a[], int index)
 146 {
 147     int i;
 148
 149     if (index < 0)
 150     {
 151         index = b->nr-1+index;
 152     }
 153     if (index >= b->nr)
 154     {
 155         gmx_fatal(FARGS, "no such index group %d in t_blocka (nr=%d)", index, b->nr);
 156     }
 157     /* compare sizes */
 158     if (nra != b->index[index+1] - b->index[index])
 159     {
 160         return FALSE;
 161     }
 162     for (i = 0; i < nra; i++)
 163     {
 164         if (a[i] != b->a[b->index[index]+i])
 165         {
 166             return FALSE;
 167         }
 168     }
 169     return TRUE;
 170 }
 171
 172 static void
 173 p_status(const char **restype, int nres, const char **typenames, int ntypes)
 174 {
 175     int   i, j;
 176     int * counter;
 177
 178     snew(counter, ntypes);
 179     for (i = 0; i < ntypes; i++)
 180     {
 181         counter[i] = 0;
 182     }
 183     for (i = 0; i < nres; i++)
 184     {
 185         for (j = 0; j < ntypes; j++)
 186         {
 187             if (!gmx_strcasecmp(restype[i], typenames[j]))
 188             {
 189                 counter[j]++;
 190             }
 191         }
 192     }
 193
 194     for (i = 0; (i < ntypes); i++)
 195     {
 196         if (counter[i] > 0)
 197         {
 198             printf("There are: %5d %10s residues\n", counter[i], typenames[i]);
 199         }
 200     }
 201
 202     sfree(counter);
 203 }
 204
 205
 206 static atom_id *
 207 mk_aid(t_atoms *atoms, const char ** restype, const char * typestring, int *nra, gmx_bool bMatch)
 208 /* Make an array of atom_ids for all atoms with residuetypes matching typestring, or the opposite if bMatch is false */
 209 {
 210     atom_id *a;
 211     int      i;
 212     int      res;
 213
 214     snew(a, atoms->nr);
 215     *nra = 0;
 216     for (i = 0; (i < atoms->nr); i++)
 217     {
 218         res = !gmx_strcasecmp(restype[atoms->atom[i].resind], typestring);
 219         if (bMatch == FALSE)
 220         {
 221             res = !res;
 222         }
 223         if (res)
 224         {
 225             a[(*nra)++] = i;
 226         }
 227     }
 228
 229     return a;
 230 }
 231
 232 typedef struct {
 233     char    *rname;
 234     gmx_bool bNeg;
 235     char    *gname;
 236 } restp_t;
 237
 238 static void analyse_other(const char ** restype, t_atoms *atoms,
 239                           t_blocka *gb, char ***gn, gmx_bool bASK, gmx_bool bVerb)
 240 {
 241     restp_t *restp = NULL;
 242     char   **attp  = NULL;
 243     char    *rname, *aname;
 244     atom_id *other_ndx, *aid, *aaid;
 245     int      i, j, k, l, resind, naid, naaid, natp, nrestp = 0;
 246
 247     for (i = 0; (i < atoms->nres); i++)
 248     {
 249         if (gmx_strcasecmp(restype[i], "Protein") && gmx_strcasecmp(restype[i], "DNA") && gmx_strcasecmp(restype[i], "RNA") && gmx_strcasecmp(restype[i], "Water"))
 250         {
 251             break;
 252         }
 253     }
 254     if (i < atoms->nres)
 255     {
 256         /* we have others */
 257         if (bVerb)
 258         {
 259             printf("Analysing residues not classified as Protein/DNA/RNA/Water and splitting into groups...\n");
 260         }
 261         snew(other_ndx, atoms->nr);
 262         for (k = 0; (k < atoms->nr); k++)
 263         {
 264             resind = atoms->atom[k].resind;
 265             rname  = *atoms->resinfo[resind].name;
 266             if (gmx_strcasecmp(restype[resind], "Protein") && gmx_strcasecmp(restype[resind], "DNA") &&
 267                 gmx_strcasecmp(restype[resind], "RNA") && gmx_strcasecmp(restype[resind], "Water"))
 268             {
 269
 270                 for (l = 0; (l < nrestp); l++)
 271                 {
 272                     if (strcmp(restp[l].rname, rname) == 0)
 273                     {
 274                         break;
 275                     }
 276                 }
 277                 if (l == nrestp)
 278                 {
 279                     srenew(restp, nrestp+1);
 280                     restp[nrestp].rname = gmx_strdup(rname);
 281                     restp[nrestp].bNeg  = FALSE;
 282                     restp[nrestp].gname = gmx_strdup(rname);
 283                     nrestp++;
 284
 285                 }
 286             }
 287         }
 288         for (i = 0; (i < nrestp); i++)
 289         {
 290             snew(aid, atoms->nr);
 291             naid = 0;
 292             for (j = 0; (j < atoms->nr); j++)
 293             {
 294                 rname = *atoms->resinfo[atoms->atom[j].resind].name;
 295                 if ((strcmp(restp[i].rname, rname) == 0 && !restp[i].bNeg) ||
 296                     (strcmp(restp[i].rname, rname) != 0 &&  restp[i].bNeg))
 297                 {
 298                     aid[naid++] = j;
 299                 }
 300             }
 301             add_grp(gb, gn, naid, aid, restp[i].gname);
 302             if (bASK)
 303             {
 304                 printf("split %s into atoms (y/n) ? ", restp[i].gname);
 305                 fflush(stdout);
 306                 if (gmx_ask_yesno(bASK))
 307                 {
 308                     natp = 0;
 309                     for (k = 0; (k < naid); k++)
 310                     {
 311                         aname = *atoms->atomname[aid[k]];
 312                         for (l = 0; (l < natp); l++)
 313                         {
 314                             if (strcmp(aname, attp[l]) == 0)
 315                             {
 316                                 break;
 317                             }
 318                         }
 319                         if (l == natp)
 320                         {
 321                             srenew(attp, ++natp);
 322                             attp[natp-1] = aname;
 323                         }
 324                     }
 325                     if (natp > 1)
 326                     {
 327                         for (l = 0; (l < natp); l++)
 328                         {
 329                             snew(aaid, naid);
 330                             naaid = 0;
 331                             for (k = 0; (k < naid); k++)
 332                             {
 333                                 aname = *atoms->atomname[aid[k]];
 334                                 if (strcmp(aname, attp[l]) == 0)
 335                                 {
 336                                     aaid[naaid++] = aid[k];
 337                                 }
 338                             }
 339                             add_grp(gb, gn, naaid, aaid, attp[l]);
 340                             sfree(aaid);
 341                         }
 342                     }
 343                     sfree(attp);
 344                     attp = NULL;
 345                 }
 346                 sfree(aid);
 347             }
 348         }
 349         sfree(other_ndx);
 350     }
 351 }
 352
 353 /*! \brief
 354  * Cata necessary to construct a single (protein) index group in
 355  * analyse_prot().
 356  */
 357 typedef struct gmx_help_make_index_group
 358 {
 359     /** The set of atom names that will be used to form this index group */
 360     const char **defining_atomnames;
 361     /** Size of the defining_atomnames array */
 362     int          num_defining_atomnames;
 363     /** Name of this index group */
 364     const char  *group_name;
 365     /** Whether the above atom names name the atoms in the group, or
 366         those not in the group */
 367     gmx_bool bTakeComplement;
 368     /** The index in wholename gives the first item in the arrays of
 369        atomnames that should be tested with 'gmx_strncasecmp' in stead of
 370        gmx_strcasecmp, or -1 if all items should be tested with strcasecmp
 371        This is comparable to using a '*' wildcard at the end of specific
 372        atom names, but that is more involved to implement...
 373      */
 374     int wholename;
 375     /** Only create this index group if it differs from the one specified in compareto,
 376        where -1 means to always create this group. */
 377     int compareto;
 378 } t_gmx_help_make_index_group;
 379
 380 static void analyse_prot(const char ** restype, t_atoms *atoms,
 381                          t_blocka *gb, char ***gn, gmx_bool bASK, gmx_bool bVerb)
 382 {
 383     /* lists of atomnames to be used in constructing index groups: */
 384     static const char *pnoh[]    = { "H", "HN" };
 385     static const char *pnodum[]  = {
 386         "MN1",  "MN2",  "MCB1", "MCB2", "MCG1", "MCG2",
 387         "MCD1", "MCD2", "MCE1", "MCE2", "MNZ1", "MNZ2"
 388     };
 389     static const char *calpha[]  = { "CA" };
 390     static const char *bb[]      = { "N", "CA", "C" };
 391     static const char *mc[]      = { "N", "CA", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT" };
 392     static const char *mcb[]     = { "N", "CA", "CB", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT" };
 393     static const char *mch[]     = {
 394         "N", "CA", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT",
 395         "H1", "H2", "H3", "H", "HN"
 396     };
 397
 398     static const t_gmx_help_make_index_group constructing_data[] =
 399     {{ NULL,   0, "Protein",      TRUE,  -1, -1},
 400      { pnoh,   asize(pnoh),   "Protein-H",    TRUE,  0,  -1},
 401      { calpha, asize(calpha), "C-alpha",      FALSE, -1, -1},
 402      { bb,     asize(bb),     "Backbone",     FALSE, -1, -1},
 403      { mc,     asize(mc),     "MainChain",    FALSE, -1, -1},
 404      { mcb,    asize(mcb),    "MainChain+Cb", FALSE, -1, -1},
 405      { mch,    asize(mch),    "MainChain+H",  FALSE, -1, -1},
 406      { mch,    asize(mch),    "SideChain",    TRUE,  -1, -1},
 407      { mch,    asize(mch),    "SideChain-H",  TRUE,  11, -1},
 408      { pnodum, asize(pnodum), "Prot-Masses",  TRUE,  -1, 0}, };
 409     const int   num_index_groups = asize(constructing_data);
 410
 411     int         n, j;
 412     atom_id    *aid;
 413     int         nra, npres;
 414     gmx_bool    match;
 415     char        ndx_name[STRLEN], *atnm;
 416     int         i;
 417
 418     if (bVerb)
 419     {
 420         printf("Analysing Protein...\n");
 421     }
 422     snew(aid, atoms->nr);
 423
 424     /* calculate the number of protein residues */
 425     npres = 0;
 426     for (i = 0; (i < atoms->nres); i++)
 427     {
 428         if (0 == gmx_strcasecmp(restype[i], "Protein"))
 429         {
 430             npres++;
 431         }
 432     }
 433     /* find matching or complement atoms */
 434     for (i = 0; (i < (int)num_index_groups); i++)
 435     {
 436         nra = 0;
 437         for (n = 0; (n < atoms->nr); n++)
 438         {
 439             if (0 == gmx_strcasecmp(restype[atoms->atom[n].resind], "Protein"))
 440             {
 441                 match = FALSE;
 442                 for (j = 0; (j < constructing_data[i].num_defining_atomnames); j++)
 443                 {
 444                     /* skip digits at beginning of atomname, e.g. 1H */
 445                     atnm = *atoms->atomname[n];
 446                     while (isdigit(atnm[0]))
 447                     {
 448                         atnm++;
 449                     }
 450                     if ( (constructing_data[i].wholename == -1) || (j < constructing_data[i].wholename) )
 451                     {
 452                         if (0 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j], atnm))
 453                         {
 454                             match = TRUE;
 455                         }
 456                     }
 457                     else
 458                     {
 459                         if (0 == gmx_strncasecmp(constructing_data[i].defining_atomnames[j], atnm, strlen(constructing_data[i].defining_atomnames[j])))
 460                         {
 461                             match = TRUE;
 462                         }
 463                     }
 464                 }
 465                 if (constructing_data[i].bTakeComplement != match)
 466                 {
 467                     aid[nra++] = n;
 468                 }
 469             }
 470         }
 471         /* if we want to add this group always or it differs from previous
 472            group, add it: */
 473         if (-1 == constructing_data[i].compareto || !grp_cmp(gb, nra, aid, constructing_data[i].compareto-i) )
 474         {
 475             add_grp(gb, gn, nra, aid, constructing_data[i].group_name);
 476         }
 477     }
 478
 479     if (bASK)
 480     {
 481         for (i = 0; (i < (int)num_index_groups); i++)
 482         {
 483             printf("Split %12s into %5d residues (y/n) ? ", constructing_data[i].group_name, npres);
 484             if (gmx_ask_yesno(bASK))
 485             {
 486                 int resind;
 487                 nra = 0;
 488                 for (n = 0; ((atoms->atom[n].resind < npres) && (n < atoms->nr)); )
 489                 {
 490                     resind = atoms->atom[n].resind;
 491                     for (; ((atoms->atom[n].resind == resind) && (n < atoms->nr)); n++)
 492                     {
 493                         match = FALSE;
 494                         for (j = 0; (j < constructing_data[i].num_defining_atomnames); j++)
 495                         {
 496                             if (0 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j], *atoms->atomname[n]))
 497                             {
 498                                 match = TRUE;
 499                             }
 500                         }
 501                         if (constructing_data[i].bTakeComplement != match)
 502                         {
 503                             aid[nra++] = n;
 504                         }
 505                     }
 506                     /* copy the residuename to the tail of the groupname */
 507                     if (nra > 0)
 508                     {
 509                         t_resinfo *ri;
 510                         ri = &atoms->resinfo[resind];
 511                         sprintf(ndx_name, "%s_%s%d%c",
 512                                 constructing_data[i].group_name, *ri->name, ri->nr, ri->ic == ' ' ? '\0' : ri->ic);
 513                         add_grp(gb, gn, nra, aid, ndx_name);
 514                         nra = 0;
 515                     }
 516                 }
 517             }
 518         }
 519         printf("Make group with sidechain and C=O swapped (y/n) ? ");
 520         if (gmx_ask_yesno(bASK))
 521         {
 522             /* Make swap sidechain C=O index */
 523             int resind, hold;
 524             nra = 0;
 525             for (n = 0; ((atoms->atom[n].resind < npres) && (n < atoms->nr)); )
 526             {
 527                 resind = atoms->atom[n].resind;
 528                 hold   = -1;
 529                 for (; ((atoms->atom[n].resind == resind) && (n < atoms->nr)); n++)
 530                 {
 531                     if (strcmp("CA", *atoms->atomname[n]) == 0)
 532                     {
 533                         aid[nra++] = n;
 534                         hold       = nra;
 535                         nra       += 2;
 536                     }
 537                     else if (strcmp("C", *atoms->atomname[n]) == 0)
 538                     {
 539                         if (hold == -1)
 540                         {
 541                             gmx_incons("Atom naming problem");
 542                         }
 543                         aid[hold] = n;
 544                     }
 545                     else if (strcmp("O", *atoms->atomname[n]) == 0)
 546                     {
 547                         if (hold == -1)
 548                         {
 549                             gmx_incons("Atom naming problem");
 550                         }
 551                         aid[hold+1] = n;
 552                     }
 553                     else if (strcmp("O1", *atoms->atomname[n]) == 0)
 554                     {
 555                         if (hold == -1)
 556                         {
 557                             gmx_incons("Atom naming problem");
 558                         }
 559                         aid[hold+1] = n;
 560                     }
 561                     else
 562                     {
 563                         aid[nra++] = n;
 564                     }
 565                 }
 566             }
 567             /* copy the residuename to the tail of the groupname */
 568             if (nra > 0)
 569             {
 570                 add_grp(gb, gn, nra, aid, "SwapSC-CO");
 571             }
 572         }
 573     }
 574     sfree(aid);
 575 }
 576
 577
 578 void analyse(t_atoms *atoms, t_blocka *gb, char ***gn, gmx_bool bASK, gmx_bool bVerb)
 579 {
 580     gmx_residuetype_t*rt = NULL;
 581     char             *resnm;
 582     atom_id          *aid;
 583     const char    **  restype;
 584     int               nra;
 585     int               i, k;
 586     int               ntypes;
 587     const char     ** p_typename;
 588     int               iwater, iion;
 589     int               nwater, nion;
 590     int               found;
 591
 592     if (bVerb)
 593     {
 594         printf("Analysing residue names:\n");
 595     }
 596     /* Create system group, every single atom */
 597     snew(aid, atoms->nr);
 598     for (i = 0; i < atoms->nr; i++)
 599     {
 600         aid[i] = i;
 601     }
 602     add_grp(gb, gn, atoms->nr, aid, "System");
 603     sfree(aid);
 604
 605     /* For every residue, get a pointer to the residue type name */
 606     gmx_residuetype_init(&rt);
 607     assert(rt);
 608
 609     snew(restype, atoms->nres);
 610     ntypes     = 0;
 611     p_typename = NULL;
 612     for (i = 0; i < atoms->nres; i++)
 613     {
 614         resnm = *atoms->resinfo[i].name;
 615         gmx_residuetype_get_type(rt, resnm, &(restype[i]));
 616
 617         /* Note that this does not lead to a N*N loop, but N*K, where
 618          * K is the number of residue _types_, which is small and independent of N.
 619          */
 620         found = 0;
 621         for (k = 0; k < ntypes && !found; k++)
 622         {
 623             assert(p_typename != NULL);
 624             found = !strcmp(restype[i], p_typename[k]);
 625         }
 626         if (!found)
 627         {
 628             srenew(p_typename, ntypes+1);
 629             p_typename[ntypes++] = gmx_strdup(restype[i]);
 630         }
 631     }
 632
 633     if (bVerb)
 634     {
 635         p_status(restype, atoms->nres, p_typename, ntypes);
 636     }
 637
 638     for (k = 0; k < ntypes; k++)
 639     {
 640         aid = mk_aid(atoms, restype, p_typename[k], &nra, TRUE);
 641
 642         /* Check for special types to do fancy stuff with */
 643
 644         if (!gmx_strcasecmp(p_typename[k], "Protein") && nra > 0)
 645         {
 646             sfree(aid);
 647             /* PROTEIN */
 648             analyse_prot(restype, atoms, gb, gn, bASK, bVerb);
 649
 650             /* Create a Non-Protein group */
 651             aid = mk_aid(atoms, restype, "Protein", &nra, FALSE);
 652             if ((nra > 0) && (nra < atoms->nr))
 653             {
 654                 add_grp(gb, gn, nra, aid, "non-Protein");
 655             }
 656             sfree(aid);
 657         }
 658         else if (!gmx_strcasecmp(p_typename[k], "Water") && nra > 0)
 659         {
 660             add_grp(gb, gn, nra, aid, p_typename[k]);
 661             /* Add this group as 'SOL' too, for backward compatibility with older gromacs versions */
 662             add_grp(gb, gn, nra, aid, "SOL");
 663
 664             sfree(aid);
 665
 666             /* Solvent, create a negated group too */
 667             aid = mk_aid(atoms, restype, "Water", &nra, FALSE);
 668             if ((nra > 0) && (nra < atoms->nr))
 669             {
 670                 add_grp(gb, gn, nra, aid, "non-Water");
 671             }
 672             sfree(aid);
 673         }
 674         else if (nra > 0)
 675         {
 676             /* Other groups */
 677             add_grp(gb, gn, nra, aid, p_typename[k]);
 678             sfree(aid);
 679             analyse_other(restype, atoms, gb, gn, bASK, bVerb);
 680         }
 681     }
 682
 683     sfree(p_typename);
 684     sfree(restype);
 685     gmx_residuetype_destroy(rt);
 686
 687     /* Create a merged water_and_ions group */
 688     iwater = -1;
 689     iion   = -1;
 690     nwater = 0;
 691     nion   = 0;
 692
 693     for (i = 0; i < gb->nr; i++)
 694     {
 695         if (!gmx_strcasecmp((*gn)[i], "Water"))
 696         {
 697             iwater = i;
 698             nwater = gb->index[i+1]-gb->index[i];
 699         }
 700         else if (!gmx_strcasecmp((*gn)[i], "Ion"))
 701         {
 702             iion = i;
 703             nion = gb->index[i+1]-gb->index[i];
 704         }
 705     }
 706
 707     if (nwater > 0 && nion > 0)
 708     {
 709         srenew(gb->index, gb->nr+2);
 710         srenew(*gn, gb->nr+1);
 711         (*gn)[gb->nr] = gmx_strdup("Water_and_ions");
 712         srenew(gb->a, gb->nra+nwater+nion);
 713         if (nwater > 0)
 714         {
 715             for (i = gb->index[iwater]; i < gb->index[iwater+1]; i++)
 716             {
 717                 gb->a[gb->nra++] = gb->a[i];
 718             }
 719         }
 720         if (nion > 0)
 721         {
 722             for (i = gb->index[iion]; i < gb->index[iion+1]; i++)
 723             {
 724                 gb->a[gb->nra++] = gb->a[i];
 725             }
 726         }
 727         gb->nr++;
 728         gb->index[gb->nr] = gb->nra;
 729     }
 730 }
 731
 732
 733 void check_index(char *gname, int n, atom_id index[], char *traj, int natoms)
 734 {
 735     int i;
 736
 737     for (i = 0; i < n; i++)
 738     {
 739         if (index[i] >= natoms)
 740         {
 741             gmx_fatal(FARGS, "%s atom number (index[%d]=%d) is larger than the number of atoms in %s (%d)",
 742                       gname ? gname : "Index", i+1, index[i]+1,
 743                       traj ? traj : "the trajectory", natoms);
 744         }
 745         else if (index[i] < 0)
 746         {
 747             gmx_fatal(FARGS, "%s atom number (index[%d]=%d) is less than zero",
 748                       gname ? gname : "Index", i+1, index[i]+1);
 749         }
 750     }
 751 }
 752
 753 t_blocka *init_index(const char *gfile, char ***grpname)
 754 {
 755     FILE      *in;
 756     t_blocka  *b;
 757     int        maxentries;
 758     int        i, j;
 759     char       line[STRLEN], *pt, str[STRLEN];
 760
 761     in = gmx_fio_fopen(gfile, "r");
 762     snew(b, 1);
 763     b->nr      = 0;
 764     b->index   = NULL;
 765     b->nra     = 0;
 766     b->a       = NULL;
 767     *grpname   = NULL;
 768     maxentries = 0;
 769     while (get_a_line(in, line, STRLEN))
 770     {
 771         if (get_header(line, str))
 772         {
 773             b->nr++;
 774             srenew(b->index, b->nr+1);
 775             srenew(*grpname, b->nr);
 776             if (b->nr == 1)
 777             {
 778                 b->index[0] = 0;
 779             }
 780             b->index[b->nr]     = b->index[b->nr-1];
 781             (*grpname)[b->nr-1] = gmx_strdup(str);
 782         }
 783         else
 784         {
 785             if (b->nr == 0)
 786             {
 787                 gmx_fatal(FARGS, "The first header of your indexfile is invalid");
 788             }
 789             pt = line;
 790             while (sscanf(pt, "%s", str) == 1)
 791             {
 792                 i = b->index[b->nr];
 793                 if (i >= maxentries)
 794                 {
 795                     maxentries += 1024;
 796                     srenew(b->a, maxentries);
 797                 }
 798                 assert(b->a != NULL); // for clang analyzer
 799                 b->a[i] = strtol(str, NULL, 10)-1;
 800                 b->index[b->nr]++;
 801                 (b->nra)++;
 802                 pt = strstr(pt, str)+strlen(str);
 803             }
 804         }
 805     }
 806     gmx_fio_fclose(in);
 807
 808     for (i = 0; (i < b->nr); i++)
 809     {
 810         assert(b->a != NULL); // for clang analyzer
 811         for (j = b->index[i]; (j < b->index[i+1]); j++)
 812         {
 813             if (b->a[j] < 0)
 814             {
 815                 fprintf(stderr, "\nWARNING: negative index %d in group %s\n\n",
 816                         b->a[j], (*grpname)[i]);
 817             }
 818         }
 819     }
 820
 821     return b;
 822 }
 823
 824 static void minstring(char *str)
 825 {
 826     int i;
 827
 828     for (i = 0; (i < (int)strlen(str)); i++)
 829     {
 830         if (str[i] == '-')
 831         {
 832             str[i] = '_';
 833         }
 834     }
 835 }
 836
 837 int find_group(char s[], int ngrps, char **grpname)
 838 {
 839     int      aa, i, n;
 840     char     string[STRLEN];
 841     gmx_bool bMultiple;
 842
 843     bMultiple = FALSE;
 844     n         = strlen(s);
 845     aa        = NOTSET;
 846     /* first look for whole name match */
 847     if (aa == NOTSET)
 848     {
 849         for (i = 0; i < ngrps; i++)
 850         {
 851             if (gmx_strcasecmp_min(s, grpname[i]) == 0)
 852             {
 853                 if (aa != NOTSET)
 854                 {
 855                     bMultiple = TRUE;
 856                 }
 857                 aa = i;
 858             }
 859         }
 860     }
 861     /* second look for first string match */
 862     if (aa == NOTSET)
 863     {
 864         for (i = 0; i < ngrps; i++)
 865         {
 866             if (gmx_strncasecmp_min(s, grpname[i], n) == 0)
 867             {
 868                 if (aa != NOTSET)
 869                 {
 870                     bMultiple = TRUE;
 871                 }
 872                 aa = i;
 873             }
 874         }
 875     }
 876     /* last look for arbitrary substring match */
 877     if (aa == NOTSET)
 878     {
 879         upstring(s);
 880         minstring(s);
 881         for (i = 0; i < ngrps; i++)
 882         {
 883             strcpy(string, grpname[i]);
 884             upstring(string);
 885             minstring(string);
 886             if (strstr(string, s) != NULL)
 887             {
 888                 if (aa != NOTSET)
 889                 {
 890                     bMultiple = TRUE;
 891                 }
 892                 aa = i;
 893             }
 894         }
 895     }
 896     if (bMultiple)
 897     {
 898         printf("Error: Multiple groups '%s' selected\n", s);
 899         aa = NOTSET;
 900     }
 901     return aa;
 902 }
 903
 904 static int qgroup(int *a, int ngrps, char **grpname)
 905 {
 906     char     s[STRLEN];
 907     int      aa;
 908     gmx_bool bInRange;
 909     char    *end;
 910
 911     do
 912     {
 913         fprintf(stderr, "Select a group: ");
 914         do
 915         {
 916             if (scanf("%s", s) != 1)
 917             {
 918                 gmx_fatal(FARGS, "Cannot read from input");
 919             }
 920             trim(s); /* remove spaces */
 921         }
 922         while (strlen(s) == 0);
 923         aa = strtol(s, &end, 10);
 924         if (aa == 0 && end[0] != '\0') /* string entered */
 925         {
 926             aa = find_group(s, ngrps, grpname);
 927         }
 928         bInRange = (aa >= 0 && aa < ngrps);
 929         if (!bInRange)
 930         {
 931             printf("Error: No such group '%s'\n", s);
 932         }
 933     }
 934     while (!bInRange);
 935     printf("Selected %d: '%s'\n", aa, grpname[aa]);
 936     *a = aa;
 937     return aa;
 938 }
 939
 940 static void rd_groups(t_blocka *grps, char **grpname, char *gnames[],
 941                       int ngrps, int isize[], atom_id *index[], int grpnr[])
 942 {
 943     int i, j, gnr1;
 944
 945     if (grps->nr == 0)
 946     {
 947         gmx_fatal(FARGS, "Error: no groups in indexfile");
 948     }
 949     for (i = 0; (i < grps->nr); i++)
 950     {
 951         fprintf(stderr, "Group %5d (%15s) has %5d elements\n", i, grpname[i],
 952                 grps->index[i+1]-grps->index[i]);
 953     }
 954     for (i = 0; (i < ngrps); i++)
 955     {
 956         if (grps->nr > 1)
 957         {
 958             do
 959             {
 960                 gnr1 = qgroup(&grpnr[i], grps->nr, grpname);
 961                 if ((gnr1 < 0) || (gnr1 >= grps->nr))
 962                 {
 963                     fprintf(stderr, "Select between %d and %d.\n", 0, grps->nr-1);
 964                 }
 965             }
 966             while ((gnr1 < 0) || (gnr1 >= grps->nr));
 967         }
 968         else
 969         {
 970             fprintf(stderr, "There is one group in the index\n");
 971             gnr1 = 0;
 972         }
 973         gnames[i] = gmx_strdup(grpname[gnr1]);
 974         isize[i]  = grps->index[gnr1+1]-grps->index[gnr1];
 975         snew(index[i], isize[i]);
 976         for (j = 0; (j < isize[i]); j++)
 977         {
 978             index[i][j] = grps->a[grps->index[gnr1]+j];
 979         }
 980     }
 981 }
 982
 983 void rd_index(const char *statfile, int ngrps, int isize[],
 984               atom_id *index[], char *grpnames[])
 985 {
 986     char    **gnames;
 987     t_blocka *grps;
 988     int      *grpnr;
 989
 990     snew(grpnr, ngrps);
 991     if (!statfile)
 992     {
 993         gmx_fatal(FARGS, "No index file specified");
 994     }
 995     grps = init_index(statfile, &gnames);
 996     rd_groups(grps, gnames, grpnames, ngrps, isize, index, grpnr);
 997 }
 998
 999 void rd_index_nrs(char *statfile, int ngrps, int isize[],
1000                   atom_id *index[], char *grpnames[], int grpnr[])
1001 {
1002     char    **gnames;
1003     t_blocka *grps;
1004
1005     if (!statfile)
1006     {
1007         gmx_fatal(FARGS, "No index file specified");
1008     }
1009     grps = init_index(statfile, &gnames);
1010
1011     rd_groups(grps, gnames, grpnames, ngrps, isize, index, grpnr);
1012 }
1013
1014 void get_index(t_atoms *atoms, const char *fnm, int ngrps,
1015                int isize[], atom_id *index[], char *grpnames[])
1016 {
1017     char    ***gnames;
1018     t_blocka  *grps = NULL;
1019     int       *grpnr;
1020
1021     snew(grpnr, ngrps);
1022     snew(gnames, 1);
1023     if (fnm != NULL)
1024     {
1025         grps = init_index(fnm, gnames);
1026     }
1027     else if (atoms)
1028     {
1029         snew(grps, 1);
1030         snew(grps->index, 1);
1031         analyse(atoms, grps, gnames, FALSE, FALSE);
1032     }
1033     else
1034     {
1035         gmx_incons("You need to supply a valid atoms structure or a valid index file name");
1036     }
1037
1038     rd_groups(grps, *gnames, grpnames, ngrps, isize, index, grpnr);
1039 }
1040
1041 t_cluster_ndx *cluster_index(FILE *fplog, const char *ndx)
1042 {
1043     t_cluster_ndx *c;
1044     int            i;
1045
1046     snew(c, 1);
1047     c->clust     = init_index(ndx, &c->grpname);
1048     c->maxframe  = -1;
1049     for (i = 0; (i < c->clust->nra); i++)
1050     {
1051         c->maxframe = std::max(c->maxframe, c->clust->a[i]);
1052     }
1053     fprintf(fplog ? fplog : stdout,
1054             "There are %d clusters containing %d structures, highest framenr is %d\n",
1055             c->clust->nr, c->clust->nra, c->maxframe);
1056     if (debug)
1057     {
1058         pr_blocka(debug, 0, "clust", c->clust, TRUE);
1059         for (i = 0; (i < c->clust->nra); i++)
1060         {
1061             if ((c->clust->a[i] < 0) || (c->clust->a[i] > c->maxframe))
1062             {
1063                 gmx_fatal(FARGS, "Range check error for c->clust->a[%d] = %d\n"
1064                           "should be within 0 and %d", i, c->clust->a[i], c->maxframe+1);
1065             }
1066         }
1067     }
1068     c->inv_clust = make_invblocka(c->clust, c->maxframe);
1069
1070     return c;
1071 }