src/gromacs/topology/index.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #include "gmxpre.h"
  38
  39 #include "index.h"
  40
  41 #include <cassert>
  42 #include <cctype>
  43 #include <cstdlib>
  44 #include <cstring>
  45
  46 #include <algorithm>
  47
  48 #include "gromacs/topology/atoms.h"
  49 #include "gromacs/topology/block.h"
  50 #include "gromacs/topology/invblock.h"
  51 #include "gromacs/topology/residuetypes.h"
  52 #include "gromacs/utility/arraysize.h"
  53 #include "gromacs/utility/cstringutil.h"
  54 #include "gromacs/utility/fatalerror.h"
  55 #include "gromacs/utility/futil.h"
  56 #include "gromacs/utility/smalloc.h"
  57 #include "gromacs/utility/strdb.h"
  58
  59 static gmx_bool gmx_ask_yesno(gmx_bool bASK)
  60 {
  61     char c;
  62
  63     if (bASK)
  64     {
  65         do
  66         {
  67             c = toupper(fgetc(stdin));
  68         }
  69         while ((c != 'Y') && (c != 'N'));
  70
  71         return (c == 'Y');
  72     }
  73     else
  74     {
  75         return FALSE;
  76     }
  77 }
  78
  79 void write_index(const char *outf, t_blocka *b, char **gnames, gmx_bool bDuplicate, int natoms)
  80 {
  81     FILE *out;
  82     int   i, j, k;
  83
  84     out = gmx_ffopen(outf, "w");
  85     /* fprintf(out,"%5d  %5d\n",b->nr,b->nra); */
  86     for (i = 0; (i < b->nr); i++)
  87     {
  88         fprintf(out, "[ %s ]", gnames[i]);
  89         for (k = 0, j = b->index[i]; j < b->index[i+1]; j++, k++)
  90         {
  91             const char sep = (k % 15 == 0 ? '\n' : ' ');
  92             fprintf(out, "%c%4d", sep, b->a[j]+1);
  93         }
  94         fprintf(out, "\n");
  95     }
  96
  97     /* Duplicate copy, useful for computational electrophysiology double-layer setups */
  98     if (bDuplicate)
  99     {
 100         fprintf(stderr, "Duplicating the whole system with an atom offset of %d atoms.\n", natoms);
 101         for (i = 0; (i < b->nr); i++)
 102         {
 103             fprintf(out, "[ %s_copy ]", gnames[i]);
 104             for (k = 0, j = b->index[i]; j < b->index[i+1]; j++, k++)
 105             {
 106                 const char sep = (k % 15 == 0 ? '\n' : ' ');
 107                 fprintf(out, "%c%4d", sep, b->a[j]+1 + natoms);
 108             }
 109             fprintf(out, "\n");
 110         }
 111     }
 112
 113     gmx_ffclose(out);
 114 }
 115
 116 void add_grp(t_blocka *b, char ***gnames, gmx::ArrayRef<const int> a, const std::string &name)
 117 {
 118     srenew(b->index, b->nr+2);
 119     srenew(*gnames, b->nr+1);
 120     (*gnames)[b->nr] = gmx_strdup(name.c_str());
 121
 122     srenew(b->a, b->nra+a.size());
 123     for (int i = 0; (i < a.ssize()); i++)
 124     {
 125         b->a[b->nra++] = a[i];
 126     }
 127     b->nr++;
 128     b->index[b->nr] = b->nra;
 129 }
 130
 131 /*! \brief
 132  * Compare index in groups.
 133  *
 134  * Checks the index in \p a to the one in \p b at \p index.
 135  * If \p index is < 0, it is taken as relative to the end of \p b.
 136  *
 137  * \param[in] b Block with groups.
 138  * \param[in] a New group to compare to.
 139  * \param[in] index The index to check.
 140  * \returns True if groups are the same.
 141  */
 142 static bool grp_cmp(t_blocka *b, gmx::ArrayRef<const int> a, int index)
 143 {
 144     if (index < 0)
 145     {
 146         index = b->nr-1+index;
 147     }
 148     if (index >= b->nr)
 149     {
 150         gmx_fatal(FARGS, "no such index group %d in t_blocka (nr=%d)", index, b->nr);
 151     }
 152     /* compare sizes */
 153     if (a.ssize() != b->index[index+1] - b->index[index])
 154     {
 155         return FALSE;
 156     }
 157     for (int i = 0; i < a.ssize(); i++)
 158     {
 159         if (a[i] != b->a[b->index[index]+i])
 160         {
 161             return false;
 162         }
 163     }
 164     return true;
 165 }
 166 //! Print out how many residues of a certain type are present.
 167 static void p_status(gmx::ArrayRef<const std::string> restype,
 168                      gmx::ArrayRef<const std::string> typenames)
 169 {
 170     std::vector<int> counter(typenames.size(), 0);
 171     std::transform(typenames.begin(), typenames.end(), counter.begin(),
 172                    [&restype](const std::string &typenm) {
 173                        return std::count_if(restype.begin(), restype.end(),
 174                                             [&typenm](const std::string &res) {
 175                                                 return gmx_strcasecmp(res.c_str(), typenm.c_str()) == 0;
 176                                             }
 177                                             );
 178                    }
 179                    );
 180
 181     for (int i = 0; (i < typenames.ssize()); i++)
 182     {
 183         if (counter[i] > 0)
 184         {
 185             printf("There are: %5d %10s residues\n", counter[i], typenames[i].c_str());
 186         }
 187     }
 188 }
 189
 190 /*! \brief
 191  * Return a new group of atoms with \p restype that match \p typestring,
 192  * or not matching if \p bMatch.
 193  *
 194  * \param[in] atoms Atoms data to use.
 195  * \param[in] restype Residuetypes to match.
 196  * \param[in] typestring Which type the residue should have.
 197  * \param[in] bMatch whether to return matching atoms or those that don't.
 198  * \returns Vector of atoms that match.
 199  */
 200 static std::vector<int> mk_aid(const t_atoms                   *atoms,
 201                                gmx::ArrayRef<const std::string> restype,
 202                                const std::string               &typestring,
 203                                bool                             bMatch)
 204 {
 205     std::vector<int> a;
 206     for (int i = 0; (i < atoms->nr); i++)
 207     {
 208         bool res = gmx_strcasecmp(restype[atoms->atom[i].resind].c_str(), typestring.c_str()) == 0;
 209         if (!bMatch)
 210         {
 211             res = !res;
 212         }
 213         if (res)
 214         {
 215             a.push_back(i);
 216         }
 217     }
 218
 219     return a;
 220 }
 221
 222 typedef struct {
 223     char    *rname;
 224     gmx_bool bNeg;
 225     char    *gname;
 226 } restp_t;
 227
 228 static void analyse_other(gmx::ArrayRef<std::string> restype, const t_atoms *atoms,
 229                           t_blocka *gb, char ***gn, gmx_bool bASK, gmx_bool bVerb)
 230 {
 231     restp_t *restp = nullptr;
 232     char   **attp  = nullptr;
 233     char    *rname, *aname;
 234     int      i, resind, natp, nrestp = 0;
 235
 236     for (i = 0; (i < atoms->nres); i++)
 237     {
 238         if (gmx_strcasecmp(restype[i].c_str(), "Protein") && gmx_strcasecmp(restype[i].c_str(), "DNA") && gmx_strcasecmp(restype[i].c_str(), "RNA") && gmx_strcasecmp(restype[i].c_str(), "Water"))
 239         {
 240             break;
 241         }
 242     }
 243     if (i < atoms->nres)
 244     {
 245         /* we have others */
 246         if (bVerb)
 247         {
 248             printf("Analysing residues not classified as Protein/DNA/RNA/Water and splitting into groups...\n");
 249         }
 250         for (int k = 0; (k < atoms->nr); k++)
 251         {
 252             resind = atoms->atom[k].resind;
 253             rname  = *atoms->resinfo[resind].name;
 254             if (gmx_strcasecmp(restype[resind].c_str(), "Protein") && gmx_strcasecmp(restype[resind].c_str(), "DNA") &&
 255                 gmx_strcasecmp(restype[resind].c_str(), "RNA") && gmx_strcasecmp(restype[resind].c_str(), "Water"))
 256             {
 257                 int l;
 258                 for (l = 0; (l < nrestp); l++)
 259                 {
 260                     assert(restp);
 261                     if (strcmp(restp[l].rname, rname) == 0)
 262                     {
 263                         break;
 264                     }
 265                 }
 266                 if (l == nrestp)
 267                 {
 268                     srenew(restp, nrestp+1);
 269                     restp[nrestp].rname = gmx_strdup(rname);
 270                     restp[nrestp].bNeg  = FALSE;
 271                     restp[nrestp].gname = gmx_strdup(rname);
 272                     nrestp++;
 273                 }
 274             }
 275         }
 276         for (int i = 0; (i < nrestp); i++)
 277         {
 278             std::vector<int> aid;
 279             for (int j = 0; (j < atoms->nr); j++)
 280             {
 281                 rname = *atoms->resinfo[atoms->atom[j].resind].name;
 282                 if ((strcmp(restp[i].rname, rname) == 0 && !restp[i].bNeg) ||
 283                     (strcmp(restp[i].rname, rname) != 0 &&  restp[i].bNeg))
 284                 {
 285                     aid.push_back(j);
 286                 }
 287             }
 288             add_grp(gb, gn, aid, restp[i].gname);
 289             if (bASK)
 290             {
 291                 printf("split %s into atoms (y/n) ? ", restp[i].gname);
 292                 fflush(stdout);
 293                 if (gmx_ask_yesno(bASK))
 294                 {
 295                     natp = 0;
 296                     for (size_t k = 0; (k < aid.size()); k++)
 297                     {
 298                         aname = *atoms->atomname[aid[k]];
 299                         int l;
 300                         for (l = 0; (l < natp); l++)
 301                         {
 302                             if (strcmp(aname, attp[l]) == 0)
 303                             {
 304                                 break;
 305                             }
 306                         }
 307                         if (l == natp)
 308                         {
 309                             srenew(attp, ++natp);
 310                             attp[natp-1] = aname;
 311                         }
 312                     }
 313                     if (natp > 1)
 314                     {
 315                         for (int l = 0; (l < natp); l++)
 316                         {
 317                             std::vector<int> aaid;
 318                             for (size_t k = 0; (k < aid.size()); k++)
 319                             {
 320                                 aname = *atoms->atomname[aid[k]];
 321                                 if (strcmp(aname, attp[l]) == 0)
 322                                 {
 323                                     aaid.push_back(aid[k]);
 324                                 }
 325                             }
 326                             add_grp(gb, gn, aaid, attp[l]);
 327                         }
 328                     }
 329                     sfree(attp);
 330                     attp = nullptr;
 331                 }
 332             }
 333             sfree(restp[i].rname);
 334             sfree(restp[i].gname);
 335         }
 336         sfree(restp);
 337     }
 338 }
 339
 340 /*! \brief
 341  * Data necessary to construct a single (protein) index group in
 342  * analyse_prot().
 343  */
 344 typedef struct gmx_help_make_index_group // NOLINT(clang-analyzer-optin.performance.Padding)
 345 {
 346     /** The set of atom names that will be used to form this index group */
 347     const char **defining_atomnames;
 348     /** Size of the defining_atomnames array */
 349     int          num_defining_atomnames;
 350     /** Name of this index group */
 351     const char  *group_name;
 352     /** Whether the above atom names name the atoms in the group, or
 353         those not in the group */
 354     gmx_bool bTakeComplement;
 355     /** The index in wholename gives the first item in the arrays of
 356        atomnames that should be tested with 'gmx_strncasecmp' in stead of
 357        gmx_strcasecmp, or -1 if all items should be tested with strcasecmp
 358        This is comparable to using a '*' wildcard at the end of specific
 359        atom names, but that is more involved to implement...
 360      */
 361     int wholename;
 362     /** Only create this index group if it differs from the one specified in compareto,
 363        where -1 means to always create this group. */
 364     int compareto;
 365 } t_gmx_help_make_index_group;
 366
 367 static void analyse_prot(gmx::ArrayRef<const std::string> restype, const t_atoms *atoms,
 368                          t_blocka *gb, char ***gn, gmx_bool bASK, gmx_bool bVerb)
 369 {
 370     /* lists of atomnames to be used in constructing index groups: */
 371     static const char *pnoh[]    = { "H", "HN" };
 372     static const char *pnodum[]  = {
 373         "MN1",  "MN2",  "MCB1", "MCB2", "MCG1", "MCG2",
 374         "MCD1", "MCD2", "MCE1", "MCE2", "MNZ1", "MNZ2"
 375     };
 376     static const char *calpha[]  = { "CA" };
 377     static const char *bb[]      = { "N", "CA", "C" };
 378     static const char *mc[]      = { "N", "CA", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT" };
 379     static const char *mcb[]     = { "N", "CA", "CB", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT" };
 380     static const char *mch[]     = {
 381         "N", "CA", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT",
 382         "H1", "H2", "H3", "H", "HN"
 383     };
 384
 385     static const t_gmx_help_make_index_group constructing_data[] =
 386     {{ nullptr,   0, "Protein",      TRUE,  -1, -1},
 387      { pnoh,   asize(pnoh),   "Protein-H",    TRUE,  0,  -1},
 388      { calpha, asize(calpha), "C-alpha",      FALSE, -1, -1},
 389      { bb,     asize(bb),     "Backbone",     FALSE, -1, -1},
 390      { mc,     asize(mc),     "MainChain",    FALSE, -1, -1},
 391      { mcb,    asize(mcb),    "MainChain+Cb", FALSE, -1, -1},
 392      { mch,    asize(mch),    "MainChain+H",  FALSE, -1, -1},
 393      { mch,    asize(mch),    "SideChain",    TRUE,  -1, -1},
 394      { mch,    asize(mch),    "SideChain-H",  TRUE,  11, -1},
 395      { pnodum, asize(pnodum), "Prot-Masses",  TRUE,  -1, 0}, };
 396     const int   num_index_groups = asize(constructing_data);
 397
 398     int         n, j;
 399     int         npres;
 400     gmx_bool    match;
 401     char        ndx_name[STRLEN], *atnm;
 402     int         i;
 403
 404     if (bVerb)
 405     {
 406         printf("Analysing Protein...\n");
 407     }
 408     std::vector<int> aid;
 409
 410     /* calculate the number of protein residues */
 411     npres = 0;
 412     for (i = 0; (i < atoms->nres); i++)
 413     {
 414         if (0 == gmx_strcasecmp(restype[i].c_str(), "Protein"))
 415         {
 416             npres++;
 417         }
 418     }
 419     /* find matching or complement atoms */
 420     for (i = 0; (i < num_index_groups); i++)
 421     {
 422         for (n = 0; (n < atoms->nr); n++)
 423         {
 424             if (0 == gmx_strcasecmp(restype[atoms->atom[n].resind].c_str(), "Protein"))
 425             {
 426                 match = FALSE;
 427                 for (j = 0; (j < constructing_data[i].num_defining_atomnames); j++)
 428                 {
 429                     /* skip digits at beginning of atomname, e.g. 1H */
 430                     atnm = *atoms->atomname[n];
 431                     while (isdigit(atnm[0]))
 432                     {
 433                         atnm++;
 434                     }
 435                     if ( (constructing_data[i].wholename == -1) || (j < constructing_data[i].wholename) )
 436                     {
 437                         if (0 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j], atnm))
 438                         {
 439                             match = TRUE;
 440                         }
 441                     }
 442                     else
 443                     {
 444                         if (0 == gmx_strncasecmp(constructing_data[i].defining_atomnames[j], atnm, strlen(constructing_data[i].defining_atomnames[j])))
 445                         {
 446                             match = TRUE;
 447                         }
 448                     }
 449                 }
 450                 if (constructing_data[i].bTakeComplement != match)
 451                 {
 452                     aid.push_back(n);
 453                 }
 454             }
 455         }
 456         /* if we want to add this group always or it differs from previous
 457            group, add it: */
 458         if (-1 == constructing_data[i].compareto || !grp_cmp(gb, aid, constructing_data[i].compareto-i) )
 459         {
 460             add_grp(gb, gn, aid, constructing_data[i].group_name);
 461         }
 462         aid.clear();
 463     }
 464
 465     if (bASK)
 466     {
 467         for (i = 0; (i < num_index_groups); i++)
 468         {
 469             printf("Split %12s into %5d residues (y/n) ? ", constructing_data[i].group_name, npres);
 470             if (gmx_ask_yesno(bASK))
 471             {
 472                 int resind;
 473                 aid.clear();
 474                 for (n = 0; ((atoms->atom[n].resind < npres) && (n < atoms->nr)); )
 475                 {
 476                     resind = atoms->atom[n].resind;
 477                     for (; ((atoms->atom[n].resind == resind) && (n < atoms->nr)); n++)
 478                     {
 479                         match = FALSE;
 480                         for (j = 0; (j < constructing_data[i].num_defining_atomnames); j++)
 481                         {
 482                             if (0 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j], *atoms->atomname[n]))
 483                             {
 484                                 match = TRUE;
 485                             }
 486                         }
 487                         if (constructing_data[i].bTakeComplement != match)
 488                         {
 489                             aid.push_back(n);
 490                         }
 491                     }
 492                     /* copy the residuename to the tail of the groupname */
 493                     if (!aid.empty())
 494                     {
 495                         t_resinfo *ri;
 496                         ri = &atoms->resinfo[resind];
 497                         sprintf(ndx_name, "%s_%s%d%c",
 498                                 constructing_data[i].group_name, *ri->name, ri->nr, ri->ic == ' ' ? '\0' : ri->ic);
 499                         add_grp(gb, gn, aid, ndx_name);
 500                         aid.clear();
 501                     }
 502                 }
 503             }
 504         }
 505         printf("Make group with sidechain and C=O swapped (y/n) ? ");
 506         if (gmx_ask_yesno(bASK))
 507         {
 508             /* Make swap sidechain C=O index */
 509             aid.clear();
 510             for (int n = 0; ((atoms->atom[n].resind < npres) && (n < atoms->nr)); )
 511             {
 512                 int resind = atoms->atom[n].resind;
 513                 int hold   = -1;
 514                 for (; ((atoms->atom[n].resind == resind) && (n < atoms->nr)); n++)
 515                 {
 516                     if (strcmp("CA", *atoms->atomname[n]) == 0)
 517                     {
 518                         aid.push_back(n);
 519                         hold = aid.size();
 520                         aid.resize(aid.size() + 3);
 521                     }
 522                     else if (strcmp("C", *atoms->atomname[n]) == 0)
 523                     {
 524                         if (hold == -1)
 525                         {
 526                             gmx_incons("Atom naming problem");
 527                         }
 528                         aid[hold] = n;
 529                     }
 530                     else if (strcmp("O", *atoms->atomname[n]) == 0)
 531                     {
 532                         if (hold == -1)
 533                         {
 534                             gmx_incons("Atom naming problem");
 535                         }
 536                         aid[hold+1] = n;
 537                     }
 538                     else if (strcmp("O1", *atoms->atomname[n]) == 0)
 539                     {
 540                         if (hold == -1)
 541                         {
 542                             gmx_incons("Atom naming problem");
 543                         }
 544                         aid[hold+1] = n;
 545                     }
 546                     else
 547                     {
 548                         aid.push_back(n);
 549                     }
 550                 }
 551             }
 552             /* copy the residuename to the tail of the groupname */
 553             if (!aid.empty())
 554             {
 555                 add_grp(gb, gn, aid, "SwapSC-CO");
 556             }
 557         }
 558     }
 559 }
 560
 561
 562 void analyse(const t_atoms *atoms, t_blocka *gb, char ***gn, gmx_bool bASK, gmx_bool bVerb)
 563 {
 564     char             *resnm;
 565     int               i;
 566     int               iwater, iion;
 567     int               nwater, nion;
 568
 569     if (bVerb)
 570     {
 571         printf("Analysing residue names:\n");
 572     }
 573     /* Create system group, every single atom */
 574     std::vector<int> aid(atoms->nr);
 575     for (i = 0; i < atoms->nr; i++)
 576     {
 577         aid[i] = i;
 578     }
 579     add_grp(gb, gn, aid, "System");
 580
 581     /* For every residue, get a pointer to the residue type name */
 582     ResidueType              rt;
 583
 584     std::vector<std::string> restype;
 585     std::vector<std::string> previousTypename;
 586     if (atoms->nres > 0)
 587     {
 588         int i = 0;
 589
 590         resnm = *atoms->resinfo[i].name;
 591         restype.emplace_back(rt.typeOfNamedDatabaseResidue(resnm));
 592         previousTypename.push_back(restype.back());
 593
 594         for (i = 1; i < atoms->nres; i++)
 595         {
 596             resnm = *atoms->resinfo[i].name;
 597             restype.emplace_back(rt.typeOfNamedDatabaseResidue(resnm));
 598
 599             /* Note that this does not lead to a N*N loop, but N*K, where
 600              * K is the number of residue _types_, which is small and independent of N.
 601              */
 602             bool found = false;
 603             for (size_t k = 0; k < previousTypename.size() && !found; k++)
 604             {
 605                 found = strcmp(restype.back().c_str(), previousTypename[k].c_str()) == 0;
 606             }
 607             if (!found)
 608             {
 609                 previousTypename.push_back(restype.back());
 610             }
 611         }
 612     }
 613
 614     if (bVerb)
 615     {
 616         p_status(restype, previousTypename);
 617     }
 618
 619     for (int k = 0; k < gmx::index(previousTypename.size()); k++)
 620     {
 621         aid = mk_aid(atoms, restype, previousTypename[k], TRUE);
 622
 623         /* Check for special types to do fancy stuff with */
 624
 625         if (!gmx_strcasecmp(previousTypename[k].c_str(), "Protein") && !aid.empty())
 626         {
 627             /* PROTEIN */
 628             analyse_prot(restype, atoms, gb, gn, bASK, bVerb);
 629
 630             /* Create a Non-Protein group */
 631             aid = mk_aid(atoms, restype, "Protein", FALSE);
 632             if ((!aid.empty()) && (gmx::ssize(aid) < atoms->nr))
 633             {
 634                 add_grp(gb, gn, aid, "non-Protein");
 635             }
 636         }
 637         else if (!gmx_strcasecmp(previousTypename[k].c_str(), "Water") && !aid.empty())
 638         {
 639             add_grp(gb, gn, aid, previousTypename[k]);
 640             /* Add this group as 'SOL' too, for backward compatibility with older gromacs versions */
 641             add_grp(gb, gn, aid, "SOL");
 642
 643
 644             /* Solvent, create a negated group too */
 645             aid = mk_aid(atoms, restype, "Water", FALSE);
 646             if ((!aid.empty()) && (gmx::ssize(aid) < atoms->nr))
 647             {
 648                 add_grp(gb, gn, aid, "non-Water");
 649             }
 650         }
 651         else if (!aid.empty())
 652         {
 653             /* Other groups */
 654             add_grp(gb, gn, aid, previousTypename[k]);
 655             analyse_other(restype, atoms, gb, gn, bASK, bVerb);
 656         }
 657     }
 658
 659
 660     /* Create a merged water_and_ions group */
 661     iwater = -1;
 662     iion   = -1;
 663     nwater = 0;
 664     nion   = 0;
 665
 666     for (i = 0; i < gb->nr; i++)
 667     {
 668         if (!gmx_strcasecmp((*gn)[i], "Water"))
 669         {
 670             iwater = i;
 671             nwater = gb->index[i+1]-gb->index[i];
 672         }
 673         else if (!gmx_strcasecmp((*gn)[i], "Ion"))
 674         {
 675             iion = i;
 676             nion = gb->index[i+1]-gb->index[i];
 677         }
 678     }
 679
 680     if (nwater > 0 && nion > 0)
 681     {
 682         srenew(gb->index, gb->nr+2);
 683         srenew(*gn, gb->nr+1);
 684         (*gn)[gb->nr] = gmx_strdup("Water_and_ions");
 685         srenew(gb->a, gb->nra+nwater+nion);
 686         if (nwater > 0)
 687         {
 688             for (i = gb->index[iwater]; i < gb->index[iwater+1]; i++)
 689             {
 690                 gb->a[gb->nra++] = gb->a[i];
 691             }
 692         }
 693         if (nion > 0)
 694         {
 695             for (i = gb->index[iion]; i < gb->index[iion+1]; i++)
 696             {
 697                 gb->a[gb->nra++] = gb->a[i];
 698             }
 699         }
 700         gb->nr++;
 701         gb->index[gb->nr] = gb->nra;
 702     }
 703 }
 704
 705
 706 void check_index(const char *gname, int n, int index[], const char *traj, int natoms)
 707 {
 708     int i;
 709
 710     for (i = 0; i < n; i++)
 711     {
 712         if (index[i] >= natoms)
 713         {
 714             gmx_fatal(FARGS, "%s atom number (index[%d]=%d) is larger than the number of atoms in %s (%d)",
 715                       gname ? gname : "Index", i+1, index[i]+1,
 716                       traj ? traj : "the trajectory", natoms);
 717         }
 718         else if (index[i] < 0)
 719         {
 720             gmx_fatal(FARGS, "%s atom number (index[%d]=%d) is less than zero",
 721                       gname ? gname : "Index", i+1, index[i]+1);
 722         }
 723     }
 724 }
 725
 726 t_blocka *init_index(const char *gfile, char ***grpname)
 727 {
 728     FILE      *in;
 729     t_blocka  *b;
 730     int        maxentries;
 731     int        i, j;
 732     char       line[STRLEN], *pt, str[STRLEN];
 733
 734     in = gmx_ffopen(gfile, "r");
 735     snew(b, 1);
 736     b->nr      = 0;
 737     b->index   = nullptr;
 738     b->nra     = 0;
 739     b->a       = nullptr;
 740     *grpname   = nullptr;
 741     maxentries = 0;
 742     while (get_a_line(in, line, STRLEN))
 743     {
 744         if (get_header(line, str))
 745         {
 746             b->nr++;
 747             srenew(b->index, b->nr+1);
 748             srenew(*grpname, b->nr);
 749             if (b->nr == 1)
 750             {
 751                 b->index[0] = 0;
 752             }
 753             b->index[b->nr]     = b->index[b->nr-1];
 754             (*grpname)[b->nr-1] = gmx_strdup(str);
 755         }
 756         else
 757         {
 758             if (b->nr == 0)
 759             {
 760                 gmx_fatal(FARGS, "The first header of your indexfile is invalid");
 761             }
 762             pt = line;
 763             while (sscanf(pt, "%s", str) == 1)
 764             {
 765                 i = b->index[b->nr];
 766                 if (i >= maxentries)
 767                 {
 768                     maxentries += 1024;
 769                     srenew(b->a, maxentries);
 770                 }
 771                 assert(b->a != nullptr); // for clang analyzer
 772                 b->a[i] = strtol(str, nullptr, 10)-1;
 773                 b->index[b->nr]++;
 774                 (b->nra)++;
 775                 pt = strstr(pt, str)+strlen(str);
 776             }
 777         }
 778     }
 779     gmx_ffclose(in);
 780
 781     for (i = 0; (i < b->nr); i++)
 782     {
 783         assert(b->a != nullptr); // for clang analyzer
 784         for (j = b->index[i]; (j < b->index[i+1]); j++)
 785         {
 786             if (b->a[j] < 0)
 787             {
 788                 fprintf(stderr, "\nWARNING: negative index %d in group %s\n\n",
 789                         b->a[j], (*grpname)[i]);
 790             }
 791         }
 792     }
 793
 794     return b;
 795 }
 796
 797 static void minstring(char *str)
 798 {
 799     int i;
 800
 801     for (i = 0; (i < static_cast<int>(strlen(str))); i++)
 802     {
 803         if (str[i] == '-')
 804         {
 805             str[i] = '_';
 806         }
 807     }
 808 }
 809
 810 int find_group(const char *s, int ngrps, char **grpname)
 811 {
 812     int      aa, i, n;
 813     char     string[STRLEN];
 814     gmx_bool bMultiple;
 815     bMultiple = FALSE;
 816     n         = strlen(s);
 817     aa        = -1;
 818     /* first look for whole name match */
 819     {
 820         for (i = 0; i < ngrps; i++)
 821         {
 822             if (gmx_strcasecmp_min(s, grpname[i]) == 0)
 823             {
 824                 if (aa != -1)
 825                 {
 826                     bMultiple = TRUE;
 827                 }
 828                 aa = i;
 829             }
 830         }
 831     }
 832     /* second look for first string match */
 833     if (aa == -1)
 834     {
 835         for (i = 0; i < ngrps; i++)
 836         {
 837             if (gmx_strncasecmp_min(s, grpname[i], n) == 0)
 838             {
 839                 if (aa != -1)
 840                 {
 841                     bMultiple = TRUE;
 842                 }
 843                 aa = i;
 844             }
 845         }
 846     }
 847     /* last look for arbitrary substring match */
 848     if (aa == -1)
 849     {
 850         char key[STRLEN];
 851         strncpy(key, s, sizeof(key)-1);
 852         key[STRLEN-1] = '\0';
 853         upstring(key);
 854         minstring(key);
 855         for (i = 0; i < ngrps; i++)
 856         {
 857             strcpy(string, grpname[i]);
 858             upstring(string);
 859             minstring(string);
 860             if (strstr(string, key) != nullptr)
 861             {
 862                 if (aa != -1)
 863                 {
 864                     bMultiple = TRUE;
 865                 }
 866                 aa = i;
 867             }
 868         }
 869     }
 870     if (bMultiple)
 871     {
 872         printf("Error: Multiple groups '%s' selected\n", s);
 873         aa = -1;
 874     }
 875     return aa;
 876 }
 877
 878 static int qgroup(int *a, int ngrps, char **grpname)
 879 {
 880     char     s[STRLEN];
 881     int      aa;
 882     gmx_bool bInRange;
 883     char    *end;
 884
 885     do
 886     {
 887         fprintf(stderr, "Select a group: ");
 888         do
 889         {
 890             if (scanf("%s", s) != 1)
 891             {
 892                 gmx_fatal(FARGS, "Cannot read from input");
 893             }
 894             trim(s); /* remove spaces */
 895         }
 896         while (strlen(s) == 0);
 897         aa = strtol(s, &end, 10);
 898         if (aa == 0 && end[0] != '\0') /* string entered */
 899         {
 900             aa = find_group(s, ngrps, grpname);
 901         }
 902         bInRange = (aa >= 0 && aa < ngrps);
 903         if (!bInRange)
 904         {
 905             printf("Error: No such group '%s'\n", s);
 906         }
 907     }
 908     while (!bInRange);
 909     printf("Selected %d: '%s'\n", aa, grpname[aa]);
 910     *a = aa;
 911     return aa;
 912 }
 913
 914 static void rd_groups(t_blocka *grps, char **grpname, char *gnames[],
 915                       int ngrps, int isize[], int *index[], int grpnr[])
 916 {
 917     int i, j, gnr1;
 918
 919     if (grps->nr == 0)
 920     {
 921         gmx_fatal(FARGS, "Error: no groups in indexfile");
 922     }
 923     for (i = 0; (i < grps->nr); i++)
 924     {
 925         fprintf(stderr, "Group %5d (%15s) has %5d elements\n", i, grpname[i],
 926                 grps->index[i+1]-grps->index[i]);
 927     }
 928     for (i = 0; (i < ngrps); i++)
 929     {
 930         if (grps->nr > 1)
 931         {
 932             do
 933             {
 934                 gnr1 = qgroup(&grpnr[i], grps->nr, grpname);
 935                 if ((gnr1 < 0) || (gnr1 >= grps->nr))
 936                 {
 937                     fprintf(stderr, "Select between %d and %d.\n", 0, grps->nr-1);
 938                 }
 939             }
 940             while ((gnr1 < 0) || (gnr1 >= grps->nr));
 941         }
 942         else
 943         {
 944             fprintf(stderr, "There is one group in the index\n");
 945             gnr1 = 0;
 946         }
 947         gnames[i] = gmx_strdup(grpname[gnr1]);
 948         isize[i]  = grps->index[gnr1+1]-grps->index[gnr1];
 949         snew(index[i], isize[i]);
 950         for (j = 0; (j < isize[i]); j++)
 951         {
 952             index[i][j] = grps->a[grps->index[gnr1]+j];
 953         }
 954     }
 955 }
 956
 957 void rd_index(const char *statfile, int ngrps, int isize[],
 958               int *index[], char *grpnames[])
 959 {
 960     char    **gnames;
 961     t_blocka *grps;
 962     int      *grpnr;
 963
 964     snew(grpnr, ngrps);
 965     if (!statfile)
 966     {
 967         gmx_fatal(FARGS, "No index file specified");
 968     }
 969     grps = init_index(statfile, &gnames);
 970     rd_groups(grps, gnames, grpnames, ngrps, isize, index, grpnr);
 971     for (int i = 0; i < grps->nr; i++)
 972     {
 973         sfree(gnames[i]);
 974     }
 975     sfree(gnames);
 976     sfree(grpnr);
 977     done_blocka(grps);
 978     sfree(grps);
 979 }
 980
 981 void get_index(const t_atoms *atoms, const char *fnm, int ngrps,
 982                int isize[], int *index[], char *grpnames[])
 983 {
 984     char    ***gnames;
 985     t_blocka  *grps = nullptr;
 986     int       *grpnr;
 987
 988     snew(grpnr, ngrps);
 989     snew(gnames, 1);
 990     if (fnm != nullptr)
 991     {
 992         grps = init_index(fnm, gnames);
 993     }
 994     else if (atoms)
 995     {
 996         snew(grps, 1);
 997         snew(grps->index, 1);
 998         analyse(atoms, grps, gnames, FALSE, FALSE);
 999     }
1000     else
1001     {
1002         gmx_incons("You need to supply a valid atoms structure or a valid index file name");
1003     }
1004
1005     rd_groups(grps, *gnames, grpnames, ngrps, isize, index, grpnr);
1006     for (int i = 0; i < grps->nr; ++i)
1007     {
1008         sfree((*gnames)[i]);
1009     }
1010     sfree(*gnames);
1011     sfree(gnames);
1012     sfree(grpnr);
1013     done_blocka(grps);
1014     sfree(grps);
1015 }
1016
1017 t_cluster_ndx *cluster_index(FILE *fplog, const char *ndx)
1018 {
1019     t_cluster_ndx *c;
1020     int            i;
1021
1022     snew(c, 1);
1023     c->clust     = init_index(ndx, &c->grpname);
1024     c->maxframe  = -1;
1025     for (i = 0; (i < c->clust->nra); i++)
1026     {
1027         c->maxframe = std::max(c->maxframe, c->clust->a[i]);
1028     }
1029     fprintf(fplog ? fplog : stdout,
1030             "There are %d clusters containing %d structures, highest framenr is %d\n",
1031             c->clust->nr, c->clust->nra, c->maxframe);
1032     if (debug)
1033     {
1034         pr_blocka(debug, 0, "clust", c->clust, TRUE);
1035         for (i = 0; (i < c->clust->nra); i++)
1036         {
1037             if ((c->clust->a[i] < 0) || (c->clust->a[i] > c->maxframe))
1038             {
1039                 gmx_fatal(FARGS, "Range check error for c->clust->a[%d] = %d\n"
1040                           "should be within 0 and %d", i, c->clust->a[i], c->maxframe+1);
1041             }
1042         }
1043     }
1044     c->inv_clust = make_invblocka(c->clust, c->maxframe);
1045
1046     return c;
1047 }