src/gromacs/topology/index.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
   7  * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 #include "gmxpre.h"
  39
  40 #include "index.h"
  41
  42 #include <cassert>
  43 #include <cctype>
  44 #include <cstdlib>
  45 #include <cstring>
  46
  47 #include <algorithm>
  48 #include <numeric>
  49
  50 #include "gromacs/topology/atoms.h"
  51 #include "gromacs/topology/block.h"
  52 #include "gromacs/topology/invblock.h"
  53 #include "gromacs/topology/residuetypes.h"
  54 #include "gromacs/utility/arraysize.h"
  55 #include "gromacs/utility/cstringutil.h"
  56 #include "gromacs/utility/fatalerror.h"
  57 #include "gromacs/utility/futil.h"
  58 #include "gromacs/utility/smalloc.h"
  59 #include "gromacs/utility/strdb.h"
  60
  61 static gmx_bool gmx_ask_yesno(gmx_bool bASK)
  62 {
  63     char c = 0;
  64
  65     if (bASK)
  66     {
  67         do
  68         {
  69             c = toupper(fgetc(stdin));
  70         } while ((c != 'Y') && (c != 'N'));
  71
  72         return (c == 'Y');
  73     }
  74     else
  75     {
  76         return FALSE;
  77     }
  78 }
  79
  80 void write_index(const char* outf, t_blocka* b, char** gnames, gmx_bool bDuplicate, int natoms)
  81 {
  82     FILE* out = gmx_ffopen(outf, "w");
  83     /* fprintf(out,"%5d  %5d\n",b->nr,b->nra); */
  84     for (int i = 0; (i < b->nr); i++)
  85     {
  86         fprintf(out, "[ %s ]", gnames[i]);
  87         for (int k = 0, j = b->index[i]; j < b->index[i + 1]; j++, k++)
  88         {
  89             const char sep = (k % 15 == 0 ? '\n' : ' ');
  90             fprintf(out, "%c%4d", sep, b->a[j] + 1);
  91         }
  92         fprintf(out, "\n");
  93     }
  94
  95     /* Duplicate copy, useful for computational electrophysiology double-layer setups */
  96     if (bDuplicate)
  97     {
  98         fprintf(stderr, "Duplicating the whole system with an atom offset of %d atoms.\n", natoms);
  99         for (int i = 0; (i < b->nr); i++)
 100         {
 101             fprintf(out, "[ %s_copy ]", gnames[i]);
 102             for (int k = 0, j = b->index[i]; j < b->index[i + 1]; j++, k++)
 103             {
 104                 const char sep = (k % 15 == 0 ? '\n' : ' ');
 105                 fprintf(out, "%c%4d", sep, b->a[j] + 1 + natoms);
 106             }
 107             fprintf(out, "\n");
 108         }
 109     }
 110
 111     gmx_ffclose(out);
 112 }
 113
 114 void add_grp(t_blocka* b, char*** gnames, gmx::ArrayRef<const int> a, const std::string& name)
 115 {
 116     srenew(b->index, b->nr + 2);
 117     srenew(*gnames, b->nr + 1);
 118     (*gnames)[b->nr] = gmx_strdup(name.c_str());
 119
 120     srenew(b->a, b->nra + a.size());
 121     for (gmx::index i = 0; (i < a.ssize()); i++)
 122     {
 123         b->a[b->nra++] = a[i];
 124     }
 125     b->nr++;
 126     b->index[b->nr] = b->nra;
 127 }
 128
 129 /*! \brief
 130  * Compare index in groups.
 131  *
 132  * Checks the index in \p a to the one in \p b at \p index.
 133  * If \p index is < 0, it is taken as relative to the end of \p b.
 134  *
 135  * \param[in] b Block with groups.
 136  * \param[in] a New group to compare to.
 137  * \param[in] index The index to check.
 138  * \returns True if groups are the same.
 139  */
 140 static bool grp_cmp(t_blocka* b, gmx::ArrayRef<const int> a, int index)
 141 {
 142     if (index < 0)
 143     {
 144         index = b->nr - 1 + index;
 145     }
 146     if (index >= b->nr)
 147     {
 148         gmx_fatal(FARGS, "no such index group %d in t_blocka (nr=%d)", index, b->nr);
 149     }
 150     /* compare sizes */
 151     if (a.ssize() != b->index[index + 1] - b->index[index])
 152     {
 153         return FALSE;
 154     }
 155     for (gmx::index i = 0; i < a.ssize(); i++)
 156     {
 157         if (a[i] != b->a[b->index[index] + i])
 158         {
 159             return false;
 160         }
 161     }
 162     return true;
 163 }
 164 //! Print out how many residues of a certain type are present.
 165 static void p_status(gmx::ArrayRef<const std::string> restype, gmx::ArrayRef<const std::string> typenames)
 166 {
 167     std::vector<int> counter(typenames.size(), 0);
 168     std::transform(typenames.begin(), typenames.end(), counter.begin(), [&restype](const std::string& typenm) {
 169         return std::count_if(restype.begin(), restype.end(), [&typenm](const std::string& res) {
 170             return gmx_strcasecmp(res.c_str(), typenm.c_str()) == 0;
 171         });
 172     });
 173
 174     for (int i = 0; (i < typenames.ssize()); i++)
 175     {
 176         if (counter[i] > 0)
 177         {
 178             printf("There are: %5d %10s residues\n", counter[i], typenames[i].c_str());
 179         }
 180     }
 181 }
 182
 183 /*! \brief
 184  * Return a new group of atoms with \p restype that match \p typestring,
 185  * or not matching if \p bMatch.
 186  *
 187  * \param[in] atoms Atoms data to use.
 188  * \param[in] restype Residuetypes to match.
 189  * \param[in] typestring Which type the residue should have.
 190  * \param[in] bMatch whether to return matching atoms or those that don't.
 191  * \returns Vector of atoms that match.
 192  */
 193 static std::vector<int> mk_aid(const t_atoms*                   atoms,
 194                                gmx::ArrayRef<const std::string> restype,
 195                                const std::string&               typestring,
 196                                bool                             bMatch)
 197 {
 198     std::vector<int> a;
 199     for (int i = 0; (i < atoms->nr); i++)
 200     {
 201         bool res = gmx_strcasecmp(restype[atoms->atom[i].resind].c_str(), typestring.c_str()) == 0;
 202         if (!bMatch)
 203         {
 204             res = !res;
 205         }
 206         if (res)
 207         {
 208             a.push_back(i);
 209         }
 210     }
 211
 212     return a;
 213 }
 214
 215 typedef struct
 216 {
 217     char*    rname;
 218     gmx_bool bNeg;
 219     char*    gname;
 220 } restp_t;
 221
 222 static void analyse_other(gmx::ArrayRef<std::string> restype,
 223                           const t_atoms*             atoms,
 224                           t_blocka*                  gb,
 225                           char***                    gn,
 226                           gmx_bool                   bASK,
 227                           gmx_bool                   bVerb)
 228 {
 229     std::vector<restp_t> restp;
 230     int                  i = 0;
 231
 232     for (; (i < atoms->nres); i++)
 233     {
 234         if (gmx_strcasecmp(restype[i].c_str(), "Protein")
 235             && gmx_strcasecmp(restype[i].c_str(), "DNA") && gmx_strcasecmp(restype[i].c_str(), "RNA")
 236             && gmx_strcasecmp(restype[i].c_str(), "Water"))
 237         {
 238             break;
 239         }
 240     }
 241     if (i < atoms->nres)
 242     {
 243         /* we have others */
 244         if (bVerb)
 245         {
 246             printf("Analysing residues not classified as Protein/DNA/RNA/Water and splitting into "
 247                    "groups...\n");
 248         }
 249         for (int k = 0; (k < atoms->nr); k++)
 250         {
 251             int         resind = atoms->atom[k].resind;
 252             const char* rname  = *atoms->resinfo[resind].name;
 253             if (gmx_strcasecmp(restype[resind].c_str(), "Protein")
 254                 && gmx_strcasecmp(restype[resind].c_str(), "DNA")
 255                 && gmx_strcasecmp(restype[resind].c_str(), "RNA")
 256                 && gmx_strcasecmp(restype[resind].c_str(), "Water"))
 257             {
 258                 auto found = std::find_if(restp.begin(), restp.end(), [rname](const auto& entry) {
 259                     return strcmp(entry.rname, rname) == 0;
 260                 });
 261                 if (found == restp.end())
 262                 {
 263                     restp.emplace_back();
 264                     auto& last = restp.back();
 265                     last.rname = gmx_strdup(rname);
 266                     last.bNeg  = false;
 267                     last.gname = gmx_strdup(rname);
 268                 }
 269             }
 270         }
 271         for (int i = 0; (i < gmx::ssize(restp)); i++)
 272         {
 273             std::vector<int> aid;
 274             for (int j = 0; (j < atoms->nr); j++)
 275             {
 276                 const char* rname = *atoms->resinfo[atoms->atom[j].resind].name;
 277                 if ((strcmp(restp[i].rname, rname) == 0 && !restp[i].bNeg)
 278                     || (strcmp(restp[i].rname, rname) != 0 && restp[i].bNeg))
 279                 {
 280                     aid.push_back(j);
 281                 }
 282             }
 283             add_grp(gb, gn, aid, restp[i].gname);
 284             if (bASK)
 285             {
 286                 printf("split %s into atoms (y/n) ? ", restp[i].gname);
 287                 fflush(stdout);
 288                 if (gmx_ask_yesno(bASK))
 289                 {
 290                     std::vector<const char*> attp;
 291                     for (size_t k = 0; (k < aid.size()); k++)
 292                     {
 293                         const char* aname = *atoms->atomname[aid[k]];
 294                         auto found = std::find_if(attp.begin(), attp.end(), [aname](const char* entry) {
 295                             return strcmp(aname, entry) == 0;
 296                         });
 297                         if (found == attp.end())
 298                         {
 299                             attp.emplace_back(aname);
 300                         }
 301                     }
 302                     if (attp.size() > 1)
 303                     {
 304                         const int natp = attp.size();
 305                         for (int l = 0; (l < natp); l++)
 306                         {
 307                             std::vector<int> aaid;
 308                             for (size_t k = 0; (k < aid.size()); k++)
 309                             {
 310                                 const char* aname = *atoms->atomname[aid[k]];
 311                                 if (strcmp(aname, attp[l]) == 0)
 312                                 {
 313                                     aaid.push_back(aid[k]);
 314                                 }
 315                             }
 316                             add_grp(gb, gn, aaid, attp[l]);
 317                         }
 318                     }
 319                 }
 320             }
 321             sfree(restp[i].rname);
 322             sfree(restp[i].gname);
 323         }
 324     }
 325 }
 326
 327 /*! \brief
 328  * Data necessary to construct a single (protein) index group in
 329  * analyse_prot().
 330  */
 331 typedef struct gmx_help_make_index_group // NOLINT(clang-analyzer-optin.performance.Padding)
 332 {
 333     /** The set of atom names that will be used to form this index group */
 334     const char** defining_atomnames;
 335     /** Size of the defining_atomnames array */
 336     int num_defining_atomnames;
 337     /** Name of this index group */
 338     const char* group_name;
 339     /** Whether the above atom names name the atoms in the group, or
 340         those not in the group */
 341     gmx_bool bTakeComplement;
 342     /** The index in wholename gives the first item in the arrays of
 343        atomnames that should be tested with 'gmx_strncasecmp' in stead of
 344        gmx_strcasecmp, or -1 if all items should be tested with strcasecmp
 345        This is comparable to using a '*' wildcard at the end of specific
 346        atom names, but that is more involved to implement...
 347      */
 348     int wholename;
 349     /** Only create this index group if it differs from the one specified in compareto,
 350        where -1 means to always create this group. */
 351     int compareto;
 352 } t_gmx_help_make_index_group;
 353
 354 static void analyse_prot(gmx::ArrayRef<const std::string> restype,
 355                          const t_atoms*                   atoms,
 356                          t_blocka*                        gb,
 357                          char***                          gn,
 358                          gmx_bool                         bASK,
 359                          gmx_bool                         bVerb)
 360 {
 361     /* lists of atomnames to be used in constructing index groups: */
 362     static const char* pnoh[]   = { "H", "HN" };
 363     static const char* pnodum[] = { "MN1",  "MN2",  "MCB1", "MCB2", "MCG1", "MCG2",
 364                                     "MCD1", "MCD2", "MCE1", "MCE2", "MNZ1", "MNZ2" };
 365     static const char* calpha[] = { "CA" };
 366     static const char* bb[]     = { "N", "CA", "C" };
 367     static const char* mc[]     = { "N", "CA", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT" };
 368     static const char* mcb[] = { "N", "CA", "CB", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT" };
 369     static const char* mch[] = { "N",  "CA",  "C",  "O",  "O1", "O2", "OC1", "OC2",
 370                                  "OT", "OXT", "H1", "H2", "H3", "H",  "HN" };
 371
 372     static const t_gmx_help_make_index_group constructing_data[] = {
 373         { nullptr, 0, "Protein", TRUE, -1, -1 },
 374         { pnoh, asize(pnoh), "Protein-H", TRUE, 0, -1 },
 375         { calpha, asize(calpha), "C-alpha", FALSE, -1, -1 },
 376         { bb, asize(bb), "Backbone", FALSE, -1, -1 },
 377         { mc, asize(mc), "MainChain", FALSE, -1, -1 },
 378         { mcb, asize(mcb), "MainChain+Cb", FALSE, -1, -1 },
 379         { mch, asize(mch), "MainChain+H", FALSE, -1, -1 },
 380         { mch, asize(mch), "SideChain", TRUE, -1, -1 },
 381         { mch, asize(mch), "SideChain-H", TRUE, 11, -1 },
 382         { pnodum, asize(pnodum), "Prot-Masses", TRUE, -1, 0 },
 383     };
 384     const int num_index_groups = asize(constructing_data);
 385
 386     char ndx_name[STRLEN];
 387
 388     if (bVerb)
 389     {
 390         printf("Analysing Protein...\n");
 391     }
 392     std::vector<int> aid;
 393
 394     /* calculate the number of protein residues */
 395     int npres = 0;
 396     for (int i = 0; (i < atoms->nres); i++)
 397     {
 398         if (0 == gmx_strcasecmp(restype[i].c_str(), "Protein"))
 399         {
 400             npres++;
 401         }
 402     }
 403     /* find matching or complement atoms */
 404     for (int i = 0; (i < num_index_groups); i++)
 405     {
 406         for (int n = 0; (n < atoms->nr); n++)
 407         {
 408             if (0 == gmx_strcasecmp(restype[atoms->atom[n].resind].c_str(), "Protein"))
 409             {
 410                 bool match = false;
 411                 for (int j = 0; (j < constructing_data[i].num_defining_atomnames); j++)
 412                 {
 413                     /* skip digits at beginning of atomname, e.g. 1H */
 414                     char* atnm = *atoms->atomname[n];
 415                     while (isdigit(atnm[0]))
 416                     {
 417                         atnm++;
 418                     }
 419                     if ((constructing_data[i].wholename == -1) || (j < constructing_data[i].wholename))
 420                     {
 421                         if (0 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j], atnm))
 422                         {
 423                             match = true;
 424                         }
 425                     }
 426                     else
 427                     {
 428                         if (0
 429                             == gmx_strncasecmp(constructing_data[i].defining_atomnames[j],
 430                                                atnm,
 431                                                strlen(constructing_data[i].defining_atomnames[j])))
 432                         {
 433                             match = true;
 434                         }
 435                     }
 436                 }
 437                 if (constructing_data[i].bTakeComplement != match)
 438                 {
 439                     aid.push_back(n);
 440                 }
 441             }
 442         }
 443         /* if we want to add this group always or it differs from previous
 444            group, add it: */
 445         if (-1 == constructing_data[i].compareto || !grp_cmp(gb, aid, constructing_data[i].compareto - i))
 446         {
 447             add_grp(gb, gn, aid, constructing_data[i].group_name);
 448         }
 449         aid.clear();
 450     }
 451
 452     if (bASK)
 453     {
 454         for (int i = 0; (i < num_index_groups); i++)
 455         {
 456             printf("Split %12s into %5d residues (y/n) ? ", constructing_data[i].group_name, npres);
 457             if (gmx_ask_yesno(bASK))
 458             {
 459                 aid.clear();
 460                 for (int n = 0; ((atoms->atom[n].resind < npres) && (n < atoms->nr));)
 461                 {
 462                     int resind = atoms->atom[n].resind;
 463                     for (; ((atoms->atom[n].resind == resind) && (n < atoms->nr)); n++)
 464                     {
 465                         bool match = false;
 466                         for (int j = 0; (j < constructing_data[i].num_defining_atomnames); j++)
 467                         {
 468                             if (0
 469                                 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j],
 470                                                   *atoms->atomname[n]))
 471                             {
 472                                 match = true;
 473                             }
 474                         }
 475                         if (constructing_data[i].bTakeComplement != match)
 476                         {
 477                             aid.push_back(n);
 478                         }
 479                     }
 480                     /* copy the residuename to the tail of the groupname */
 481                     if (!aid.empty())
 482                     {
 483                         t_resinfo* ri = &atoms->resinfo[resind];
 484                         sprintf(ndx_name,
 485                                 "%s_%s%d%c",
 486                                 constructing_data[i].group_name,
 487                                 *ri->name,
 488                                 ri->nr,
 489                                 ri->ic == ' ' ? '\0' : ri->ic);
 490                         add_grp(gb, gn, aid, ndx_name);
 491                         aid.clear();
 492                     }
 493                 }
 494             }
 495         }
 496         printf("Make group with sidechain and C=O swapped (y/n) ? ");
 497         if (gmx_ask_yesno(bASK))
 498         {
 499             /* Make swap sidechain C=O index */
 500             aid.clear();
 501             for (int n = 0; ((atoms->atom[n].resind < npres) && (n < atoms->nr));)
 502             {
 503                 int resind = atoms->atom[n].resind;
 504                 int hold   = -1;
 505                 for (; ((atoms->atom[n].resind == resind) && (n < atoms->nr)); n++)
 506                 {
 507                     if (strcmp("CA", *atoms->atomname[n]) == 0)
 508                     {
 509                         aid.push_back(n);
 510                         hold = aid.size();
 511                         aid.resize(aid.size() + 3);
 512                     }
 513                     else if (strcmp("C", *atoms->atomname[n]) == 0)
 514                     {
 515                         if (hold == -1)
 516                         {
 517                             gmx_incons("Atom naming problem");
 518                         }
 519                         aid[hold] = n;
 520                     }
 521                     else if (strcmp("O", *atoms->atomname[n]) == 0)
 522                     {
 523                         if (hold == -1)
 524                         {
 525                             gmx_incons("Atom naming problem");
 526                         }
 527                         aid[hold + 1] = n;
 528                     }
 529                     else if (strcmp("O1", *atoms->atomname[n]) == 0)
 530                     {
 531                         if (hold == -1)
 532                         {
 533                             gmx_incons("Atom naming problem");
 534                         }
 535                         aid[hold + 1] = n;
 536                     }
 537                     else
 538                     {
 539                         aid.push_back(n);
 540                     }
 541                 }
 542             }
 543             /* copy the residuename to the tail of the groupname */
 544             if (!aid.empty())
 545             {
 546                 add_grp(gb, gn, aid, "SwapSC-CO");
 547             }
 548         }
 549     }
 550 }
 551
 552
 553 void analyse(const t_atoms* atoms, t_blocka* gb, char*** gn, gmx_bool bASK, gmx_bool bVerb)
 554 {
 555     if (bVerb)
 556     {
 557         printf("Analysing residue names:\n");
 558     }
 559     /* Create system group, every single atom */
 560     std::vector<int> aid(atoms->nr);
 561     std::iota(aid.begin(), aid.end(), 0);
 562     add_grp(gb, gn, aid, "System");
 563
 564     /* For every residue, get a pointer to the residue type name */
 565     ResidueTypeMap residueTypeMap = residueTypeMapFromLibraryFile("residuetypes.dat");
 566
 567     std::vector<std::string> restype;
 568     std::vector<std::string> previousTypename;
 569     if (atoms->nres > 0)
 570     {
 571         const char* resnm = *atoms->resinfo[0].name;
 572         restype.emplace_back(typeOfNamedDatabaseResidue(residueTypeMap, resnm));
 573         previousTypename.push_back(restype.back());
 574
 575         for (int i = 1; i < atoms->nres; i++)
 576         {
 577             const char* resnm = *atoms->resinfo[i].name;
 578             restype.emplace_back(typeOfNamedDatabaseResidue(residueTypeMap, resnm));
 579
 580             /* Note that this does not lead to a N*N loop, but N*K, where
 581              * K is the number of residue _types_, which is small and independent of N.
 582              */
 583             bool found = false;
 584             for (size_t k = 0; k < previousTypename.size() && !found; k++)
 585             {
 586                 found = strcmp(restype.back().c_str(), previousTypename[k].c_str()) == 0;
 587             }
 588             if (!found)
 589             {
 590                 previousTypename.push_back(restype.back());
 591             }
 592         }
 593     }
 594
 595     if (bVerb)
 596     {
 597         p_status(restype, previousTypename);
 598     }
 599
 600     for (gmx::index k = 0; k < gmx::ssize(previousTypename); k++)
 601     {
 602         std::vector<int> aid = mk_aid(atoms, restype, previousTypename[k], TRUE);
 603
 604         /* Check for special types to do fancy stuff with */
 605
 606         if (!gmx_strcasecmp(previousTypename[k].c_str(), "Protein") && !aid.empty())
 607         {
 608             /* PROTEIN */
 609             analyse_prot(restype, atoms, gb, gn, bASK, bVerb);
 610
 611             /* Create a Non-Protein group */
 612             std::vector<int> aid = mk_aid(atoms, restype, "Protein", FALSE);
 613             if ((!aid.empty()) && (gmx::ssize(aid) < atoms->nr))
 614             {
 615                 add_grp(gb, gn, aid, "non-Protein");
 616             }
 617         }
 618         else if (!gmx_strcasecmp(previousTypename[k].c_str(), "Water") && !aid.empty())
 619         {
 620             add_grp(gb, gn, aid, previousTypename[k]);
 621             /* Add this group as 'SOL' too, for backward compatibility with older gromacs versions */
 622             add_grp(gb, gn, aid, "SOL");
 623
 624
 625             /* Solvent, create a negated group too */
 626             std::vector<int> aid = mk_aid(atoms, restype, "Water", FALSE);
 627             if ((!aid.empty()) && (gmx::ssize(aid) < atoms->nr))
 628             {
 629                 add_grp(gb, gn, aid, "non-Water");
 630             }
 631         }
 632         else if (!aid.empty())
 633         {
 634             /* Other groups */
 635             add_grp(gb, gn, aid, previousTypename[k]);
 636             analyse_other(restype, atoms, gb, gn, bASK, bVerb);
 637         }
 638     }
 639
 640
 641     /* Create a merged water_and_ions group */
 642     int iwater = -1;
 643     int iion   = -1;
 644     int nwater = 0;
 645     int nion   = 0;
 646
 647     for (int i = 0; i < gb->nr; i++)
 648     {
 649         if (!gmx_strcasecmp((*gn)[i], "Water"))
 650         {
 651             iwater = i;
 652             nwater = gb->index[i + 1] - gb->index[i];
 653         }
 654         else if (!gmx_strcasecmp((*gn)[i], "Ion"))
 655         {
 656             iion = i;
 657             nion = gb->index[i + 1] - gb->index[i];
 658         }
 659     }
 660
 661     if (nwater > 0 && nion > 0)
 662     {
 663         srenew(gb->index, gb->nr + 2);
 664         srenew(*gn, gb->nr + 1);
 665         (*gn)[gb->nr] = gmx_strdup("Water_and_ions");
 666         srenew(gb->a, gb->nra + nwater + nion);
 667         if (nwater > 0)
 668         {
 669             for (int i = gb->index[iwater]; i < gb->index[iwater + 1]; i++)
 670             {
 671                 gb->a[gb->nra++] = gb->a[i];
 672             }
 673         }
 674         if (nion > 0)
 675         {
 676             for (int i = gb->index[iion]; i < gb->index[iion + 1]; i++)
 677             {
 678                 gb->a[gb->nra++] = gb->a[i];
 679             }
 680         }
 681         gb->nr++;
 682         gb->index[gb->nr] = gb->nra;
 683     }
 684 }
 685
 686
 687 void check_index(const char* gname, int n, int index[], const char* traj, int natoms)
 688 {
 689     for (int i = 0; i < n; i++)
 690     {
 691         if (index[i] >= natoms)
 692         {
 693             gmx_fatal(FARGS,
 694                       "%s atom number (index[%d]=%d) is larger than the number of atoms in %s (%d)",
 695                       gname ? gname : "Index",
 696                       i + 1,
 697                       index[i] + 1,
 698                       traj ? traj : "the trajectory",
 699                       natoms);
 700         }
 701         else if (index[i] < 0)
 702         {
 703             gmx_fatal(FARGS,
 704                       "%s atom number (index[%d]=%d) is less than zero",
 705                       gname ? gname : "Index",
 706                       i + 1,
 707                       index[i] + 1);
 708         }
 709     }
 710 }
 711
 712 t_blocka* init_index(const char* gfile, char*** grpname)
 713 {
 714     t_blocka* b = nullptr;
 715     char      line[STRLEN], str[STRLEN];
 716
 717     FILE* in = gmx_ffopen(gfile, "r");
 718     snew(b, 1);
 719     b->nr          = 0;
 720     b->index       = nullptr;
 721     b->nra         = 0;
 722     b->a           = nullptr;
 723     *grpname       = nullptr;
 724     int maxentries = 0;
 725     while (get_a_line(in, line, STRLEN))
 726     {
 727         if (get_header(line, str))
 728         {
 729             b->nr++;
 730             srenew(b->index, b->nr + 1);
 731             srenew(*grpname, b->nr);
 732             if (b->nr == 1)
 733             {
 734                 b->index[0] = 0;
 735             }
 736             b->index[b->nr]       = b->index[b->nr - 1];
 737             (*grpname)[b->nr - 1] = gmx_strdup(str);
 738         }
 739         else
 740         {
 741             if (b->nr == 0)
 742             {
 743                 gmx_fatal(FARGS, "The first header of your indexfile is invalid");
 744             }
 745             char* pt = line;
 746             while (sscanf(pt, "%s", str) == 1)
 747             {
 748                 int i = b->index[b->nr];
 749                 if (i >= maxentries)
 750                 {
 751                     maxentries += 1024;
 752                     srenew(b->a, maxentries);
 753                 }
 754                 assert(b->a != nullptr); // for clang analyzer
 755                 b->a[i] = strtol(str, nullptr, 10) - 1;
 756                 b->index[b->nr]++;
 757                 (b->nra)++;
 758                 pt = strstr(pt, str) + strlen(str);
 759             }
 760         }
 761     }
 762     gmx_ffclose(in);
 763
 764     for (int i = 0; (i < b->nr); i++)
 765     {
 766         assert(b->a != nullptr); // for clang analyzer
 767         for (int j = b->index[i]; (j < b->index[i + 1]); j++)
 768         {
 769             if (b->a[j] < 0)
 770             {
 771                 fprintf(stderr, "\nWARNING: negative index %d in group %s\n\n", b->a[j], (*grpname)[i]);
 772             }
 773         }
 774     }
 775
 776     return b;
 777 }
 778
 779 static void minstring(char* str)
 780 {
 781     for (int i = 0; (i < static_cast<int>(strlen(str))); i++)
 782     {
 783         if (str[i] == '-')
 784         {
 785             str[i] = '_';
 786         }
 787     }
 788 }
 789
 790 int find_group(const char* s, int ngrps, char** grpname)
 791 {
 792     char      string[STRLEN];
 793     bool      bMultiple = false;
 794     const int n         = strlen(s);
 795     int       aa        = -1;
 796     /* first look for whole name match */
 797     {
 798         for (int i = 0; i < ngrps; i++)
 799         {
 800             if (gmx_strcasecmp_min(s, grpname[i]) == 0)
 801             {
 802                 if (aa != -1)
 803                 {
 804                     bMultiple = true;
 805                 }
 806                 aa = i;
 807             }
 808         }
 809     }
 810     /* second look for first string match */
 811     if (aa == -1)
 812     {
 813         for (int i = 0; i < ngrps; i++)
 814         {
 815             if (gmx_strncasecmp_min(s, grpname[i], n) == 0)
 816             {
 817                 if (aa != -1)
 818                 {
 819                     bMultiple = TRUE;
 820                 }
 821                 aa = i;
 822             }
 823         }
 824     }
 825     /* last look for arbitrary substring match */
 826     if (aa == -1)
 827     {
 828         char key[STRLEN];
 829         strncpy(key, s, sizeof(key) - 1);
 830         key[STRLEN - 1] = '\0';
 831         upstring(key);
 832         minstring(key);
 833         for (int i = 0; i < ngrps; i++)
 834         {
 835             strncpy(string, grpname[i], STRLEN - 1);
 836             upstring(string);
 837             minstring(string);
 838             if (strstr(string, key) != nullptr)
 839             {
 840                 if (aa != -1)
 841                 {
 842                     bMultiple = TRUE;
 843                 }
 844                 aa = i;
 845             }
 846         }
 847     }
 848     if (bMultiple)
 849     {
 850         printf("Error: Multiple groups '%s' selected\n", s);
 851         aa = -1;
 852     }
 853     return aa;
 854 }
 855
 856 static int qgroup(int* a, int ngrps, char** grpname)
 857 {
 858     char  s[STRLEN];
 859     int   aa       = 0;
 860     bool  bInRange = false;
 861     char* end      = nullptr;
 862
 863     do
 864     {
 865         fprintf(stderr, "Select a group: ");
 866         do
 867         {
 868             if (scanf("%s", s) != 1)
 869             {
 870                 gmx_fatal(FARGS, "Cannot read from input");
 871             }
 872             trim(s); /* remove spaces */
 873         } while (strlen(s) == 0);
 874         aa = strtol(s, &end, 10);
 875         if (aa == 0 && end[0] != '\0') /* string entered */
 876         {
 877             aa = find_group(s, ngrps, grpname);
 878         }
 879         bInRange = (aa >= 0 && aa < ngrps);
 880         if (!bInRange)
 881         {
 882             printf("Error: No such group '%s'\n", s);
 883         }
 884     } while (!bInRange);
 885     printf("Selected %d: '%s'\n", aa, grpname[aa]);
 886     *a = aa;
 887     return aa;
 888 }
 889
 890 static void
 891 rd_groups(t_blocka* grps, char** grpname, char* gnames[], int ngrps, int isize[], int* index[], int grpnr[])
 892 {
 893     if (grps->nr == 0)
 894     {
 895         gmx_fatal(FARGS, "Error: no groups in indexfile");
 896     }
 897     for (int i = 0; (i < grps->nr); i++)
 898     {
 899         fprintf(stderr, "Group %5d (%15s) has %5d elements\n", i, grpname[i], grps->index[i + 1] - grps->index[i]);
 900     }
 901     for (int i = 0; (i < ngrps); i++)
 902     {
 903         int gnr1 = 0;
 904         if (grps->nr > 1)
 905         {
 906             do
 907             {
 908                 gnr1 = qgroup(&grpnr[i], grps->nr, grpname);
 909                 if ((gnr1 < 0) || (gnr1 >= grps->nr))
 910                 {
 911                     fprintf(stderr, "Select between %d and %d.\n", 0, grps->nr - 1);
 912                 }
 913             } while ((gnr1 < 0) || (gnr1 >= grps->nr));
 914         }
 915         else
 916         {
 917             fprintf(stderr, "There is one group in the index\n");
 918             gnr1 = 0;
 919         }
 920         gnames[i] = gmx_strdup(grpname[gnr1]);
 921         isize[i]  = grps->index[gnr1 + 1] - grps->index[gnr1];
 922         snew(index[i], isize[i]);
 923         for (int j = 0; (j < isize[i]); j++)
 924         {
 925             index[i][j] = grps->a[grps->index[gnr1] + j];
 926         }
 927     }
 928 }
 929
 930 void rd_index(const char* statfile, int ngrps, int isize[], int* index[], char* grpnames[])
 931 {
 932     char** gnames = nullptr;
 933     int*   grpnr  = nullptr;
 934
 935     snew(grpnr, ngrps);
 936     if (!statfile)
 937     {
 938         gmx_fatal(FARGS, "No index file specified");
 939     }
 940     t_blocka* grps = init_index(statfile, &gnames);
 941     rd_groups(grps, gnames, grpnames, ngrps, isize, index, grpnr);
 942     for (int i = 0; i < grps->nr; i++)
 943     {
 944         sfree(gnames[i]);
 945     }
 946     sfree(gnames);
 947     sfree(grpnr);
 948     done_blocka(grps);
 949     sfree(grps);
 950 }
 951
 952 void get_index(const t_atoms* atoms, const char* fnm, int ngrps, int isize[], int* index[], char* grpnames[])
 953 {
 954     char***   gnames = nullptr;
 955     t_blocka* grps   = nullptr;
 956     int*      grpnr  = nullptr;
 957
 958     snew(grpnr, ngrps);
 959     snew(gnames, 1);
 960     if (fnm != nullptr)
 961     {
 962         grps = init_index(fnm, gnames);
 963     }
 964     else if (atoms)
 965     {
 966         snew(grps, 1);
 967         snew(grps->index, 1);
 968         analyse(atoms, grps, gnames, FALSE, FALSE);
 969     }
 970     else
 971     {
 972         gmx_incons("You need to supply a valid atoms structure or a valid index file name");
 973     }
 974
 975     rd_groups(grps, *gnames, grpnames, ngrps, isize, index, grpnr);
 976     for (int i = 0; i < grps->nr; ++i)
 977     {
 978         sfree((*gnames)[i]);
 979     }
 980     sfree(*gnames);
 981     sfree(gnames);
 982     sfree(grpnr);
 983     done_blocka(grps);
 984     sfree(grps);
 985 }
 986
 987 t_cluster_ndx* cluster_index(FILE* fplog, const char* ndx)
 988 {
 989     t_cluster_ndx* c = nullptr;
 990
 991     snew(c, 1);
 992     c->clust    = init_index(ndx, &c->grpname);
 993     c->maxframe = -1;
 994     for (int i = 0; (i < c->clust->nra); i++)
 995     {
 996         c->maxframe = std::max(c->maxframe, c->clust->a[i]);
 997     }
 998     fprintf(fplog ? fplog : stdout,
 999             "There are %d clusters containing %d structures, highest framenr is %d\n",
1000             c->clust->nr,
1001             c->clust->nra,
1002             c->maxframe);
1003     if (debug)
1004     {
1005         pr_blocka(debug, 0, "clust", c->clust, TRUE);
1006         for (int i = 0; (i < c->clust->nra); i++)
1007         {
1008             if ((c->clust->a[i] < 0) || (c->clust->a[i] > c->maxframe))
1009             {
1010                 gmx_fatal(FARGS,
1011                           "Range check error for c->clust->a[%d] = %d\n"
1012                           "should be within 0 and %d",
1013                           i,
1014                           c->clust->a[i],
1015                           c->maxframe + 1);
1016             }
1017         }
1018     }
1019     c->inv_clust = make_invblocka(c->clust, c->maxframe);
1020
1021     return c;
1022 }