src/gromacs/topology/index.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
   7  * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 #include "gmxpre.h"
  39
  40 #include "index.h"
  41
  42 #include <cassert>
  43 #include <cctype>
  44 #include <cstdlib>
  45 #include <cstring>
  46
  47 #include <algorithm>
  48
  49 #include "gromacs/topology/atoms.h"
  50 #include "gromacs/topology/block.h"
  51 #include "gromacs/topology/invblock.h"
  52 #include "gromacs/topology/residuetypes.h"
  53 #include "gromacs/utility/arraysize.h"
  54 #include "gromacs/utility/cstringutil.h"
  55 #include "gromacs/utility/fatalerror.h"
  56 #include "gromacs/utility/futil.h"
  57 #include "gromacs/utility/smalloc.h"
  58 #include "gromacs/utility/strdb.h"
  59
  60 static gmx_bool gmx_ask_yesno(gmx_bool bASK)
  61 {
  62     char c;
  63
  64     if (bASK)
  65     {
  66         do
  67         {
  68             c = toupper(fgetc(stdin));
  69         } while ((c != 'Y') && (c != 'N'));
  70
  71         return (c == 'Y');
  72     }
  73     else
  74     {
  75         return FALSE;
  76     }
  77 }
  78
  79 void write_index(const char* outf, t_blocka* b, char** gnames, gmx_bool bDuplicate, int natoms)
  80 {
  81     FILE* out;
  82     int   i, j, k;
  83
  84     out = gmx_ffopen(outf, "w");
  85     /* fprintf(out,"%5d  %5d\n",b->nr,b->nra); */
  86     for (i = 0; (i < b->nr); i++)
  87     {
  88         fprintf(out, "[ %s ]", gnames[i]);
  89         for (k = 0, j = b->index[i]; j < b->index[i + 1]; j++, k++)
  90         {
  91             const char sep = (k % 15 == 0 ? '\n' : ' ');
  92             fprintf(out, "%c%4d", sep, b->a[j] + 1);
  93         }
  94         fprintf(out, "\n");
  95     }
  96
  97     /* Duplicate copy, useful for computational electrophysiology double-layer setups */
  98     if (bDuplicate)
  99     {
 100         fprintf(stderr, "Duplicating the whole system with an atom offset of %d atoms.\n", natoms);
 101         for (i = 0; (i < b->nr); i++)
 102         {
 103             fprintf(out, "[ %s_copy ]", gnames[i]);
 104             for (k = 0, j = b->index[i]; j < b->index[i + 1]; j++, k++)
 105             {
 106                 const char sep = (k % 15 == 0 ? '\n' : ' ');
 107                 fprintf(out, "%c%4d", sep, b->a[j] + 1 + natoms);
 108             }
 109             fprintf(out, "\n");
 110         }
 111     }
 112
 113     gmx_ffclose(out);
 114 }
 115
 116 void add_grp(t_blocka* b, char*** gnames, gmx::ArrayRef<const int> a, const std::string& name)
 117 {
 118     srenew(b->index, b->nr + 2);
 119     srenew(*gnames, b->nr + 1);
 120     (*gnames)[b->nr] = gmx_strdup(name.c_str());
 121
 122     srenew(b->a, b->nra + a.size());
 123     for (gmx::index i = 0; (i < a.ssize()); i++)
 124     {
 125         b->a[b->nra++] = a[i];
 126     }
 127     b->nr++;
 128     b->index[b->nr] = b->nra;
 129 }
 130
 131 /*! \brief
 132  * Compare index in groups.
 133  *
 134  * Checks the index in \p a to the one in \p b at \p index.
 135  * If \p index is < 0, it is taken as relative to the end of \p b.
 136  *
 137  * \param[in] b Block with groups.
 138  * \param[in] a New group to compare to.
 139  * \param[in] index The index to check.
 140  * \returns True if groups are the same.
 141  */
 142 static bool grp_cmp(t_blocka* b, gmx::ArrayRef<const int> a, int index)
 143 {
 144     if (index < 0)
 145     {
 146         index = b->nr - 1 + index;
 147     }
 148     if (index >= b->nr)
 149     {
 150         gmx_fatal(FARGS, "no such index group %d in t_blocka (nr=%d)", index, b->nr);
 151     }
 152     /* compare sizes */
 153     if (a.ssize() != b->index[index + 1] - b->index[index])
 154     {
 155         return FALSE;
 156     }
 157     for (gmx::index i = 0; i < a.ssize(); i++)
 158     {
 159         if (a[i] != b->a[b->index[index] + i])
 160         {
 161             return false;
 162         }
 163     }
 164     return true;
 165 }
 166 //! Print out how many residues of a certain type are present.
 167 static void p_status(gmx::ArrayRef<const std::string> restype, gmx::ArrayRef<const std::string> typenames)
 168 {
 169     std::vector<int> counter(typenames.size(), 0);
 170     std::transform(typenames.begin(), typenames.end(), counter.begin(), [&restype](const std::string& typenm) {
 171         return std::count_if(restype.begin(), restype.end(), [&typenm](const std::string& res) {
 172             return gmx_strcasecmp(res.c_str(), typenm.c_str()) == 0;
 173         });
 174     });
 175
 176     for (int i = 0; (i < typenames.ssize()); i++)
 177     {
 178         if (counter[i] > 0)
 179         {
 180             printf("There are: %5d %10s residues\n", counter[i], typenames[i].c_str());
 181         }
 182     }
 183 }
 184
 185 /*! \brief
 186  * Return a new group of atoms with \p restype that match \p typestring,
 187  * or not matching if \p bMatch.
 188  *
 189  * \param[in] atoms Atoms data to use.
 190  * \param[in] restype Residuetypes to match.
 191  * \param[in] typestring Which type the residue should have.
 192  * \param[in] bMatch whether to return matching atoms or those that don't.
 193  * \returns Vector of atoms that match.
 194  */
 195 static std::vector<int> mk_aid(const t_atoms*                   atoms,
 196                                gmx::ArrayRef<const std::string> restype,
 197                                const std::string&               typestring,
 198                                bool                             bMatch)
 199 {
 200     std::vector<int> a;
 201     for (int i = 0; (i < atoms->nr); i++)
 202     {
 203         bool res = gmx_strcasecmp(restype[atoms->atom[i].resind].c_str(), typestring.c_str()) == 0;
 204         if (!bMatch)
 205         {
 206             res = !res;
 207         }
 208         if (res)
 209         {
 210             a.push_back(i);
 211         }
 212     }
 213
 214     return a;
 215 }
 216
 217 typedef struct
 218 {
 219     char*    rname;
 220     gmx_bool bNeg;
 221     char*    gname;
 222 } restp_t;
 223
 224 static void analyse_other(gmx::ArrayRef<std::string> restype,
 225                           const t_atoms*             atoms,
 226                           t_blocka*                  gb,
 227                           char***                    gn,
 228                           gmx_bool                   bASK,
 229                           gmx_bool                   bVerb)
 230 {
 231     restp_t* restp = nullptr;
 232     char**   attp  = nullptr;
 233     char *   rname, *aname;
 234     int      i, resind, natp, nrestp = 0;
 235
 236     for (i = 0; (i < atoms->nres); i++)
 237     {
 238         if (gmx_strcasecmp(restype[i].c_str(), "Protein")
 239             && gmx_strcasecmp(restype[i].c_str(), "DNA") && gmx_strcasecmp(restype[i].c_str(), "RNA")
 240             && gmx_strcasecmp(restype[i].c_str(), "Water"))
 241         {
 242             break;
 243         }
 244     }
 245     if (i < atoms->nres)
 246     {
 247         /* we have others */
 248         if (bVerb)
 249         {
 250             printf("Analysing residues not classified as Protein/DNA/RNA/Water and splitting into "
 251                    "groups...\n");
 252         }
 253         for (int k = 0; (k < atoms->nr); k++)
 254         {
 255             resind = atoms->atom[k].resind;
 256             rname  = *atoms->resinfo[resind].name;
 257             if (gmx_strcasecmp(restype[resind].c_str(), "Protein")
 258                 && gmx_strcasecmp(restype[resind].c_str(), "DNA")
 259                 && gmx_strcasecmp(restype[resind].c_str(), "RNA")
 260                 && gmx_strcasecmp(restype[resind].c_str(), "Water"))
 261             {
 262                 int l;
 263                 for (l = 0; (l < nrestp); l++)
 264                 {
 265                     assert(restp);
 266                     if (strcmp(restp[l].rname, rname) == 0)
 267                     {
 268                         break;
 269                     }
 270                 }
 271                 if (l == nrestp)
 272                 {
 273                     srenew(restp, nrestp + 1);
 274                     restp[nrestp].rname = gmx_strdup(rname);
 275                     restp[nrestp].bNeg  = FALSE;
 276                     restp[nrestp].gname = gmx_strdup(rname);
 277                     nrestp++;
 278                 }
 279             }
 280         }
 281         for (int i = 0; (i < nrestp); i++)
 282         {
 283             std::vector<int> aid;
 284             for (int j = 0; (j < atoms->nr); j++)
 285             {
 286                 rname = *atoms->resinfo[atoms->atom[j].resind].name;
 287                 if ((strcmp(restp[i].rname, rname) == 0 && !restp[i].bNeg)
 288                     || (strcmp(restp[i].rname, rname) != 0 && restp[i].bNeg))
 289                 {
 290                     aid.push_back(j);
 291                 }
 292             }
 293             add_grp(gb, gn, aid, restp[i].gname);
 294             if (bASK)
 295             {
 296                 printf("split %s into atoms (y/n) ? ", restp[i].gname);
 297                 fflush(stdout);
 298                 if (gmx_ask_yesno(bASK))
 299                 {
 300                     natp = 0;
 301                     for (size_t k = 0; (k < aid.size()); k++)
 302                     {
 303                         aname = *atoms->atomname[aid[k]];
 304                         int l;
 305                         for (l = 0; (l < natp); l++)
 306                         {
 307                             if (strcmp(aname, attp[l]) == 0)
 308                             {
 309                                 break;
 310                             }
 311                         }
 312                         if (l == natp)
 313                         {
 314                             srenew(attp, ++natp);
 315                             attp[natp - 1] = aname;
 316                         }
 317                     }
 318                     if (natp > 1)
 319                     {
 320                         for (int l = 0; (l < natp); l++)
 321                         {
 322                             std::vector<int> aaid;
 323                             for (size_t k = 0; (k < aid.size()); k++)
 324                             {
 325                                 aname = *atoms->atomname[aid[k]];
 326                                 if (strcmp(aname, attp[l]) == 0)
 327                                 {
 328                                     aaid.push_back(aid[k]);
 329                                 }
 330                             }
 331                             add_grp(gb, gn, aaid, attp[l]);
 332                         }
 333                     }
 334                     sfree(attp);
 335                     attp = nullptr;
 336                 }
 337             }
 338             sfree(restp[i].rname);
 339             sfree(restp[i].gname);
 340         }
 341         sfree(restp);
 342     }
 343 }
 344
 345 /*! \brief
 346  * Data necessary to construct a single (protein) index group in
 347  * analyse_prot().
 348  */
 349 typedef struct gmx_help_make_index_group // NOLINT(clang-analyzer-optin.performance.Padding)
 350 {
 351     /** The set of atom names that will be used to form this index group */
 352     const char** defining_atomnames;
 353     /** Size of the defining_atomnames array */
 354     int num_defining_atomnames;
 355     /** Name of this index group */
 356     const char* group_name;
 357     /** Whether the above atom names name the atoms in the group, or
 358         those not in the group */
 359     gmx_bool bTakeComplement;
 360     /** The index in wholename gives the first item in the arrays of
 361        atomnames that should be tested with 'gmx_strncasecmp' in stead of
 362        gmx_strcasecmp, or -1 if all items should be tested with strcasecmp
 363        This is comparable to using a '*' wildcard at the end of specific
 364        atom names, but that is more involved to implement...
 365      */
 366     int wholename;
 367     /** Only create this index group if it differs from the one specified in compareto,
 368        where -1 means to always create this group. */
 369     int compareto;
 370 } t_gmx_help_make_index_group;
 371
 372 static void analyse_prot(gmx::ArrayRef<const std::string> restype,
 373                          const t_atoms*                   atoms,
 374                          t_blocka*                        gb,
 375                          char***                          gn,
 376                          gmx_bool                         bASK,
 377                          gmx_bool                         bVerb)
 378 {
 379     /* lists of atomnames to be used in constructing index groups: */
 380     static const char* pnoh[]   = { "H", "HN" };
 381     static const char* pnodum[] = { "MN1",  "MN2",  "MCB1", "MCB2", "MCG1", "MCG2",
 382                                     "MCD1", "MCD2", "MCE1", "MCE2", "MNZ1", "MNZ2" };
 383     static const char* calpha[] = { "CA" };
 384     static const char* bb[]     = { "N", "CA", "C" };
 385     static const char* mc[]     = { "N", "CA", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT" };
 386     static const char* mcb[] = { "N", "CA", "CB", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT" };
 387     static const char* mch[] = { "N",  "CA",  "C",  "O",  "O1", "O2", "OC1", "OC2",
 388                                  "OT", "OXT", "H1", "H2", "H3", "H",  "HN" };
 389
 390     static const t_gmx_help_make_index_group constructing_data[] = {
 391         { nullptr, 0, "Protein", TRUE, -1, -1 },
 392         { pnoh, asize(pnoh), "Protein-H", TRUE, 0, -1 },
 393         { calpha, asize(calpha), "C-alpha", FALSE, -1, -1 },
 394         { bb, asize(bb), "Backbone", FALSE, -1, -1 },
 395         { mc, asize(mc), "MainChain", FALSE, -1, -1 },
 396         { mcb, asize(mcb), "MainChain+Cb", FALSE, -1, -1 },
 397         { mch, asize(mch), "MainChain+H", FALSE, -1, -1 },
 398         { mch, asize(mch), "SideChain", TRUE, -1, -1 },
 399         { mch, asize(mch), "SideChain-H", TRUE, 11, -1 },
 400         { pnodum, asize(pnodum), "Prot-Masses", TRUE, -1, 0 },
 401     };
 402     const int num_index_groups = asize(constructing_data);
 403
 404     int      n, j;
 405     int      npres;
 406     gmx_bool match;
 407     char     ndx_name[STRLEN], *atnm;
 408     int      i;
 409
 410     if (bVerb)
 411     {
 412         printf("Analysing Protein...\n");
 413     }
 414     std::vector<int> aid;
 415
 416     /* calculate the number of protein residues */
 417     npres = 0;
 418     for (i = 0; (i < atoms->nres); i++)
 419     {
 420         if (0 == gmx_strcasecmp(restype[i].c_str(), "Protein"))
 421         {
 422             npres++;
 423         }
 424     }
 425     /* find matching or complement atoms */
 426     for (i = 0; (i < num_index_groups); i++)
 427     {
 428         for (n = 0; (n < atoms->nr); n++)
 429         {
 430             if (0 == gmx_strcasecmp(restype[atoms->atom[n].resind].c_str(), "Protein"))
 431             {
 432                 match = FALSE;
 433                 for (j = 0; (j < constructing_data[i].num_defining_atomnames); j++)
 434                 {
 435                     /* skip digits at beginning of atomname, e.g. 1H */
 436                     atnm = *atoms->atomname[n];
 437                     while (isdigit(atnm[0]))
 438                     {
 439                         atnm++;
 440                     }
 441                     if ((constructing_data[i].wholename == -1) || (j < constructing_data[i].wholename))
 442                     {
 443                         if (0 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j], atnm))
 444                         {
 445                             match = TRUE;
 446                         }
 447                     }
 448                     else
 449                     {
 450                         if (0
 451                             == gmx_strncasecmp(constructing_data[i].defining_atomnames[j],
 452                                                atnm,
 453                                                strlen(constructing_data[i].defining_atomnames[j])))
 454                         {
 455                             match = TRUE;
 456                         }
 457                     }
 458                 }
 459                 if (constructing_data[i].bTakeComplement != match)
 460                 {
 461                     aid.push_back(n);
 462                 }
 463             }
 464         }
 465         /* if we want to add this group always or it differs from previous
 466            group, add it: */
 467         if (-1 == constructing_data[i].compareto || !grp_cmp(gb, aid, constructing_data[i].compareto - i))
 468         {
 469             add_grp(gb, gn, aid, constructing_data[i].group_name);
 470         }
 471         aid.clear();
 472     }
 473
 474     if (bASK)
 475     {
 476         for (i = 0; (i < num_index_groups); i++)
 477         {
 478             printf("Split %12s into %5d residues (y/n) ? ", constructing_data[i].group_name, npres);
 479             if (gmx_ask_yesno(bASK))
 480             {
 481                 int resind;
 482                 aid.clear();
 483                 for (n = 0; ((atoms->atom[n].resind < npres) && (n < atoms->nr));)
 484                 {
 485                     resind = atoms->atom[n].resind;
 486                     for (; ((atoms->atom[n].resind == resind) && (n < atoms->nr)); n++)
 487                     {
 488                         match = FALSE;
 489                         for (j = 0; (j < constructing_data[i].num_defining_atomnames); j++)
 490                         {
 491                             if (0
 492                                 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j],
 493                                                   *atoms->atomname[n]))
 494                             {
 495                                 match = TRUE;
 496                             }
 497                         }
 498                         if (constructing_data[i].bTakeComplement != match)
 499                         {
 500                             aid.push_back(n);
 501                         }
 502                     }
 503                     /* copy the residuename to the tail of the groupname */
 504                     if (!aid.empty())
 505                     {
 506                         t_resinfo* ri;
 507                         ri = &atoms->resinfo[resind];
 508                         sprintf(ndx_name,
 509                                 "%s_%s%d%c",
 510                                 constructing_data[i].group_name,
 511                                 *ri->name,
 512                                 ri->nr,
 513                                 ri->ic == ' ' ? '\0' : ri->ic);
 514                         add_grp(gb, gn, aid, ndx_name);
 515                         aid.clear();
 516                     }
 517                 }
 518             }
 519         }
 520         printf("Make group with sidechain and C=O swapped (y/n) ? ");
 521         if (gmx_ask_yesno(bASK))
 522         {
 523             /* Make swap sidechain C=O index */
 524             aid.clear();
 525             for (int n = 0; ((atoms->atom[n].resind < npres) && (n < atoms->nr));)
 526             {
 527                 int resind = atoms->atom[n].resind;
 528                 int hold   = -1;
 529                 for (; ((atoms->atom[n].resind == resind) && (n < atoms->nr)); n++)
 530                 {
 531                     if (strcmp("CA", *atoms->atomname[n]) == 0)
 532                     {
 533                         aid.push_back(n);
 534                         hold = aid.size();
 535                         aid.resize(aid.size() + 3);
 536                     }
 537                     else if (strcmp("C", *atoms->atomname[n]) == 0)
 538                     {
 539                         if (hold == -1)
 540                         {
 541                             gmx_incons("Atom naming problem");
 542                         }
 543                         aid[hold] = n;
 544                     }
 545                     else if (strcmp("O", *atoms->atomname[n]) == 0)
 546                     {
 547                         if (hold == -1)
 548                         {
 549                             gmx_incons("Atom naming problem");
 550                         }
 551                         aid[hold + 1] = n;
 552                     }
 553                     else if (strcmp("O1", *atoms->atomname[n]) == 0)
 554                     {
 555                         if (hold == -1)
 556                         {
 557                             gmx_incons("Atom naming problem");
 558                         }
 559                         aid[hold + 1] = n;
 560                     }
 561                     else
 562                     {
 563                         aid.push_back(n);
 564                     }
 565                 }
 566             }
 567             /* copy the residuename to the tail of the groupname */
 568             if (!aid.empty())
 569             {
 570                 add_grp(gb, gn, aid, "SwapSC-CO");
 571             }
 572         }
 573     }
 574 }
 575
 576
 577 void analyse(const t_atoms* atoms, t_blocka* gb, char*** gn, gmx_bool bASK, gmx_bool bVerb)
 578 {
 579     char* resnm;
 580     int   i;
 581     int   iwater, iion;
 582     int   nwater, nion;
 583
 584     if (bVerb)
 585     {
 586         printf("Analysing residue names:\n");
 587     }
 588     /* Create system group, every single atom */
 589     std::vector<int> aid(atoms->nr);
 590     for (i = 0; i < atoms->nr; i++)
 591     {
 592         aid[i] = i;
 593     }
 594     add_grp(gb, gn, aid, "System");
 595
 596     /* For every residue, get a pointer to the residue type name */
 597     ResidueType rt;
 598
 599     std::vector<std::string> restype;
 600     std::vector<std::string> previousTypename;
 601     if (atoms->nres > 0)
 602     {
 603         int i = 0;
 604
 605         resnm = *atoms->resinfo[i].name;
 606         restype.emplace_back(rt.typeOfNamedDatabaseResidue(resnm));
 607         previousTypename.push_back(restype.back());
 608
 609         for (i = 1; i < atoms->nres; i++)
 610         {
 611             resnm = *atoms->resinfo[i].name;
 612             restype.emplace_back(rt.typeOfNamedDatabaseResidue(resnm));
 613
 614             /* Note that this does not lead to a N*N loop, but N*K, where
 615              * K is the number of residue _types_, which is small and independent of N.
 616              */
 617             bool found = false;
 618             for (size_t k = 0; k < previousTypename.size() && !found; k++)
 619             {
 620                 found = strcmp(restype.back().c_str(), previousTypename[k].c_str()) == 0;
 621             }
 622             if (!found)
 623             {
 624                 previousTypename.push_back(restype.back());
 625             }
 626         }
 627     }
 628
 629     if (bVerb)
 630     {
 631         p_status(restype, previousTypename);
 632     }
 633
 634     for (gmx::index k = 0; k < gmx::ssize(previousTypename); k++)
 635     {
 636         aid = mk_aid(atoms, restype, previousTypename[k], TRUE);
 637
 638         /* Check for special types to do fancy stuff with */
 639
 640         if (!gmx_strcasecmp(previousTypename[k].c_str(), "Protein") && !aid.empty())
 641         {
 642             /* PROTEIN */
 643             analyse_prot(restype, atoms, gb, gn, bASK, bVerb);
 644
 645             /* Create a Non-Protein group */
 646             aid = mk_aid(atoms, restype, "Protein", FALSE);
 647             if ((!aid.empty()) && (gmx::ssize(aid) < atoms->nr))
 648             {
 649                 add_grp(gb, gn, aid, "non-Protein");
 650             }
 651         }
 652         else if (!gmx_strcasecmp(previousTypename[k].c_str(), "Water") && !aid.empty())
 653         {
 654             add_grp(gb, gn, aid, previousTypename[k]);
 655             /* Add this group as 'SOL' too, for backward compatibility with older gromacs versions */
 656             add_grp(gb, gn, aid, "SOL");
 657
 658
 659             /* Solvent, create a negated group too */
 660             aid = mk_aid(atoms, restype, "Water", FALSE);
 661             if ((!aid.empty()) && (gmx::ssize(aid) < atoms->nr))
 662             {
 663                 add_grp(gb, gn, aid, "non-Water");
 664             }
 665         }
 666         else if (!aid.empty())
 667         {
 668             /* Other groups */
 669             add_grp(gb, gn, aid, previousTypename[k]);
 670             analyse_other(restype, atoms, gb, gn, bASK, bVerb);
 671         }
 672     }
 673
 674
 675     /* Create a merged water_and_ions group */
 676     iwater = -1;
 677     iion   = -1;
 678     nwater = 0;
 679     nion   = 0;
 680
 681     for (i = 0; i < gb->nr; i++)
 682     {
 683         if (!gmx_strcasecmp((*gn)[i], "Water"))
 684         {
 685             iwater = i;
 686             nwater = gb->index[i + 1] - gb->index[i];
 687         }
 688         else if (!gmx_strcasecmp((*gn)[i], "Ion"))
 689         {
 690             iion = i;
 691             nion = gb->index[i + 1] - gb->index[i];
 692         }
 693     }
 694
 695     if (nwater > 0 && nion > 0)
 696     {
 697         srenew(gb->index, gb->nr + 2);
 698         srenew(*gn, gb->nr + 1);
 699         (*gn)[gb->nr] = gmx_strdup("Water_and_ions");
 700         srenew(gb->a, gb->nra + nwater + nion);
 701         if (nwater > 0)
 702         {
 703             for (i = gb->index[iwater]; i < gb->index[iwater + 1]; i++)
 704             {
 705                 gb->a[gb->nra++] = gb->a[i];
 706             }
 707         }
 708         if (nion > 0)
 709         {
 710             for (i = gb->index[iion]; i < gb->index[iion + 1]; i++)
 711             {
 712                 gb->a[gb->nra++] = gb->a[i];
 713             }
 714         }
 715         gb->nr++;
 716         gb->index[gb->nr] = gb->nra;
 717     }
 718 }
 719
 720
 721 void check_index(const char* gname, int n, int index[], const char* traj, int natoms)
 722 {
 723     int i;
 724
 725     for (i = 0; i < n; i++)
 726     {
 727         if (index[i] >= natoms)
 728         {
 729             gmx_fatal(FARGS,
 730                       "%s atom number (index[%d]=%d) is larger than the number of atoms in %s (%d)",
 731                       gname ? gname : "Index",
 732                       i + 1,
 733                       index[i] + 1,
 734                       traj ? traj : "the trajectory",
 735                       natoms);
 736         }
 737         else if (index[i] < 0)
 738         {
 739             gmx_fatal(FARGS,
 740                       "%s atom number (index[%d]=%d) is less than zero",
 741                       gname ? gname : "Index",
 742                       i + 1,
 743                       index[i] + 1);
 744         }
 745     }
 746 }
 747
 748 t_blocka* init_index(const char* gfile, char*** grpname)
 749 {
 750     FILE*     in;
 751     t_blocka* b;
 752     int       maxentries;
 753     int       i, j;
 754     char      line[STRLEN], *pt, str[STRLEN];
 755
 756     in = gmx_ffopen(gfile, "r");
 757     snew(b, 1);
 758     b->nr      = 0;
 759     b->index   = nullptr;
 760     b->nra     = 0;
 761     b->a       = nullptr;
 762     *grpname   = nullptr;
 763     maxentries = 0;
 764     while (get_a_line(in, line, STRLEN))
 765     {
 766         if (get_header(line, str))
 767         {
 768             b->nr++;
 769             srenew(b->index, b->nr + 1);
 770             srenew(*grpname, b->nr);
 771             if (b->nr == 1)
 772             {
 773                 b->index[0] = 0;
 774             }
 775             b->index[b->nr]       = b->index[b->nr - 1];
 776             (*grpname)[b->nr - 1] = gmx_strdup(str);
 777         }
 778         else
 779         {
 780             if (b->nr == 0)
 781             {
 782                 gmx_fatal(FARGS, "The first header of your indexfile is invalid");
 783             }
 784             pt = line;
 785             while (sscanf(pt, "%s", str) == 1)
 786             {
 787                 i = b->index[b->nr];
 788                 if (i >= maxentries)
 789                 {
 790                     maxentries += 1024;
 791                     srenew(b->a, maxentries);
 792                 }
 793                 assert(b->a != nullptr); // for clang analyzer
 794                 b->a[i] = strtol(str, nullptr, 10) - 1;
 795                 b->index[b->nr]++;
 796                 (b->nra)++;
 797                 pt = strstr(pt, str) + strlen(str);
 798             }
 799         }
 800     }
 801     gmx_ffclose(in);
 802
 803     for (i = 0; (i < b->nr); i++)
 804     {
 805         assert(b->a != nullptr); // for clang analyzer
 806         for (j = b->index[i]; (j < b->index[i + 1]); j++)
 807         {
 808             if (b->a[j] < 0)
 809             {
 810                 fprintf(stderr, "\nWARNING: negative index %d in group %s\n\n", b->a[j], (*grpname)[i]);
 811             }
 812         }
 813     }
 814
 815     return b;
 816 }
 817
 818 static void minstring(char* str)
 819 {
 820     int i;
 821
 822     for (i = 0; (i < static_cast<int>(strlen(str))); i++)
 823     {
 824         if (str[i] == '-')
 825         {
 826             str[i] = '_';
 827         }
 828     }
 829 }
 830
 831 int find_group(const char* s, int ngrps, char** grpname)
 832 {
 833     int      aa, i, n;
 834     char     string[STRLEN];
 835     gmx_bool bMultiple;
 836     bMultiple = FALSE;
 837     n         = strlen(s);
 838     aa        = -1;
 839     /* first look for whole name match */
 840     {
 841         for (i = 0; i < ngrps; i++)
 842         {
 843             if (gmx_strcasecmp_min(s, grpname[i]) == 0)
 844             {
 845                 if (aa != -1)
 846                 {
 847                     bMultiple = TRUE;
 848                 }
 849                 aa = i;
 850             }
 851         }
 852     }
 853     /* second look for first string match */
 854     if (aa == -1)
 855     {
 856         for (i = 0; i < ngrps; i++)
 857         {
 858             if (gmx_strncasecmp_min(s, grpname[i], n) == 0)
 859             {
 860                 if (aa != -1)
 861                 {
 862                     bMultiple = TRUE;
 863                 }
 864                 aa = i;
 865             }
 866         }
 867     }
 868     /* last look for arbitrary substring match */
 869     if (aa == -1)
 870     {
 871         char key[STRLEN];
 872         strncpy(key, s, sizeof(key) - 1);
 873         key[STRLEN - 1] = '\0';
 874         upstring(key);
 875         minstring(key);
 876         for (i = 0; i < ngrps; i++)
 877         {
 878             strncpy(string, grpname[i], STRLEN - 1);
 879             upstring(string);
 880             minstring(string);
 881             if (strstr(string, key) != nullptr)
 882             {
 883                 if (aa != -1)
 884                 {
 885                     bMultiple = TRUE;
 886                 }
 887                 aa = i;
 888             }
 889         }
 890     }
 891     if (bMultiple)
 892     {
 893         printf("Error: Multiple groups '%s' selected\n", s);
 894         aa = -1;
 895     }
 896     return aa;
 897 }
 898
 899 static int qgroup(int* a, int ngrps, char** grpname)
 900 {
 901     char     s[STRLEN];
 902     int      aa;
 903     gmx_bool bInRange;
 904     char*    end;
 905
 906     do
 907     {
 908         fprintf(stderr, "Select a group: ");
 909         do
 910         {
 911             if (scanf("%s", s) != 1)
 912             {
 913                 gmx_fatal(FARGS, "Cannot read from input");
 914             }
 915             trim(s); /* remove spaces */
 916         } while (strlen(s) == 0);
 917         aa = strtol(s, &end, 10);
 918         if (aa == 0 && end[0] != '\0') /* string entered */
 919         {
 920             aa = find_group(s, ngrps, grpname);
 921         }
 922         bInRange = (aa >= 0 && aa < ngrps);
 923         if (!bInRange)
 924         {
 925             printf("Error: No such group '%s'\n", s);
 926         }
 927     } while (!bInRange);
 928     printf("Selected %d: '%s'\n", aa, grpname[aa]);
 929     *a = aa;
 930     return aa;
 931 }
 932
 933 static void
 934 rd_groups(t_blocka* grps, char** grpname, char* gnames[], int ngrps, int isize[], int* index[], int grpnr[])
 935 {
 936     int i, j, gnr1;
 937
 938     if (grps->nr == 0)
 939     {
 940         gmx_fatal(FARGS, "Error: no groups in indexfile");
 941     }
 942     for (i = 0; (i < grps->nr); i++)
 943     {
 944         fprintf(stderr, "Group %5d (%15s) has %5d elements\n", i, grpname[i], grps->index[i + 1] - grps->index[i]);
 945     }
 946     for (i = 0; (i < ngrps); i++)
 947     {
 948         if (grps->nr > 1)
 949         {
 950             do
 951             {
 952                 gnr1 = qgroup(&grpnr[i], grps->nr, grpname);
 953                 if ((gnr1 < 0) || (gnr1 >= grps->nr))
 954                 {
 955                     fprintf(stderr, "Select between %d and %d.\n", 0, grps->nr - 1);
 956                 }
 957             } while ((gnr1 < 0) || (gnr1 >= grps->nr));
 958         }
 959         else
 960         {
 961             fprintf(stderr, "There is one group in the index\n");
 962             gnr1 = 0;
 963         }
 964         gnames[i] = gmx_strdup(grpname[gnr1]);
 965         isize[i]  = grps->index[gnr1 + 1] - grps->index[gnr1];
 966         snew(index[i], isize[i]);
 967         for (j = 0; (j < isize[i]); j++)
 968         {
 969             index[i][j] = grps->a[grps->index[gnr1] + j];
 970         }
 971     }
 972 }
 973
 974 void rd_index(const char* statfile, int ngrps, int isize[], int* index[], char* grpnames[])
 975 {
 976     char**    gnames;
 977     t_blocka* grps;
 978     int*      grpnr;
 979
 980     snew(grpnr, ngrps);
 981     if (!statfile)
 982     {
 983         gmx_fatal(FARGS, "No index file specified");
 984     }
 985     grps = init_index(statfile, &gnames);
 986     rd_groups(grps, gnames, grpnames, ngrps, isize, index, grpnr);
 987     for (int i = 0; i < grps->nr; i++)
 988     {
 989         sfree(gnames[i]);
 990     }
 991     sfree(gnames);
 992     sfree(grpnr);
 993     done_blocka(grps);
 994     sfree(grps);
 995 }
 996
 997 void get_index(const t_atoms* atoms, const char* fnm, int ngrps, int isize[], int* index[], char* grpnames[])
 998 {
 999     char***   gnames;
1000     t_blocka* grps = nullptr;
1001     int*      grpnr;
1002
1003     snew(grpnr, ngrps);
1004     snew(gnames, 1);
1005     if (fnm != nullptr)
1006     {
1007         grps = init_index(fnm, gnames);
1008     }
1009     else if (atoms)
1010     {
1011         snew(grps, 1);
1012         snew(grps->index, 1);
1013         analyse(atoms, grps, gnames, FALSE, FALSE);
1014     }
1015     else
1016     {
1017         gmx_incons("You need to supply a valid atoms structure or a valid index file name");
1018     }
1019
1020     rd_groups(grps, *gnames, grpnames, ngrps, isize, index, grpnr);
1021     for (int i = 0; i < grps->nr; ++i)
1022     {
1023         sfree((*gnames)[i]);
1024     }
1025     sfree(*gnames);
1026     sfree(gnames);
1027     sfree(grpnr);
1028     done_blocka(grps);
1029     sfree(grps);
1030 }
1031
1032 t_cluster_ndx* cluster_index(FILE* fplog, const char* ndx)
1033 {
1034     t_cluster_ndx* c;
1035     int            i;
1036
1037     snew(c, 1);
1038     c->clust    = init_index(ndx, &c->grpname);
1039     c->maxframe = -1;
1040     for (i = 0; (i < c->clust->nra); i++)
1041     {
1042         c->maxframe = std::max(c->maxframe, c->clust->a[i]);
1043     }
1044     fprintf(fplog ? fplog : stdout,
1045             "There are %d clusters containing %d structures, highest framenr is %d\n",
1046             c->clust->nr,
1047             c->clust->nra,
1048             c->maxframe);
1049     if (debug)
1050     {
1051         pr_blocka(debug, 0, "clust", c->clust, TRUE);
1052         for (i = 0; (i < c->clust->nra); i++)
1053         {
1054             if ((c->clust->a[i] < 0) || (c->clust->a[i] > c->maxframe))
1055             {
1056                 gmx_fatal(FARGS,
1057                           "Range check error for c->clust->a[%d] = %d\n"
1058                           "should be within 0 and %d",
1059                           i,
1060                           c->clust->a[i],
1061                           c->maxframe + 1);
1062             }
1063         }
1064     }
1065     c->inv_clust = make_invblocka(c->clust, c->maxframe);
1066
1067     return c;
1068 }