2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
49 #include "gromacs/topology/atoms.h"
50 #include "gromacs/topology/block.h"
51 #include "gromacs/topology/invblock.h"
52 #include "gromacs/topology/residuetypes.h"
53 #include "gromacs/utility/arraysize.h"
54 #include "gromacs/utility/cstringutil.h"
55 #include "gromacs/utility/fatalerror.h"
56 #include "gromacs/utility/futil.h"
57 #include "gromacs/utility/smalloc.h"
58 #include "gromacs/utility/strdb.h"
60 static gmx_bool gmx_ask_yesno(gmx_bool bASK)
68 c = toupper(fgetc(stdin));
69 } while ((c != 'Y') && (c != 'N'));
79 void write_index(const char* outf, t_blocka* b, char** gnames, gmx_bool bDuplicate, int natoms)
84 out = gmx_ffopen(outf, "w");
85 /* fprintf(out,"%5d %5d\n",b->nr,b->nra); */
86 for (i = 0; (i < b->nr); i++)
88 fprintf(out, "[ %s ]", gnames[i]);
89 for (k = 0, j = b->index[i]; j < b->index[i + 1]; j++, k++)
91 const char sep = (k % 15 == 0 ? '\n' : ' ');
92 fprintf(out, "%c%4d", sep, b->a[j] + 1);
97 /* Duplicate copy, useful for computational electrophysiology double-layer setups */
100 fprintf(stderr, "Duplicating the whole system with an atom offset of %d atoms.\n", natoms);
101 for (i = 0; (i < b->nr); i++)
103 fprintf(out, "[ %s_copy ]", gnames[i]);
104 for (k = 0, j = b->index[i]; j < b->index[i + 1]; j++, k++)
106 const char sep = (k % 15 == 0 ? '\n' : ' ');
107 fprintf(out, "%c%4d", sep, b->a[j] + 1 + natoms);
116 void add_grp(t_blocka* b, char*** gnames, gmx::ArrayRef<const int> a, const std::string& name)
118 srenew(b->index, b->nr + 2);
119 srenew(*gnames, b->nr + 1);
120 (*gnames)[b->nr] = gmx_strdup(name.c_str());
122 srenew(b->a, b->nra + a.size());
123 for (gmx::index i = 0; (i < a.ssize()); i++)
125 b->a[b->nra++] = a[i];
128 b->index[b->nr] = b->nra;
132 * Compare index in groups.
134 * Checks the index in \p a to the one in \p b at \p index.
135 * If \p index is < 0, it is taken as relative to the end of \p b.
137 * \param[in] b Block with groups.
138 * \param[in] a New group to compare to.
139 * \param[in] index The index to check.
140 * \returns True if groups are the same.
142 static bool grp_cmp(t_blocka* b, gmx::ArrayRef<const int> a, int index)
146 index = b->nr - 1 + index;
150 gmx_fatal(FARGS, "no such index group %d in t_blocka (nr=%d)", index, b->nr);
153 if (a.ssize() != b->index[index + 1] - b->index[index])
157 for (gmx::index i = 0; i < a.ssize(); i++)
159 if (a[i] != b->a[b->index[index] + i])
166 //! Print out how many residues of a certain type are present.
167 static void p_status(gmx::ArrayRef<const std::string> restype, gmx::ArrayRef<const std::string> typenames)
169 std::vector<int> counter(typenames.size(), 0);
170 std::transform(typenames.begin(), typenames.end(), counter.begin(), [&restype](const std::string& typenm) {
171 return std::count_if(restype.begin(), restype.end(), [&typenm](const std::string& res) {
172 return gmx_strcasecmp(res.c_str(), typenm.c_str()) == 0;
176 for (int i = 0; (i < typenames.ssize()); i++)
180 printf("There are: %5d %10s residues\n", counter[i], typenames[i].c_str());
186 * Return a new group of atoms with \p restype that match \p typestring,
187 * or not matching if \p bMatch.
189 * \param[in] atoms Atoms data to use.
190 * \param[in] restype Residuetypes to match.
191 * \param[in] typestring Which type the residue should have.
192 * \param[in] bMatch whether to return matching atoms or those that don't.
193 * \returns Vector of atoms that match.
195 static std::vector<int> mk_aid(const t_atoms* atoms,
196 gmx::ArrayRef<const std::string> restype,
197 const std::string& typestring,
201 for (int i = 0; (i < atoms->nr); i++)
203 bool res = gmx_strcasecmp(restype[atoms->atom[i].resind].c_str(), typestring.c_str()) == 0;
224 static void analyse_other(gmx::ArrayRef<std::string> restype,
225 const t_atoms* atoms,
231 restp_t* restp = nullptr;
232 char** attp = nullptr;
233 char * rname, *aname;
234 int i, resind, natp, nrestp = 0;
236 for (i = 0; (i < atoms->nres); i++)
238 if (gmx_strcasecmp(restype[i].c_str(), "Protein")
239 && gmx_strcasecmp(restype[i].c_str(), "DNA") && gmx_strcasecmp(restype[i].c_str(), "RNA")
240 && gmx_strcasecmp(restype[i].c_str(), "Water"))
250 printf("Analysing residues not classified as Protein/DNA/RNA/Water and splitting into "
253 for (int k = 0; (k < atoms->nr); k++)
255 resind = atoms->atom[k].resind;
256 rname = *atoms->resinfo[resind].name;
257 if (gmx_strcasecmp(restype[resind].c_str(), "Protein")
258 && gmx_strcasecmp(restype[resind].c_str(), "DNA")
259 && gmx_strcasecmp(restype[resind].c_str(), "RNA")
260 && gmx_strcasecmp(restype[resind].c_str(), "Water"))
263 for (l = 0; (l < nrestp); l++)
266 if (strcmp(restp[l].rname, rname) == 0)
273 srenew(restp, nrestp + 1);
274 restp[nrestp].rname = gmx_strdup(rname);
275 restp[nrestp].bNeg = FALSE;
276 restp[nrestp].gname = gmx_strdup(rname);
281 for (int i = 0; (i < nrestp); i++)
283 std::vector<int> aid;
284 for (int j = 0; (j < atoms->nr); j++)
286 rname = *atoms->resinfo[atoms->atom[j].resind].name;
287 if ((strcmp(restp[i].rname, rname) == 0 && !restp[i].bNeg)
288 || (strcmp(restp[i].rname, rname) != 0 && restp[i].bNeg))
293 add_grp(gb, gn, aid, restp[i].gname);
296 printf("split %s into atoms (y/n) ? ", restp[i].gname);
298 if (gmx_ask_yesno(bASK))
301 for (size_t k = 0; (k < aid.size()); k++)
303 aname = *atoms->atomname[aid[k]];
305 for (l = 0; (l < natp); l++)
307 if (strcmp(aname, attp[l]) == 0)
314 srenew(attp, ++natp);
315 attp[natp - 1] = aname;
320 for (int l = 0; (l < natp); l++)
322 std::vector<int> aaid;
323 for (size_t k = 0; (k < aid.size()); k++)
325 aname = *atoms->atomname[aid[k]];
326 if (strcmp(aname, attp[l]) == 0)
328 aaid.push_back(aid[k]);
331 add_grp(gb, gn, aaid, attp[l]);
338 sfree(restp[i].rname);
339 sfree(restp[i].gname);
346 * Data necessary to construct a single (protein) index group in
349 typedef struct gmx_help_make_index_group // NOLINT(clang-analyzer-optin.performance.Padding)
351 /** The set of atom names that will be used to form this index group */
352 const char** defining_atomnames;
353 /** Size of the defining_atomnames array */
354 int num_defining_atomnames;
355 /** Name of this index group */
356 const char* group_name;
357 /** Whether the above atom names name the atoms in the group, or
358 those not in the group */
359 gmx_bool bTakeComplement;
360 /** The index in wholename gives the first item in the arrays of
361 atomnames that should be tested with 'gmx_strncasecmp' in stead of
362 gmx_strcasecmp, or -1 if all items should be tested with strcasecmp
363 This is comparable to using a '*' wildcard at the end of specific
364 atom names, but that is more involved to implement...
367 /** Only create this index group if it differs from the one specified in compareto,
368 where -1 means to always create this group. */
370 } t_gmx_help_make_index_group;
372 static void analyse_prot(gmx::ArrayRef<const std::string> restype,
373 const t_atoms* atoms,
379 /* lists of atomnames to be used in constructing index groups: */
380 static const char* pnoh[] = { "H", "HN" };
381 static const char* pnodum[] = { "MN1", "MN2", "MCB1", "MCB2", "MCG1", "MCG2",
382 "MCD1", "MCD2", "MCE1", "MCE2", "MNZ1", "MNZ2" };
383 static const char* calpha[] = { "CA" };
384 static const char* bb[] = { "N", "CA", "C" };
385 static const char* mc[] = { "N", "CA", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT" };
386 static const char* mcb[] = { "N", "CA", "CB", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT" };
387 static const char* mch[] = { "N", "CA", "C", "O", "O1", "O2", "OC1", "OC2",
388 "OT", "OXT", "H1", "H2", "H3", "H", "HN" };
390 static const t_gmx_help_make_index_group constructing_data[] = {
391 { nullptr, 0, "Protein", TRUE, -1, -1 },
392 { pnoh, asize(pnoh), "Protein-H", TRUE, 0, -1 },
393 { calpha, asize(calpha), "C-alpha", FALSE, -1, -1 },
394 { bb, asize(bb), "Backbone", FALSE, -1, -1 },
395 { mc, asize(mc), "MainChain", FALSE, -1, -1 },
396 { mcb, asize(mcb), "MainChain+Cb", FALSE, -1, -1 },
397 { mch, asize(mch), "MainChain+H", FALSE, -1, -1 },
398 { mch, asize(mch), "SideChain", TRUE, -1, -1 },
399 { mch, asize(mch), "SideChain-H", TRUE, 11, -1 },
400 { pnodum, asize(pnodum), "Prot-Masses", TRUE, -1, 0 },
402 const int num_index_groups = asize(constructing_data);
407 char ndx_name[STRLEN], *atnm;
412 printf("Analysing Protein...\n");
414 std::vector<int> aid;
416 /* calculate the number of protein residues */
418 for (i = 0; (i < atoms->nres); i++)
420 if (0 == gmx_strcasecmp(restype[i].c_str(), "Protein"))
425 /* find matching or complement atoms */
426 for (i = 0; (i < num_index_groups); i++)
428 for (n = 0; (n < atoms->nr); n++)
430 if (0 == gmx_strcasecmp(restype[atoms->atom[n].resind].c_str(), "Protein"))
433 for (j = 0; (j < constructing_data[i].num_defining_atomnames); j++)
435 /* skip digits at beginning of atomname, e.g. 1H */
436 atnm = *atoms->atomname[n];
437 while (isdigit(atnm[0]))
441 if ((constructing_data[i].wholename == -1) || (j < constructing_data[i].wholename))
443 if (0 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j], atnm))
451 == gmx_strncasecmp(constructing_data[i].defining_atomnames[j],
453 strlen(constructing_data[i].defining_atomnames[j])))
459 if (constructing_data[i].bTakeComplement != match)
465 /* if we want to add this group always or it differs from previous
467 if (-1 == constructing_data[i].compareto || !grp_cmp(gb, aid, constructing_data[i].compareto - i))
469 add_grp(gb, gn, aid, constructing_data[i].group_name);
476 for (i = 0; (i < num_index_groups); i++)
478 printf("Split %12s into %5d residues (y/n) ? ", constructing_data[i].group_name, npres);
479 if (gmx_ask_yesno(bASK))
483 for (n = 0; ((atoms->atom[n].resind < npres) && (n < atoms->nr));)
485 resind = atoms->atom[n].resind;
486 for (; ((atoms->atom[n].resind == resind) && (n < atoms->nr)); n++)
489 for (j = 0; (j < constructing_data[i].num_defining_atomnames); j++)
492 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j],
493 *atoms->atomname[n]))
498 if (constructing_data[i].bTakeComplement != match)
503 /* copy the residuename to the tail of the groupname */
507 ri = &atoms->resinfo[resind];
510 constructing_data[i].group_name,
513 ri->ic == ' ' ? '\0' : ri->ic);
514 add_grp(gb, gn, aid, ndx_name);
520 printf("Make group with sidechain and C=O swapped (y/n) ? ");
521 if (gmx_ask_yesno(bASK))
523 /* Make swap sidechain C=O index */
525 for (int n = 0; ((atoms->atom[n].resind < npres) && (n < atoms->nr));)
527 int resind = atoms->atom[n].resind;
529 for (; ((atoms->atom[n].resind == resind) && (n < atoms->nr)); n++)
531 if (strcmp("CA", *atoms->atomname[n]) == 0)
535 aid.resize(aid.size() + 3);
537 else if (strcmp("C", *atoms->atomname[n]) == 0)
541 gmx_incons("Atom naming problem");
545 else if (strcmp("O", *atoms->atomname[n]) == 0)
549 gmx_incons("Atom naming problem");
553 else if (strcmp("O1", *atoms->atomname[n]) == 0)
557 gmx_incons("Atom naming problem");
567 /* copy the residuename to the tail of the groupname */
570 add_grp(gb, gn, aid, "SwapSC-CO");
577 void analyse(const t_atoms* atoms, t_blocka* gb, char*** gn, gmx_bool bASK, gmx_bool bVerb)
586 printf("Analysing residue names:\n");
588 /* Create system group, every single atom */
589 std::vector<int> aid(atoms->nr);
590 for (i = 0; i < atoms->nr; i++)
594 add_grp(gb, gn, aid, "System");
596 /* For every residue, get a pointer to the residue type name */
599 std::vector<std::string> restype;
600 std::vector<std::string> previousTypename;
605 resnm = *atoms->resinfo[i].name;
606 restype.emplace_back(rt.typeOfNamedDatabaseResidue(resnm));
607 previousTypename.push_back(restype.back());
609 for (i = 1; i < atoms->nres; i++)
611 resnm = *atoms->resinfo[i].name;
612 restype.emplace_back(rt.typeOfNamedDatabaseResidue(resnm));
614 /* Note that this does not lead to a N*N loop, but N*K, where
615 * K is the number of residue _types_, which is small and independent of N.
618 for (size_t k = 0; k < previousTypename.size() && !found; k++)
620 found = strcmp(restype.back().c_str(), previousTypename[k].c_str()) == 0;
624 previousTypename.push_back(restype.back());
631 p_status(restype, previousTypename);
634 for (gmx::index k = 0; k < gmx::ssize(previousTypename); k++)
636 aid = mk_aid(atoms, restype, previousTypename[k], TRUE);
638 /* Check for special types to do fancy stuff with */
640 if (!gmx_strcasecmp(previousTypename[k].c_str(), "Protein") && !aid.empty())
643 analyse_prot(restype, atoms, gb, gn, bASK, bVerb);
645 /* Create a Non-Protein group */
646 aid = mk_aid(atoms, restype, "Protein", FALSE);
647 if ((!aid.empty()) && (gmx::ssize(aid) < atoms->nr))
649 add_grp(gb, gn, aid, "non-Protein");
652 else if (!gmx_strcasecmp(previousTypename[k].c_str(), "Water") && !aid.empty())
654 add_grp(gb, gn, aid, previousTypename[k]);
655 /* Add this group as 'SOL' too, for backward compatibility with older gromacs versions */
656 add_grp(gb, gn, aid, "SOL");
659 /* Solvent, create a negated group too */
660 aid = mk_aid(atoms, restype, "Water", FALSE);
661 if ((!aid.empty()) && (gmx::ssize(aid) < atoms->nr))
663 add_grp(gb, gn, aid, "non-Water");
666 else if (!aid.empty())
669 add_grp(gb, gn, aid, previousTypename[k]);
670 analyse_other(restype, atoms, gb, gn, bASK, bVerb);
675 /* Create a merged water_and_ions group */
681 for (i = 0; i < gb->nr; i++)
683 if (!gmx_strcasecmp((*gn)[i], "Water"))
686 nwater = gb->index[i + 1] - gb->index[i];
688 else if (!gmx_strcasecmp((*gn)[i], "Ion"))
691 nion = gb->index[i + 1] - gb->index[i];
695 if (nwater > 0 && nion > 0)
697 srenew(gb->index, gb->nr + 2);
698 srenew(*gn, gb->nr + 1);
699 (*gn)[gb->nr] = gmx_strdup("Water_and_ions");
700 srenew(gb->a, gb->nra + nwater + nion);
703 for (i = gb->index[iwater]; i < gb->index[iwater + 1]; i++)
705 gb->a[gb->nra++] = gb->a[i];
710 for (i = gb->index[iion]; i < gb->index[iion + 1]; i++)
712 gb->a[gb->nra++] = gb->a[i];
716 gb->index[gb->nr] = gb->nra;
721 void check_index(const char* gname, int n, int index[], const char* traj, int natoms)
725 for (i = 0; i < n; i++)
727 if (index[i] >= natoms)
730 "%s atom number (index[%d]=%d) is larger than the number of atoms in %s (%d)",
731 gname ? gname : "Index",
734 traj ? traj : "the trajectory",
737 else if (index[i] < 0)
740 "%s atom number (index[%d]=%d) is less than zero",
741 gname ? gname : "Index",
748 t_blocka* init_index(const char* gfile, char*** grpname)
754 char line[STRLEN], *pt, str[STRLEN];
756 in = gmx_ffopen(gfile, "r");
764 while (get_a_line(in, line, STRLEN))
766 if (get_header(line, str))
769 srenew(b->index, b->nr + 1);
770 srenew(*grpname, b->nr);
775 b->index[b->nr] = b->index[b->nr - 1];
776 (*grpname)[b->nr - 1] = gmx_strdup(str);
782 gmx_fatal(FARGS, "The first header of your indexfile is invalid");
785 while (sscanf(pt, "%s", str) == 1)
791 srenew(b->a, maxentries);
793 assert(b->a != nullptr); // for clang analyzer
794 b->a[i] = strtol(str, nullptr, 10) - 1;
797 pt = strstr(pt, str) + strlen(str);
803 for (i = 0; (i < b->nr); i++)
805 assert(b->a != nullptr); // for clang analyzer
806 for (j = b->index[i]; (j < b->index[i + 1]); j++)
810 fprintf(stderr, "\nWARNING: negative index %d in group %s\n\n", b->a[j], (*grpname)[i]);
818 static void minstring(char* str)
822 for (i = 0; (i < static_cast<int>(strlen(str))); i++)
831 int find_group(const char* s, int ngrps, char** grpname)
839 /* first look for whole name match */
841 for (i = 0; i < ngrps; i++)
843 if (gmx_strcasecmp_min(s, grpname[i]) == 0)
853 /* second look for first string match */
856 for (i = 0; i < ngrps; i++)
858 if (gmx_strncasecmp_min(s, grpname[i], n) == 0)
868 /* last look for arbitrary substring match */
872 strncpy(key, s, sizeof(key) - 1);
873 key[STRLEN - 1] = '\0';
876 for (i = 0; i < ngrps; i++)
878 strncpy(string, grpname[i], STRLEN - 1);
881 if (strstr(string, key) != nullptr)
893 printf("Error: Multiple groups '%s' selected\n", s);
899 static int qgroup(int* a, int ngrps, char** grpname)
908 fprintf(stderr, "Select a group: ");
911 if (scanf("%s", s) != 1)
913 gmx_fatal(FARGS, "Cannot read from input");
915 trim(s); /* remove spaces */
916 } while (strlen(s) == 0);
917 aa = strtol(s, &end, 10);
918 if (aa == 0 && end[0] != '\0') /* string entered */
920 aa = find_group(s, ngrps, grpname);
922 bInRange = (aa >= 0 && aa < ngrps);
925 printf("Error: No such group '%s'\n", s);
928 printf("Selected %d: '%s'\n", aa, grpname[aa]);
934 rd_groups(t_blocka* grps, char** grpname, char* gnames[], int ngrps, int isize[], int* index[], int grpnr[])
940 gmx_fatal(FARGS, "Error: no groups in indexfile");
942 for (i = 0; (i < grps->nr); i++)
944 fprintf(stderr, "Group %5d (%15s) has %5d elements\n", i, grpname[i], grps->index[i + 1] - grps->index[i]);
946 for (i = 0; (i < ngrps); i++)
952 gnr1 = qgroup(&grpnr[i], grps->nr, grpname);
953 if ((gnr1 < 0) || (gnr1 >= grps->nr))
955 fprintf(stderr, "Select between %d and %d.\n", 0, grps->nr - 1);
957 } while ((gnr1 < 0) || (gnr1 >= grps->nr));
961 fprintf(stderr, "There is one group in the index\n");
964 gnames[i] = gmx_strdup(grpname[gnr1]);
965 isize[i] = grps->index[gnr1 + 1] - grps->index[gnr1];
966 snew(index[i], isize[i]);
967 for (j = 0; (j < isize[i]); j++)
969 index[i][j] = grps->a[grps->index[gnr1] + j];
974 void rd_index(const char* statfile, int ngrps, int isize[], int* index[], char* grpnames[])
983 gmx_fatal(FARGS, "No index file specified");
985 grps = init_index(statfile, &gnames);
986 rd_groups(grps, gnames, grpnames, ngrps, isize, index, grpnr);
987 for (int i = 0; i < grps->nr; i++)
997 void get_index(const t_atoms* atoms, const char* fnm, int ngrps, int isize[], int* index[], char* grpnames[])
1000 t_blocka* grps = nullptr;
1007 grps = init_index(fnm, gnames);
1012 snew(grps->index, 1);
1013 analyse(atoms, grps, gnames, FALSE, FALSE);
1017 gmx_incons("You need to supply a valid atoms structure or a valid index file name");
1020 rd_groups(grps, *gnames, grpnames, ngrps, isize, index, grpnr);
1021 for (int i = 0; i < grps->nr; ++i)
1023 sfree((*gnames)[i]);
1032 t_cluster_ndx* cluster_index(FILE* fplog, const char* ndx)
1038 c->clust = init_index(ndx, &c->grpname);
1040 for (i = 0; (i < c->clust->nra); i++)
1042 c->maxframe = std::max(c->maxframe, c->clust->a[i]);
1044 fprintf(fplog ? fplog : stdout,
1045 "There are %d clusters containing %d structures, highest framenr is %d\n",
1051 pr_blocka(debug, 0, "clust", c->clust, TRUE);
1052 for (i = 0; (i < c->clust->nra); i++)
1054 if ((c->clust->a[i] < 0) || (c->clust->a[i] > c->maxframe))
1057 "Range check error for c->clust->a[%d] = %d\n"
1058 "should be within 0 and %d",
1065 c->inv_clust = make_invblocka(c->clust, c->maxframe);