From 5519db306dea80d234fd75ea81cccd87d323070c Mon Sep 17 00:00:00 2001 From: Mark Abraham Date: Thu, 29 Jul 2021 07:57:30 +0200 Subject: [PATCH] Use better data structures in gmx chi This removes part of the dependency of gmx chi histogramming from ResidueType handling, which will permit changes to the latter that will benefit both grompp and pdb2gmx Replaced vectors that needed a stable numerical index to look up a residue name with a map that uses the residue name to do the look up. Removed capabilities of ResidueType that are no longer used. --- src/gromacs/gmxana/dlist.cpp | 4 +- src/gromacs/gmxana/gmx_chi.cpp | 54 +++++++++++++++++---------- src/gromacs/gmxana/gstat.h | 20 +++++----- src/gromacs/topology/residuetypes.cpp | 25 ------------- src/gromacs/topology/residuetypes.h | 16 -------- 5 files changed, 47 insertions(+), 72 deletions(-) diff --git a/src/gromacs/gmxana/dlist.cpp b/src/gromacs/gmxana/dlist.cpp index 6d9fbacc8e..c21f89fb44 100644 --- a/src/gromacs/gmxana/dlist.cpp +++ b/src/gromacs/gmxana/dlist.cpp @@ -227,18 +227,18 @@ std::vector mk_dlist(FILE* log, { nc[6]++; } - dl[nl].index = rt->indexFromResidueName(thisres); /* Prevent use of unknown residues. If one adds a custom residue to * residuetypes.dat but somehow loses it, changes it, or does analysis on * another machine, the residue type will be unknown. */ - if (dl[nl].index == -1) + if (!rt->nameIndexedInResidueTypes(thisres)) { gmx_fatal(FARGS, "Unknown residue %s when searching for residue type.\n" "Maybe you need to add a custom residue in residuetypes.dat.", thisres); } + dl[nl].residueName = thisres; sprintf(dl[nl].name, "%s%d", thisres, ires + r0); nl++; diff --git a/src/gromacs/gmxana/gmx_chi.cpp b/src/gromacs/gmxana/gmx_chi.cpp index a56b31a109..e52b5f92ae 100644 --- a/src/gromacs/gmxana/gmx_chi.cpp +++ b/src/gromacs/gmxana/gmx_chi.cpp @@ -43,7 +43,9 @@ #include #include +#include #include +#include #include #include "gromacs/commandline/pargs.h" @@ -450,7 +452,6 @@ static int reset_em_all(gmx::ArrayRef dlist, int nf, real** dih, static void histogramming(FILE* log, int nbin, - ResidueType* rt, int nf, int maxchi, real** dih, @@ -500,7 +501,6 @@ static void histogramming(FILE* log, char hisfile[256], hhisfile[256], title[256], *ss_str = nullptr; char** leg; - size_t rt_size = rt->numberOfEntries(); if (bSSHisto) { fp = gmx_ffopen(ssdump, "r"); @@ -518,19 +518,35 @@ static void histogramming(FILE* log, gmx_ffclose(fp); } + // Build a list of unique residue names found in the dihedral + // list, so we can loop over those unique names conveniently. The + // names are the same as the residue names found in rt in the + // caller, but ResidueType doesn't yet have a way to loop over its + // contents. + std::unordered_set uniqueResidueNames; + for (const auto& dihedral : dlist) + { + uniqueResidueNames.emplace(dihedral.residueName); + } // Build the lookup tables for data relating to the all dihedrals // from each unique residue name represented in the dihedral list. - std::array>>, 3> his_aa_ss; - if (bSSHisto) + std::array>>, 3> his_aa_ss; + std::vector>> his_aa(edMax); + for (const auto& residueName : uniqueResidueNames) { - for (auto& secondaryStructure : his_aa_ss) + if (bSSHisto) + { + for (auto& secondaryStructure : his_aa_ss) + { + secondaryStructure[residueName] = + std::vector>(edMax, std::vector(nbin, 0)); + } + } + for (auto& dihedraltype : his_aa) { - // Construct the vector nest - secondaryStructure = { rt_size + 1, { edMax, std::vector(nbin, 0) } }; + dihedraltype[residueName] = std::vector(nbin, 0); } } - std::vector>> his_aa = { edMax, - { rt_size + 1, std::vector(nbin, 0) } }; snew(histmp, nbin); snew(Jc, dlist.size()); @@ -577,9 +593,9 @@ static void histogramming(FILE* log, */ switch (ss_str[dihedral.resnr]) { - case 'E': his_aa_ss[0][dihedral.index][Dih][hindex]++; break; - case 'H': his_aa_ss[1][dihedral.index][Dih][hindex]++; break; - default: his_aa_ss[2][dihedral.index][Dih][hindex]++; break; + case 'E': his_aa_ss[0][dihedral.residueName][Dih][hindex]++; break; + case 'H': his_aa_ss[1][dihedral.residueName][Dih][hindex]++; break; + default: his_aa_ss[2][dihedral.residueName][Dih][hindex]++; break; } } else if (debug) @@ -629,7 +645,7 @@ static void histogramming(FILE* log, /* Sum distribution per amino acid type as well */ for (int k = 0; (k < nbin); k++) { - his_aa[Dih][dihedral.index][k] += histmp[k]; + his_aa[Dih][dihedral.residueName][k] += histmp[k]; histmp[k] = 0; } j++; @@ -726,7 +742,7 @@ static void histogramming(FILE* log, /* finished -jc stuff */ std::vector normhisto(nbin); - for (size_t i = 0; (i < rt_size); i++) + for (const auto& residueName : uniqueResidueNames) { for (int Dih = 0; (Dih < edMax); Dih++) { @@ -734,7 +750,7 @@ static void histogramming(FILE* log, int j; for (j = 0; (j < nbin); j++) { - if (his_aa[Dih][i][j] != 0) + if (his_aa[Dih][residueName][j] != 0) { break; } @@ -745,10 +761,9 @@ static void histogramming(FILE* log, { if (bNormalize) { - normalize_histo(his_aa[Dih][i], (360.0 / nbin), normhisto); + normalize_histo(his_aa[Dih][residueName], (360.0 / nbin), normhisto); } - std::string residueName = rt->nameFromResidueIndex(i); switch (Dih) { case edPhi: @@ -808,13 +823,13 @@ static void histogramming(FILE* log, } else { - fprintf(fp, "%5d %10d\n", angle, his_aa[Dih][i][j]); + fprintf(fp, "%5d %10d\n", angle, his_aa[Dih][residueName][j]); } if (bSSHisto) { for (int k = 0; (k < 3); k++) { - fprintf(ssfp[k], "%5d %10d\n", angle, his_aa_ss[k][i][Dih][j]); + fprintf(ssfp[k], "%5d %10d\n", angle, his_aa_ss[k][residueName][Dih][j]); } } } @@ -1573,7 +1588,6 @@ int gmx_chi(int argc, char* argv[]) /* Histogramming & J coupling constants & calc of S2 order params */ histogramming(log, nbin, - &rt, nf, maxchi, dih, diff --git a/src/gromacs/gmxana/gstat.h b/src/gromacs/gmxana/gstat.h index 9366190403..90202867f1 100644 --- a/src/gromacs/gmxana/gstat.h +++ b/src/gromacs/gmxana/gstat.h @@ -38,6 +38,8 @@ #ifndef GMX_GMXANA_GSTAT_H #define GMX_GMXANA_GSTAT_H +#include + #include "gromacs/commandline/pargs.h" #include "gromacs/topology/index.h" @@ -77,15 +79,15 @@ typedef struct struct t_dlist { - char name[12]; - int resnr; - int index; /* Index for amino acids (histograms) */ - int j0[edMax]; /* Index in dih array (phi angle is first...) */ - t_dihatms atm; - int b[edMax]; - int ntr[edMax]; - real S2[edMax]; - real rot_occ[edMax][NROT]; + char name[12]; + int resnr; + std::string residueName; + int j0[edMax]; /* Index in dih array (phi angle is first...) */ + t_dihatms atm; + int b[edMax]; + int ntr[edMax]; + real S2[edMax]; + real rot_occ[edMax][NROT]; }; typedef struct diff --git a/src/gromacs/topology/residuetypes.cpp b/src/gromacs/topology/residuetypes.cpp index 02a5ff6f17..8feb1115be 100644 --- a/src/gromacs/topology/residuetypes.cpp +++ b/src/gromacs/topology/residuetypes.cpp @@ -41,7 +41,6 @@ #include #include -#include #include #include @@ -151,30 +150,6 @@ bool ResidueType::namedResidueHasType(const std::string& residueName, const std: return foundIt ? gmx::equalCaseInsensitive(residueType, (*foundIt)->residueType) : false; } -int ResidueType::numberOfEntries() const -{ - return impl_->entry.size(); -} - -int ResidueType::indexFromResidueName(const std::string& residueName) const -{ - gmx::ArrayRef temp(impl_->entry); - auto foundIt = findResidueEntryWithName(temp, residueName); - return foundIt ? std::distance(temp.begin(), *foundIt) : -1; -} - -std::string ResidueType::nameFromResidueIndex(int index) const -{ - if (index >= 0 && index < gmx::ssize(impl_->entry)) - { - return impl_->entry[index].residueName; - } - else - { - return ""; - } -} - std::string ResidueType::typeOfNamedDatabaseResidue(const std::string& residueName) { auto foundIt = findResidueEntryWithName(impl_->entry, residueName); diff --git a/src/gromacs/topology/residuetypes.h b/src/gromacs/topology/residuetypes.h index 7cd5beba12..7c5f60aa94 100644 --- a/src/gromacs/topology/residuetypes.h +++ b/src/gromacs/topology/residuetypes.h @@ -54,8 +54,6 @@ public: //! Get handle to underlying residue type data. ResidueTypeEntry* ResidueTypes(); - //! Get number of entries in ResidueTypes. - int numberOfEntries() const; /*! \brief * Return true if residue \p residueName is found or false otherwise. * @@ -78,20 +76,6 @@ public: * \returns If the check was successful. */ bool namedResidueHasType(const std::string& residueName, const std::string& residueType); - /*! \brief - * Get index to entry in ResidueTypes with name \p residueName. - * - * \param[in] residueName Name of the residue being searched. - * \returns The index or -1 if not found. - */ - int indexFromResidueName(const std::string& residueName) const; - /*! \brief - * Get the name of the entry in ResidueTypes with \p index. - * - * \param[in] index Which entry should be returned. - * \returns The name of the entry at \p index, or nullptr. - */ - std::string nameFromResidueIndex(int index) const; /*! \brief * Return the residue type if a residue with that name exists, or "Other" * -- 2.22.0