Use better data structures in gmx chi

author Mark Abraham <mark.j.abraham@gmail.com>

Thu, 29 Jul 2021 05:57:30 +0000 (07:57 +0200)

committer Mark Abraham <mark.j.abraham@gmail.com>

Sat, 21 Aug 2021 05:22:51 +0000 (07:22 +0200)
author Mark Abraham <mark.j.abraham@gmail.com>
Thu, 29 Jul 2021 05:57:30 +0000 (07:57 +0200)
committer Mark Abraham <mark.j.abraham@gmail.com>
Sat, 21 Aug 2021 05:22:51 +0000 (07:22 +0200)
diff --git a/src/gromacs/gmxana/dlist.cpp b/src/gromacs/gmxana/dlist.cpp

index 6d9fbacc8e3b85e4e45aedd0f684fa7426783a46..c21f89fb443096f7a2fc61306d48c41c79ec50b1 100644 (file)
--- a/src/gromacs/gmxana/dlist.cpp
+++ b/src/gromacs/gmxana/dlist.cpp
@@ -227,18 +227,18 @@ std::vector<t_dlist> mk_dlist(FILE*          log,
              {
                  nc[6]++;
              }
-            dl[nl].index = rt->indexFromResidueName(thisres);
  
              /* Prevent use of unknown residues. If one adds a custom residue to
               * residuetypes.dat but somehow loses it, changes it, or does analysis on
               * another machine, the residue type will be unknown. */
-            if (dl[nl].index == -1)
+            if (!rt->nameIndexedInResidueTypes(thisres))
              {
                  gmx_fatal(FARGS,
                            "Unknown residue %s when searching for residue type.\n"
                            "Maybe you need to add a custom residue in residuetypes.dat.",
                            thisres);
              }
+            dl[nl].residueName = thisres;
  
              sprintf(dl[nl].name, "%s%d", thisres, ires + r0);
              nl++;
diff --git a/src/gromacs/gmxana/gmx_chi.cpp b/src/gromacs/gmxana/gmx_chi.cpp

index a56b31a10937c97d2824f467603487fb7d923239..e52b5f92ae7df45c03afd9822699545c547a97ca 100644 (file)
--- a/src/gromacs/gmxana/gmx_chi.cpp
+++ b/src/gromacs/gmxana/gmx_chi.cpp
@@ -43,7 +43,9 @@
  
  #include <algorithm>
  #include <array>
+#include <map>
  #include <string>
+#include <unordered_set>
  #include <vector>
  
  #include "gromacs/commandline/pargs.h"
@@ -450,7 +452,6 @@ static int reset_em_all(gmx::ArrayRef<const t_dlist> dlist, int nf, real** dih,
  
  static void histogramming(FILE*                    log,
                            int                      nbin,
-                          ResidueType*             rt,
                            int                      nf,
                            int                      maxchi,
                            real**                   dih,
@@ -500,7 +501,6 @@ static void histogramming(FILE*                    log,
      char        hisfile[256], hhisfile[256], title[256], *ss_str = nullptr;
      char**      leg;
  
-    size_t rt_size = rt->numberOfEntries();
      if (bSSHisto)
      {
          fp = gmx_ffopen(ssdump, "r");
@@ -518,19 +518,35 @@ static void histogramming(FILE*                    log,
          gmx_ffclose(fp);
      }
  
+    // Build a list of unique residue names found in the dihedral
+    // list, so we can loop over those unique names conveniently. The
+    // names are the same as the residue names found in rt in the
+    // caller, but ResidueType doesn't yet have a way to loop over its
+    // contents.
+    std::unordered_set<std::string> uniqueResidueNames;
+    for (const auto& dihedral : dlist)
+    {
+        uniqueResidueNames.emplace(dihedral.residueName);
+    }
      // Build the lookup tables for data relating to the all dihedrals
      // from each unique residue name represented in the dihedral list.
-    std::array<std::vector<std::vector<std::vector<int>>>, 3> his_aa_ss;
-    if (bSSHisto)
+    std::array<std::map<std::string, std::vector<std::vector<int>>>, 3> his_aa_ss;
+    std::vector<std::map<std::string, std::vector<int>>>                his_aa(edMax);
+    for (const auto& residueName : uniqueResidueNames)
      {
-        for (auto& secondaryStructure : his_aa_ss)
+        if (bSSHisto)
+        {
+            for (auto& secondaryStructure : his_aa_ss)
+            {
+                secondaryStructure[residueName] =
+                        std::vector<std::vector<int>>(edMax, std::vector<int>(nbin, 0));
+            }
+        }
+        for (auto& dihedraltype : his_aa)
          {
-            // Construct the vector nest
-            secondaryStructure = { rt_size + 1, { edMax, std::vector<int>(nbin, 0) } };
+            dihedraltype[residueName] = std::vector<int>(nbin, 0);
          }
      }
-    std::vector<std::vector<std::vector<int>>> his_aa = { edMax,
-                                                          { rt_size + 1, std::vector<int>(nbin, 0) } };
      snew(histmp, nbin);
  
      snew(Jc, dlist.size());
@@ -577,9 +593,9 @@ static void histogramming(FILE*                    log,
                           */
                          switch (ss_str[dihedral.resnr])
                          {
-                            case 'E': his_aa_ss[0][dihedral.index][Dih][hindex]++; break;
-                            case 'H': his_aa_ss[1][dihedral.index][Dih][hindex]++; break;
-                            default: his_aa_ss[2][dihedral.index][Dih][hindex]++; break;
+                            case 'E': his_aa_ss[0][dihedral.residueName][Dih][hindex]++; break;
+                            case 'H': his_aa_ss[1][dihedral.residueName][Dih][hindex]++; break;
+                            default: his_aa_ss[2][dihedral.residueName][Dih][hindex]++; break;
                          }
                      }
                      else if (debug)
@@ -629,7 +645,7 @@ static void histogramming(FILE*                    log,
                  /* Sum distribution per amino acid type as well */
                  for (int k = 0; (k < nbin); k++)
                  {
-                    his_aa[Dih][dihedral.index][k] += histmp[k];
+                    his_aa[Dih][dihedral.residueName][k] += histmp[k];
                      histmp[k] = 0;
                  }
                  j++;
@@ -726,7 +742,7 @@ static void histogramming(FILE*                    log,
      /* finished -jc stuff */
  
      std::vector<real> normhisto(nbin);
-    for (size_t i = 0; (i < rt_size); i++)
+    for (const auto& residueName : uniqueResidueNames)
      {
          for (int Dih = 0; (Dih < edMax); Dih++)
          {
@@ -734,7 +750,7 @@ static void histogramming(FILE*                    log,
              int j;
              for (j = 0; (j < nbin); j++)
              {
-                if (his_aa[Dih][i][j] != 0)
+                if (his_aa[Dih][residueName][j] != 0)
                  {
                      break;
                  }
@@ -745,10 +761,9 @@ static void histogramming(FILE*                    log,
              {
                  if (bNormalize)
                  {
-                    normalize_histo(his_aa[Dih][i], (360.0 / nbin), normhisto);
+                    normalize_histo(his_aa[Dih][residueName], (360.0 / nbin), normhisto);
                  }
  
-                std::string residueName = rt->nameFromResidueIndex(i);
                  switch (Dih)
                  {
                      case edPhi:
@@ -808,13 +823,13 @@ static void histogramming(FILE*                    log,
                      }
                      else
                      {
-                        fprintf(fp, "%5d  %10d\n", angle, his_aa[Dih][i][j]);
+                        fprintf(fp, "%5d  %10d\n", angle, his_aa[Dih][residueName][j]);
                      }
                      if (bSSHisto)
                      {
                          for (int k = 0; (k < 3); k++)
                          {
-                            fprintf(ssfp[k], "%5d  %10d\n", angle, his_aa_ss[k][i][Dih][j]);
+                            fprintf(ssfp[k], "%5d  %10d\n", angle, his_aa_ss[k][residueName][Dih][j]);
                          }
                      }
                  }
@@ -1573,7 +1588,6 @@ int gmx_chi(int argc, char* argv[])
      /* Histogramming & J coupling constants & calc of S2 order params */
      histogramming(log,
                    nbin,
-                  &rt,
                    nf,
                    maxchi,
                    dih,
diff --git a/src/gromacs/gmxana/gstat.h b/src/gromacs/gmxana/gstat.h

index 9366190403935b08dfabdbdad3e9db6c5569dea7..90202867f166a693edbf2c570b95695e0ec4f36b 100644 (file)
--- a/src/gromacs/gmxana/gstat.h
+++ b/src/gromacs/gmxana/gstat.h
@@ -38,6 +38,8 @@
  #ifndef GMX_GMXANA_GSTAT_H
  #define GMX_GMXANA_GSTAT_H
  
+#include <string>
+
  #include "gromacs/commandline/pargs.h"
  #include "gromacs/topology/index.h"
  
@@ -77,15 +79,15 @@ typedef struct
  
  struct t_dlist
  {
-    char      name[12];
-    int       resnr;
-    int       index;     /* Index for amino acids (histograms) */
-    int       j0[edMax]; /* Index in dih array (phi angle is first...) */
-    t_dihatms atm;
-    int       b[edMax];
-    int       ntr[edMax];
-    real      S2[edMax];
-    real      rot_occ[edMax][NROT];
+    char        name[12];
+    int         resnr;
+    std::string residueName;
+    int         j0[edMax]; /* Index in dih array (phi angle is first...) */
+    t_dihatms   atm;
+    int         b[edMax];
+    int         ntr[edMax];
+    real        S2[edMax];
+    real        rot_occ[edMax][NROT];
  };
  
  typedef struct
diff --git a/src/gromacs/topology/residuetypes.cpp b/src/gromacs/topology/residuetypes.cpp

index 02a5ff6f174e7e338aa9f93e07ac701140edf485..8feb1115bebf62c6f8d6ccdec5037a2ad2510f71 100644 (file)
--- a/src/gromacs/topology/residuetypes.cpp
+++ b/src/gromacs/topology/residuetypes.cpp
@@ -41,7 +41,6 @@
  #include <cstdio>
  
  #include <algorithm>
-#include <iterator>
  #include <optional>
  #include <string>
  
@@ -151,30 +150,6 @@ bool ResidueType::namedResidueHasType(const std::string& residueName, const std:
      return foundIt ? gmx::equalCaseInsensitive(residueType, (*foundIt)->residueType) : false;
  }
  
-int ResidueType::numberOfEntries() const
-{
-    return impl_->entry.size();
-}
-
-int ResidueType::indexFromResidueName(const std::string& residueName) const
-{
-    gmx::ArrayRef<const ResidueTypeEntry> temp(impl_->entry);
-    auto                                  foundIt = findResidueEntryWithName(temp, residueName);
-    return foundIt ? std::distance(temp.begin(), *foundIt) : -1;
-}
-
-std::string ResidueType::nameFromResidueIndex(int index) const
-{
-    if (index >= 0 && index < gmx::ssize(impl_->entry))
-    {
-        return impl_->entry[index].residueName;
-    }
-    else
-    {
-        return "";
-    }
-}
-
  std::string ResidueType::typeOfNamedDatabaseResidue(const std::string& residueName)
  {
      auto foundIt = findResidueEntryWithName(impl_->entry, residueName);
diff --git a/src/gromacs/topology/residuetypes.h b/src/gromacs/topology/residuetypes.h

index 7cd5beba12ff4d834ac8f7b22643264e8d604523..7c5f60aa94503f2e87d90ae8e7e0cef426e7c330 100644 (file)
--- a/src/gromacs/topology/residuetypes.h
+++ b/src/gromacs/topology/residuetypes.h
@@ -54,8 +54,6 @@ public:
      //! Get handle to underlying residue type data.
      ResidueTypeEntry* ResidueTypes();
  
-    //! Get number of entries in ResidueTypes.
-    int numberOfEntries() const;
      /*! \brief
       * Return true if residue \p residueName is found or false otherwise.
       *
@@ -78,20 +76,6 @@ public:
       * \returns If the check was successful.
       */
      bool namedResidueHasType(const std::string& residueName, const std::string& residueType);
-    /*! \brief
-     * Get index to entry in ResidueTypes with name \p residueName.
-     *
-     * \param[in] residueName Name of the residue being searched.
-     * \returns The index or -1 if not found.
-     */
-    int indexFromResidueName(const std::string& residueName) const;
-    /*! \brief
-     * Get the name of the entry in ResidueTypes with \p index.
-     *
-     * \param[in] index Which entry should be returned.
-     * \returns The name of the entry at \p index, or nullptr.
-     */
-    std::string nameFromResidueIndex(int index) const;
      /*! \brief
       * Return the residue type if a residue with that name exists, or "Other"
       *
author	Mark Abraham <mark.j.abraham@gmail.com>
	Thu, 29 Jul 2021 05:57:30 +0000 (07:57 +0200)
committer	Mark Abraham <mark.j.abraham@gmail.com>
	Sat, 21 Aug 2021 05:22:51 +0000 (07:22 +0200)
src/gromacs/gmxana/dlist.cpp		patch \| blob \| history
src/gromacs/gmxana/gmx_chi.cpp		patch \| blob \| history
src/gromacs/gmxana/gstat.h		patch \| blob \| history
src/gromacs/topology/residuetypes.cpp		patch \| blob \| history
src/gromacs/topology/residuetypes.h		patch \| blob \| history