From 6b81dbff472791cc5defd7667cd80ce1512d0dee Mon Sep 17 00:00:00 2001 From: Mark Abraham Date: Tue, 7 Sep 2021 05:22:49 +0000 Subject: [PATCH] Replace ResidueTypeMap with std::unordered_map --- src/gromacs/fileio/pdbio.cpp | 2 - src/gromacs/gmxana/dlist.cpp | 21 +++-- src/gromacs/gmxana/gmx_chi.cpp | 7 +- src/gromacs/gmxana/gstat.h | 28 +++--- src/gromacs/gmxpreprocess/gen_vsite.cpp | 28 +++--- src/gromacs/gmxpreprocess/pdb2gmx.cpp | 108 +++++++++++++----------- src/gromacs/gmxpreprocess/pdb2top.cpp | 8 +- src/gromacs/gmxpreprocess/xlate.cpp | 12 +-- src/gromacs/gmxpreprocess/xlate.h | 5 +- src/gromacs/topology/atomprop.cpp | 43 +++++----- src/gromacs/topology/atomprop.h | 4 - src/gromacs/topology/index.cpp | 6 +- src/gromacs/topology/residuetypes.cpp | 86 ++++++------------- src/gromacs/topology/residuetypes.h | 96 ++++++++++----------- src/gromacs/utility/stringutil.h | 10 +++ 15 files changed, 226 insertions(+), 238 deletions(-) diff --git a/src/gromacs/fileio/pdbio.cpp b/src/gromacs/fileio/pdbio.cpp index 670d960f0b..07de14f92c 100644 --- a/src/gromacs/fileio/pdbio.cpp +++ b/src/gromacs/fileio/pdbio.cpp @@ -54,7 +54,6 @@ #include "gromacs/pbcutil/pbc.h" #include "gromacs/topology/atomprop.h" #include "gromacs/topology/ifunc.h" -#include "gromacs/topology/residuetypes.h" #include "gromacs/topology/symtab.h" #include "gromacs/topology/topology.h" #include "gromacs/utility/coolstuff.h" @@ -330,7 +329,6 @@ void write_pdbfile_indexed(FILE* out, fprintf(out, "MODEL %8d\n", model_nr > 0 ? model_nr : 1); - ResidueTypeMap rt; for (int ii = 0; ii < nindex; ii++) { int i = index[ii]; diff --git a/src/gromacs/gmxana/dlist.cpp b/src/gromacs/gmxana/dlist.cpp index acdfcd0df8..c6536b8aa5 100644 --- a/src/gromacs/gmxana/dlist.cpp +++ b/src/gromacs/gmxana/dlist.cpp @@ -43,19 +43,18 @@ #include #include "gromacs/gmxana/gstat.h" -#include "gromacs/topology/residuetypes.h" #include "gromacs/topology/topology.h" #include "gromacs/utility/fatalerror.h" -std::vector mk_dlist(FILE* log, - const t_atoms* atoms, - gmx_bool bPhi, - gmx_bool bPsi, - gmx_bool bChi, - gmx_bool bHChi, - int maxchi, - int r0, - ResidueTypeMap* rt) +std::vector mk_dlist(FILE* log, + const t_atoms* atoms, + gmx_bool bPhi, + gmx_bool bPsi, + gmx_bool bChi, + gmx_bool bHChi, + int maxchi, + int r0, + const ResidueTypeMap& residueTypeMap) { int i, j, ii; t_dihatms atm, prev; @@ -231,7 +230,7 @@ std::vector mk_dlist(FILE* log, /* Prevent use of unknown residues. If one adds a custom residue to * residuetypes.dat but somehow loses it, changes it, or does analysis on * another machine, the residue type will be unknown. */ - if (!rt->nameIndexedInResidueTypeMap(thisres)) + if (residueTypeMap.find(thisres) == residueTypeMap.end()) { gmx_fatal(FARGS, "Unknown residue %s when searching for residue type.\n" diff --git a/src/gromacs/gmxana/gmx_chi.cpp b/src/gromacs/gmxana/gmx_chi.cpp index f4359c9cb6..0ca1075c6e 100644 --- a/src/gromacs/gmxana/gmx_chi.cpp +++ b/src/gromacs/gmxana/gmx_chi.cpp @@ -520,7 +520,7 @@ static void histogramming(FILE* log, // Build a list of unique residue names found in the dihedral // list, so we can loop over those unique names conveniently. The - // names are the same as the residue names found in rt in the + // names are the same as the residue names found in residueTypeMap in the // caller, but ResidueTypeMap doesn't yet have a way to loop over its // contents. std::unordered_set uniqueResidueNames; @@ -1530,8 +1530,9 @@ int gmx_chi(int argc, char* argv[]) } fprintf(log, "Title: %s\n", name); - ResidueTypeMap rt; - std::vector dlist = mk_dlist(log, &atoms, bPhi, bPsi, bChi, bHChi, maxchi, r0, &rt); + ResidueTypeMap residueTypeMap = residueTypeMapFromLibraryFile("residuetypes.dat"); + std::vector dlist = + mk_dlist(log, &atoms, bPhi, bPsi, bChi, bHChi, maxchi, r0, residueTypeMap); fprintf(stderr, "%zu residues with dihedrals found\n", dlist.size()); if (dlist.empty()) diff --git a/src/gromacs/gmxana/gstat.h b/src/gromacs/gmxana/gstat.h index 0671400c92..3207566063 100644 --- a/src/gromacs/gmxana/gstat.h +++ b/src/gromacs/gmxana/gstat.h @@ -42,9 +42,9 @@ #include "gromacs/commandline/pargs.h" #include "gromacs/topology/index.h" +#include "gromacs/topology/residuetypes.h" struct gmx_output_env_t; -class ResidueTypeMap; /* must correspond with 'leg' g_chi.c:727 */ enum @@ -247,15 +247,23 @@ void do_pp2shifts(FILE* fp, int nframes, gmx::ArrayRef dlist, rea gmx_bool has_dihedral(int Dih, const t_dlist& dlist); -std::vector mk_dlist(FILE* log, - const t_atoms* atoms, - gmx_bool bPhi, - gmx_bool bPsi, - gmx_bool bChi, - gmx_bool bHChi, - int maxchi, - int r0, - ResidueTypeMap* rt); +/*! \brief Describe the dihedrals in the residues of the \c atoms + * structure + * + * Return a vector with a t_dlist entry for each residue in \c + * atoms. The entry for a residue contains its name, its index within + * the residues, and a mapping from chemical peptide atom names to + * atom indices based on the atom names. Many fields of t_dlist are + * not yet filled. */ +std::vector mk_dlist(FILE* log, + const t_atoms* atoms, + gmx_bool bPhi, + gmx_bool bPsi, + gmx_bool bChi, + gmx_bool bHChi, + int maxchi, + int r0, + const ResidueTypeMap& rt); void pr_dlist(FILE* fp, gmx::ArrayRef dlist, diff --git a/src/gromacs/gmxpreprocess/gen_vsite.cpp b/src/gromacs/gmxpreprocess/gen_vsite.cpp index 4629996ad8..6487348053 100644 --- a/src/gromacs/gmxpreprocess/gen_vsite.cpp +++ b/src/gromacs/gmxpreprocess/gen_vsite.cpp @@ -578,7 +578,10 @@ print_bonds(FILE* fp, int o2n[], int nrHatoms, const int Hatoms[], int Heavy, in fprintf(fp, "\n"); } -static int get_atype(int atom, t_atoms* at, gmx::ArrayRef rtpFFDB, ResidueTypeMap* rt) +static int get_atype(int atom, + t_atoms* at, + gmx::ArrayRef rtpFFDB, + const ResidueTypeMap& residueTypeMap) { int type; bool bNterm; @@ -591,7 +594,8 @@ static int get_atype(int atom, t_atoms* at, gmx::ArrayRefresinfo[at->atom[atom].resind].name), rtpFFDB); - bNterm = rt->namedResidueHasType(*(at->resinfo[at->atom[atom].resind].name), "Protein") + bNterm = namedResidueHasType( + residueTypeMap, *(at->resinfo[at->atom[atom].resind].name), "Protein") && (at->atom[atom].resind == 0); int j = search_jtype(*localPpResidue, *(at->atomname[atom]), bNterm); type = localPpResidue->atom[j].type; @@ -610,7 +614,10 @@ static int vsite_nm2type(const char* name, PreprocessingAtomTypes* atype) return *tp; } -static real get_amass(int atom, t_atoms* at, gmx::ArrayRef rtpFFDB, ResidueTypeMap* rt) +static real get_amass(int atom, + t_atoms* at, + gmx::ArrayRef rtpFFDB, + const ResidueTypeMap& residueTypeMap) { real mass; bool bNterm; @@ -623,7 +630,8 @@ static real get_amass(int atom, t_atoms* at, gmx::ArrayRefresinfo[at->atom[atom].resind].name), rtpFFDB); - bNterm = rt->namedResidueHasType(*(at->resinfo[at->atom[atom].resind].name), "Protein") + bNterm = namedResidueHasType( + residueTypeMap, *(at->resinfo[at->atom[atom].resind].name), "Protein") && (at->atom[atom].resind == 0); int j = search_jtype(*localPpResidue, *(at->atomname[atom]), bNterm); mass = localPpResidue->atom[j].m; @@ -1799,7 +1807,7 @@ void do_vsites(gmx::ArrayRef rtpFFDB, /* make index to tell which residues were already processed */ std::vector bResProcessed(at->nres); - ResidueTypeMap rt; + ResidueTypeMap residueTypeMap = residueTypeMapFromLibraryFile("residuetypes.dat"); /* generate vsite constructions */ /* loop over all atoms */ @@ -1818,7 +1826,7 @@ void do_vsites(gmx::ArrayRef rtpFFDB, * N-terminus that must be treated first. */ if (bVsiteAromatics && (strcmp(*(at->atomname[i]), "CA") == 0) && !bResProcessed[resind] - && rt.namedResidueHasType(*(at->resinfo[resind].name), "Protein")) + && namedResidueHasType(residueTypeMap, *(at->resinfo[resind].name), "Protein")) { /* mark this residue */ bResProcessed[resind] = TRUE; @@ -1960,7 +1968,7 @@ void do_vsites(gmx::ArrayRef rtpFFDB, and return H atom numbers (Hatoms) and heavy atom numbers (heavies) */ count_bonds(i, &plist[F_BONDS], at->atomname, &nrbonds, &nrHatoms, Hatoms, &Heavy, &nrheavies, heavies); /* get Heavy atom type */ - tpHeavy = get_atype(Heavy, at, rtpFFDB, &rt); + tpHeavy = get_atype(Heavy, at, rtpFFDB, residueTypeMap); strcpy(tpname, *atype->atomNameFromAtomType(tpHeavy)); bWARNING = FALSE; @@ -2059,7 +2067,7 @@ void do_vsites(gmx::ArrayRef rtpFFDB, /* get dummy mass type from first char of heavy atom type (N or C) */ strcpy(nexttpname, - *atype->atomNameFromAtomType(get_atype(heavies[0], at, rtpFFDB, &rt))); + *atype->atomNameFromAtomType(get_atype(heavies[0], at, rtpFFDB, residueTypeMap))); std::string ch = get_dummymass_name(vsiteconflist, tpname, nexttpname); std::string name; if (ch.empty()) @@ -2118,10 +2126,10 @@ void do_vsites(gmx::ArrayRef rtpFFDB, /* get atom masses, and set Heavy and Hatoms mass to zero */ for (int j = 0; j < nrHatoms; j++) { - mHtot += get_amass(Hatoms[j], at, rtpFFDB, &rt); + mHtot += get_amass(Hatoms[j], at, rtpFFDB, residueTypeMap); at->atom[Hatoms[j]].m = at->atom[Hatoms[j]].mB = 0; } - mtot = mHtot + get_amass(Heavy, at, rtpFFDB, &rt); + mtot = mHtot + get_amass(Heavy, at, rtpFFDB, residueTypeMap); at->atom[Heavy].m = at->atom[Heavy].mB = 0; if (mHmult != 1.0) { diff --git a/src/gromacs/gmxpreprocess/pdb2gmx.cpp b/src/gromacs/gmxpreprocess/pdb2gmx.cpp index 067a1b8683..1454634b99 100644 --- a/src/gromacs/gmxpreprocess/pdb2gmx.cpp +++ b/src/gromacs/gmxpreprocess/pdb2gmx.cpp @@ -647,20 +647,20 @@ void write_posres(const char* fn, t_atoms* pdba, real fc) gmx_fio_fclose(fp); } -int read_pdball(const char* inf, - bool bOutput, - const char* outf, - char** title, - t_atoms* atoms, - rvec** x, - PbcType* pbcType, - matrix box, - bool bRemoveH, - t_symtab* symtab, - ResidueTypeMap* rt, - const char* watres, - AtomProperties* aps, - bool bVerbose) +int read_pdball(const char* inf, + bool bOutput, + const char* outf, + char** title, + t_atoms* atoms, + rvec** x, + PbcType* pbcType, + matrix box, + bool bRemoveH, + t_symtab* symtab, + const ResidueTypeMap& residueTypeMap, + const char* watres, + AtomProperties* aps, + bool bVerbose) /* Read a pdb file. (containing proteins) */ { int natom, new_natom, i; @@ -707,7 +707,7 @@ int read_pdball(const char* inf, rename_pdbres(atoms, "SOL", watres, false, symtab); rename_pdbres(atoms, "WAT", watres, false, symtab); - rename_atoms("xlateat.dat", nullptr, atoms, symtab, {}, true, rt, true, bVerbose); + rename_atoms("xlateat.dat", nullptr, atoms, symtab, {}, true, residueTypeMap, true, bVerbose); if (natom == 0) { @@ -984,7 +984,7 @@ int remove_duplicate_atoms(t_atoms* pdba, gmx::ArrayRef x, bool bVerb return pdba->nr; } -void checkResidueTypeSanity(t_atoms* pdba, int r0, int r1, ResidueTypeMap* rt) +void checkResidueTypeSanity(t_atoms* pdba, int r0, int r1, const ResidueTypeMap& residueTypeMap) { std::string startResidueString = gmx::formatString("%s%d", *pdba->resinfo[r0].name, pdba->resinfo[r0].nr); @@ -1026,11 +1026,11 @@ void checkResidueTypeSanity(t_atoms* pdba, int r0, int r1, ResidueTypeMap* rt) { bool allResiduesHaveSameType = true; std::string restype; - std::string restype0 = rt->typeOfNamedDatabaseResidue(*pdba->resinfo[r0].name); + std::string restype0 = typeOfNamedDatabaseResidue(residueTypeMap, *pdba->resinfo[r0].name); for (int i = r0 + 1; i < r1; i++) { - restype = rt->typeOfNamedDatabaseResidue(*pdba->resinfo[i].name); + restype = typeOfNamedDatabaseResidue(residueTypeMap, *pdba->resinfo[i].name); if (!gmx::equalCaseInsensitive(restype, restype0)) { allResiduesHaveSameType = false; @@ -1058,10 +1058,15 @@ void checkResidueTypeSanity(t_atoms* pdba, int r0, int r1, ResidueTypeMap* rt) } } -void find_nc_ter(t_atoms* pdba, int r0, int r1, int* r_start, int* r_end, ResidueTypeMap* rt, const gmx::MDLogger& logger) +void find_nc_ter(t_atoms* pdba, + int r0, + int r1, + int* r_start, + int* r_end, + const ResidueTypeMap& residueTypeMap, + const gmx::MDLogger& logger) { - int i; - std::optional startrestype; + std::optional residueTypeOfStartingTerminus; *r_start = -1; *r_end = -1; @@ -1072,7 +1077,7 @@ void find_nc_ter(t_atoms* pdba, int r0, int r1, int* r_start, int* r_end, Residu // Check that all residues have the same chain identifier, and if it is // non-blank we also require the residue types to match. - checkResidueTypeSanity(pdba, r0, r1, rt); + checkResidueTypeSanity(pdba, r0, r1, residueTypeMap); // If we return correctly from checkResidueTypeSanity(), the only // remaining cases where we can have non-matching residue types is if @@ -1085,16 +1090,17 @@ void find_nc_ter(t_atoms* pdba, int r0, int r1, int* r_start, int* r_end, Residu char chainID = pdba->resinfo[r0].chainid; /* Find the starting terminus (typially N or 5') */ - for (i = r0; i < r1 && *r_start == -1; i++) + for (int i = r0; i < r1 && *r_start == -1; i++) { - startrestype = rt->optionalTypeOfNamedDatabaseResidue(*pdba->resinfo[i].name); - if (!startrestype) + auto foundIt = residueTypeMap.find(*pdba->resinfo[i].name); + if (foundIt == residueTypeMap.end()) { continue; } - if (gmx::equalCaseInsensitive(*startrestype, "Protein") - || gmx::equalCaseInsensitive(*startrestype, "DNA") - || gmx::equalCaseInsensitive(*startrestype, "RNA")) + residueTypeOfStartingTerminus = foundIt->second; + if (gmx::equalCaseInsensitive(residueTypeOfStartingTerminus.value(), "Protein") + || gmx::equalCaseInsensitive(residueTypeOfStartingTerminus.value(), "DNA") + || gmx::equalCaseInsensitive(residueTypeOfStartingTerminus.value(), "RNA")) { GMX_LOG(logger.info) .asParagraph() @@ -1103,7 +1109,7 @@ void find_nc_ter(t_atoms* pdba, int r0, int r1, int* r_start, int* r_end, Residu pdba->resinfo[i].nr); *r_start = i; } - else if (gmx::equalCaseInsensitive(*startrestype, "Ion")) + else if (gmx::equalCaseInsensitive(residueTypeOfStartingTerminus.value(), "Ion")) { if (ionNotes < 5) { @@ -1182,17 +1188,18 @@ void find_nc_ter(t_atoms* pdba, int r0, int r1, int* r_start, int* r_end, Residu /* Go through the rest of the residues, check that they are the same class, and identify the ending terminus. */ for (int i = *r_start; i < r1; i++) { - std::optional restype = - rt->optionalTypeOfNamedDatabaseResidue(*pdba->resinfo[i].name); - if (!restype) + auto foundIt = residueTypeMap.find(*pdba->resinfo[i].name); + if (foundIt == residueTypeMap.end()) { continue; } - if (gmx::equalCaseInsensitive(*restype, *startrestype) && endWarnings == 0) + const std::string& residueTypeOfCurrentResidue = foundIt->second; + if (gmx::equalCaseInsensitive(residueTypeOfCurrentResidue, residueTypeOfStartingTerminus.value()) + && endWarnings == 0) { *r_end = i; } - else if (gmx::equalCaseInsensitive(*startrestype, "Ion")) + else if (gmx::equalCaseInsensitive(residueTypeOfStartingTerminus.value(), "Ion")) { if (ionNotes < 5) { @@ -1237,10 +1244,10 @@ void find_nc_ter(t_atoms* pdba, int r0, int r1, int* r_start, int* r_end, Residu "if this was not correct.", *pdba->resinfo[i].name, pdba->resinfo[i].nr, - restype->c_str(), + residueTypeOfCurrentResidue.c_str(), *pdba->resinfo[*r_start].name, pdba->resinfo[*r_start].nr, - startrestype->c_str(), + residueTypeOfStartingTerminus.value().c_str(), *pdba->resinfo[i].name); } if (endWarnings == 4) @@ -2014,7 +2021,7 @@ int pdb2gmx::run() open_symtab(&symtab); /* Residue type database */ - ResidueTypeMap rt; + ResidueTypeMap residueTypeMap = residueTypeMapFromLibraryFile("residuetypes.dat"); /* Read residue renaming database(s), if present */ std::vector rrn = fflib_search_file_end(ffdir_, ".r2b", FALSE); @@ -2033,12 +2040,14 @@ int pdb2gmx::run() for (const auto& rename : rtprename) { /* Only add names if the 'standard' gromacs/iupac base name was found */ - if (auto restype = rt.optionalTypeOfNamedDatabaseResidue(rename.gmx)) + if (auto foundIt = residueTypeMap.find(rename.gmx); foundIt != residueTypeMap.end()) { - rt.addResidue(rename.main, *restype); - rt.addResidue(rename.nter, *restype); - rt.addResidue(rename.cter, *restype); - rt.addResidue(rename.bter, *restype); + // Add the renamed forms with the same residue type as the standard form + auto& residueType = foundIt->second; + addResidue(&residueTypeMap, rename.main, residueType); + addResidue(&residueTypeMap, rename.nter, residueType); + addResidue(&residueTypeMap, rename.cter, residueType); + addResidue(&residueTypeMap, rename.bter, residueType); } } @@ -2073,7 +2082,7 @@ int pdb2gmx::run() box, bRemoveH_, &symtab, - &rt, + residueTypeMap, watres, &aps, bVerbose_); @@ -2426,8 +2435,13 @@ int pdb2gmx::run() j = 0; for (int i = 0; i < cc->nterpairs; i++) { - find_nc_ter( - pdba, cc->chainstart[i], cc->chainstart[i + 1], &(cc->r_start[j]), &(cc->r_end[j]), &rt, logger); + find_nc_ter(pdba, + cc->chainstart[i], + cc->chainstart[i + 1], + &(cc->r_start[j]), + &(cc->r_end[j]), + residueTypeMap, + logger); if (cc->r_start[j] >= 0 && cc->r_end[j] >= 0) { if (checkChainCyclicity( @@ -2575,7 +2589,7 @@ int pdb2gmx::run() requires some re-thinking of code in gen_vsite.c, which I won't do now :( AF 26-7-99 */ - rename_atoms(nullptr, ffdir_, pdba, &symtab, restp_chain, false, &rt, false, bVerbose_); + rename_atoms(nullptr, ffdir_, pdba, &symtab, restp_chain, false, residueTypeMap, false, bVerbose_); match_atomnames_with_rtp(restp_chain, hb_chain, pdba, &symtab, x, bVerbose_, logger); @@ -2641,7 +2655,7 @@ int pdb2gmx::run() int k = (cc->nterpairs > 0 && cc->r_start[0] >= 0) ? cc->r_start[0] : 0; - std::string restype = rt.typeOfNamedDatabaseResidue(*pdba->resinfo[k].name); + std::string restype = typeOfNamedDatabaseResidue(residueTypeMap, *pdba->resinfo[k].name); std::string molname; std::string suffix; diff --git a/src/gromacs/gmxpreprocess/pdb2top.cpp b/src/gromacs/gmxpreprocess/pdb2top.cpp index 0d70ee8961..067df13dbf 100644 --- a/src/gromacs/gmxpreprocess/pdb2top.cpp +++ b/src/gromacs/gmxpreprocess/pdb2top.cpp @@ -462,7 +462,7 @@ void choose_watermodel(const char* wmsel, const char* ffdir, char** watermodel, static int name2type(t_atoms* at, int** cgnr, gmx::ArrayRef usedPpResidues, - ResidueTypeMap* rt, + const ResidueTypeMap& residueTypeMap, const gmx::MDLogger& logger) { int i, j, prevresind, i0, prevcg, cg, curcg; @@ -487,7 +487,7 @@ static int name2type(t_atoms* at, if (at->atom[i].resind != resind) { resind = at->atom[i].resind; - bool bProt = rt->namedResidueHasType(*(at->resinfo[resind].name), "Protein"); + bool bProt = namedResidueHasType(residueTypeMap, *(at->resinfo[resind].name), "Protein"); bNterm = bProt && (resind == 0); if (resind > 0) { @@ -1563,7 +1563,7 @@ void pdb2top(FILE* top_file, int i, nmissat; gmx::EnumerationArray bts; - ResidueTypeMap rt; + ResidueTypeMap residueTypeMap = residueTypeMapFromLibraryFile("residuetypes.dat"); /* Make bonds */ at2bonds(&(plist[F_BONDS]), globalPatches, atoms, *x, long_bond_dist, short_bond_dist, cyclicBondsIndex, logger); @@ -1571,7 +1571,7 @@ void pdb2top(FILE* top_file, /* specbonds: disulphide bonds & heme-his */ do_ssbonds(&(plist[F_BONDS]), atoms, ssbonds, bAllowMissing); - nmissat = name2type(atoms, &cgnr, usedPpResidues, &rt, logger); + nmissat = name2type(atoms, &cgnr, usedPpResidues, residueTypeMap, logger); if (nmissat) { if (bAllowMissing) diff --git a/src/gromacs/gmxpreprocess/xlate.cpp b/src/gromacs/gmxpreprocess/xlate.cpp index b8b837922c..d5555bbb24 100644 --- a/src/gromacs/gmxpreprocess/xlate.cpp +++ b/src/gromacs/gmxpreprocess/xlate.cpp @@ -148,7 +148,7 @@ void rename_atoms(const char* xlfile, t_symtab* symtab, gmx::ArrayRef localPpResidue, bool bResname, - ResidueTypeMap* rt, + const ResidueTypeMap& rt, bool bReorderNum, bool bVerbose) { @@ -218,15 +218,15 @@ void rename_atoms(const char* xlfile, /* Match the residue name */ bMatch = (xlatom[i].res == nullptr || (gmx_strcasecmp("protein-nterm", xlatom[i].res) == 0 - && rt->namedResidueHasType(rnm, "Protein") && bStartTerm) + && namedResidueHasType(rt, rnm, "Protein") && bStartTerm) || (gmx_strcasecmp("protein-cterm", xlatom[i].res) == 0 - && rt->namedResidueHasType(rnm, "Protein") && bEndTerm) + && namedResidueHasType(rt, rnm, "Protein") && bEndTerm) || (gmx_strcasecmp("protein", xlatom[i].res) == 0 - && rt->namedResidueHasType(rnm, "Protein")) + && namedResidueHasType(rt, rnm, "Protein")) || (gmx_strcasecmp("DNA", xlatom[i].res) == 0 - && rt->namedResidueHasType(rnm, "DNA")) + && namedResidueHasType(rt, rnm, "DNA")) || (gmx_strcasecmp("RNA", xlatom[i].res) == 0 - && rt->namedResidueHasType(rnm, "RNA"))); + && namedResidueHasType(rt, rnm, "RNA"))); if (!bMatch) { const char* ptr0 = rnm; diff --git a/src/gromacs/gmxpreprocess/xlate.h b/src/gromacs/gmxpreprocess/xlate.h index b6246fcbcd..1a34376757 100644 --- a/src/gromacs/gmxpreprocess/xlate.h +++ b/src/gromacs/gmxpreprocess/xlate.h @@ -38,7 +38,8 @@ #ifndef GMX_GMXPREPROCESS_XLATE_H #define GMX_GMXPREPROCESS_XLATE_H -class ResidueTypeMap; +#include "gromacs/topology/residuetypes.h" + struct t_atoms; struct PreprocessResidue; struct t_symtab; @@ -58,7 +59,7 @@ void rename_atoms(const char* xlfile, t_symtab* symtab, gmx::ArrayRef restp, bool bResname, - ResidueTypeMap* rt, + const ResidueTypeMap& rt, bool bReorderNum, bool bVerbose); diff --git a/src/gromacs/topology/atomprop.cpp b/src/gromacs/topology/atomprop.cpp index 1b6523f09e..0846300453 100644 --- a/src/gromacs/topology/atomprop.cpp +++ b/src/gromacs/topology/atomprop.cpp @@ -109,7 +109,7 @@ public: //! The different atom properties. AtomProperty prop[epropNR]; //! The residue types. - ResidueTypeMap residueTypeMap; + ResidueTypeMap residueTypeMap = residueTypeMapFromLibraryFile("residuetypes.dat"); }; /*! \brief @@ -157,15 +157,15 @@ static int compareToDatabase(const std::string& search, const std::string& datab * \param[in] bExact Do we have the correct match. * \returns The index for the property. */ -static int findPropertyIndex(AtomProperty* ap, - ResidueTypeMap* residueTypeMap, - const std::string& residueName, - const std::string& atomName, - gmx_bool* bExact) +static int findPropertyIndex(AtomProperty* ap, + const ResidueTypeMap& residueTypeMap, + const std::string& residueName, + const std::string& atomName, + gmx_bool* bExact) { int j = NOTFOUND; - bool bProtein = residueTypeMap->namedResidueHasType(residueName, "Protein"); + bool bProtein = namedResidueHasType(residueTypeMap, residueName, "Protein"); bool bProtWild = residueName == "AAA"; int malen = NOTFOUND; int mrlen = NOTFOUND; @@ -227,12 +227,12 @@ static int findPropertyIndex(AtomProperty* ap, * \param[in] propValue Value of property. * \param[in] line Where to add property. */ -static void addProperty(AtomProperty* ap, - ResidueTypeMap* residueTypeMap, - const std::string& residueName, - const std::string& atomName, - real propValue, - int line) +static void addProperty(AtomProperty* ap, + const ResidueTypeMap& residueTypeMap, + const std::string& residueName, + const std::string& atomName, + real propValue, + int line) { bool bExact = false; int j = findPropertyIndex(ap, residueTypeMap, residueName, atomName, &bExact); @@ -284,7 +284,7 @@ static void addProperty(AtomProperty* ap, * \param[in] residueTypeMap Library of residue types. * \param[in] factor Scaling factor for property. */ -static void readProperty(AtomProperty* ap, ResidueTypeMap* residueTypeMap, double factor) +static void readProperty(AtomProperty* ap, const ResidueTypeMap& residueTypeMap, double factor) { char line[STRLEN], resnm[32], atomnm[32]; @@ -316,7 +316,7 @@ static void readProperty(AtomProperty* ap, ResidueTypeMap* residueTypeMap, doubl * \param[in] haveBeenWarned If we already set a warning before * \returns True of warning should be printed. */ -static bool setProperties(AtomProperty* ap, ResidueTypeMap* residueTypeMap, int eprop, bool haveBeenWarned) +static bool setProperties(AtomProperty* ap, const ResidueTypeMap& residueTypeMap, int eprop, bool haveBeenWarned) { const char* fns[epropNR] = { "atommass.dat", "vdwradii.dat", "dgsolv.dat", "electroneg.dat", "elements.dat" @@ -353,11 +353,6 @@ AtomProperty* AtomProperties::prop(int eprop) return &impl_->prop[eprop]; } -ResidueTypeMap* AtomProperties::residueTypeMap() -{ - return &impl_->residueTypeMap; -} - //! Print warning that vdW radii and masses are guessed. static void printWarning() { @@ -391,7 +386,7 @@ bool AtomProperties::setAtomProperty(int eprop, std::string tmpAtomName, tmpResidueName; bool bExact = false; - if (setProperties(prop(eprop), residueTypeMap(), eprop, impl_->bWarned)) + if (setProperties(prop(eprop), impl_->residueTypeMap, eprop, impl_->bWarned)) { printWarning(); impl_->bWarned = true; @@ -407,7 +402,7 @@ bool AtomProperties::setAtomProperty(int eprop, tmpAtomName = atomName; } const int j = findPropertyIndex( - &(impl_->prop[eprop]), &impl_->residueTypeMap, residueName, tmpAtomName, &bExact); + &(impl_->prop[eprop]), impl_->residueTypeMap, residueName, tmpAtomName, &bExact); if (eprop == epropVDW && !impl_->bWarnVDW) { @@ -429,7 +424,7 @@ bool AtomProperties::setAtomProperty(int eprop, std::string AtomProperties::elementFromAtomNumber(int atomNumber) { - if (setProperties(prop(epropElement), residueTypeMap(), epropElement, impl_->bWarned)) + if (setProperties(prop(epropElement), impl_->residueTypeMap, epropElement, impl_->bWarned)) { printWarning(); impl_->bWarned = true; @@ -446,7 +441,7 @@ std::string AtomProperties::elementFromAtomNumber(int atomNumber) int AtomProperties::atomNumberFromElement(const char* element) { - if (setProperties(prop(epropElement), residueTypeMap(), epropElement, impl_->bWarned)) + if (setProperties(prop(epropElement), impl_->residueTypeMap, epropElement, impl_->bWarned)) { printWarning(); impl_->bWarned = true; diff --git a/src/gromacs/topology/atomprop.h b/src/gromacs/topology/atomprop.h index f41f2444c2..721f0e9fcc 100644 --- a/src/gromacs/topology/atomprop.h +++ b/src/gromacs/topology/atomprop.h @@ -54,7 +54,6 @@ enum }; struct AtomProperty; -class ResidueTypeMap; /*! \brief * Holds all the atom property information loaded. */ @@ -110,9 +109,6 @@ public: AtomProperty* prop(int eprop); private: - //! Get handle to residuetype library. - ResidueTypeMap* residueTypeMap(); - //! Implementation pointer. class Impl; diff --git a/src/gromacs/topology/index.cpp b/src/gromacs/topology/index.cpp index fb738e43c5..c23cfb380c 100644 --- a/src/gromacs/topology/index.cpp +++ b/src/gromacs/topology/index.cpp @@ -562,20 +562,20 @@ void analyse(const t_atoms* atoms, t_blocka* gb, char*** gn, gmx_bool bASK, gmx_ add_grp(gb, gn, aid, "System"); /* For every residue, get a pointer to the residue type name */ - ResidueTypeMap rt; + ResidueTypeMap residueTypeMap = residueTypeMapFromLibraryFile("residuetypes.dat"); std::vector restype; std::vector previousTypename; if (atoms->nres > 0) { const char* resnm = *atoms->resinfo[0].name; - restype.emplace_back(rt.typeOfNamedDatabaseResidue(resnm)); + restype.emplace_back(typeOfNamedDatabaseResidue(residueTypeMap, resnm)); previousTypename.push_back(restype.back()); for (int i = 1; i < atoms->nres; i++) { const char* resnm = *atoms->resinfo[i].name; - restype.emplace_back(rt.typeOfNamedDatabaseResidue(resnm)); + restype.emplace_back(typeOfNamedDatabaseResidue(residueTypeMap, resnm)); /* Note that this does not lead to a N*N loop, but N*K, where * K is the number of residue _types_, which is small and independent of N. diff --git a/src/gromacs/topology/residuetypes.cpp b/src/gromacs/topology/residuetypes.cpp index 390f1dcef1..bfefdc0ac5 100644 --- a/src/gromacs/topology/residuetypes.cpp +++ b/src/gromacs/topology/residuetypes.cpp @@ -37,49 +37,41 @@ #include "residuetypes.h" -#include -#include - -#include -#include #include -#include -#include "gromacs/utility/arrayref.h" #include "gromacs/utility/cstringutil.h" #include "gromacs/utility/fatalerror.h" #include "gromacs/utility/futil.h" -#include "gromacs/utility/smalloc.h" #include "gromacs/utility/strdb.h" //! Definition for residue type that is not known. const ResidueType c_undefinedResidueType = "Other"; -//! Function object for comparisons used in std::unordered_map -class EqualCaseInsensitive +void addResidue(ResidueTypeMap* residueTypeMap, const ResidueName& residueName, const ResidueType& residueType) { -public: - bool operator()(const ResidueName& lhs, const ResidueName& rhs) const + if (auto [foundIt, insertionTookPlace] = residueTypeMap->insert({ residueName, residueType }); + !insertionTookPlace) { - return gmx::equalCaseInsensitive(lhs, rhs); + if (!gmx::equalCaseInsensitive(foundIt->second, residueType)) + { + fprintf(stderr, + "Warning: Residue '%s' already present with type '%s' in database, ignoring " + "new type '%s'.\n", + residueName.c_str(), + foundIt->second.c_str(), + residueType.c_str()); + } } -}; - -//! Implementation detail for ResidueTypeMap -class ResidueTypeMap::Impl -{ -public: - //! Storage object for entries. - std::unordered_map, EqualCaseInsensitive> entries; -}; +} -ResidueTypeMap::ResidueTypeMap() : impl_(new Impl) +ResidueTypeMap residueTypeMapFromLibraryFile(const std::string& residueTypesDatFilename) { char line[STRLEN]; char resname[STRLEN], restype[STRLEN], dum[STRLEN]; - gmx::FilePtr db = gmx::openLibraryFile("residuetypes.dat"); + gmx::FilePtr db = gmx::openLibraryFile(residueTypesDatFilename); + ResidueTypeMap residueTypeMap; while (get_a_line(db.get(), line, STRLEN)) { strip_comment(line); @@ -92,58 +84,28 @@ ResidueTypeMap::ResidueTypeMap() : impl_(new Impl) FARGS, "Incorrect number of columns (2 expected) for line in residuetypes.dat "); } - addResidue(resname, restype); - } - } -} - -ResidueTypeMap::~ResidueTypeMap() = default; - -bool ResidueTypeMap::nameIndexedInResidueTypeMap(const ResidueName& residueName) -{ - return impl_->entries.find(residueName) != impl_->entries.end(); -} - -void ResidueTypeMap::addResidue(const ResidueName& residueName, const ResidueType& residueType) -{ - if (auto [foundIt, insertionTookPlace] = impl_->entries.insert({ residueName, residueType }); - !insertionTookPlace) - { - if (!gmx::equalCaseInsensitive(foundIt->second, residueType)) - { - fprintf(stderr, - "Warning: Residue '%s' already present with type '%s' in database, ignoring " - "new type '%s'.\n", - residueName.c_str(), - foundIt->second.c_str(), - residueType.c_str()); + addResidue(&residueTypeMap, resname, restype); } } + return residueTypeMap; } -bool ResidueTypeMap::namedResidueHasType(const ResidueName& residueName, const ResidueType& residueType) +bool namedResidueHasType(const ResidueTypeMap& residueTypeMap, + const ResidueName& residueName, + const ResidueType& residueType) { - if (auto foundIt = impl_->entries.find(residueName); foundIt != impl_->entries.end()) + if (auto foundIt = residueTypeMap.find(residueName); foundIt != residueTypeMap.end()) { return gmx::equalCaseInsensitive(residueType, foundIt->second); } return false; } -ResidueType ResidueTypeMap::typeOfNamedDatabaseResidue(const ResidueName& residueName) +ResidueType typeOfNamedDatabaseResidue(const ResidueTypeMap& residueTypeMap, const ResidueName& residueName) { - if (auto foundIt = impl_->entries.find(residueName); foundIt != impl_->entries.end()) + if (auto foundIt = residueTypeMap.find(residueName); foundIt != residueTypeMap.end()) { return foundIt->second; } return c_undefinedResidueType; } - -std::optional ResidueTypeMap::optionalTypeOfNamedDatabaseResidue(const ResidueName& residueName) -{ - if (auto foundIt = impl_->entries.find(residueName); foundIt != impl_->entries.end()) - { - return std::make_optional(foundIt->second); - } - return std::nullopt; -} diff --git a/src/gromacs/topology/residuetypes.h b/src/gromacs/topology/residuetypes.h index 3e903b427c..d18e09953d 100644 --- a/src/gromacs/topology/residuetypes.h +++ b/src/gromacs/topology/residuetypes.h @@ -35,11 +35,11 @@ #ifndef GMX_TOPOLOGY_RESIDUETYPES_H #define GMX_TOPOLOGY_RESIDUETYPES_H -#include -#include #include +#include #include "gromacs/utility/basedefinitions.h" +#include "gromacs/utility/stringutil.h" /*! \brief Convenience type aliases * @@ -50,56 +50,52 @@ using ResidueName = std::string; using ResidueType = std::string; //! \} -class ResidueTypeMap -{ -public: - //! Default constructor. - ResidueTypeMap(); - //! Default destructor. - ~ResidueTypeMap(); +/*! \brief Maps residue names to residue types + * + * The contents are typically loaded from share/top/residuetypes.dat + * or similar file provided in the users's working directory. + */ +using ResidueTypeMap = + std::unordered_map, gmx::EqualCaseInsensitive>; + +/*! \brief + * Add entry to ResidueTypeMap if unique. + * + * \param[in] residueTypeMap Map to which to add new name+type entry + * \param[in] residueName Name of new residue. + * \param[in] residueType Type of new residue. + */ +void addResidue(ResidueTypeMap* residueTypeMap, const ResidueName& residueName, const ResidueType& residueType); - /*! \brief - * Return true if residue \p residueName is found or false otherwise. - * - * \param[in] residueName Residue name to search database for. - * \returns true if successful. - */ - bool nameIndexedInResidueTypeMap(const ResidueName& residueName); - /*! \brief - * Add entry to ResidueTypeMap if unique. - * - * \param[in] residueName Name of new residue. - * \param[in] residueType Type of new residue. - */ - void addResidue(const ResidueName& residueName, const ResidueType& residueType); - /*! \brief - * Checks if the indicated \p residueName if of \p residueType. - * - * \param[in] residueName Residue that should be checked. - * \param[in] residueType Which ResidueType the residue should have. - * \returns If the check was successful. - */ - bool namedResidueHasType(const ResidueName& residueName, const ResidueType& residueType); - /*! \brief - * Return the residue type if a residue with that name exists, or "Other" - * - * \param[in] residueName Name of the residue to search for. - * \returns The residue type of any matching residue, or "Other" - */ - ResidueType typeOfNamedDatabaseResidue(const ResidueName& residueName); - /*! \brief - * Return an optional residue type if a residue with that name exists - * - * \param[in] residueName Name of the residue to search for. - * \returns An optional containing the residue type of any matching residue - */ - std::optional optionalTypeOfNamedDatabaseResidue(const ResidueName& residueName); +/*! \brief Returns a ResidueTypeMap filled from a file + * + * The value of the parameter is typically "residuetypes.dat" which + * treats that as a GROMACS library file, ie. loads it from the working + * directory or from "share/top" corresponding to the sourced GMXRC. + * + * \param[in] residueTypesDatFilename Library file to read and from which to fill the returned map + */ +ResidueTypeMap residueTypeMapFromLibraryFile(const std::string& residueTypesDatFilename); -private: - //! Implementation pointer. - class Impl; +/*! \brief + * Checks if the indicated \p residueName is of \p residueType. + * + * \param[in] residueTypeMap Map to search + * \param[in] residueName Residue that should be checked. + * \param[in] residueType Which ResidueType the residue should have. + * \returns If the check was successful. + */ +bool namedResidueHasType(const ResidueTypeMap& residueTypeMap, + const ResidueName& residueName, + const ResidueType& residueType); - std::unique_ptr impl_; -}; +/*! \brief + * Return the residue type if a residue with that name exists, or "Other" + * + * \param[in] residueTypeMap Map to search + * \param[in] residueName Name of the residue to search for. + * \returns The residue type of any matching residue, or "Other" + */ +ResidueType typeOfNamedDatabaseResidue(const ResidueTypeMap& residueTypeMap, const ResidueName& residueName); #endif diff --git a/src/gromacs/utility/stringutil.h b/src/gromacs/utility/stringutil.h index 4249b004d7..e3a2680583 100644 --- a/src/gromacs/utility/stringutil.h +++ b/src/gromacs/utility/stringutil.h @@ -431,6 +431,16 @@ std::string replaceAllWords(const std::string& input, const std::string& from, c */ bool equalCaseInsensitive(const std::string& source, const std::string& target); +//! Function object for comparisons with \c equalCaseInsensitive +class EqualCaseInsensitive +{ +public: + bool operator()(const std::string& lhs, const std::string& rhs) const + { + return gmx::equalCaseInsensitive(lhs, rhs); + } +}; + /*! \brief * Checks if at most \p maxLengthOfComparison characters of two strings match case insensitive. * -- 2.22.0