2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
40 #include "gromacs/utility/enumerationhelpers.h"
53 #include "gromacs/commandline/cmdlineoptionsmodule.h"
54 #include "gromacs/fileio/confio.h"
55 #include "gromacs/fileio/filetypes.h"
56 #include "gromacs/fileio/gmxfio.h"
57 #include "gromacs/fileio/pdbio.h"
58 #include "gromacs/gmxlib/conformation_utilities.h"
59 #include "gromacs/gmxpreprocess/fflibutil.h"
60 #include "gromacs/gmxpreprocess/genhydro.h"
61 #include "gromacs/gmxpreprocess/gpp_atomtype.h"
62 #include "gromacs/gmxpreprocess/grompp_impl.h"
63 #include "gromacs/gmxpreprocess/h_db.h"
64 #include "gromacs/gmxpreprocess/hizzie.h"
65 #include "gromacs/gmxpreprocess/pdb2top.h"
66 #include "gromacs/gmxpreprocess/pgutil.h"
67 #include "gromacs/gmxpreprocess/specbond.h"
68 #include "gromacs/gmxpreprocess/ter_db.h"
69 #include "gromacs/gmxpreprocess/toputil.h"
70 #include "gromacs/gmxpreprocess/xlate.h"
71 #include "gromacs/math/vec.h"
72 #include "gromacs/options/basicoptions.h"
73 #include "gromacs/options/filenameoption.h"
74 #include "gromacs/options/ioptionscontainer.h"
75 #include "gromacs/topology/atomprop.h"
76 #include "gromacs/topology/block.h"
77 #include "gromacs/topology/index.h"
78 #include "gromacs/topology/residuetypes.h"
79 #include "gromacs/topology/symtab.h"
80 #include "gromacs/topology/topology.h"
81 #include "gromacs/utility/dir_separator.h"
82 #include "gromacs/utility/exceptions.h"
83 #include "gromacs/utility/fatalerror.h"
84 #include "gromacs/utility/filestream.h"
85 #include "gromacs/utility/loggerbuilder.h"
86 #include "gromacs/utility/path.h"
87 #include "gromacs/utility/smalloc.h"
88 #include "gromacs/utility/strdb.h"
89 #include "gromacs/utility/stringutil.h"
91 #include "hackblock.h"
96 RtpRename(const char* newGmx, const char* newMain, const char* newNter, const char* newCter, const char* newBter) :
97 gmx(newGmx), main(newMain), nter(newNter), cter(newCter), bter(newBter)
110 const char* res2bb_notermini(const std::string& name, gmx::ArrayRef<const RtpRename> rr)
112 /* NOTE: This function returns the main building block name,
113 * it does not take terminal renaming into account.
115 auto found = std::find_if(rr.begin(), rr.end(), [&name](const auto& rename) {
116 return gmx::equalCaseInsensitive(name, rename.gmx);
118 return found != rr.end() ? found->main.c_str() : name.c_str();
121 const char* enumValueToLongString(HistidineStates enumValue)
123 constexpr gmx::EnumerationArray<HistidineStates, const char*> histidineStatesLongNames = {
124 "H on ND1 only", "H on NE2 only", "H on ND1 and NE2", "Coupled to Heme"
126 return histidineStatesLongNames[enumValue];
129 enum class AspartateStates : int
136 const char* enumValueToString(AspartateStates enumValue)
138 constexpr gmx::EnumerationArray<AspartateStates, const char*> aspartateStateNames = { "ASP",
140 return aspartateStateNames[enumValue];
143 const char* enumValueToLongString(AspartateStates enumValue)
145 constexpr gmx::EnumerationArray<AspartateStates, const char*> aspartateStateLongNames = {
146 "Not protonated (charge -1)", "Protonated (charge 0)"
148 return aspartateStateLongNames[enumValue];
151 enum class GlutamateStates : int
158 const char* enumValueToString(GlutamateStates enumValue)
160 constexpr gmx::EnumerationArray<GlutamateStates, const char*> glutamateStateNames = { "GLU",
162 return glutamateStateNames[enumValue];
165 const char* enumValueToLongString(GlutamateStates enumValue)
167 constexpr gmx::EnumerationArray<GlutamateStates, const char*> glutamateStateLongNames = {
168 "Not protonated (charge -1)", "Protonated (charge 0)"
170 return glutamateStateLongNames[enumValue];
173 enum class GlutamineStates : int
180 const char* enumValueToString(GlutamineStates enumValue)
182 constexpr gmx::EnumerationArray<GlutamineStates, const char*> glutamineStateNames = { "GLN",
184 return glutamineStateNames[enumValue];
187 const char* enumValueToLongString(GlutamineStates enumValue)
189 constexpr gmx::EnumerationArray<GlutamineStates, const char*> glutamineStateLongNames = {
190 "Not protonated (charge 0)", "Protonated (charge +1)"
192 return glutamineStateLongNames[enumValue];
195 enum class LysineStates : int
202 const char* enumValueToString(LysineStates enumValue)
204 constexpr gmx::EnumerationArray<LysineStates, const char*> lysineStateNames = { "LYSN", "LYS" };
205 return lysineStateNames[enumValue];
208 const char* enumValueToLongString(LysineStates enumValue)
210 constexpr gmx::EnumerationArray<LysineStates, const char*> lysineStateLongNames = {
211 "Not protonated (charge 0)", "Protonated (charge +1)"
213 return lysineStateLongNames[enumValue];
216 enum class ArginineStates : int
223 const char* enumValueToString(ArginineStates enumValue)
225 constexpr gmx::EnumerationArray<ArginineStates, const char*> arginineStatesNames = { "ARGN",
227 return arginineStatesNames[enumValue];
230 const char* enumValueToLongString(ArginineStates enumValue)
232 constexpr gmx::EnumerationArray<ArginineStates, const char*> arginineStatesLongNames = {
233 "Not protonated (charge 0)", "Protonated (charge +1)"
235 return arginineStatesLongNames[enumValue];
238 template<typename EnumType>
239 const char* select_res(int resnr, const char* title, gmx::ArrayRef<const RtpRename> rr)
241 printf("Which %s type do you want for residue %d\n", title, resnr + 1);
242 for (auto sel : gmx::EnumerationWrapper<EnumType>{})
244 printf("%d. %s (%s)\n",
245 static_cast<int>(sel),
246 enumValueToString(sel),
247 res2bb_notermini(enumValueToString(sel), rr));
249 printf("\nType a number:");
253 if (scanf("%d", &userSelection) != 1)
255 gmx_fatal(FARGS, "Answer me for res %s %d!", title, resnr + 1);
258 return enumValueToLongString(static_cast<EnumType>(userSelection));
261 const char* get_asptp(int resnr, gmx::ArrayRef<const RtpRename> rr)
263 return select_res<AspartateStates>(resnr, "ASPARTIC ACID", rr);
266 const char* get_glutp(int resnr, gmx::ArrayRef<const RtpRename> rr)
268 return select_res<GlutamateStates>(resnr, "GLUTAMIC ACID", rr);
271 const char* get_glntp(int resnr, gmx::ArrayRef<const RtpRename> rr)
273 return select_res<GlutamineStates>(resnr, "GLUTAMINE", rr);
276 const char* get_lystp(int resnr, gmx::ArrayRef<const RtpRename> rr)
278 return select_res<LysineStates>(resnr, "LYSINE", rr);
281 const char* get_argtp(int resnr, gmx::ArrayRef<const RtpRename> rr)
283 return select_res<ArginineStates>(resnr, "ARGININE", rr);
286 const char* get_histp(int resnr, gmx::ArrayRef<const RtpRename> rr)
288 return select_res<HistidineStates>(resnr, "HISTIDINE", rr);
291 void read_rtprename(const char* fname, FILE* fp, std::vector<RtpRename>* rtprename)
293 char line[STRLEN], buf[STRLEN];
296 while (get_a_line(fp, line, STRLEN))
298 /* line is NULL-terminated and length<STRLEN, so final arg cannot overflow.
299 * For other args, we read up to 6 chars (so we can detect if the length is > 5).
300 * Note that the buffer length has been increased to 7 to allow this,
301 * so we just need to make sure the strings have zero-length initially.
313 int nc = sscanf(line, "%6s %6s %6s %6s %6s %s", gmx, main, nter, cter, bter, buf);
314 RtpRename newEntry(gmx, main, nter, cter, bter);
317 if (nc != 2 && nc != 5)
320 "Residue renaming database '%s' has %d columns instead of %d or %d",
331 "A line in residue renaming database '%s' has %d columns, while previous "
332 "lines have %d columns",
340 /* This file does not have special termini names, copy them from main */
341 newEntry.nter = newEntry.main;
342 newEntry.cter = newEntry.main;
343 newEntry.bter = newEntry.main;
345 rtprename->push_back(newEntry);
349 std::string search_resrename(gmx::ArrayRef<const RtpRename> rr, const char* name, bool bStart, bool bEnd, bool bCompareFFRTPname)
351 auto found = std::find_if(rr.begin(), rr.end(), [&name, &bCompareFFRTPname](const auto& rename) {
352 return ((!bCompareFFRTPname && (name == rename.gmx))
353 || (bCompareFFRTPname && (name == rename.main)));
357 /* If found in the database, rename this residue's rtp building block,
358 * otherwise keep the old name.
360 if (found != rr.end())
364 newName = found->bter;
368 newName = found->nter;
372 newName = found->cter;
376 newName = found->main;
379 if (newName[0] == '-')
382 "In the chosen force field there is no residue type for '%s'%s",
384 bStart ? (bEnd ? " as a standalone (starting & ending) residue" : " as a starting terminus")
385 : (bEnd ? " as an ending terminus" : ""));
392 void rename_resrtp(t_atoms* pdba,
394 gmx::ArrayRef<const int> r_start,
395 gmx::ArrayRef<const int> r_end,
396 gmx::ArrayRef<const RtpRename> rr,
399 const gmx::MDLogger& logger)
401 bool bFFRTPTERRNM = (getenv("GMX_NO_FFRTP_TER_RENAME") == nullptr);
403 for (int r = 0; r < pdba->nres; r++)
407 for (int j = 0; j < nterpairs; j++)
414 for (int j = 0; j < nterpairs; j++)
422 std::string newName = search_resrename(rr, *pdba->resinfo[r].rtp, bStart, bEnd, false);
424 if (bFFRTPTERRNM && newName.empty() && (bStart || bEnd))
426 /* This is a terminal residue, but the residue name,
427 * currently stored in .rtp, is not a standard residue name,
428 * but probably a force field specific rtp name.
429 * Check if we need to rename it because it is terminal.
431 newName = search_resrename(rr, *pdba->resinfo[r].rtp, bStart, bEnd, true);
434 if (!newName.empty() && newName != *pdba->resinfo[r].rtp)
440 .appendTextFormatted("Changing rtp entry of residue %d %s to '%s'",
442 *pdba->resinfo[r].name,
445 pdba->resinfo[r].rtp = put_symtab(symtab, newName.c_str());
450 void pdbres_to_gmxrtp(t_atoms* pdba)
454 for (i = 0; (i < pdba->nres); i++)
456 if (pdba->resinfo[i].rtp == nullptr)
458 pdba->resinfo[i].rtp = pdba->resinfo[i].name;
463 void rename_pdbres(t_atoms* pdba, const char* oldnm, const char* newnm, bool bFullCompare, t_symtab* symtab)
468 for (i = 0; (i < pdba->nres); i++)
470 resnm = *pdba->resinfo[i].name;
471 if ((bFullCompare && (gmx::equalCaseInsensitive(resnm, oldnm)))
472 || (!bFullCompare && strstr(resnm, oldnm) != nullptr))
474 /* Rename the residue name (not the rtp name) */
475 pdba->resinfo[i].name = put_symtab(symtab, newnm);
480 /*! \brief Rename all residues named \c oldnm to \c newnm
482 * Search for residues for which the residue name from the input
483 * configuration file matches \c oldnm, and when found choose the rtp
484 * entry and name of \c newnm.
486 * \todo Refactor this function to accept a lambda that accepts i and
487 * numMatchesFound but always produces \c newnm. Then remove
488 * renameResiduesInteractively by calling this method with suitable
489 * lambdas that capture its parameter \c rr and ignores
490 * numMatchesFound. */
491 void renameResidue(const gmx::MDLogger& logger,
498 int numMatchesFound = 0;
499 for (int i = 0; (i < pdba->nres); i++)
501 /* We have not set the rtp name yet, use the residue name */
502 const char* residueNameInInputConfiguration = *pdba->resinfo[i].name;
503 if ((bFullCompare && (gmx::equalCaseInsensitive(residueNameInInputConfiguration, oldnm)))
504 || (!bFullCompare && strstr(residueNameInInputConfiguration, oldnm) != nullptr))
506 /* Change the rtp building block name */
507 pdba->resinfo[i].rtp = put_symtab(symtab, newnm);
508 pdba->resinfo[i].name = pdba->resinfo[i].rtp;
512 if (numMatchesFound > 0)
516 .appendTextFormatted(
517 "Replaced %d residue%s named %s to the default %s. Use interactive "
518 "selection of protonated residues if that is what you need.",
520 numMatchesFound > 1 ? "s" : "",
526 /*! \brief Rename all residues named \c oldnm according to the user's
529 * Search for residues for which the residue name from the input
530 * configuration file matches \c oldnm, and when found choose the rtp
531 * entry and name of the interactive choice from \c gettp.
533 * \todo Remove this function, per todo in \c renameResidue. */
534 void renameResidueInteractively(t_atoms* pdba,
536 const char* gettp(int, gmx::ArrayRef<const RtpRename>),
539 gmx::ArrayRef<const RtpRename> rr)
541 // Search for residues i for which the residue name from the input
542 // configuration file matches oldnm, so it can replaced by the rtp
543 // entry and name of newnm.
544 for (int i = 0; i < pdba->nres; i++)
546 /* We have not set the rtp name yet, use the residue name */
547 char* residueNameInInputConfiguration = *pdba->resinfo[i].name;
548 if ((bFullCompare && (strcmp(residueNameInInputConfiguration, oldnm) == 0))
549 || (!bFullCompare && strstr(residueNameInInputConfiguration, oldnm) != nullptr))
551 const char* interactiveRtpChoice = gettp(i, rr);
552 pdba->resinfo[i].rtp = put_symtab(symtab, interactiveRtpChoice);
553 pdba->resinfo[i].name = pdba->resinfo[i].rtp;
558 void check_occupancy(t_atoms* atoms, const char* filename, bool bVerbose, const gmx::MDLogger& logger)
564 ftp = fn2ftp(filename);
565 if (!atoms->pdbinfo || ((ftp != efPDB) && (ftp != efBRK) && (ftp != efENT)))
567 GMX_LOG(logger.warning).asParagraph().appendTextFormatted("No occupancies in %s", filename);
571 for (i = 0; (i < atoms->nr); i++)
573 if (atoms->pdbinfo[i].occup != 1)
577 GMX_LOG(logger.warning)
579 .appendTextFormatted("Occupancy for atom %s%d-%s is %f rather than 1",
580 *atoms->resinfo[atoms->atom[i].resind].name,
581 atoms->resinfo[atoms->atom[i].resind].nr,
583 atoms->pdbinfo[i].occup);
585 if (atoms->pdbinfo[i].occup == 0)
595 if (nzero == atoms->nr)
597 GMX_LOG(logger.warning)
599 .appendTextFormatted(
600 "All occupancy fields zero. This is probably not an X-Ray structure");
602 else if ((nzero > 0) || (nnotone > 0))
604 GMX_LOG(logger.warning)
606 .appendTextFormatted(
607 "there were %d atoms with zero occupancy and %d atoms with "
608 " occupancy unequal to one (out of %d atoms). Check your pdb "
616 GMX_LOG(logger.warning).asParagraph().appendTextFormatted("All occupancies are one");
621 void write_posres(const char* fn, t_atoms* pdba, real fc)
626 fp = gmx_fio_fopen(fn, "w");
628 "; In this topology include file, you will find position restraint\n"
629 "; entries for all the heavy atoms in your original pdb file.\n"
630 "; This means that all the protons which were added by pdb2gmx are\n"
631 "; not restrained.\n"
633 "[ position_restraints ]\n"
634 "; %4s%6s%8s%8s%8s\n",
640 for (i = 0; (i < pdba->nr); i++)
642 if (!is_hydrogen(*pdba->atomname[i]) && !is_dummymass(*pdba->atomname[i]))
644 fprintf(fp, "%6d%6d %g %g %g\n", i + 1, 1, fc, fc, fc);
650 int read_pdball(const char* inf,
660 const ResidueTypeMap& residueTypeMap,
664 /* Read a pdb file. (containing proteins) */
666 int natom, new_natom, i;
669 printf("Reading %s...\n", inf);
670 readConfAndAtoms(inf, symtab, title, atoms, pbcType, x, nullptr, box);
672 if (atoms->pdbinfo == nullptr)
674 snew(atoms->pdbinfo, atoms->nr);
676 if (fn2ftp(inf) == efPDB)
678 get_pdb_atomnumber(atoms, aps);
683 for (i = 0; i < atoms->nr; i++)
685 if (!is_hydrogen(*atoms->atomname[i]))
687 atoms->atom[new_natom] = atoms->atom[i];
688 atoms->atomname[new_natom] = atoms->atomname[i];
689 atoms->pdbinfo[new_natom] = atoms->pdbinfo[i];
690 copy_rvec((*x)[i], (*x)[new_natom]);
694 atoms->nr = new_natom;
701 printf(" '%s',", *title);
703 printf(" %d atoms\n", natom);
705 /* Rename residues */
706 rename_pdbres(atoms, "HOH", watres, false, symtab);
707 rename_pdbres(atoms, "SOL", watres, false, symtab);
708 rename_pdbres(atoms, "WAT", watres, false, symtab);
710 rename_atoms("xlateat.dat", nullptr, atoms, symtab, {}, true, residueTypeMap, true, bVerbose);
718 write_sto_conf(outf, *title, atoms, *x, nullptr, *pbcType, box);
724 void process_chain(const gmx::MDLogger& logger,
726 gmx::ArrayRef<gmx::RVec> x,
739 gmx::ArrayRef<const RtpRename> rr)
741 /* Rename aromatics, lys, asp and histidine */
744 renameResidue(logger, pdba, "TYR", "TYRU", false, symtab);
748 renameResidue(logger, pdba, "TRP", "TRPU", false, symtab);
752 renameResidue(logger, pdba, "PHE", "PHEU", false, symtab);
756 renameResidueInteractively(pdba, "LYS", get_lystp, false, symtab, rr);
760 renameResidueInteractively(pdba, "ARG", get_argtp, false, symtab, rr);
764 renameResidueInteractively(pdba, "GLN", get_glntp, false, symtab, rr);
768 renameResidueInteractively(pdba, "ASP", get_asptp, false, symtab, rr);
772 renameResidue(logger, pdba, "ASPH", "ASP", false, symtab);
776 renameResidueInteractively(pdba, "GLU", get_glutp, false, symtab, rr);
780 renameResidue(logger, pdba, "GLUH", "GLU", false, symtab);
785 set_histp(pdba, gmx::as_rvec_array(x.data()), symtab, angle, distance);
789 renameResidueInteractively(pdba, "HIS", get_histp, true, symtab, rr);
792 /* Initialize the rtp builing block names with the residue names
793 * for the residues that have not been processed above.
795 pdbres_to_gmxrtp(pdba);
797 /* Now we have all rtp names set.
798 * The rtp names will conform to Gromacs naming,
799 * unless the input pdb file contained one or more force field specific
800 * rtp names as residue names.
804 /* struct for sorting the atoms from the pdb file */
807 int resnr; /* residue number */
808 int j; /* database order index */
809 int index; /* original atom number */
810 char anm1; /* second letter of atom name */
811 char altloc; /* alternate location indicator */
814 bool pdbicomp(const t_pdbindex& a, const t_pdbindex& b)
816 int d = (a.resnr - b.resnr);
822 d = (a.anm1 - b.anm1);
825 d = (a.altloc - b.altloc);
832 void sort_pdbatoms(gmx::ArrayRef<const PreprocessResidue> restp_chain,
835 t_atoms** newPdbAtoms,
836 std::vector<gmx::RVec>* x,
840 t_atoms* pdba = *pdbaptr;
841 std::vector<gmx::RVec> xnew;
848 for (int i = 0; i < natoms; i++)
850 atomnm = *pdba->atomname[i];
851 const PreprocessResidue* localPpResidue = &restp_chain[pdba->atom[i].resind];
853 std::find_if(localPpResidue->atomname.begin(),
854 localPpResidue->atomname.end(),
855 [&atomnm](char** it) { return gmx::equalCaseInsensitive(atomnm, *it); });
856 if (found == localPpResidue->atomname.end())
858 std::string buf = gmx::formatString(
859 "Atom %s in residue %s %d was not found in rtp entry %s with %d atoms\n"
860 "while sorting atoms.\n%s",
862 *pdba->resinfo[pdba->atom[i].resind].name,
863 pdba->resinfo[pdba->atom[i].resind].nr,
864 localPpResidue->resname.c_str(),
865 localPpResidue->natom(),
867 ? "\nFor a hydrogen, this can be a different protonation state, or it\n"
868 "might have had a different number in the PDB file and was rebuilt\n"
869 "(it might for instance have been H3, and we only expected H1 & "
871 "Note that hydrogens might have been added to the entry for the "
873 "Remove this hydrogen or choose a different protonation state to "
875 "Option -ignh will ignore all hydrogens in the input."
877 gmx_fatal(FARGS, "%s", buf.c_str());
879 /* make shadow array to be sorted into indexgroup */
880 pdbi[i].resnr = pdba->atom[i].resind;
881 pdbi[i].j = std::distance(localPpResidue->atomname.begin(), found);
883 pdbi[i].anm1 = atomnm[1];
884 pdbi[i].altloc = pdba->pdbinfo[i].altloc;
886 std::sort(pdbi, pdbi + natoms, pdbicomp);
888 /* pdba is sorted in pdbnew using the pdbi index */
889 std::vector<int> a(natoms);
890 srenew(*newPdbAtoms, 1);
891 init_t_atoms((*newPdbAtoms), natoms, true);
892 (*newPdbAtoms)->nr = pdba->nr;
893 (*newPdbAtoms)->nres = pdba->nres;
894 srenew((*newPdbAtoms)->resinfo, pdba->nres);
895 std::copy(pdba->resinfo, pdba->resinfo + pdba->nres, (*newPdbAtoms)->resinfo);
896 for (int i = 0; i < natoms; i++)
898 (*newPdbAtoms)->atom[i] = pdba->atom[pdbi[i].index];
899 (*newPdbAtoms)->atomname[i] = pdba->atomname[pdbi[i].index];
900 (*newPdbAtoms)->pdbinfo[i] = pdba->pdbinfo[pdbi[i].index];
901 xnew.push_back(x->at(pdbi[i].index));
902 /* make indexgroup in block */
903 a[i] = pdbi[i].index;
908 /* copy the sorted pdbnew back to pdba */
909 *pdbaptr = *newPdbAtoms;
911 add_grp(block, gnames, a, "prot_sort");
915 int remove_duplicate_atoms(t_atoms* pdba, gmx::ArrayRef<gmx::RVec> x, bool bVerbose, const gmx::MDLogger& logger)
917 int i, j, oldnatoms, ndel;
920 GMX_LOG(logger.info).asParagraph().appendTextFormatted("Checking for duplicate atoms....");
921 oldnatoms = pdba->nr;
923 /* NOTE: pdba->nr is modified inside the loop */
924 for (i = 1; (i < pdba->nr); i++)
926 /* compare 'i' and 'i-1', throw away 'i' if they are identical
927 this is a 'while' because multiple alternate locations can be present */
928 while ((i < pdba->nr) && (pdba->atom[i - 1].resind == pdba->atom[i].resind)
929 && (strcmp(*pdba->atomname[i - 1], *pdba->atomname[i]) == 0))
934 ri = &pdba->resinfo[pdba->atom[i].resind];
937 .appendTextFormatted("deleting duplicate atom %4s %s%4d%c",
942 if (ri->chainid && (ri->chainid != ' '))
944 printf(" ch %c", ri->chainid);
948 if (pdba->pdbinfo[i].atomnr)
950 printf(" pdb nr %4d", pdba->pdbinfo[i].atomnr);
952 if (pdba->pdbinfo[i].altloc && (pdba->pdbinfo[i].altloc != ' '))
954 printf(" altloc %c", pdba->pdbinfo[i].altloc);
960 /* We can not free, since it might be in the symtab */
961 /* sfree(pdba->atomname[i]); */
962 for (j = i; j < pdba->nr; j++)
964 pdba->atom[j] = pdba->atom[j + 1];
965 pdba->atomname[j] = pdba->atomname[j + 1];
968 pdba->pdbinfo[j] = pdba->pdbinfo[j + 1];
970 copy_rvec(x[j + 1], x[j]);
972 srenew(pdba->atom, pdba->nr);
973 /* srenew(pdba->atomname, pdba->nr); */
974 srenew(pdba->pdbinfo, pdba->nr);
977 if (pdba->nr != oldnatoms)
981 .appendTextFormatted("Now there are %d atoms. Deleted %d duplicates.", pdba->nr, ndel);
987 void checkResidueTypeSanity(t_atoms* pdba, int r0, int r1, const ResidueTypeMap& residueTypeMap)
989 std::string startResidueString =
990 gmx::formatString("%s%d", *pdba->resinfo[r0].name, pdba->resinfo[r0].nr);
991 std::string endResidueString =
992 gmx::formatString("%s%d", *pdba->resinfo[r1 - 1].name, pdba->resinfo[r1 - 1].nr);
994 // Check whether all residues in chain have the same chain ID.
995 bool allResiduesHaveSameChainID = true;
996 char chainID0 = pdba->resinfo[r0].chainid;
998 std::string residueString;
1000 for (int i = r0 + 1; i < r1; i++)
1002 chainID = pdba->resinfo[i].chainid;
1003 if (chainID != chainID0)
1005 allResiduesHaveSameChainID = false;
1006 residueString = gmx::formatString("%s%d", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
1011 if (!allResiduesHaveSameChainID)
1014 "The chain covering the range %s--%s does not have a consistent chain ID. "
1015 "The first residue has ID '%c', while residue %s has ID '%c'.",
1016 startResidueString.c_str(),
1017 endResidueString.c_str(),
1019 residueString.c_str(),
1023 // At this point all residues have the same ID. If they are also non-blank
1024 // we can be a bit more aggressive and require the types match too.
1025 if (chainID0 != ' ')
1027 bool allResiduesHaveSameType = true;
1028 std::string restype;
1029 std::string restype0 = typeOfNamedDatabaseResidue(residueTypeMap, *pdba->resinfo[r0].name);
1031 for (int i = r0 + 1; i < r1; i++)
1033 restype = typeOfNamedDatabaseResidue(residueTypeMap, *pdba->resinfo[i].name);
1034 if (!gmx::equalCaseInsensitive(restype, restype0))
1036 allResiduesHaveSameType = false;
1037 residueString = gmx::formatString("%s%d", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
1042 if (!allResiduesHaveSameType)
1045 "The residues in the chain %s--%s do not have a consistent type. "
1046 "The first residue has type '%s', while residue %s is of type '%s'. "
1047 "Either there is a mistake in your chain, or it includes nonstandard "
1048 "residue names that have not yet been added to the residuetypes.dat "
1049 "file in the GROMACS library directory. If there are other molecules "
1050 "such as ligands, they should not have the same chain ID as the "
1051 "adjacent protein chain since it's a separate molecule.",
1052 startResidueString.c_str(),
1053 endResidueString.c_str(),
1055 residueString.c_str(),
1061 void find_nc_ter(t_atoms* pdba,
1066 const ResidueTypeMap& residueTypeMap,
1067 const gmx::MDLogger& logger)
1069 std::optional<std::string> residueTypeOfStartingTerminus;
1074 int startWarnings = 0;
1075 int endWarnings = 0;
1078 // Check that all residues have the same chain identifier, and if it is
1079 // non-blank we also require the residue types to match.
1080 checkResidueTypeSanity(pdba, r0, r1, residueTypeMap);
1082 // If we return correctly from checkResidueTypeSanity(), the only
1083 // remaining cases where we can have non-matching residue types is if
1084 // the chain ID was blank, which could be the case e.g. for a structure
1085 // read from a GRO file or other file types without chain information.
1086 // In that case we need to be a bit more liberal and detect chains based
1087 // on the residue type.
1089 // If we get here, the chain ID must be identical for all residues
1090 char chainID = pdba->resinfo[r0].chainid;
1092 /* Find the starting terminus (typially N or 5') */
1093 for (int i = r0; i < r1 && *r_start == -1; i++)
1095 auto foundIt = residueTypeMap.find(*pdba->resinfo[i].name);
1096 if (foundIt == residueTypeMap.end())
1100 residueTypeOfStartingTerminus = foundIt->second;
1101 if (gmx::equalCaseInsensitive(residueTypeOfStartingTerminus.value(), "Protein")
1102 || gmx::equalCaseInsensitive(residueTypeOfStartingTerminus.value(), "DNA")
1103 || gmx::equalCaseInsensitive(residueTypeOfStartingTerminus.value(), "RNA"))
1105 GMX_LOG(logger.info)
1107 .appendTextFormatted("Identified residue %s%d as a starting terminus.",
1108 *pdba->resinfo[i].name,
1109 pdba->resinfo[i].nr);
1112 else if (gmx::equalCaseInsensitive(residueTypeOfStartingTerminus.value(), "Ion"))
1116 GMX_LOG(logger.info)
1118 .appendTextFormatted(
1119 "Residue %s%d has type 'Ion', assuming it is not linked into a "
1121 *pdba->resinfo[i].name,
1122 pdba->resinfo[i].nr);
1126 GMX_LOG(logger.info)
1128 .appendTextFormatted("Disabling further notes about ions.");
1134 // Either no known residue type, or one not needing special handling
1135 if (startWarnings < 5)
1139 GMX_LOG(logger.warning)
1141 .appendTextFormatted(
1142 "Starting residue %s%d in chain not identified as "
1144 "This chain lacks identifiers, which makes it impossible to do "
1146 "classification of the start/end residues. Here we need to "
1147 "guess this residue "
1148 "should not be part of the chain and instead introduce a "
1149 "break, but that will "
1150 "be catastrophic if they should in fact be linked. Please "
1151 "check your structure, "
1152 "and add %s to residuetypes.dat if this was not correct.",
1153 *pdba->resinfo[i].name,
1154 pdba->resinfo[i].nr,
1155 *pdba->resinfo[i].name);
1159 GMX_LOG(logger.warning)
1161 .appendTextFormatted(
1162 "No residues in chain starting at %s%d identified as "
1164 "This makes it impossible to link them into a molecule, which "
1166 "correct or a catastrophic error. Please check your structure, "
1168 "necessary residue names to residuetypes.dat if this was not "
1170 *pdba->resinfo[i].name,
1171 pdba->resinfo[i].nr);
1174 if (startWarnings == 4)
1176 GMX_LOG(logger.warning)
1178 .appendTextFormatted(
1179 "Disabling further warnings about unidentified residues at start "
1188 /* Go through the rest of the residues, check that they are the same class, and identify the ending terminus. */
1189 for (int i = *r_start; i < r1; i++)
1191 auto foundIt = residueTypeMap.find(*pdba->resinfo[i].name);
1192 if (foundIt == residueTypeMap.end())
1196 const std::string& residueTypeOfCurrentResidue = foundIt->second;
1197 if (gmx::equalCaseInsensitive(residueTypeOfCurrentResidue, residueTypeOfStartingTerminus.value())
1198 && endWarnings == 0)
1202 else if (gmx::equalCaseInsensitive(residueTypeOfStartingTerminus.value(), "Ion"))
1206 GMX_LOG(logger.info)
1208 .appendTextFormatted(
1209 "Residue %s%d has type 'Ion', assuming it is not linked into a "
1211 *pdba->resinfo[i].name,
1212 pdba->resinfo[i].nr);
1216 GMX_LOG(logger.info)
1218 .appendTextFormatted("Disabling further notes about ions.");
1224 // Either no known residue type, or one not needing special handling.
1225 GMX_RELEASE_ASSERT(chainID == ' ', "Chain ID must be blank");
1226 // Otherwise the call to checkResidueTypeSanity() will
1227 // have caught the problem.
1228 if (endWarnings < 5)
1230 GMX_LOG(logger.warning)
1232 .appendTextFormatted(
1233 "Residue %s%d in chain has different type ('%s') from "
1234 "residue %s%d ('%s'). This chain lacks identifiers, which "
1236 "it impossible to do strict classification of the start/end "
1237 "residues. Here we "
1238 "need to guess this residue should not be part of the chain "
1240 "introduce a break, but that will be catastrophic if they "
1241 "should in fact be "
1242 "linked. Please check your structure, and add %s to "
1244 "if this was not correct.",
1245 *pdba->resinfo[i].name,
1246 pdba->resinfo[i].nr,
1247 residueTypeOfCurrentResidue.c_str(),
1248 *pdba->resinfo[*r_start].name,
1249 pdba->resinfo[*r_start].nr,
1250 residueTypeOfStartingTerminus.value().c_str(),
1251 *pdba->resinfo[i].name);
1253 if (endWarnings == 4)
1255 GMX_LOG(logger.warning)
1257 .appendTextFormatted(
1258 "Disabling further warnings about unidentified residues at end "
1268 GMX_LOG(logger.info)
1270 .appendTextFormatted("Identified residue %s%d as a ending terminus.",
1271 *pdba->resinfo[*r_end].name,
1272 pdba->resinfo[*r_end].nr);
1276 enum class ChainSeparationType : int
1285 const gmx::EnumerationArray<ChainSeparationType, const char*> c_chainSeparationTypeNames = {
1286 { "id_or_ter", "id_and_ter", "ter", "id", "interactive" }
1288 const gmx::EnumerationArray<ChainSeparationType, const char*> c_chainSeparationTypeNotificationMessages = {
1289 { "Splitting chemical chains based on TER records or chain id changing.\n",
1290 "Splitting chemical chains based on TER records and chain id changing.\n",
1291 "Splitting chemical chains based on TER records only (ignoring chain id).\n",
1292 "Splitting chemical chains based on changing chain id only (ignoring TER records).\n",
1293 "Splitting chemical chains interactively.\n" }
1296 void modify_chain_numbers(t_atoms* pdba, ChainSeparationType chainSeparation, const gmx::MDLogger& logger)
1299 char old_prev_chainid;
1300 char old_this_chainid;
1301 int old_prev_chainnum;
1302 int old_this_chainnum;
1304 char select[STRLEN];
1308 const char* prev_atomname;
1309 const char* this_atomname;
1310 const char* prev_resname;
1311 const char* this_resname;
1317 /* The default chain enumeration is based on TER records only */
1318 printf("%s", c_chainSeparationTypeNotificationMessages[chainSeparation]);
1320 old_prev_chainid = '?';
1321 old_prev_chainnum = -1;
1324 this_atomname = nullptr;
1326 this_resname = nullptr;
1330 for (i = 0; i < pdba->nres; i++)
1332 ri = &pdba->resinfo[i];
1333 old_this_chainid = ri->chainid;
1334 old_this_chainnum = ri->chainnum;
1336 prev_atomname = this_atomname;
1337 prev_atomnum = this_atomnum;
1338 prev_resname = this_resname;
1339 prev_resnum = this_resnum;
1340 prev_chainid = this_chainid;
1342 this_atomname = *(pdba->atomname[i]);
1343 this_atomnum = (pdba->pdbinfo != nullptr) ? pdba->pdbinfo[i].atomnr : i + 1;
1344 this_resname = *ri->name;
1345 this_resnum = ri->nr;
1346 this_chainid = ri->chainid;
1348 switch (chainSeparation)
1350 case ChainSeparationType::IdOrTer:
1351 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1357 case ChainSeparationType::IdAndTer:
1358 if (old_this_chainid != old_prev_chainid && old_this_chainnum != old_prev_chainnum)
1364 case ChainSeparationType::Id:
1365 if (old_this_chainid != old_prev_chainid)
1371 case ChainSeparationType::Ter:
1372 if (old_this_chainnum != old_prev_chainnum)
1377 case ChainSeparationType::Interactive:
1378 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1382 GMX_LOG(logger.info)
1384 .appendTextFormatted(
1385 "Split the chain (and introduce termini) between residue %s%d (chain id '%c', atom %d %s)\
1387 "and residue %s%d (chain id '%c', atom %d %s) ? [n/y]",
1399 if (nullptr == fgets(select, STRLEN - 1, stdin))
1401 gmx_fatal(FARGS, "Error reading from stdin");
1404 if (i == 0 || select[0] == 'y')
1410 case ChainSeparationType::Count:
1411 gmx_fatal(FARGS, "Internal inconsistency - this shouldn't happen...");
1413 old_prev_chainid = old_this_chainid;
1414 old_prev_chainnum = old_this_chainnum;
1416 ri->chainnum = new_chainnum;
1420 bool checkChainCyclicity(t_atoms* pdba,
1424 gmx::ArrayRef<const PreprocessResidue> rtpFFDB,
1425 gmx::ArrayRef<const RtpRename> rr,
1426 real long_bond_dist_,
1427 real short_bond_dist_)
1429 // if start and end are the same, we can't have a cycle
1430 if (start_ter == end_ter)
1434 int ai = -1, aj = -1;
1435 char* rtpname = *(pdba->resinfo[start_ter].rtp);
1436 std::string newName = search_resrename(rr, rtpname, false, false, false);
1437 if (newName.empty())
1441 auto res = getDatabaseEntry(newName, rtpFFDB);
1442 const char *name_ai, *name_aj;
1444 for (const auto& patch : res->rb[BondedTypes::Bonds].b)
1445 { /* Search backward bond for n/5' terminus */
1446 name_ai = patch.ai().c_str();
1447 name_aj = patch.aj().c_str();
1448 if (name_ai[0] == '-')
1450 aj = search_res_atom(++name_ai, end_ter, pdba, "check", TRUE);
1451 ai = search_res_atom(name_aj, start_ter, pdba, "check", TRUE);
1453 else if (name_aj[0] == '-')
1455 aj = search_res_atom(++name_aj, end_ter, pdba, "check", TRUE);
1456 ai = search_res_atom(name_ai, start_ter, pdba, "check", TRUE);
1458 if (ai >= 0 && aj >= 0)
1464 if (!(ai >= 0 && aj >= 0))
1466 rtpname = *(pdba->resinfo[end_ter].rtp);
1467 newName = search_resrename(rr, rtpname, false, false, false);
1468 if (newName.empty())
1472 res = getDatabaseEntry(newName, rtpFFDB);
1473 for (const auto& patch : res->rb[BondedTypes::Bonds].b)
1475 /* Seach forward bond for c/3' terminus */
1476 name_ai = patch.ai().c_str();
1477 name_aj = patch.aj().c_str();
1479 if (name_ai[0] == '+')
1481 ai = search_res_atom(name_aj, end_ter, pdba, "check", TRUE);
1482 aj = search_res_atom(++name_ai, start_ter, pdba, "check", TRUE);
1484 else if (name_aj[0] == '+')
1486 ai = search_res_atom(name_ai, end_ter, pdba, "check", TRUE);
1487 aj = search_res_atom(++name_aj, start_ter, pdba, "check", TRUE);
1489 if (ai >= 0 && aj >= 0)
1496 if (ai >= 0 && aj >= 0)
1498 real dist = distance2(pdbx[ai], pdbx[aj]);
1499 /* it is better to read bond length from ffbonded.itp */
1500 return (dist < gmx::square(long_bond_dist_) && dist > gmx::square(short_bond_dist_));
1511 int chainnum = ' '; // char, but stored as int to make clang-tidy happy
1514 bool bAllWat = false;
1516 std::vector<int> chainstart;
1523 bool bAllWat = false;
1525 std::vector<int> chainstart;
1526 std::vector<MoleculePatchDatabase*> ntdb;
1527 std::vector<MoleculePatchDatabase*> ctdb;
1528 std::vector<int> r_start;
1529 std::vector<int> r_end;
1531 std::vector<gmx::RVec> x;
1532 std::vector<int> cyclicBondsIndex;
1535 enum class VSitesType : int
1542 const gmx::EnumerationArray<VSitesType, const char*> c_vsitesTypeNames = {
1543 { "none", "hydrogens", "aromatics" }
1546 enum class WaterType : int
1558 const gmx::EnumerationArray<WaterType, const char*> c_waterTypeNames = {
1559 { "select", "none", "spc", "spce", "tip3p", "tip4p", "tip5p", "tips3p" }
1562 enum class MergeType : int
1569 const gmx::EnumerationArray<MergeType, const char*> c_mergeTypeNames = {
1570 { "no", "all", "interactive" }
1581 class pdb2gmx : public ICommandLineOptionsModule
1587 bVsiteAromatics_(FALSE),
1588 chainSeparation_(ChainSeparationType::IdOrTer),
1589 vsitesType_(VSitesType::None),
1590 waterType_(WaterType::Select),
1591 mergeType_(MergeType::No),
1595 gmx::LoggerBuilder builder;
1596 builder.addTargetStream(gmx::MDLogger::LogLevel::Info, &gmx::TextOutputFile::standardOutput());
1597 builder.addTargetStream(gmx::MDLogger::LogLevel::Warning, &gmx::TextOutputFile::standardError());
1598 loggerOwner_ = std::make_unique<LoggerOwner>(builder.build());
1601 // From ICommandLineOptionsModule
1602 void init(CommandLineModuleSettings* /*settings*/) override {}
1604 void initOptions(IOptionsContainer* options, ICommandLineOptionsModuleSettings* settings) override;
1606 void optionsFinished() override;
1624 bool bAllowMissing_;
1628 bool bChargeGroups_;
1638 bool bVsiteAromatics_;
1642 real long_bond_dist_;
1643 real short_bond_dist_;
1645 std::string indexOutputFile_;
1646 std::string outputFile_;
1647 std::string topologyFile_;
1648 std::string includeTopologyFile_;
1649 std::string outputConfFile_;
1650 std::string inputConfFile_;
1651 std::string outFile_;
1654 ChainSeparationType chainSeparation_;
1655 VSitesType vsitesType_;
1656 WaterType waterType_;
1657 MergeType mergeType_;
1660 char forcefield_[STRLEN];
1661 char ffdir_[STRLEN];
1664 std::vector<std::string> incls_;
1665 std::vector<t_mols> mols_;
1667 std::unique_ptr<LoggerOwner> loggerOwner_;
1670 void pdb2gmx::initOptions(IOptionsContainer* options, ICommandLineOptionsModuleSettings* settings)
1672 const char* desc[] = {
1673 "[THISMODULE] reads a [REF].pdb[ref] (or [REF].gro[ref]) file, reads",
1674 "some database files, adds hydrogens to the molecules and generates",
1675 "coordinates in GROMACS (GROMOS), or optionally [REF].pdb[ref], format",
1676 "and a topology in GROMACS format.",
1677 "These files can subsequently be processed to generate a run input file.",
1679 "[THISMODULE] will search for force fields by looking for",
1680 "a [TT]forcefield.itp[tt] file in subdirectories [TT]<forcefield>.ff[tt]",
1681 "of the current working directory and of the GROMACS library directory",
1682 "as inferred from the path of the binary or the [TT]GMXLIB[tt] environment",
1684 "By default the forcefield selection is interactive,",
1685 "but you can use the [TT]-ff[tt] option to specify one of the short names",
1686 "in the list on the command line instead. In that case [THISMODULE] just looks",
1687 "for the corresponding [TT]<forcefield>.ff[tt] directory.",
1689 "After choosing a force field, all files will be read only from",
1690 "the corresponding force field directory.",
1691 "If you want to modify or add a residue types, you can copy the force",
1692 "field directory from the GROMACS library directory to your current",
1693 "working directory. If you want to add new protein residue types,",
1694 "you will need to modify [TT]residuetypes.dat[tt] in the library directory",
1695 "or copy the whole library directory to a local directory and set",
1696 "the environment variable [TT]GMXLIB[tt] to the name of that directory.",
1697 "Check Chapter 5 of the manual for more information about file formats.",
1700 "Note that a [REF].pdb[ref] file is nothing more than a file format, and it",
1701 "need not necessarily contain a protein structure. Every kind of",
1702 "molecule for which there is support in the database can be converted.",
1703 "If there is no support in the database, you can add it yourself.[PAR]",
1705 "The program has limited intelligence, it reads a number of database",
1706 "files, that allow it to make special bonds (Cys-Cys, Heme-His, etc.),",
1707 "if necessary this can be done manually. The program can prompt the",
1708 "user to select which kind of LYS, ASP, GLU, CYS or HIS residue is",
1709 "desired. For Lys the choice is between neutral (two protons on NZ) or",
1710 "protonated (three protons, default), for Asp and Glu unprotonated",
1711 "(default) or protonated, for His the proton can be either on ND1,",
1712 "on NE2 or on both. By default these selections are done automatically.",
1713 "For His, this is based on an optimal hydrogen bonding",
1714 "conformation. Hydrogen bonds are defined based on a simple geometric",
1715 "criterion, specified by the maximum hydrogen-donor-acceptor angle",
1716 "and donor-acceptor distance, which are set by [TT]-angle[tt] and",
1717 "[TT]-dist[tt] respectively.[PAR]",
1719 "The protonation state of N- and C-termini can be chosen interactively",
1720 "with the [TT]-ter[tt] flag. Default termini are ionized (NH3+ and COO-),",
1721 "respectively. Some force fields support zwitterionic forms for chains of",
1722 "one residue, but for polypeptides these options should NOT be selected.",
1723 "The AMBER force fields have unique forms for the terminal residues,",
1724 "and these are incompatible with the [TT]-ter[tt] mechanism. You need",
1725 "to prefix your N- or C-terminal residue names with \"N\" or \"C\"",
1726 "respectively to use these forms, making sure you preserve the format",
1727 "of the coordinate file. Alternatively, use named terminating residues",
1728 "(e.g. ACE, NME).[PAR]",
1730 "The separation of chains is not entirely trivial since the markup",
1731 "in user-generated PDB files frequently varies and sometimes it",
1732 "is desirable to merge entries across a TER record, for instance",
1733 "if you want a disulfide bridge or distance restraints between",
1734 "two protein chains or if you have a HEME group bound to a protein.",
1735 "In such cases multiple chains should be contained in a single",
1736 "[TT]moleculetype[tt] definition.",
1737 "To handle this, [THISMODULE] uses two separate options.",
1738 "First, [TT]-chainsep[tt] allows you to choose when a new chemical chain should",
1739 "start, and termini added when applicable. This can be done based on the",
1740 "existence of TER records, when the chain id changes, or combinations of either",
1741 "or both of these. You can also do the selection fully interactively.",
1742 "In addition, there is a [TT]-merge[tt] option that controls how multiple chains",
1743 "are merged into one moleculetype, after adding all the chemical termini (or not).",
1744 "This can be turned off (no merging), all non-water chains can be merged into a",
1745 "single molecule, or the selection can be done interactively.[PAR]",
1747 "[THISMODULE] will also check the occupancy field of the [REF].pdb[ref] file.",
1748 "If any of the occupancies are not one, indicating that the atom is",
1749 "not resolved well in the structure, a warning message is issued.",
1750 "When a [REF].pdb[ref] file does not originate from an X-ray structure determination",
1751 "all occupancy fields may be zero. Either way, it is up to the user",
1752 "to verify the correctness of the input data (read the article!).[PAR]",
1754 "During processing the atoms will be reordered according to GROMACS",
1755 "conventions. With [TT]-n[tt] an index file can be generated that",
1756 "contains one group reordered in the same way. This allows you to",
1757 "convert a GROMOS trajectory and coordinate file to GROMOS. There is",
1758 "one limitation: reordering is done after the hydrogens are stripped",
1759 "from the input and before new hydrogens are added. This means that",
1760 "you should not use [TT]-ignh[tt].[PAR]",
1762 "The [REF].gro[ref] and [TT].g96[tt] file formats do not support chain",
1763 "identifiers. Therefore it is useful to enter a [REF].pdb[ref] file name at",
1764 "the [TT]-o[tt] option when you want to convert a multi-chain [REF].pdb[ref] file.",
1767 "The option [TT]-vsite[tt] removes hydrogen and fast improper dihedral",
1768 "motions. Angular and out-of-plane motions can be removed by changing",
1769 "hydrogens into virtual sites and fixing angles, which fixes their",
1770 "position relative to neighboring atoms. Additionally, all atoms in the",
1771 "aromatic rings of the standard amino acids (i.e. PHE, TRP, TYR and HIS)",
1772 "can be converted into virtual sites, eliminating the fast improper dihedral",
1773 "fluctuations in these rings (but this feature is deprecated).",
1774 "[BB]Note[bb] that in this case all other hydrogen",
1775 "atoms are also converted to virtual sites. The mass of all atoms that are",
1776 "converted into virtual sites, is added to the heavy atoms.[PAR]",
1777 "Also slowing down of dihedral motion can be done with [TT]-heavyh[tt]",
1778 "done by increasing the hydrogen-mass by a factor of 4. This is also",
1779 "done for water hydrogens to slow down the rotational motion of water.",
1780 "The increase in mass of the hydrogens is subtracted from the bonded",
1781 "(heavy) atom so that the total mass of the system remains the same.",
1783 "As a special case, ring-closed (or cyclic) molecules are considered.",
1784 "[THISMODULE] automatically determines if a cyclic molecule is present",
1785 "by evaluating the distance between the terminal atoms of a given chain.",
1786 "If this distance is greater than the [TT]-sb[tt]",
1787 "(\"Short bond warning distance\", default 0.05 nm)",
1788 "and less than the [TT]-lb[tt] (\"Long bond warning distance\", default 0.25 nm)",
1789 "the molecule is considered to be ring closed and will be processed as such.",
1790 "Please note that this does not detect cyclic bonds over periodic boundaries."
1793 settings->setHelpText(desc);
1795 options->addOption(BooleanOption("newrtp").store(&bNewRTP_).defaultValue(false).hidden().description(
1796 "Write the residue database in new format to [TT]new.rtp[tt]"));
1798 RealOption("lb").store(&long_bond_dist_).defaultValue(0.25).hidden().description("Long bond warning distance"));
1800 RealOption("sb").store(&short_bond_dist_).defaultValue(0.05).hidden().description("Short bond warning distance"));
1801 options->addOption(EnumOption<ChainSeparationType>("chainsep")
1802 .enumValue(c_chainSeparationTypeNames)
1803 .store(&chainSeparation_)
1804 .description("Condition in PDB files when a new chain should be "
1805 "started (adding termini)"));
1806 options->addOption(EnumOption<MergeType>("merge")
1807 .enumValue(c_mergeTypeNames)
1809 .description("Merge multiple chains into a single [moleculetype]"));
1810 options->addOption(StringOption("ff").store(&ff_).defaultValue("select").description(
1811 "Force field, interactive by default. Use [TT]-h[tt] for information."));
1813 EnumOption<WaterType>("water").store(&waterType_).enumValue(c_waterTypeNames).description("Water model to use"));
1814 options->addOption(BooleanOption("inter").store(&bInter_).defaultValue(false).description(
1815 "Set the next 8 options to interactive"));
1816 options->addOption(BooleanOption("ss").store(&bCysMan_).defaultValue(false).description(
1817 "Interactive SS bridge selection"));
1818 options->addOption(BooleanOption("ter").store(&bTerMan_).defaultValue(false).description(
1819 "Interactive termini selection, instead of charged (default)"));
1820 options->addOption(BooleanOption("lys").store(&bLysMan_).defaultValue(false).description(
1821 "Interactive lysine selection, instead of charged"));
1822 options->addOption(BooleanOption("arg").store(&bArgMan_).defaultValue(false).description(
1823 "Interactive arginine selection, instead of charged"));
1824 options->addOption(BooleanOption("asp").store(&bAspMan_).defaultValue(false).description(
1825 "Interactive aspartic acid selection, instead of charged"));
1826 options->addOption(BooleanOption("glu").store(&bGluMan_).defaultValue(false).description(
1827 "Interactive glutamic acid selection, instead of charged"));
1828 options->addOption(BooleanOption("gln").store(&bGlnMan_).defaultValue(false).description(
1829 "Interactive glutamine selection, instead of charged"));
1830 options->addOption(BooleanOption("his").store(&bHisMan_).defaultValue(false).description(
1831 "Interactive histidine selection, instead of checking H-bonds"));
1832 options->addOption(RealOption("angle").store(&angle_).defaultValue(135.0).description(
1833 "Minimum hydrogen-donor-acceptor angle for a H-bond (degrees)"));
1835 RealOption("dist").store(&distance_).defaultValue(0.3).description("Maximum donor-acceptor distance for a H-bond (nm)"));
1836 options->addOption(BooleanOption("una").store(&bUnA_).defaultValue(false).description(
1837 "Select aromatic rings with united CH atoms on phenylalanine, tryptophane and "
1839 options->addOption(BooleanOption("sort").store(&bSort_).defaultValue(true).hidden().description(
1840 "Sort the residues according to database, turning this off is dangerous as charge "
1841 "groups might be broken in parts"));
1843 BooleanOption("ignh").store(&bRemoveH_).defaultValue(false).description("Ignore hydrogen atoms that are in the coordinate file"));
1845 BooleanOption("missing")
1846 .store(&bAllowMissing_)
1847 .defaultValue(false)
1849 "Continue when atoms are missing and bonds cannot be made, dangerous"));
1851 BooleanOption("v").store(&bVerbose_).defaultValue(false).description("Be slightly more verbose in messages"));
1853 RealOption("posrefc").store(&posre_fc_).defaultValue(1000).description("Force constant for position restraints"));
1854 options->addOption(EnumOption<VSitesType>("vsite")
1855 .store(&vsitesType_)
1856 .enumValue(c_vsitesTypeNames)
1857 .description("Convert atoms to virtual sites"));
1858 options->addOption(BooleanOption("heavyh").store(&bHeavyH_).defaultValue(false).description(
1859 "Make hydrogen atoms heavy"));
1861 BooleanOption("deuterate").store(&bDeuterate_).defaultValue(false).description("Change the mass of hydrogens to 2 amu"));
1862 options->addOption(BooleanOption("chargegrp")
1863 .store(&bChargeGroups_)
1865 .description("Use charge groups in the [REF].rtp[ref] file"));
1866 options->addOption(BooleanOption("cmap").store(&bCmap_).defaultValue(true).description(
1867 "Use cmap torsions (if enabled in the [REF].rtp[ref] file)"));
1868 options->addOption(BooleanOption("renum")
1870 .defaultValue(false)
1871 .description("Renumber the residues consecutively in the output"));
1872 options->addOption(BooleanOption("rtpres")
1873 .store(&bRTPresname_)
1874 .defaultValue(false)
1875 .description("Use [REF].rtp[ref] entry names as residue names"));
1876 options->addOption(FileNameOption("f")
1879 .store(&inputConfFile_)
1881 .defaultBasename("protein")
1883 .description("Structure file"));
1884 options->addOption(FileNameOption("o")
1887 .store(&outputConfFile_)
1889 .defaultBasename("conf")
1890 .description("Structure file"));
1891 options->addOption(FileNameOption("p")
1894 .store(&topologyFile_)
1896 .defaultBasename("topol")
1897 .description("Topology file"));
1898 options->addOption(FileNameOption("i")
1901 .store(&includeTopologyFile_)
1903 .defaultBasename("posre")
1904 .description("Include file for topology"));
1905 options->addOption(FileNameOption("n")
1908 .store(&indexOutputFile_)
1909 .storeIsSet(&bIndexSet_)
1910 .defaultBasename("index")
1911 .description("Index file"));
1912 options->addOption(FileNameOption("q")
1916 .storeIsSet(&bOutputSet_)
1917 .defaultBasename("clean")
1919 .description("Structure file"));
1922 void pdb2gmx::optionsFinished()
1924 if (inputConfFile_.empty())
1926 GMX_THROW(InconsistentInputError("You must supply an input file"));
1930 /* if anything changes here, also change description of -inter */
1945 else if (bDeuterate_)
1954 /* Force field selection, interactive or direct */
1955 choose_ff(strcmp(ff_.c_str(), "select") == 0 ? nullptr : ff_.c_str(),
1957 sizeof(forcefield_),
1960 loggerOwner_->logger());
1962 if (strlen(forcefield_) > 0)
1964 ffname_ = forcefield_;
1965 ffname_[0] = std::toupper(ffname_[0]);
1969 gmx_fatal(FARGS, "Empty forcefield string");
1975 char select[STRLEN];
1976 std::vector<DisulfideBond> ssbonds;
1980 const char* prev_atomname;
1981 const char* this_atomname;
1982 const char* prev_resname;
1983 const char* this_resname;
1988 int prev_chainnumber;
1989 int this_chainnumber;
1990 int this_chainstart;
1991 int prev_chainstart;
1993 const gmx::MDLogger& logger = loggerOwner_->logger();
1995 GMX_LOG(logger.info)
1997 .appendTextFormatted("Using the %s force field in directory %s", ffname_, ffdir_);
1999 choose_watermodel(c_waterTypeNames[waterType_], ffdir_, &watermodel_, logger);
2001 switch (vsitesType_)
2003 case VSitesType::None:
2005 bVsiteAromatics_ = false;
2007 case VSitesType::Hydrogens:
2009 bVsiteAromatics_ = false;
2011 case VSitesType::Aromatics:
2013 bVsiteAromatics_ = true;
2015 case VSitesType::Count:
2016 gmx_fatal(FARGS, "Internal inconsistency: VSitesType is out of range.");
2019 /* Open the symbol table */
2021 open_symtab(&symtab);
2023 /* Residue type database */
2024 ResidueTypeMap residueTypeMap = residueTypeMapFromLibraryFile("residuetypes.dat");
2026 /* Read residue renaming database(s), if present */
2027 std::vector<std::string> rrn = fflib_search_file_end(ffdir_, ".r2b", FALSE);
2029 std::vector<RtpRename> rtprename;
2030 for (const auto& filename : rrn)
2032 GMX_LOG(logger.info).asParagraph().appendTextFormatted("going to rename %s", filename.c_str());
2033 FILE* fp = fflib_open(filename);
2034 read_rtprename(filename.c_str(), fp, &rtprename);
2038 /* Add all alternative names from the residue renaming database to the list
2039 of recognized amino/nucleic acids. */
2040 for (const auto& rename : rtprename)
2042 /* Only add names if the 'standard' gromacs/iupac base name was found */
2043 if (auto foundIt = residueTypeMap.find(rename.gmx); foundIt != residueTypeMap.end())
2045 // Add the renamed forms with the same residue type as the standard form
2046 auto& residueType = foundIt->second;
2047 addResidue(&residueTypeMap, rename.main, residueType);
2048 addResidue(&residueTypeMap, rename.nter, residueType);
2049 addResidue(&residueTypeMap, rename.cter, residueType);
2050 addResidue(&residueTypeMap, rename.bter, residueType);
2057 if (watermodel_ != nullptr && (strstr(watermodel_, "4p") || strstr(watermodel_, "4P")))
2061 else if (watermodel_ != nullptr && (strstr(watermodel_, "5p") || strstr(watermodel_, "5P")))
2071 char* title = nullptr;
2075 int natom = read_pdball(inputConfFile_.c_str(),
2092 std::string message = formatString("No atoms found in pdb file %s\n", inputConfFile_.c_str());
2093 GMX_THROW(InconsistentInputError(message));
2096 GMX_LOG(logger.info).asParagraph().appendTextFormatted("Analyzing pdb file");
2097 int nwaterchain = 0;
2099 modify_chain_numbers(&pdba_all, chainSeparation_, logger);
2101 int nchainmerges = 0;
2103 this_atomname = nullptr;
2105 this_resname = nullptr;
2108 this_chainnumber = -1;
2109 this_chainstart = 0;
2110 /* Keep the compiler happy */
2111 prev_chainstart = 0;
2114 std::vector<t_pdbchain> pdb_ch;
2117 bool bMerged = false;
2118 for (int i = 0; (i < natom); i++)
2120 ri = &pdba_all.resinfo[pdba_all.atom[i].resind];
2122 /* TODO this should live in a helper object, and consolidate
2123 that with code in modify_chain_numbers */
2124 prev_atomname = this_atomname;
2125 prev_atomnum = this_atomnum;
2126 prev_resname = this_resname;
2127 prev_resnum = this_resnum;
2128 prev_chainid = this_chainid;
2129 prev_chainnumber = this_chainnumber;
2132 prev_chainstart = this_chainstart;
2135 this_atomname = *pdba_all.atomname[i];
2136 this_atomnum = (pdba_all.pdbinfo != nullptr) ? pdba_all.pdbinfo[i].atomnr : i + 1;
2137 this_resname = *ri->name;
2138 this_resnum = ri->nr;
2139 this_chainid = ri->chainid;
2140 this_chainnumber = ri->chainnum;
2142 bWat_ = gmx::equalCaseInsensitive(*ri->name, watres);
2144 if ((i == 0) || (this_chainnumber != prev_chainnumber) || (bWat_ != bPrevWat_))
2148 "Must have pdbinfo from reading a PDB file if chain number is changing");
2149 this_chainstart = pdba_all.atom[i].resind;
2151 if (i > 0 && !bWat_)
2153 if (!strncmp(c_mergeTypeNames[mergeType_], "int", 3))
2155 GMX_LOG(logger.info)
2157 .appendTextFormatted(
2158 "Merge chain ending with residue %s%d (chain id '%c', atom %d "
2159 "%s) and chain starting with "
2160 "residue %s%d (chain id '%c', atom %d %s) into a single "
2161 "moleculetype (keeping termini)? [n/y]",
2173 if (nullptr == fgets(select, STRLEN - 1, stdin))
2175 gmx_fatal(FARGS, "Error reading from stdin");
2177 bMerged = (select[0] == 'y');
2179 else if (!strncmp(c_mergeTypeNames[mergeType_], "all", 3))
2187 pdb_ch[numChains - 1].chainstart[pdb_ch[numChains - 1].nterpairs] =
2188 pdba_all.atom[i].resind - prev_chainstart;
2189 pdb_ch[numChains - 1].nterpairs++;
2190 pdb_ch[numChains - 1].chainstart.resize(pdb_ch[numChains - 1].nterpairs + 1);
2195 /* set natom for previous chain */
2198 pdb_ch[numChains - 1].natom = i - pdb_ch[numChains - 1].start;
2205 /* check if chain identifier was used before */
2206 for (int j = 0; (j < numChains); j++)
2208 if (pdb_ch[j].chainid != ' ' && pdb_ch[j].chainid == ri->chainid)
2210 GMX_LOG(logger.warning)
2212 .appendTextFormatted(
2213 "Chain identifier '%c' is used in two non-sequential "
2215 "They will be treated as separate chains unless you "
2216 "reorder your file.",
2220 t_pdbchain newChain;
2221 newChain.chainid = ri->chainid;
2222 newChain.chainnum = ri->chainnum;
2224 newChain.bAllWat = bWat_;
2227 newChain.nterpairs = 0;
2231 newChain.nterpairs = 1;
2233 newChain.chainstart.resize(newChain.nterpairs + 1);
2234 /* modified [numChains] to [0] below */
2235 newChain.chainstart[0] = 0;
2236 pdb_ch.push_back(newChain);
2242 pdb_ch.back().natom = natom - pdb_ch.back().start;
2244 /* set all the water blocks at the end of the chain */
2245 std::vector<int> swap_index(numChains);
2247 for (int i = 0; i < numChains; i++)
2249 if (!pdb_ch[i].bAllWat)
2255 for (int i = 0; i < numChains; i++)
2257 if (pdb_ch[i].bAllWat)
2263 if (nwaterchain > 1)
2265 GMX_LOG(logger.info)
2267 .appendTextFormatted("Moved all the water blocks to the end");
2271 std::vector<t_chain> chains(numChains);
2272 /* copy pdb data and x for all chains */
2273 for (int i = 0; (i < numChains); i++)
2275 int si = swap_index[i];
2276 chains[i].chainid = pdb_ch[si].chainid;
2277 chains[i].chainnum = pdb_ch[si].chainnum;
2278 chains[i].bAllWat = pdb_ch[si].bAllWat;
2279 chains[i].nterpairs = pdb_ch[si].nterpairs;
2280 chains[i].chainstart = pdb_ch[si].chainstart;
2281 chains[i].ntdb.clear();
2282 chains[i].ctdb.clear();
2283 chains[i].r_start.resize(pdb_ch[si].nterpairs);
2284 chains[i].r_end.resize(pdb_ch[si].nterpairs);
2286 snew(chains[i].pdba, 1);
2287 init_t_atoms(chains[i].pdba, pdb_ch[si].natom, true);
2288 for (j = 0; j < chains[i].pdba->nr; j++)
2290 chains[i].pdba->atom[j] = pdba_all.atom[pdb_ch[si].start + j];
2291 chains[i].pdba->atomname[j] = put_symtab(&symtab, *pdba_all.atomname[pdb_ch[si].start + j]);
2292 chains[i].pdba->pdbinfo[j] = pdba_all.pdbinfo[pdb_ch[si].start + j];
2293 chains[i].x.emplace_back(pdbx[pdb_ch[si].start + j]);
2295 /* Re-index the residues assuming that the indices are continuous */
2296 int k = chains[i].pdba->atom[0].resind;
2297 int nres = chains[i].pdba->atom[chains[i].pdba->nr - 1].resind - k + 1;
2298 chains[i].pdba->nres = nres;
2299 for (int j = 0; j < chains[i].pdba->nr; j++)
2301 chains[i].pdba->atom[j].resind -= k;
2303 srenew(chains[i].pdba->resinfo, nres);
2304 for (int j = 0; j < nres; j++)
2306 chains[i].pdba->resinfo[j] = pdba_all.resinfo[k + j];
2307 chains[i].pdba->resinfo[j].name = put_symtab(&symtab, *pdba_all.resinfo[k + j].name);
2308 /* make all chain identifiers equal to that of the chain */
2309 chains[i].pdba->resinfo[j].chainid = pdb_ch[si].chainid;
2313 if (nchainmerges > 0)
2315 GMX_LOG(logger.info)
2317 .appendTextFormatted("Merged chains into joint molecule definitions at %d places.",
2321 GMX_LOG(logger.info)
2323 .appendTextFormatted(
2324 "There are %d chains and %d blocks of water and "
2325 "%d residues with %d atoms",
2326 numChains - nwaterchain,
2331 GMX_LOG(logger.info)
2333 .appendTextFormatted(" %5s %4s %6s", "chain", "#res", "#atoms");
2334 for (int i = 0; (i < numChains); i++)
2336 GMX_LOG(logger.info)
2338 .appendTextFormatted(" %d '%c' %5d %6d %s\n",
2340 chains[i].chainid ? chains[i].chainid : '-',
2341 chains[i].pdba->nres,
2343 chains[i].bAllWat ? "(only water)" : "");
2346 check_occupancy(&pdba_all, inputConfFile_.c_str(), bVerbose_, logger);
2348 /* Read atomtypes... */
2349 PreprocessingAtomTypes atype = read_atype(ffdir_, &symtab);
2351 /* read residue database */
2352 GMX_LOG(logger.info).asParagraph().appendTextFormatted("Reading residue database... (%s)", forcefield_);
2353 std::vector<std::string> rtpf = fflib_search_file_end(ffdir_, ".rtp", true);
2354 std::vector<PreprocessResidue> rtpFFDB;
2355 for (const auto& filename : rtpf)
2357 readResidueDatabase(filename, &rtpFFDB, &atype, &symtab, logger, false);
2361 /* Not correct with multiple rtp input files with different bonded types */
2362 FILE* fp = gmx_fio_fopen("new.rtp", "w");
2363 print_resall(fp, rtpFFDB, atype);
2367 /* read hydrogen database */
2368 std::vector<MoleculePatchDatabase> ah;
2369 read_h_db(ffdir_, &ah);
2371 /* Read Termini database... */
2372 std::vector<MoleculePatchDatabase> ntdb;
2373 std::vector<MoleculePatchDatabase> ctdb;
2374 std::vector<MoleculePatchDatabase*> tdblist;
2375 int nNtdb = read_ter_db(ffdir_, 'n', &ntdb, &atype);
2376 int nCtdb = read_ter_db(ffdir_, 'c', &ctdb, &atype);
2378 FILE* top_file = gmx_fio_fopen(topologyFile_.c_str(), "w");
2380 print_top_header(top_file, topologyFile_.c_str(), FALSE, ffdir_, mHmult_);
2383 std::vector<gmx::RVec> x;
2384 /* new pdb datastructure for sorting. */
2385 t_atoms** sortAtoms = nullptr;
2386 t_atoms** localAtoms = nullptr;
2387 snew(sortAtoms, numChains);
2388 snew(localAtoms, numChains);
2389 for (int chain = 0; (chain < numChains); chain++)
2391 cc = &(chains[chain]);
2393 /* set pdba, natom and nres to the current chain */
2396 natom = cc->pdba->nr;
2397 int nres = cc->pdba->nres;
2399 if (cc->chainid && (cc->chainid != ' '))
2401 GMX_LOG(logger.info)
2403 .appendTextFormatted("Processing chain %d '%c' (%d atoms, %d residues)",
2411 GMX_LOG(logger.info)
2413 .appendTextFormatted(
2414 "Processing chain %d (%d atoms, %d residues)", chain + 1, natom, nres);
2417 process_chain(logger,
2434 cc->chainstart[cc->nterpairs] = pdba->nres;
2436 for (int i = 0; i < cc->nterpairs; i++)
2440 cc->chainstart[i + 1],
2445 if (cc->r_start[j] >= 0 && cc->r_end[j] >= 0)
2447 if (checkChainCyclicity(
2448 pdba, pdbx, cc->r_start[j], cc->r_end[j], rtpFFDB, rtprename, long_bond_dist_, short_bond_dist_))
2450 cc->cyclicBondsIndex.push_back(cc->r_start[j]);
2451 cc->cyclicBondsIndex.push_back(cc->r_end[j]);
2452 cc->r_start[j] = -1;
2462 if (cc->nterpairs == 0 && cc->cyclicBondsIndex.empty())
2464 GMX_LOG(logger.info)
2466 .appendTextFormatted(
2467 "Problem with chain definition, or missing terminal residues. "
2468 "This chain does not appear to contain a recognized chain molecule. "
2469 "If this is incorrect, you can edit residuetypes.dat to modify the "
2473 /* Check for disulfides and other special bonds */
2474 ssbonds = makeDisulfideBonds(pdba, gmx::as_rvec_array(x.data()), bCysMan_, bVerbose_);
2476 if (!rtprename.empty())
2478 rename_resrtp(pdba, cc->nterpairs, cc->r_start, cc->r_end, rtprename, &symtab, bVerbose_, logger);
2481 for (int i = 0; i < cc->nterpairs; i++)
2484 * We first apply a filter so we only have the
2485 * termini that can be applied to the residue in question
2486 * (or a generic terminus if no-residue specific is available).
2488 /* First the N terminus */
2491 tdblist = filter_ter(ntdb, *pdba->resinfo[cc->r_start[i]].name);
2492 if (tdblist.empty())
2494 GMX_LOG(logger.info)
2496 .appendTextFormatted(
2497 "No suitable end (N or 5') terminus found in database - "
2498 "assuming this residue "
2499 "is already in a terminus-specific form and skipping terminus "
2501 cc->ntdb.push_back(nullptr);
2505 if (bTerMan_ && !tdblist.empty())
2508 "Select start terminus type for %s-%d",
2509 *pdba->resinfo[cc->r_start[i]].name,
2510 pdba->resinfo[cc->r_start[i]].nr);
2511 cc->ntdb.push_back(choose_ter(tdblist, select));
2515 cc->ntdb.push_back(tdblist[0]);
2518 printf("Start terminus %s-%d: %s\n",
2519 *pdba->resinfo[cc->r_start[i]].name,
2520 pdba->resinfo[cc->r_start[i]].nr,
2521 (cc->ntdb[i])->name.c_str());
2527 cc->ntdb.push_back(nullptr);
2530 /* And the C terminus */
2533 tdblist = filter_ter(ctdb, *pdba->resinfo[cc->r_end[i]].name);
2534 if (tdblist.empty())
2536 GMX_LOG(logger.info)
2538 .appendTextFormatted(
2539 "No suitable end (C or 3') terminus found in database - "
2540 "assuming this residue"
2541 "is already in a terminus-specific form and skipping terminus "
2543 cc->ctdb.push_back(nullptr);
2547 if (bTerMan_ && !tdblist.empty())
2550 "Select end terminus type for %s-%d",
2551 *pdba->resinfo[cc->r_end[i]].name,
2552 pdba->resinfo[cc->r_end[i]].nr);
2553 cc->ctdb.push_back(choose_ter(tdblist, select));
2557 cc->ctdb.push_back(tdblist[0]);
2559 printf("End terminus %s-%d: %s\n",
2560 *pdba->resinfo[cc->r_end[i]].name,
2561 pdba->resinfo[cc->r_end[i]].nr,
2562 (cc->ctdb[i])->name.c_str());
2568 cc->ctdb.push_back(nullptr);
2571 std::vector<MoleculePatchDatabase> hb_chain;
2572 /* lookup hackblocks and rtp for all residues */
2573 std::vector<PreprocessResidue> restp_chain;
2574 get_hackblocks_rtp(&hb_chain,
2587 /* ideally, now we would not need the rtp itself anymore, but do
2588 everything using the hb and restp arrays. Unfortunately, that
2589 requires some re-thinking of code in gen_vsite.c, which I won't
2590 do now :( AF 26-7-99 */
2592 rename_atoms(nullptr, ffdir_, pdba, &symtab, restp_chain, false, residueTypeMap, false, bVerbose_);
2594 match_atomnames_with_rtp(restp_chain, hb_chain, pdba, &symtab, x, bVerbose_, logger);
2599 t_blocka* block = new_blocka();
2601 sort_pdbatoms(restp_chain, natom, &pdba, &sortAtoms[chain], &x, block, &gnames);
2602 remove_duplicate_atoms(pdba, x, bVerbose_, logger);
2607 GMX_LOG(logger.warning)
2609 .appendTextFormatted(
2610 "With the -remh option the generated "
2611 "index file (%s) might be useless "
2612 "(the index file is generated before hydrogens are added)",
2613 indexOutputFile_.c_str());
2615 write_index(indexOutputFile_.c_str(), block, gnames, false, 0);
2617 for (int i = 0; i < block->nr; i++)
2627 GMX_LOG(logger.warning)
2629 .appendTextFormatted(
2630 "Without sorting no check for duplicate atoms can be done");
2633 /* Generate Hydrogen atoms (and termini) in the sequence */
2634 GMX_LOG(logger.info)
2636 .appendTextFormatted(
2637 "Generating any missing hydrogen atoms and/or adding termini.");
2649 cc->cyclicBondsIndex);
2650 GMX_LOG(logger.info)
2652 .appendTextFormatted("Now there are %d residues with %d atoms", pdba->nres, pdba->nr);
2654 /* make up molecule name(s) */
2656 int k = (cc->nterpairs > 0 && cc->r_start[0] >= 0) ? cc->r_start[0] : 0;
2658 std::string restype = typeOfNamedDatabaseResidue(residueTypeMap, *pdba->resinfo[k].name);
2660 std::string molname;
2668 this_chainid = cc->chainid;
2670 /* Add the chain id if we have one */
2671 if (this_chainid != ' ')
2673 suffix.append(formatString("_chain_%c", this_chainid));
2676 /* Check if there have been previous chains with the same id */
2678 for (int k = 0; k < chain; k++)
2680 if (cc->chainid == chains[k].chainid)
2685 /* Add the number for this chain identifier if there are multiple copies */
2688 suffix.append(formatString("%d", nid_used + 1));
2691 if (suffix.length() > 0)
2693 molname.append(restype);
2694 molname.append(suffix);
2701 std::string itp_fn = topologyFile_;
2703 std::string posre_fn = includeTopologyFile_;
2704 if ((numChains - nwaterchain > 1) && !cc->bAllWat)
2707 printf("Chain time...\n");
2708 // construct the itp file name
2709 itp_fn = stripSuffixIfPresent(itp_fn, ".top");
2711 itp_fn.append(molname);
2712 itp_fn.append(".itp");
2713 // now do the same for posre
2714 posre_fn = stripSuffixIfPresent(posre_fn, ".itp");
2715 posre_fn.append("_");
2716 posre_fn.append(molname);
2717 posre_fn.append(".itp");
2718 if (posre_fn == itp_fn)
2720 posre_fn = Path::concatenateBeforeExtension(posre_fn, "_pr");
2722 incls_.emplace_back();
2723 incls_.back() = itp_fn;
2724 itp_file_ = gmx_fio_fopen(itp_fn.c_str(), "w");
2731 mols_.emplace_back();
2734 mols_.back().name = "SOL";
2735 mols_.back().nr = pdba->nres;
2739 mols_.back().name = molname;
2740 mols_.back().nr = 1;
2745 print_top_comment(itp_file_, itp_fn.c_str(), ffdir_, true);
2751 top_file2 = nullptr;
2755 top_file2 = itp_file_;
2759 top_file2 = top_file;
2785 cc->cyclicBondsIndex,
2790 write_posres(posre_fn.c_str(), pdba, posre_fc_);
2795 gmx_fio_fclose(itp_file_);
2798 /* pdba and natom have been reassigned somewhere so: */
2803 if (watermodel_ == nullptr)
2805 for (int chain = 0; chain < numChains; chain++)
2807 if (chains[chain].bAllWat)
2809 auto message = formatString(
2810 "You have chosen not to include a water model, "
2811 "but there is water in the input file. Select a "
2812 "water model or remove the water from your input file.");
2813 GMX_THROW(InconsistentInputError(message));
2819 std::string waterFile = formatString("%s%c%s.itp", ffdir_, DIR_SEPARATOR, watermodel_);
2820 if (!fflib_fexist(waterFile))
2822 auto message = formatString(
2823 "The topology file '%s' for the selected water "
2824 "model '%s' can not be found in the force field "
2825 "directory. Select a different water model.",
2828 GMX_THROW(InconsistentInputError(message));
2832 print_top_mols(top_file, title, ffdir_, watermodel_, incls_, mols_);
2833 gmx_fio_fclose(top_file);
2835 /* now merge all chains back together */
2838 for (int i = 0; (i < numChains); i++)
2840 natom += chains[i].pdba->nr;
2841 nres += chains[i].pdba->nres;
2845 init_t_atoms(atoms, natom, false);
2846 for (int i = 0; i < atoms->nres; i++)
2848 sfree(atoms->resinfo[i].name);
2851 srenew(atoms->resinfo, nres);
2855 for (int i = 0; (i < numChains); i++)
2859 GMX_LOG(logger.info)
2861 .appendTextFormatted("Including chain %d in system: %d atoms %d residues",
2864 chains[i].pdba->nres);
2866 for (int j = 0; (j < chains[i].pdba->nr); j++)
2868 atoms->atom[k] = chains[i].pdba->atom[j];
2869 atoms->atom[k].resind += l; /* l is processed nr of residues */
2870 atoms->atomname[k] = chains[i].pdba->atomname[j];
2871 atoms->resinfo[atoms->atom[k].resind].chainid = chains[i].chainid;
2872 x.push_back(chains[i].x[j]);
2875 for (int j = 0; (j < chains[i].pdba->nres); j++)
2877 atoms->resinfo[l] = chains[i].pdba->resinfo[j];
2880 atoms->resinfo[l].name = atoms->resinfo[l].rtp;
2884 done_atom(chains[i].pdba);
2889 GMX_LOG(logger.info)
2891 .appendTextFormatted("Now there are %d atoms and %d residues", k, l);
2892 print_sums(atoms, true, logger);
2896 GMX_LOG(logger.info).asParagraph().appendTextFormatted("Writing coordinate file...");
2897 clear_rvec(box_space);
2900 make_new_box(atoms->nr, gmx::as_rvec_array(x.data()), box, box_space, false);
2903 outputConfFile_.c_str(), title, atoms, gmx::as_rvec_array(x.data()), nullptr, pbcType, box);
2905 done_symtab(&symtab);
2906 done_atom(&pdba_all);
2908 for (int chain = 0; chain < numChains; chain++)
2910 sfree(sortAtoms[chain]);
2911 sfree(localAtoms[chain]);
2919 GMX_LOG(logger.info)
2921 .appendTextFormatted("\t\t--------- PLEASE NOTE ------------");
2922 GMX_LOG(logger.info)
2924 .appendTextFormatted("You have successfully generated a topology from: %s.",
2925 inputConfFile_.c_str());
2926 if (watermodel_ != nullptr)
2928 GMX_LOG(logger.info)
2930 .appendTextFormatted(
2931 "The %s force field and the %s water model are used.", ffname_, watermodel_);
2936 GMX_LOG(logger.info).asParagraph().appendTextFormatted("The %s force field is used.", ffname_);
2938 GMX_LOG(logger.info)
2940 .appendTextFormatted("\t\t--------- ETON ESAELP ------------");
2947 const char pdb2gmxInfo::name[] = "pdb2gmx";
2948 const char pdb2gmxInfo::shortDescription[] =
2949 "Convert coordinate files to topology and FF-compliant coordinate files";
2950 ICommandLineOptionsModulePointer pdb2gmxInfo::create()
2952 return std::make_unique<pdb2gmx>();