2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
40 #include "gromacs/utility/enumerationhelpers.h"
53 #include "gromacs/commandline/cmdlineoptionsmodule.h"
54 #include "gromacs/fileio/confio.h"
55 #include "gromacs/fileio/filetypes.h"
56 #include "gromacs/fileio/gmxfio.h"
57 #include "gromacs/fileio/pdbio.h"
58 #include "gromacs/gmxlib/conformation_utilities.h"
59 #include "gromacs/gmxpreprocess/fflibutil.h"
60 #include "gromacs/gmxpreprocess/genhydro.h"
61 #include "gromacs/gmxpreprocess/gpp_atomtype.h"
62 #include "gromacs/gmxpreprocess/grompp_impl.h"
63 #include "gromacs/gmxpreprocess/h_db.h"
64 #include "gromacs/gmxpreprocess/hizzie.h"
65 #include "gromacs/gmxpreprocess/pdb2top.h"
66 #include "gromacs/gmxpreprocess/pgutil.h"
67 #include "gromacs/gmxpreprocess/specbond.h"
68 #include "gromacs/gmxpreprocess/ter_db.h"
69 #include "gromacs/gmxpreprocess/toputil.h"
70 #include "gromacs/gmxpreprocess/xlate.h"
71 #include "gromacs/math/vec.h"
72 #include "gromacs/options/basicoptions.h"
73 #include "gromacs/options/filenameoption.h"
74 #include "gromacs/options/ioptionscontainer.h"
75 #include "gromacs/topology/atomprop.h"
76 #include "gromacs/topology/block.h"
77 #include "gromacs/topology/index.h"
78 #include "gromacs/topology/residuetypes.h"
79 #include "gromacs/topology/symtab.h"
80 #include "gromacs/topology/topology.h"
81 #include "gromacs/utility/dir_separator.h"
82 #include "gromacs/utility/exceptions.h"
83 #include "gromacs/utility/fatalerror.h"
84 #include "gromacs/utility/filestream.h"
85 #include "gromacs/utility/loggerbuilder.h"
86 #include "gromacs/utility/path.h"
87 #include "gromacs/utility/smalloc.h"
88 #include "gromacs/utility/strdb.h"
89 #include "gromacs/utility/stringutil.h"
91 #include "hackblock.h"
96 RtpRename(const char* newGmx, const char* newMain, const char* newNter, const char* newCter, const char* newBter) :
114 const char* res2bb_notermini(const std::string& name, gmx::ArrayRef<const RtpRename> rr)
116 /* NOTE: This function returns the main building block name,
117 * it does not take terminal renaming into account.
119 auto found = std::find_if(rr.begin(), rr.end(), [&name](const auto& rename) {
120 return gmx::equalCaseInsensitive(name, rename.gmx);
122 return found != rr.end() ? found->main.c_str() : name.c_str();
125 const char* enumValueToLongString(HistidineStates enumValue)
127 constexpr gmx::EnumerationArray<HistidineStates, const char*> histidineStatesLongNames = {
128 "H on ND1 only", "H on NE2 only", "H on ND1 and NE2", "Coupled to Heme"
130 return histidineStatesLongNames[enumValue];
133 enum class AspartateStates : int
140 const char* enumValueToString(AspartateStates enumValue)
142 constexpr gmx::EnumerationArray<AspartateStates, const char*> aspartateStateNames = { "ASP",
144 return aspartateStateNames[enumValue];
147 const char* enumValueToLongString(AspartateStates enumValue)
149 constexpr gmx::EnumerationArray<AspartateStates, const char*> aspartateStateLongNames = {
150 "Not protonated (charge -1)", "Protonated (charge 0)"
152 return aspartateStateLongNames[enumValue];
155 enum class GlutamateStates : int
162 const char* enumValueToString(GlutamateStates enumValue)
164 constexpr gmx::EnumerationArray<GlutamateStates, const char*> glutamateStateNames = { "GLU",
166 return glutamateStateNames[enumValue];
169 const char* enumValueToLongString(GlutamateStates enumValue)
171 constexpr gmx::EnumerationArray<GlutamateStates, const char*> glutamateStateLongNames = {
172 "Not protonated (charge -1)", "Protonated (charge 0)"
174 return glutamateStateLongNames[enumValue];
177 enum class GlutamineStates : int
184 const char* enumValueToString(GlutamineStates enumValue)
186 constexpr gmx::EnumerationArray<GlutamineStates, const char*> glutamineStateNames = { "GLN",
188 return glutamineStateNames[enumValue];
191 const char* enumValueToLongString(GlutamineStates enumValue)
193 constexpr gmx::EnumerationArray<GlutamineStates, const char*> glutamineStateLongNames = {
194 "Not protonated (charge 0)", "Protonated (charge +1)"
196 return glutamineStateLongNames[enumValue];
199 enum class LysineStates : int
206 const char* enumValueToString(LysineStates enumValue)
208 constexpr gmx::EnumerationArray<LysineStates, const char*> lysineStateNames = { "LYSN", "LYS" };
209 return lysineStateNames[enumValue];
212 const char* enumValueToLongString(LysineStates enumValue)
214 constexpr gmx::EnumerationArray<LysineStates, const char*> lysineStateLongNames = {
215 "Not protonated (charge 0)", "Protonated (charge +1)"
217 return lysineStateLongNames[enumValue];
220 enum class ArginineStates : int
227 const char* enumValueToString(ArginineStates enumValue)
229 constexpr gmx::EnumerationArray<ArginineStates, const char*> arginineStatesNames = { "ARGN",
231 return arginineStatesNames[enumValue];
234 const char* enumValueToLongString(ArginineStates enumValue)
236 constexpr gmx::EnumerationArray<ArginineStates, const char*> arginineStatesLongNames = {
237 "Not protonated (charge 0)", "Protonated (charge +1)"
239 return arginineStatesLongNames[enumValue];
242 template<typename EnumType>
243 const char* select_res(int resnr, const char* title, gmx::ArrayRef<const RtpRename> rr)
245 printf("Which %s type do you want for residue %d\n", title, resnr + 1);
246 for (auto sel : gmx::EnumerationWrapper<EnumType>{})
248 printf("%d. %s (%s)\n",
249 static_cast<int>(sel),
250 enumValueToString(sel),
251 res2bb_notermini(enumValueToString(sel), rr));
253 printf("\nType a number:");
257 if (scanf("%d", &userSelection) != 1)
259 gmx_fatal(FARGS, "Answer me for res %s %d!", title, resnr + 1);
262 return enumValueToLongString(static_cast<EnumType>(userSelection));
265 const char* get_asptp(int resnr, gmx::ArrayRef<const RtpRename> rr)
267 return select_res<AspartateStates>(resnr, "ASPARTIC ACID", rr);
270 const char* get_glutp(int resnr, gmx::ArrayRef<const RtpRename> rr)
272 return select_res<GlutamateStates>(resnr, "GLUTAMIC ACID", rr);
275 const char* get_glntp(int resnr, gmx::ArrayRef<const RtpRename> rr)
277 return select_res<GlutamineStates>(resnr, "GLUTAMINE", rr);
280 const char* get_lystp(int resnr, gmx::ArrayRef<const RtpRename> rr)
282 return select_res<LysineStates>(resnr, "LYSINE", rr);
285 const char* get_argtp(int resnr, gmx::ArrayRef<const RtpRename> rr)
287 return select_res<ArginineStates>(resnr, "ARGININE", rr);
290 const char* get_histp(int resnr, gmx::ArrayRef<const RtpRename> rr)
292 return select_res<HistidineStates>(resnr, "HISTIDINE", rr);
295 void read_rtprename(const char* fname, FILE* fp, std::vector<RtpRename>* rtprename)
297 char line[STRLEN], buf[STRLEN];
300 while (get_a_line(fp, line, STRLEN))
302 /* line is NULL-terminated and length<STRLEN, so final arg cannot overflow.
303 * For other args, we read up to 6 chars (so we can detect if the length is > 5).
304 * Note that the buffer length has been increased to 7 to allow this,
305 * so we just need to make sure the strings have zero-length initially.
317 int nc = sscanf(line, "%6s %6s %6s %6s %6s %s", gmx, main, nter, cter, bter, buf);
318 RtpRename newEntry(gmx, main, nter, cter, bter);
321 if (nc != 2 && nc != 5)
324 "Residue renaming database '%s' has %d columns instead of %d or %d",
335 "A line in residue renaming database '%s' has %d columns, while previous "
336 "lines have %d columns",
344 /* This file does not have special termini names, copy them from main */
345 newEntry.nter = newEntry.main;
346 newEntry.cter = newEntry.main;
347 newEntry.bter = newEntry.main;
349 rtprename->push_back(newEntry);
353 std::string search_resrename(gmx::ArrayRef<const RtpRename> rr, const char* name, bool bStart, bool bEnd, bool bCompareFFRTPname)
355 auto found = std::find_if(rr.begin(), rr.end(), [&name, &bCompareFFRTPname](const auto& rename) {
356 return ((!bCompareFFRTPname && (name == rename.gmx))
357 || (bCompareFFRTPname && (name == rename.main)));
361 /* If found in the database, rename this residue's rtp building block,
362 * otherwise keep the old name.
364 if (found != rr.end())
368 newName = found->bter;
372 newName = found->nter;
376 newName = found->cter;
380 newName = found->main;
383 if (newName[0] == '-')
386 "In the chosen force field there is no residue type for '%s'%s",
388 bStart ? (bEnd ? " as a standalone (starting & ending) residue" : " as a starting terminus")
389 : (bEnd ? " as an ending terminus" : ""));
396 void rename_resrtp(t_atoms* pdba,
398 gmx::ArrayRef<const int> r_start,
399 gmx::ArrayRef<const int> r_end,
400 gmx::ArrayRef<const RtpRename> rr,
403 const gmx::MDLogger& logger)
405 bool bFFRTPTERRNM = (getenv("GMX_NO_FFRTP_TER_RENAME") == nullptr);
407 for (int r = 0; r < pdba->nres; r++)
411 for (int j = 0; j < nterpairs; j++)
418 for (int j = 0; j < nterpairs; j++)
426 std::string newName = search_resrename(rr, *pdba->resinfo[r].rtp, bStart, bEnd, false);
428 if (bFFRTPTERRNM && newName.empty() && (bStart || bEnd))
430 /* This is a terminal residue, but the residue name,
431 * currently stored in .rtp, is not a standard residue name,
432 * but probably a force field specific rtp name.
433 * Check if we need to rename it because it is terminal.
435 newName = search_resrename(rr, *pdba->resinfo[r].rtp, bStart, bEnd, true);
438 if (!newName.empty() && newName != *pdba->resinfo[r].rtp)
444 .appendTextFormatted("Changing rtp entry of residue %d %s to '%s'",
446 *pdba->resinfo[r].name,
449 pdba->resinfo[r].rtp = put_symtab(symtab, newName.c_str());
454 void pdbres_to_gmxrtp(t_atoms* pdba)
458 for (i = 0; (i < pdba->nres); i++)
460 if (pdba->resinfo[i].rtp == nullptr)
462 pdba->resinfo[i].rtp = pdba->resinfo[i].name;
467 void rename_pdbres(t_atoms* pdba, const char* oldnm, const char* newnm, bool bFullCompare, t_symtab* symtab)
472 for (i = 0; (i < pdba->nres); i++)
474 resnm = *pdba->resinfo[i].name;
475 if ((bFullCompare && (gmx::equalCaseInsensitive(resnm, oldnm)))
476 || (!bFullCompare && strstr(resnm, oldnm) != nullptr))
478 /* Rename the residue name (not the rtp name) */
479 pdba->resinfo[i].name = put_symtab(symtab, newnm);
484 /*! \brief Rename all residues named \c oldnm to \c newnm
486 * Search for residues for which the residue name from the input
487 * configuration file matches \c oldnm, and when found choose the rtp
488 * entry and name of \c newnm.
490 * \todo Refactor this function to accept a lambda that accepts i and
491 * numMatchesFound but always produces \c newnm. Then remove
492 * renameResiduesInteractively by calling this method with suitable
493 * lambdas that capture its parameter \c rr and ignores
494 * numMatchesFound. */
495 void renameResidue(const gmx::MDLogger& logger,
502 int numMatchesFound = 0;
503 for (int i = 0; (i < pdba->nres); i++)
505 /* We have not set the rtp name yet, use the residue name */
506 const char* residueNameInInputConfiguration = *pdba->resinfo[i].name;
507 if ((bFullCompare && (gmx::equalCaseInsensitive(residueNameInInputConfiguration, oldnm)))
508 || (!bFullCompare && strstr(residueNameInInputConfiguration, oldnm) != nullptr))
510 /* Change the rtp building block name */
511 pdba->resinfo[i].rtp = put_symtab(symtab, newnm);
512 pdba->resinfo[i].name = pdba->resinfo[i].rtp;
516 if (numMatchesFound > 0)
520 .appendTextFormatted(
521 "Replaced %d residue%s named %s to the default %s. Use interactive "
522 "selection of protonated residues if that is what you need.",
524 numMatchesFound > 1 ? "s" : "",
530 /*! \brief Rename all residues named \c oldnm according to the user's
533 * Search for residues for which the residue name from the input
534 * configuration file matches \c oldnm, and when found choose the rtp
535 * entry and name of the interactive choice from \c gettp.
537 * \todo Remove this function, per todo in \c renameResidue. */
538 void renameResidueInteractively(t_atoms* pdba,
540 const char* gettp(int, gmx::ArrayRef<const RtpRename>),
543 gmx::ArrayRef<const RtpRename> rr)
545 // Search for residues i for which the residue name from the input
546 // configuration file matches oldnm, so it can replaced by the rtp
547 // entry and name of newnm.
548 for (int i = 0; i < pdba->nres; i++)
550 /* We have not set the rtp name yet, use the residue name */
551 char* residueNameInInputConfiguration = *pdba->resinfo[i].name;
552 if ((bFullCompare && (strcmp(residueNameInInputConfiguration, oldnm) == 0))
553 || (!bFullCompare && strstr(residueNameInInputConfiguration, oldnm) != nullptr))
555 const char* interactiveRtpChoice = gettp(i, rr);
556 pdba->resinfo[i].rtp = put_symtab(symtab, interactiveRtpChoice);
557 pdba->resinfo[i].name = pdba->resinfo[i].rtp;
562 void check_occupancy(t_atoms* atoms, const char* filename, bool bVerbose, const gmx::MDLogger& logger)
568 ftp = fn2ftp(filename);
569 if (!atoms->pdbinfo || ((ftp != efPDB) && (ftp != efBRK) && (ftp != efENT)))
571 GMX_LOG(logger.warning).asParagraph().appendTextFormatted("No occupancies in %s", filename);
575 for (i = 0; (i < atoms->nr); i++)
577 if (atoms->pdbinfo[i].occup != 1)
581 GMX_LOG(logger.warning)
583 .appendTextFormatted("Occupancy for atom %s%d-%s is %f rather than 1",
584 *atoms->resinfo[atoms->atom[i].resind].name,
585 atoms->resinfo[atoms->atom[i].resind].nr,
587 atoms->pdbinfo[i].occup);
589 if (atoms->pdbinfo[i].occup == 0)
599 if (nzero == atoms->nr)
601 GMX_LOG(logger.warning)
603 .appendTextFormatted(
604 "All occupancy fields zero. This is probably not an X-Ray structure");
606 else if ((nzero > 0) || (nnotone > 0))
608 GMX_LOG(logger.warning)
610 .appendTextFormatted(
611 "there were %d atoms with zero occupancy and %d atoms with "
612 " occupancy unequal to one (out of %d atoms). Check your pdb "
620 GMX_LOG(logger.warning).asParagraph().appendTextFormatted("All occupancies are one");
625 void write_posres(const char* fn, t_atoms* pdba, real fc)
630 fp = gmx_fio_fopen(fn, "w");
632 "; In this topology include file, you will find position restraint\n"
633 "; entries for all the heavy atoms in your original pdb file.\n"
634 "; This means that all the protons which were added by pdb2gmx are\n"
635 "; not restrained.\n"
637 "[ position_restraints ]\n"
638 "; %4s%6s%8s%8s%8s\n",
644 for (i = 0; (i < pdba->nr); i++)
646 if (!is_hydrogen(*pdba->atomname[i]) && !is_dummymass(*pdba->atomname[i]))
648 fprintf(fp, "%6d%6d %g %g %g\n", i + 1, 1, fc, fc, fc);
654 int read_pdball(const char* inf,
668 /* Read a pdb file. (containing proteins) */
670 int natom, new_natom, i;
673 printf("Reading %s...\n", inf);
674 readConfAndAtoms(inf, symtab, title, atoms, pbcType, x, nullptr, box);
676 if (atoms->pdbinfo == nullptr)
678 snew(atoms->pdbinfo, atoms->nr);
680 if (fn2ftp(inf) == efPDB)
682 get_pdb_atomnumber(atoms, aps);
687 for (i = 0; i < atoms->nr; i++)
689 if (!is_hydrogen(*atoms->atomname[i]))
691 atoms->atom[new_natom] = atoms->atom[i];
692 atoms->atomname[new_natom] = atoms->atomname[i];
693 atoms->pdbinfo[new_natom] = atoms->pdbinfo[i];
694 copy_rvec((*x)[i], (*x)[new_natom]);
698 atoms->nr = new_natom;
705 printf(" '%s',", *title);
707 printf(" %d atoms\n", natom);
709 /* Rename residues */
710 rename_pdbres(atoms, "HOH", watres, false, symtab);
711 rename_pdbres(atoms, "SOL", watres, false, symtab);
712 rename_pdbres(atoms, "WAT", watres, false, symtab);
714 rename_atoms("xlateat.dat", nullptr, atoms, symtab, {}, true, rt, true, bVerbose);
722 write_sto_conf(outf, *title, atoms, *x, nullptr, *pbcType, box);
728 void process_chain(const gmx::MDLogger& logger,
730 gmx::ArrayRef<gmx::RVec> x,
743 gmx::ArrayRef<const RtpRename> rr)
745 /* Rename aromatics, lys, asp and histidine */
748 renameResidue(logger, pdba, "TYR", "TYRU", false, symtab);
752 renameResidue(logger, pdba, "TRP", "TRPU", false, symtab);
756 renameResidue(logger, pdba, "PHE", "PHEU", false, symtab);
760 renameResidueInteractively(pdba, "LYS", get_lystp, false, symtab, rr);
764 renameResidueInteractively(pdba, "ARG", get_argtp, false, symtab, rr);
768 renameResidueInteractively(pdba, "GLN", get_glntp, false, symtab, rr);
772 renameResidueInteractively(pdba, "ASP", get_asptp, false, symtab, rr);
776 renameResidue(logger, pdba, "ASPH", "ASP", false, symtab);
780 renameResidueInteractively(pdba, "GLU", get_glutp, false, symtab, rr);
784 renameResidue(logger, pdba, "GLUH", "GLU", false, symtab);
789 set_histp(pdba, gmx::as_rvec_array(x.data()), symtab, angle, distance);
793 renameResidueInteractively(pdba, "HIS", get_histp, true, symtab, rr);
796 /* Initialize the rtp builing block names with the residue names
797 * for the residues that have not been processed above.
799 pdbres_to_gmxrtp(pdba);
801 /* Now we have all rtp names set.
802 * The rtp names will conform to Gromacs naming,
803 * unless the input pdb file contained one or more force field specific
804 * rtp names as residue names.
808 /* struct for sorting the atoms from the pdb file */
811 int resnr; /* residue number */
812 int j; /* database order index */
813 int index; /* original atom number */
814 char anm1; /* second letter of atom name */
815 char altloc; /* alternate location indicator */
818 bool pdbicomp(const t_pdbindex& a, const t_pdbindex& b)
820 int d = (a.resnr - b.resnr);
826 d = (a.anm1 - b.anm1);
829 d = (a.altloc - b.altloc);
836 void sort_pdbatoms(gmx::ArrayRef<const PreprocessResidue> restp_chain,
839 t_atoms** newPdbAtoms,
840 std::vector<gmx::RVec>* x,
844 t_atoms* pdba = *pdbaptr;
845 std::vector<gmx::RVec> xnew;
852 for (int i = 0; i < natoms; i++)
854 atomnm = *pdba->atomname[i];
855 const PreprocessResidue* localPpResidue = &restp_chain[pdba->atom[i].resind];
857 std::find_if(localPpResidue->atomname.begin(),
858 localPpResidue->atomname.end(),
859 [&atomnm](char** it) { return gmx::equalCaseInsensitive(atomnm, *it); });
860 if (found == localPpResidue->atomname.end())
862 std::string buf = gmx::formatString(
863 "Atom %s in residue %s %d was not found in rtp entry %s with %d atoms\n"
864 "while sorting atoms.\n%s",
866 *pdba->resinfo[pdba->atom[i].resind].name,
867 pdba->resinfo[pdba->atom[i].resind].nr,
868 localPpResidue->resname.c_str(),
869 localPpResidue->natom(),
871 ? "\nFor a hydrogen, this can be a different protonation state, or it\n"
872 "might have had a different number in the PDB file and was rebuilt\n"
873 "(it might for instance have been H3, and we only expected H1 & "
875 "Note that hydrogens might have been added to the entry for the "
877 "Remove this hydrogen or choose a different protonation state to "
879 "Option -ignh will ignore all hydrogens in the input."
881 gmx_fatal(FARGS, "%s", buf.c_str());
883 /* make shadow array to be sorted into indexgroup */
884 pdbi[i].resnr = pdba->atom[i].resind;
885 pdbi[i].j = std::distance(localPpResidue->atomname.begin(), found);
887 pdbi[i].anm1 = atomnm[1];
888 pdbi[i].altloc = pdba->pdbinfo[i].altloc;
890 std::sort(pdbi, pdbi + natoms, pdbicomp);
892 /* pdba is sorted in pdbnew using the pdbi index */
893 std::vector<int> a(natoms);
894 srenew(*newPdbAtoms, 1);
895 init_t_atoms((*newPdbAtoms), natoms, true);
896 (*newPdbAtoms)->nr = pdba->nr;
897 (*newPdbAtoms)->nres = pdba->nres;
898 srenew((*newPdbAtoms)->resinfo, pdba->nres);
899 std::copy(pdba->resinfo, pdba->resinfo + pdba->nres, (*newPdbAtoms)->resinfo);
900 for (int i = 0; i < natoms; i++)
902 (*newPdbAtoms)->atom[i] = pdba->atom[pdbi[i].index];
903 (*newPdbAtoms)->atomname[i] = pdba->atomname[pdbi[i].index];
904 (*newPdbAtoms)->pdbinfo[i] = pdba->pdbinfo[pdbi[i].index];
905 xnew.push_back(x->at(pdbi[i].index));
906 /* make indexgroup in block */
907 a[i] = pdbi[i].index;
912 /* copy the sorted pdbnew back to pdba */
913 *pdbaptr = *newPdbAtoms;
915 add_grp(block, gnames, a, "prot_sort");
919 int remove_duplicate_atoms(t_atoms* pdba, gmx::ArrayRef<gmx::RVec> x, bool bVerbose, const gmx::MDLogger& logger)
921 int i, j, oldnatoms, ndel;
924 GMX_LOG(logger.info).asParagraph().appendTextFormatted("Checking for duplicate atoms....");
925 oldnatoms = pdba->nr;
927 /* NOTE: pdba->nr is modified inside the loop */
928 for (i = 1; (i < pdba->nr); i++)
930 /* compare 'i' and 'i-1', throw away 'i' if they are identical
931 this is a 'while' because multiple alternate locations can be present */
932 while ((i < pdba->nr) && (pdba->atom[i - 1].resind == pdba->atom[i].resind)
933 && (strcmp(*pdba->atomname[i - 1], *pdba->atomname[i]) == 0))
938 ri = &pdba->resinfo[pdba->atom[i].resind];
941 .appendTextFormatted("deleting duplicate atom %4s %s%4d%c",
946 if (ri->chainid && (ri->chainid != ' '))
948 printf(" ch %c", ri->chainid);
952 if (pdba->pdbinfo[i].atomnr)
954 printf(" pdb nr %4d", pdba->pdbinfo[i].atomnr);
956 if (pdba->pdbinfo[i].altloc && (pdba->pdbinfo[i].altloc != ' '))
958 printf(" altloc %c", pdba->pdbinfo[i].altloc);
964 /* We can not free, since it might be in the symtab */
965 /* sfree(pdba->atomname[i]); */
966 for (j = i; j < pdba->nr; j++)
968 pdba->atom[j] = pdba->atom[j + 1];
969 pdba->atomname[j] = pdba->atomname[j + 1];
972 pdba->pdbinfo[j] = pdba->pdbinfo[j + 1];
974 copy_rvec(x[j + 1], x[j]);
976 srenew(pdba->atom, pdba->nr);
977 /* srenew(pdba->atomname, pdba->nr); */
978 srenew(pdba->pdbinfo, pdba->nr);
981 if (pdba->nr != oldnatoms)
985 .appendTextFormatted("Now there are %d atoms. Deleted %d duplicates.", pdba->nr, ndel);
991 void checkResidueTypeSanity(t_atoms* pdba, int r0, int r1, ResidueType* rt)
993 std::string startResidueString =
994 gmx::formatString("%s%d", *pdba->resinfo[r0].name, pdba->resinfo[r0].nr);
995 std::string endResidueString =
996 gmx::formatString("%s%d", *pdba->resinfo[r1 - 1].name, pdba->resinfo[r1 - 1].nr);
998 // Check whether all residues in chain have the same chain ID.
999 bool allResiduesHaveSameChainID = true;
1000 char chainID0 = pdba->resinfo[r0].chainid;
1002 std::string residueString;
1004 for (int i = r0 + 1; i < r1; i++)
1006 chainID = pdba->resinfo[i].chainid;
1007 if (chainID != chainID0)
1009 allResiduesHaveSameChainID = false;
1010 residueString = gmx::formatString("%s%d", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
1015 if (!allResiduesHaveSameChainID)
1018 "The chain covering the range %s--%s does not have a consistent chain ID. "
1019 "The first residue has ID '%c', while residue %s has ID '%c'.",
1020 startResidueString.c_str(),
1021 endResidueString.c_str(),
1023 residueString.c_str(),
1027 // At this point all residues have the same ID. If they are also non-blank
1028 // we can be a bit more aggressive and require the types match too.
1029 if (chainID0 != ' ')
1031 bool allResiduesHaveSameType = true;
1032 std::string restype;
1033 std::string restype0 = rt->typeOfNamedDatabaseResidue(*pdba->resinfo[r0].name);
1035 for (int i = r0 + 1; i < r1; i++)
1037 restype = rt->typeOfNamedDatabaseResidue(*pdba->resinfo[i].name);
1038 if (!gmx::equalCaseInsensitive(restype, restype0))
1040 allResiduesHaveSameType = false;
1041 residueString = gmx::formatString("%s%d", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
1046 if (!allResiduesHaveSameType)
1049 "The residues in the chain %s--%s do not have a consistent type. "
1050 "The first residue has type '%s', while residue %s is of type '%s'. "
1051 "Either there is a mistake in your chain, or it includes nonstandard "
1052 "residue names that have not yet been added to the residuetypes.dat "
1053 "file in the GROMACS library directory. If there are other molecules "
1054 "such as ligands, they should not have the same chain ID as the "
1055 "adjacent protein chain since it's a separate molecule.",
1056 startResidueString.c_str(),
1057 endResidueString.c_str(),
1059 residueString.c_str(),
1065 void find_nc_ter(t_atoms* pdba, int r0, int r1, int* r_start, int* r_end, ResidueType* rt, const gmx::MDLogger& logger)
1068 std::optional<std::string> startrestype;
1073 int startWarnings = 0;
1074 int endWarnings = 0;
1077 // Check that all residues have the same chain identifier, and if it is
1078 // non-blank we also require the residue types to match.
1079 checkResidueTypeSanity(pdba, r0, r1, rt);
1081 // If we return correctly from checkResidueTypeSanity(), the only
1082 // remaining cases where we can have non-matching residue types is if
1083 // the chain ID was blank, which could be the case e.g. for a structure
1084 // read from a GRO file or other file types without chain information.
1085 // In that case we need to be a bit more liberal and detect chains based
1086 // on the residue type.
1088 // If we get here, the chain ID must be identical for all residues
1089 char chainID = pdba->resinfo[r0].chainid;
1091 /* Find the starting terminus (typially N or 5') */
1092 for (i = r0; i < r1 && *r_start == -1; i++)
1094 startrestype = rt->optionalTypeOfNamedDatabaseResidue(*pdba->resinfo[i].name);
1099 if (gmx::equalCaseInsensitive(*startrestype, "Protein")
1100 || gmx::equalCaseInsensitive(*startrestype, "DNA")
1101 || gmx::equalCaseInsensitive(*startrestype, "RNA"))
1103 GMX_LOG(logger.info)
1105 .appendTextFormatted("Identified residue %s%d as a starting terminus.",
1106 *pdba->resinfo[i].name,
1107 pdba->resinfo[i].nr);
1110 else if (gmx::equalCaseInsensitive(*startrestype, "Ion"))
1114 GMX_LOG(logger.info)
1116 .appendTextFormatted(
1117 "Residue %s%d has type 'Ion', assuming it is not linked into a "
1119 *pdba->resinfo[i].name,
1120 pdba->resinfo[i].nr);
1124 GMX_LOG(logger.info)
1126 .appendTextFormatted("Disabling further notes about ions.");
1132 // Either no known residue type, or one not needing special handling
1133 if (startWarnings < 5)
1137 GMX_LOG(logger.warning)
1139 .appendTextFormatted(
1140 "Starting residue %s%d in chain not identified as "
1142 "This chain lacks identifiers, which makes it impossible to do "
1144 "classification of the start/end residues. Here we need to "
1145 "guess this residue "
1146 "should not be part of the chain and instead introduce a "
1147 "break, but that will "
1148 "be catastrophic if they should in fact be linked. Please "
1149 "check your structure, "
1150 "and add %s to residuetypes.dat if this was not correct.",
1151 *pdba->resinfo[i].name,
1152 pdba->resinfo[i].nr,
1153 *pdba->resinfo[i].name);
1157 GMX_LOG(logger.warning)
1159 .appendTextFormatted(
1160 "No residues in chain starting at %s%d identified as "
1162 "This makes it impossible to link them into a molecule, which "
1164 "correct or a catastrophic error. Please check your structure, "
1166 "necessary residue names to residuetypes.dat if this was not "
1168 *pdba->resinfo[i].name,
1169 pdba->resinfo[i].nr);
1172 if (startWarnings == 4)
1174 GMX_LOG(logger.warning)
1176 .appendTextFormatted(
1177 "Disabling further warnings about unidentified residues at start "
1186 /* Go through the rest of the residues, check that they are the same class, and identify the ending terminus. */
1187 for (int i = *r_start; i < r1; i++)
1189 std::optional<std::string> restype =
1190 rt->optionalTypeOfNamedDatabaseResidue(*pdba->resinfo[i].name);
1195 if (gmx::equalCaseInsensitive(*restype, *startrestype) && endWarnings == 0)
1199 else if (gmx::equalCaseInsensitive(*startrestype, "Ion"))
1203 GMX_LOG(logger.info)
1205 .appendTextFormatted(
1206 "Residue %s%d has type 'Ion', assuming it is not linked into a "
1208 *pdba->resinfo[i].name,
1209 pdba->resinfo[i].nr);
1213 GMX_LOG(logger.info)
1215 .appendTextFormatted("Disabling further notes about ions.");
1221 // Either no known residue type, or one not needing special handling.
1222 GMX_RELEASE_ASSERT(chainID == ' ', "Chain ID must be blank");
1223 // Otherwise the call to checkResidueTypeSanity() will
1224 // have caught the problem.
1225 if (endWarnings < 5)
1227 GMX_LOG(logger.warning)
1229 .appendTextFormatted(
1230 "Residue %s%d in chain has different type ('%s') from "
1231 "residue %s%d ('%s'). This chain lacks identifiers, which "
1233 "it impossible to do strict classification of the start/end "
1234 "residues. Here we "
1235 "need to guess this residue should not be part of the chain "
1237 "introduce a break, but that will be catastrophic if they "
1238 "should in fact be "
1239 "linked. Please check your structure, and add %s to "
1241 "if this was not correct.",
1242 *pdba->resinfo[i].name,
1243 pdba->resinfo[i].nr,
1245 *pdba->resinfo[*r_start].name,
1246 pdba->resinfo[*r_start].nr,
1247 startrestype->c_str(),
1248 *pdba->resinfo[i].name);
1250 if (endWarnings == 4)
1252 GMX_LOG(logger.warning)
1254 .appendTextFormatted(
1255 "Disabling further warnings about unidentified residues at end "
1265 GMX_LOG(logger.info)
1267 .appendTextFormatted("Identified residue %s%d as a ending terminus.",
1268 *pdba->resinfo[*r_end].name,
1269 pdba->resinfo[*r_end].nr);
1273 enum class ChainSeparationType : int
1282 const gmx::EnumerationArray<ChainSeparationType, const char*> c_chainSeparationTypeNames = {
1283 { "id_or_ter", "id_and_ter", "ter", "id", "interactive" }
1285 const gmx::EnumerationArray<ChainSeparationType, const char*> c_chainSeparationTypeNotificationMessages = {
1286 { "Splitting chemical chains based on TER records or chain id changing.\n",
1287 "Splitting chemical chains based on TER records and chain id changing.\n",
1288 "Splitting chemical chains based on TER records only (ignoring chain id).\n",
1289 "Splitting chemical chains based on changing chain id only (ignoring TER records).\n",
1290 "Splitting chemical chains interactively.\n" }
1293 void modify_chain_numbers(t_atoms* pdba, ChainSeparationType chainSeparation, const gmx::MDLogger& logger)
1296 char old_prev_chainid;
1297 char old_this_chainid;
1298 int old_prev_chainnum;
1299 int old_this_chainnum;
1301 char select[STRLEN];
1305 const char* prev_atomname;
1306 const char* this_atomname;
1307 const char* prev_resname;
1308 const char* this_resname;
1314 /* The default chain enumeration is based on TER records only */
1315 printf("%s", c_chainSeparationTypeNotificationMessages[chainSeparation]);
1317 old_prev_chainid = '?';
1318 old_prev_chainnum = -1;
1321 this_atomname = nullptr;
1323 this_resname = nullptr;
1327 for (i = 0; i < pdba->nres; i++)
1329 ri = &pdba->resinfo[i];
1330 old_this_chainid = ri->chainid;
1331 old_this_chainnum = ri->chainnum;
1333 prev_atomname = this_atomname;
1334 prev_atomnum = this_atomnum;
1335 prev_resname = this_resname;
1336 prev_resnum = this_resnum;
1337 prev_chainid = this_chainid;
1339 this_atomname = *(pdba->atomname[i]);
1340 this_atomnum = (pdba->pdbinfo != nullptr) ? pdba->pdbinfo[i].atomnr : i + 1;
1341 this_resname = *ri->name;
1342 this_resnum = ri->nr;
1343 this_chainid = ri->chainid;
1345 switch (chainSeparation)
1347 case ChainSeparationType::IdOrTer:
1348 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1354 case ChainSeparationType::IdAndTer:
1355 if (old_this_chainid != old_prev_chainid && old_this_chainnum != old_prev_chainnum)
1361 case ChainSeparationType::Id:
1362 if (old_this_chainid != old_prev_chainid)
1368 case ChainSeparationType::Ter:
1369 if (old_this_chainnum != old_prev_chainnum)
1374 case ChainSeparationType::Interactive:
1375 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1379 GMX_LOG(logger.info)
1381 .appendTextFormatted(
1382 "Split the chain (and introduce termini) between residue %s%d (chain id '%c', atom %d %s)\
1384 "and residue %s%d (chain id '%c', atom %d %s) ? [n/y]",
1396 if (nullptr == fgets(select, STRLEN - 1, stdin))
1398 gmx_fatal(FARGS, "Error reading from stdin");
1401 if (i == 0 || select[0] == 'y')
1407 case ChainSeparationType::Count:
1408 gmx_fatal(FARGS, "Internal inconsistency - this shouldn't happen...");
1410 old_prev_chainid = old_this_chainid;
1411 old_prev_chainnum = old_this_chainnum;
1413 ri->chainnum = new_chainnum;
1417 bool checkChainCyclicity(t_atoms* pdba,
1421 gmx::ArrayRef<const PreprocessResidue> rtpFFDB,
1422 gmx::ArrayRef<const RtpRename> rr,
1423 real long_bond_dist_,
1424 real short_bond_dist_)
1426 int ai = -1, aj = -1;
1427 char* rtpname = *(pdba->resinfo[start_ter].rtp);
1428 std::string newName = search_resrename(rr, rtpname, false, false, false);
1429 if (newName.empty())
1433 auto res = getDatabaseEntry(newName, rtpFFDB);
1434 const char *name_ai, *name_aj;
1436 for (const auto& patch : res->rb[BondedTypes::Bonds].b)
1437 { /* Search backward bond for n/5' terminus */
1438 name_ai = patch.ai().c_str();
1439 name_aj = patch.aj().c_str();
1440 if (name_ai[0] == '-')
1442 aj = search_res_atom(++name_ai, end_ter, pdba, "check", TRUE);
1443 ai = search_res_atom(name_aj, start_ter, pdba, "check", TRUE);
1445 else if (name_aj[0] == '-')
1447 aj = search_res_atom(++name_aj, end_ter, pdba, "check", TRUE);
1448 ai = search_res_atom(name_ai, start_ter, pdba, "check", TRUE);
1450 if (ai >= 0 && aj >= 0)
1456 if (!(ai >= 0 && aj >= 0))
1458 rtpname = *(pdba->resinfo[end_ter].rtp);
1459 newName = search_resrename(rr, rtpname, false, false, false);
1460 if (newName.empty())
1464 res = getDatabaseEntry(newName, rtpFFDB);
1465 for (const auto& patch : res->rb[BondedTypes::Bonds].b)
1467 /* Seach forward bond for c/3' terminus */
1468 name_ai = patch.ai().c_str();
1469 name_aj = patch.aj().c_str();
1471 if (name_ai[0] == '+')
1473 ai = search_res_atom(name_aj, end_ter, pdba, "check", TRUE);
1474 aj = search_res_atom(++name_ai, start_ter, pdba, "check", TRUE);
1476 else if (name_aj[0] == '+')
1478 ai = search_res_atom(name_ai, end_ter, pdba, "check", TRUE);
1479 aj = search_res_atom(++name_aj, start_ter, pdba, "check", TRUE);
1481 if (ai >= 0 && aj >= 0)
1488 if (ai >= 0 && aj >= 0)
1490 real dist = distance2(pdbx[ai], pdbx[aj]);
1491 /* it is better to read bond length from ffbonded.itp */
1492 return (dist < gmx::square(long_bond_dist_) && dist > gmx::square(short_bond_dist_));
1503 int chainnum = ' '; // char, but stored as int to make clang-tidy happy
1506 bool bAllWat = false;
1508 std::vector<int> chainstart;
1515 bool bAllWat = false;
1517 std::vector<int> chainstart;
1518 std::vector<MoleculePatchDatabase*> ntdb;
1519 std::vector<MoleculePatchDatabase*> ctdb;
1520 std::vector<int> r_start;
1521 std::vector<int> r_end;
1523 std::vector<gmx::RVec> x;
1524 std::vector<int> cyclicBondsIndex;
1527 enum class VSitesType : int
1534 const gmx::EnumerationArray<VSitesType, const char*> c_vsitesTypeNames = {
1535 { "none", "hydrogens", "aromatics" }
1538 enum class WaterType : int
1550 const gmx::EnumerationArray<WaterType, const char*> c_waterTypeNames = {
1551 { "select", "none", "spc", "spce", "tip3p", "tip4p", "tip5p", "tips3p" }
1554 enum class MergeType : int
1561 const gmx::EnumerationArray<MergeType, const char*> c_mergeTypeNames = {
1562 { "no", "all", "interactive" }
1573 class pdb2gmx : public ICommandLineOptionsModule
1579 bVsiteAromatics_(FALSE),
1580 chainSeparation_(ChainSeparationType::IdOrTer),
1581 vsitesType_(VSitesType::None),
1582 waterType_(WaterType::Select),
1583 mergeType_(MergeType::No),
1587 gmx::LoggerBuilder builder;
1588 builder.addTargetStream(gmx::MDLogger::LogLevel::Info, &gmx::TextOutputFile::standardOutput());
1589 builder.addTargetStream(gmx::MDLogger::LogLevel::Warning, &gmx::TextOutputFile::standardError());
1590 loggerOwner_ = std::make_unique<LoggerOwner>(builder.build());
1593 // From ICommandLineOptionsModule
1594 void init(CommandLineModuleSettings* /*settings*/) override {}
1596 void initOptions(IOptionsContainer* options, ICommandLineOptionsModuleSettings* settings) override;
1598 void optionsFinished() override;
1616 bool bAllowMissing_;
1620 bool bChargeGroups_;
1630 bool bVsiteAromatics_;
1634 real long_bond_dist_;
1635 real short_bond_dist_;
1637 std::string indexOutputFile_;
1638 std::string outputFile_;
1639 std::string topologyFile_;
1640 std::string includeTopologyFile_;
1641 std::string outputConfFile_;
1642 std::string inputConfFile_;
1643 std::string outFile_;
1646 ChainSeparationType chainSeparation_;
1647 VSitesType vsitesType_;
1648 WaterType waterType_;
1649 MergeType mergeType_;
1652 char forcefield_[STRLEN];
1653 char ffdir_[STRLEN];
1656 std::vector<std::string> incls_;
1657 std::vector<t_mols> mols_;
1659 std::unique_ptr<LoggerOwner> loggerOwner_;
1662 void pdb2gmx::initOptions(IOptionsContainer* options, ICommandLineOptionsModuleSettings* settings)
1664 const char* desc[] = {
1665 "[THISMODULE] reads a [REF].pdb[ref] (or [REF].gro[ref]) file, reads",
1666 "some database files, adds hydrogens to the molecules and generates",
1667 "coordinates in GROMACS (GROMOS), or optionally [REF].pdb[ref], format",
1668 "and a topology in GROMACS format.",
1669 "These files can subsequently be processed to generate a run input file.",
1671 "[THISMODULE] will search for force fields by looking for",
1672 "a [TT]forcefield.itp[tt] file in subdirectories [TT]<forcefield>.ff[tt]",
1673 "of the current working directory and of the GROMACS library directory",
1674 "as inferred from the path of the binary or the [TT]GMXLIB[tt] environment",
1676 "By default the forcefield selection is interactive,",
1677 "but you can use the [TT]-ff[tt] option to specify one of the short names",
1678 "in the list on the command line instead. In that case [THISMODULE] just looks",
1679 "for the corresponding [TT]<forcefield>.ff[tt] directory.",
1681 "After choosing a force field, all files will be read only from",
1682 "the corresponding force field directory.",
1683 "If you want to modify or add a residue types, you can copy the force",
1684 "field directory from the GROMACS library directory to your current",
1685 "working directory. If you want to add new protein residue types,",
1686 "you will need to modify [TT]residuetypes.dat[tt] in the library directory",
1687 "or copy the whole library directory to a local directory and set",
1688 "the environment variable [TT]GMXLIB[tt] to the name of that directory.",
1689 "Check Chapter 5 of the manual for more information about file formats.",
1692 "Note that a [REF].pdb[ref] file is nothing more than a file format, and it",
1693 "need not necessarily contain a protein structure. Every kind of",
1694 "molecule for which there is support in the database can be converted.",
1695 "If there is no support in the database, you can add it yourself.[PAR]",
1697 "The program has limited intelligence, it reads a number of database",
1698 "files, that allow it to make special bonds (Cys-Cys, Heme-His, etc.),",
1699 "if necessary this can be done manually. The program can prompt the",
1700 "user to select which kind of LYS, ASP, GLU, CYS or HIS residue is",
1701 "desired. For Lys the choice is between neutral (two protons on NZ) or",
1702 "protonated (three protons, default), for Asp and Glu unprotonated",
1703 "(default) or protonated, for His the proton can be either on ND1,",
1704 "on NE2 or on both. By default these selections are done automatically.",
1705 "For His, this is based on an optimal hydrogen bonding",
1706 "conformation. Hydrogen bonds are defined based on a simple geometric",
1707 "criterion, specified by the maximum hydrogen-donor-acceptor angle",
1708 "and donor-acceptor distance, which are set by [TT]-angle[tt] and",
1709 "[TT]-dist[tt] respectively.[PAR]",
1711 "The protonation state of N- and C-termini can be chosen interactively",
1712 "with the [TT]-ter[tt] flag. Default termini are ionized (NH3+ and COO-),",
1713 "respectively. Some force fields support zwitterionic forms for chains of",
1714 "one residue, but for polypeptides these options should NOT be selected.",
1715 "The AMBER force fields have unique forms for the terminal residues,",
1716 "and these are incompatible with the [TT]-ter[tt] mechanism. You need",
1717 "to prefix your N- or C-terminal residue names with \"N\" or \"C\"",
1718 "respectively to use these forms, making sure you preserve the format",
1719 "of the coordinate file. Alternatively, use named terminating residues",
1720 "(e.g. ACE, NME).[PAR]",
1722 "The separation of chains is not entirely trivial since the markup",
1723 "in user-generated PDB files frequently varies and sometimes it",
1724 "is desirable to merge entries across a TER record, for instance",
1725 "if you want a disulfide bridge or distance restraints between",
1726 "two protein chains or if you have a HEME group bound to a protein.",
1727 "In such cases multiple chains should be contained in a single",
1728 "[TT]moleculetype[tt] definition.",
1729 "To handle this, [THISMODULE] uses two separate options.",
1730 "First, [TT]-chainsep[tt] allows you to choose when a new chemical chain should",
1731 "start, and termini added when applicable. This can be done based on the",
1732 "existence of TER records, when the chain id changes, or combinations of either",
1733 "or both of these. You can also do the selection fully interactively.",
1734 "In addition, there is a [TT]-merge[tt] option that controls how multiple chains",
1735 "are merged into one moleculetype, after adding all the chemical termini (or not).",
1736 "This can be turned off (no merging), all non-water chains can be merged into a",
1737 "single molecule, or the selection can be done interactively.[PAR]",
1739 "[THISMODULE] will also check the occupancy field of the [REF].pdb[ref] file.",
1740 "If any of the occupancies are not one, indicating that the atom is",
1741 "not resolved well in the structure, a warning message is issued.",
1742 "When a [REF].pdb[ref] file does not originate from an X-ray structure determination",
1743 "all occupancy fields may be zero. Either way, it is up to the user",
1744 "to verify the correctness of the input data (read the article!).[PAR]",
1746 "During processing the atoms will be reordered according to GROMACS",
1747 "conventions. With [TT]-n[tt] an index file can be generated that",
1748 "contains one group reordered in the same way. This allows you to",
1749 "convert a GROMOS trajectory and coordinate file to GROMOS. There is",
1750 "one limitation: reordering is done after the hydrogens are stripped",
1751 "from the input and before new hydrogens are added. This means that",
1752 "you should not use [TT]-ignh[tt].[PAR]",
1754 "The [REF].gro[ref] and [TT].g96[tt] file formats do not support chain",
1755 "identifiers. Therefore it is useful to enter a [REF].pdb[ref] file name at",
1756 "the [TT]-o[tt] option when you want to convert a multi-chain [REF].pdb[ref] file.",
1759 "The option [TT]-vsite[tt] removes hydrogen and fast improper dihedral",
1760 "motions. Angular and out-of-plane motions can be removed by changing",
1761 "hydrogens into virtual sites and fixing angles, which fixes their",
1762 "position relative to neighboring atoms. Additionally, all atoms in the",
1763 "aromatic rings of the standard amino acids (i.e. PHE, TRP, TYR and HIS)",
1764 "can be converted into virtual sites, eliminating the fast improper dihedral",
1765 "fluctuations in these rings (but this feature is deprecated).",
1766 "[BB]Note[bb] that in this case all other hydrogen",
1767 "atoms are also converted to virtual sites. The mass of all atoms that are",
1768 "converted into virtual sites, is added to the heavy atoms.[PAR]",
1769 "Also slowing down of dihedral motion can be done with [TT]-heavyh[tt]",
1770 "done by increasing the hydrogen-mass by a factor of 4. This is also",
1771 "done for water hydrogens to slow down the rotational motion of water.",
1772 "The increase in mass of the hydrogens is subtracted from the bonded",
1773 "(heavy) atom so that the total mass of the system remains the same.",
1775 "As a special case, ring-closed (or cyclic) molecules are considered.",
1776 "[THISMODULE] automatically determines if a cyclic molecule is present",
1777 "by evaluating the distance between the terminal atoms of a given chain.",
1778 "If this distance is greater than the [TT]-sb[tt]",
1779 "(\"Short bond warning distance\", default 0.05 nm)",
1780 "and less than the [TT]-lb[tt] (\"Long bond warning distance\", default 0.25 nm)",
1781 "the molecule is considered to be ring closed and will be processed as such.",
1782 "Please note that this does not detect cyclic bonds over periodic boundaries."
1785 settings->setHelpText(desc);
1787 options->addOption(BooleanOption("newrtp").store(&bNewRTP_).defaultValue(false).hidden().description(
1788 "Write the residue database in new format to [TT]new.rtp[tt]"));
1790 RealOption("lb").store(&long_bond_dist_).defaultValue(0.25).hidden().description("Long bond warning distance"));
1792 RealOption("sb").store(&short_bond_dist_).defaultValue(0.05).hidden().description("Short bond warning distance"));
1793 options->addOption(EnumOption<ChainSeparationType>("chainsep")
1794 .enumValue(c_chainSeparationTypeNames)
1795 .store(&chainSeparation_)
1796 .description("Condition in PDB files when a new chain should be "
1797 "started (adding termini)"));
1798 options->addOption(EnumOption<MergeType>("merge")
1799 .enumValue(c_mergeTypeNames)
1801 .description("Merge multiple chains into a single [moleculetype]"));
1802 options->addOption(StringOption("ff").store(&ff_).defaultValue("select").description(
1803 "Force field, interactive by default. Use [TT]-h[tt] for information."));
1805 EnumOption<WaterType>("water").store(&waterType_).enumValue(c_waterTypeNames).description("Water model to use"));
1806 options->addOption(BooleanOption("inter").store(&bInter_).defaultValue(false).description(
1807 "Set the next 8 options to interactive"));
1808 options->addOption(BooleanOption("ss").store(&bCysMan_).defaultValue(false).description(
1809 "Interactive SS bridge selection"));
1810 options->addOption(BooleanOption("ter").store(&bTerMan_).defaultValue(false).description(
1811 "Interactive termini selection, instead of charged (default)"));
1812 options->addOption(BooleanOption("lys").store(&bLysMan_).defaultValue(false).description(
1813 "Interactive lysine selection, instead of charged"));
1814 options->addOption(BooleanOption("arg").store(&bArgMan_).defaultValue(false).description(
1815 "Interactive arginine selection, instead of charged"));
1816 options->addOption(BooleanOption("asp").store(&bAspMan_).defaultValue(false).description(
1817 "Interactive aspartic acid selection, instead of charged"));
1818 options->addOption(BooleanOption("glu").store(&bGluMan_).defaultValue(false).description(
1819 "Interactive glutamic acid selection, instead of charged"));
1820 options->addOption(BooleanOption("gln").store(&bGlnMan_).defaultValue(false).description(
1821 "Interactive glutamine selection, instead of charged"));
1822 options->addOption(BooleanOption("his").store(&bHisMan_).defaultValue(false).description(
1823 "Interactive histidine selection, instead of checking H-bonds"));
1824 options->addOption(RealOption("angle").store(&angle_).defaultValue(135.0).description(
1825 "Minimum hydrogen-donor-acceptor angle for a H-bond (degrees)"));
1827 RealOption("dist").store(&distance_).defaultValue(0.3).description("Maximum donor-acceptor distance for a H-bond (nm)"));
1828 options->addOption(BooleanOption("una").store(&bUnA_).defaultValue(false).description(
1829 "Select aromatic rings with united CH atoms on phenylalanine, tryptophane and "
1831 options->addOption(BooleanOption("sort").store(&bSort_).defaultValue(true).hidden().description(
1832 "Sort the residues according to database, turning this off is dangerous as charge "
1833 "groups might be broken in parts"));
1835 BooleanOption("ignh").store(&bRemoveH_).defaultValue(false).description("Ignore hydrogen atoms that are in the coordinate file"));
1837 BooleanOption("missing")
1838 .store(&bAllowMissing_)
1839 .defaultValue(false)
1841 "Continue when atoms are missing and bonds cannot be made, dangerous"));
1843 BooleanOption("v").store(&bVerbose_).defaultValue(false).description("Be slightly more verbose in messages"));
1845 RealOption("posrefc").store(&posre_fc_).defaultValue(1000).description("Force constant for position restraints"));
1846 options->addOption(EnumOption<VSitesType>("vsite")
1847 .store(&vsitesType_)
1848 .enumValue(c_vsitesTypeNames)
1849 .description("Convert atoms to virtual sites"));
1850 options->addOption(BooleanOption("heavyh").store(&bHeavyH_).defaultValue(false).description(
1851 "Make hydrogen atoms heavy"));
1853 BooleanOption("deuterate").store(&bDeuterate_).defaultValue(false).description("Change the mass of hydrogens to 2 amu"));
1854 options->addOption(BooleanOption("chargegrp")
1855 .store(&bChargeGroups_)
1857 .description("Use charge groups in the [REF].rtp[ref] file"));
1858 options->addOption(BooleanOption("cmap").store(&bCmap_).defaultValue(true).description(
1859 "Use cmap torsions (if enabled in the [REF].rtp[ref] file)"));
1860 options->addOption(BooleanOption("renum")
1862 .defaultValue(false)
1863 .description("Renumber the residues consecutively in the output"));
1864 options->addOption(BooleanOption("rtpres")
1865 .store(&bRTPresname_)
1866 .defaultValue(false)
1867 .description("Use [REF].rtp[ref] entry names as residue names"));
1868 options->addOption(FileNameOption("f")
1871 .store(&inputConfFile_)
1873 .defaultBasename("protein")
1875 .description("Structure file"));
1876 options->addOption(FileNameOption("o")
1879 .store(&outputConfFile_)
1881 .defaultBasename("conf")
1882 .description("Structure file"));
1883 options->addOption(FileNameOption("p")
1886 .store(&topologyFile_)
1888 .defaultBasename("topol")
1889 .description("Topology file"));
1890 options->addOption(FileNameOption("i")
1893 .store(&includeTopologyFile_)
1895 .defaultBasename("posre")
1896 .description("Include file for topology"));
1897 options->addOption(FileNameOption("n")
1900 .store(&indexOutputFile_)
1901 .storeIsSet(&bIndexSet_)
1902 .defaultBasename("index")
1903 .description("Index file"));
1904 options->addOption(FileNameOption("q")
1908 .storeIsSet(&bOutputSet_)
1909 .defaultBasename("clean")
1911 .description("Structure file"));
1914 void pdb2gmx::optionsFinished()
1916 if (inputConfFile_.empty())
1918 GMX_THROW(InconsistentInputError("You must supply an input file"));
1922 /* if anything changes here, also change description of -inter */
1937 else if (bDeuterate_)
1946 /* Force field selection, interactive or direct */
1947 choose_ff(strcmp(ff_.c_str(), "select") == 0 ? nullptr : ff_.c_str(),
1949 sizeof(forcefield_),
1952 loggerOwner_->logger());
1954 if (strlen(forcefield_) > 0)
1956 ffname_ = forcefield_;
1957 ffname_[0] = std::toupper(ffname_[0]);
1961 gmx_fatal(FARGS, "Empty forcefield string");
1967 char select[STRLEN];
1968 std::vector<DisulfideBond> ssbonds;
1972 const char* prev_atomname;
1973 const char* this_atomname;
1974 const char* prev_resname;
1975 const char* this_resname;
1980 int prev_chainnumber;
1981 int this_chainnumber;
1982 int this_chainstart;
1983 int prev_chainstart;
1985 const gmx::MDLogger& logger = loggerOwner_->logger();
1987 GMX_LOG(logger.info)
1989 .appendTextFormatted("Using the %s force field in directory %s", ffname_, ffdir_);
1991 choose_watermodel(c_waterTypeNames[waterType_], ffdir_, &watermodel_, logger);
1993 switch (vsitesType_)
1995 case VSitesType::None:
1997 bVsiteAromatics_ = false;
1999 case VSitesType::Hydrogens:
2001 bVsiteAromatics_ = false;
2003 case VSitesType::Aromatics:
2005 bVsiteAromatics_ = true;
2007 case VSitesType::Count:
2008 gmx_fatal(FARGS, "Internal inconsistency: VSitesType is out of range.");
2011 /* Open the symbol table */
2013 open_symtab(&symtab);
2015 /* Residue type database */
2018 /* Read residue renaming database(s), if present */
2019 std::vector<std::string> rrn = fflib_search_file_end(ffdir_, ".r2b", FALSE);
2021 std::vector<RtpRename> rtprename;
2022 for (const auto& filename : rrn)
2024 GMX_LOG(logger.info).asParagraph().appendTextFormatted("going to rename %s", filename.c_str());
2025 FILE* fp = fflib_open(filename);
2026 read_rtprename(filename.c_str(), fp, &rtprename);
2030 /* Add all alternative names from the residue renaming database to the list
2031 of recognized amino/nucleic acids. */
2032 for (const auto& rename : rtprename)
2034 /* Only add names if the 'standard' gromacs/iupac base name was found */
2035 if (auto restype = rt.optionalTypeOfNamedDatabaseResidue(rename.gmx))
2037 rt.addResidue(rename.main, *restype);
2038 rt.addResidue(rename.nter, *restype);
2039 rt.addResidue(rename.cter, *restype);
2040 rt.addResidue(rename.bter, *restype);
2047 if (watermodel_ != nullptr && (strstr(watermodel_, "4p") || strstr(watermodel_, "4P")))
2051 else if (watermodel_ != nullptr && (strstr(watermodel_, "5p") || strstr(watermodel_, "5P")))
2061 char* title = nullptr;
2065 int natom = read_pdball(inputConfFile_.c_str(),
2082 std::string message = formatString("No atoms found in pdb file %s\n", inputConfFile_.c_str());
2083 GMX_THROW(InconsistentInputError(message));
2086 GMX_LOG(logger.info).asParagraph().appendTextFormatted("Analyzing pdb file");
2087 int nwaterchain = 0;
2089 modify_chain_numbers(&pdba_all, chainSeparation_, logger);
2091 int nchainmerges = 0;
2093 this_atomname = nullptr;
2095 this_resname = nullptr;
2098 this_chainnumber = -1;
2099 this_chainstart = 0;
2100 /* Keep the compiler happy */
2101 prev_chainstart = 0;
2104 std::vector<t_pdbchain> pdb_ch;
2107 bool bMerged = false;
2108 for (int i = 0; (i < natom); i++)
2110 ri = &pdba_all.resinfo[pdba_all.atom[i].resind];
2112 /* TODO this should live in a helper object, and consolidate
2113 that with code in modify_chain_numbers */
2114 prev_atomname = this_atomname;
2115 prev_atomnum = this_atomnum;
2116 prev_resname = this_resname;
2117 prev_resnum = this_resnum;
2118 prev_chainid = this_chainid;
2119 prev_chainnumber = this_chainnumber;
2122 prev_chainstart = this_chainstart;
2125 this_atomname = *pdba_all.atomname[i];
2126 this_atomnum = (pdba_all.pdbinfo != nullptr) ? pdba_all.pdbinfo[i].atomnr : i + 1;
2127 this_resname = *ri->name;
2128 this_resnum = ri->nr;
2129 this_chainid = ri->chainid;
2130 this_chainnumber = ri->chainnum;
2132 bWat_ = gmx::equalCaseInsensitive(*ri->name, watres);
2134 if ((i == 0) || (this_chainnumber != prev_chainnumber) || (bWat_ != bPrevWat_))
2138 "Must have pdbinfo from reading a PDB file if chain number is changing");
2139 this_chainstart = pdba_all.atom[i].resind;
2141 if (i > 0 && !bWat_)
2143 if (!strncmp(c_mergeTypeNames[mergeType_], "int", 3))
2145 GMX_LOG(logger.info)
2147 .appendTextFormatted(
2148 "Merge chain ending with residue %s%d (chain id '%c', atom %d "
2149 "%s) and chain starting with "
2150 "residue %s%d (chain id '%c', atom %d %s) into a single "
2151 "moleculetype (keeping termini)? [n/y]",
2163 if (nullptr == fgets(select, STRLEN - 1, stdin))
2165 gmx_fatal(FARGS, "Error reading from stdin");
2167 bMerged = (select[0] == 'y');
2169 else if (!strncmp(c_mergeTypeNames[mergeType_], "all", 3))
2177 pdb_ch[numChains - 1].chainstart[pdb_ch[numChains - 1].nterpairs] =
2178 pdba_all.atom[i].resind - prev_chainstart;
2179 pdb_ch[numChains - 1].nterpairs++;
2180 pdb_ch[numChains - 1].chainstart.resize(pdb_ch[numChains - 1].nterpairs + 1);
2185 /* set natom for previous chain */
2188 pdb_ch[numChains - 1].natom = i - pdb_ch[numChains - 1].start;
2195 /* check if chain identifier was used before */
2196 for (int j = 0; (j < numChains); j++)
2198 if (pdb_ch[j].chainid != ' ' && pdb_ch[j].chainid == ri->chainid)
2200 GMX_LOG(logger.warning)
2202 .appendTextFormatted(
2203 "Chain identifier '%c' is used in two non-sequential "
2205 "They will be treated as separate chains unless you "
2206 "reorder your file.",
2210 t_pdbchain newChain;
2211 newChain.chainid = ri->chainid;
2212 newChain.chainnum = ri->chainnum;
2214 newChain.bAllWat = bWat_;
2217 newChain.nterpairs = 0;
2221 newChain.nterpairs = 1;
2223 newChain.chainstart.resize(newChain.nterpairs + 1);
2224 /* modified [numChains] to [0] below */
2225 newChain.chainstart[0] = 0;
2226 pdb_ch.push_back(newChain);
2232 pdb_ch.back().natom = natom - pdb_ch.back().start;
2234 /* set all the water blocks at the end of the chain */
2235 std::vector<int> swap_index(numChains);
2237 for (int i = 0; i < numChains; i++)
2239 if (!pdb_ch[i].bAllWat)
2245 for (int i = 0; i < numChains; i++)
2247 if (pdb_ch[i].bAllWat)
2253 if (nwaterchain > 1)
2255 GMX_LOG(logger.info)
2257 .appendTextFormatted("Moved all the water blocks to the end");
2261 std::vector<t_chain> chains(numChains);
2262 /* copy pdb data and x for all chains */
2263 for (int i = 0; (i < numChains); i++)
2265 int si = swap_index[i];
2266 chains[i].chainid = pdb_ch[si].chainid;
2267 chains[i].chainnum = pdb_ch[si].chainnum;
2268 chains[i].bAllWat = pdb_ch[si].bAllWat;
2269 chains[i].nterpairs = pdb_ch[si].nterpairs;
2270 chains[i].chainstart = pdb_ch[si].chainstart;
2271 chains[i].ntdb.clear();
2272 chains[i].ctdb.clear();
2273 chains[i].r_start.resize(pdb_ch[si].nterpairs);
2274 chains[i].r_end.resize(pdb_ch[si].nterpairs);
2276 snew(chains[i].pdba, 1);
2277 init_t_atoms(chains[i].pdba, pdb_ch[si].natom, true);
2278 for (j = 0; j < chains[i].pdba->nr; j++)
2280 chains[i].pdba->atom[j] = pdba_all.atom[pdb_ch[si].start + j];
2281 chains[i].pdba->atomname[j] = put_symtab(&symtab, *pdba_all.atomname[pdb_ch[si].start + j]);
2282 chains[i].pdba->pdbinfo[j] = pdba_all.pdbinfo[pdb_ch[si].start + j];
2283 chains[i].x.emplace_back(pdbx[pdb_ch[si].start + j]);
2285 /* Re-index the residues assuming that the indices are continuous */
2286 int k = chains[i].pdba->atom[0].resind;
2287 int nres = chains[i].pdba->atom[chains[i].pdba->nr - 1].resind - k + 1;
2288 chains[i].pdba->nres = nres;
2289 for (int j = 0; j < chains[i].pdba->nr; j++)
2291 chains[i].pdba->atom[j].resind -= k;
2293 srenew(chains[i].pdba->resinfo, nres);
2294 for (int j = 0; j < nres; j++)
2296 chains[i].pdba->resinfo[j] = pdba_all.resinfo[k + j];
2297 chains[i].pdba->resinfo[j].name = put_symtab(&symtab, *pdba_all.resinfo[k + j].name);
2298 /* make all chain identifiers equal to that of the chain */
2299 chains[i].pdba->resinfo[j].chainid = pdb_ch[si].chainid;
2303 if (nchainmerges > 0)
2305 GMX_LOG(logger.info)
2307 .appendTextFormatted("Merged chains into joint molecule definitions at %d places.",
2311 GMX_LOG(logger.info)
2313 .appendTextFormatted(
2314 "There are %d chains and %d blocks of water and "
2315 "%d residues with %d atoms",
2316 numChains - nwaterchain,
2321 GMX_LOG(logger.info)
2323 .appendTextFormatted(" %5s %4s %6s", "chain", "#res", "#atoms");
2324 for (int i = 0; (i < numChains); i++)
2326 GMX_LOG(logger.info)
2328 .appendTextFormatted(" %d '%c' %5d %6d %s\n",
2330 chains[i].chainid ? chains[i].chainid : '-',
2331 chains[i].pdba->nres,
2333 chains[i].bAllWat ? "(only water)" : "");
2336 check_occupancy(&pdba_all, inputConfFile_.c_str(), bVerbose_, logger);
2338 /* Read atomtypes... */
2339 PreprocessingAtomTypes atype = read_atype(ffdir_, &symtab);
2341 /* read residue database */
2342 GMX_LOG(logger.info).asParagraph().appendTextFormatted("Reading residue database... (%s)", forcefield_);
2343 std::vector<std::string> rtpf = fflib_search_file_end(ffdir_, ".rtp", true);
2344 std::vector<PreprocessResidue> rtpFFDB;
2345 for (const auto& filename : rtpf)
2347 readResidueDatabase(filename, &rtpFFDB, &atype, &symtab, logger, false);
2351 /* Not correct with multiple rtp input files with different bonded types */
2352 FILE* fp = gmx_fio_fopen("new.rtp", "w");
2353 print_resall(fp, rtpFFDB, atype);
2357 /* read hydrogen database */
2358 std::vector<MoleculePatchDatabase> ah;
2359 read_h_db(ffdir_, &ah);
2361 /* Read Termini database... */
2362 std::vector<MoleculePatchDatabase> ntdb;
2363 std::vector<MoleculePatchDatabase> ctdb;
2364 std::vector<MoleculePatchDatabase*> tdblist;
2365 int nNtdb = read_ter_db(ffdir_, 'n', &ntdb, &atype);
2366 int nCtdb = read_ter_db(ffdir_, 'c', &ctdb, &atype);
2368 FILE* top_file = gmx_fio_fopen(topologyFile_.c_str(), "w");
2370 print_top_header(top_file, topologyFile_.c_str(), FALSE, ffdir_, mHmult_);
2373 std::vector<gmx::RVec> x;
2374 /* new pdb datastructure for sorting. */
2375 t_atoms** sortAtoms = nullptr;
2376 t_atoms** localAtoms = nullptr;
2377 snew(sortAtoms, numChains);
2378 snew(localAtoms, numChains);
2379 for (int chain = 0; (chain < numChains); chain++)
2381 cc = &(chains[chain]);
2383 /* set pdba, natom and nres to the current chain */
2386 natom = cc->pdba->nr;
2387 int nres = cc->pdba->nres;
2389 if (cc->chainid && (cc->chainid != ' '))
2391 GMX_LOG(logger.info)
2393 .appendTextFormatted("Processing chain %d '%c' (%d atoms, %d residues)",
2401 GMX_LOG(logger.info)
2403 .appendTextFormatted(
2404 "Processing chain %d (%d atoms, %d residues)", chain + 1, natom, nres);
2407 process_chain(logger,
2424 cc->chainstart[cc->nterpairs] = pdba->nres;
2426 for (int i = 0; i < cc->nterpairs; i++)
2429 pdba, cc->chainstart[i], cc->chainstart[i + 1], &(cc->r_start[j]), &(cc->r_end[j]), &rt, logger);
2430 if (cc->r_start[j] >= 0 && cc->r_end[j] >= 0)
2432 if (checkChainCyclicity(
2433 pdba, pdbx, cc->r_start[j], cc->r_end[j], rtpFFDB, rtprename, long_bond_dist_, short_bond_dist_))
2435 cc->cyclicBondsIndex.push_back(cc->r_start[j]);
2436 cc->cyclicBondsIndex.push_back(cc->r_end[j]);
2437 cc->r_start[j] = -1;
2447 if (cc->nterpairs == 0 && cc->cyclicBondsIndex.empty())
2449 GMX_LOG(logger.info)
2451 .appendTextFormatted(
2452 "Problem with chain definition, or missing terminal residues. "
2453 "This chain does not appear to contain a recognized chain molecule. "
2454 "If this is incorrect, you can edit residuetypes.dat to modify the "
2458 /* Check for disulfides and other special bonds */
2459 ssbonds = makeDisulfideBonds(pdba, gmx::as_rvec_array(x.data()), bCysMan_, bVerbose_);
2461 if (!rtprename.empty())
2463 rename_resrtp(pdba, cc->nterpairs, cc->r_start, cc->r_end, rtprename, &symtab, bVerbose_, logger);
2466 for (int i = 0; i < cc->nterpairs; i++)
2469 * We first apply a filter so we only have the
2470 * termini that can be applied to the residue in question
2471 * (or a generic terminus if no-residue specific is available).
2473 /* First the N terminus */
2476 tdblist = filter_ter(ntdb, *pdba->resinfo[cc->r_start[i]].name);
2477 if (tdblist.empty())
2479 GMX_LOG(logger.info)
2481 .appendTextFormatted(
2482 "No suitable end (N or 5') terminus found in database - "
2483 "assuming this residue "
2484 "is already in a terminus-specific form and skipping terminus "
2486 cc->ntdb.push_back(nullptr);
2490 if (bTerMan_ && !tdblist.empty())
2493 "Select start terminus type for %s-%d",
2494 *pdba->resinfo[cc->r_start[i]].name,
2495 pdba->resinfo[cc->r_start[i]].nr);
2496 cc->ntdb.push_back(choose_ter(tdblist, select));
2500 cc->ntdb.push_back(tdblist[0]);
2503 printf("Start terminus %s-%d: %s\n",
2504 *pdba->resinfo[cc->r_start[i]].name,
2505 pdba->resinfo[cc->r_start[i]].nr,
2506 (cc->ntdb[i])->name.c_str());
2512 cc->ntdb.push_back(nullptr);
2515 /* And the C terminus */
2518 tdblist = filter_ter(ctdb, *pdba->resinfo[cc->r_end[i]].name);
2519 if (tdblist.empty())
2521 GMX_LOG(logger.info)
2523 .appendTextFormatted(
2524 "No suitable end (C or 3') terminus found in database - "
2525 "assuming this residue"
2526 "is already in a terminus-specific form and skipping terminus "
2528 cc->ctdb.push_back(nullptr);
2532 if (bTerMan_ && !tdblist.empty())
2535 "Select end terminus type for %s-%d",
2536 *pdba->resinfo[cc->r_end[i]].name,
2537 pdba->resinfo[cc->r_end[i]].nr);
2538 cc->ctdb.push_back(choose_ter(tdblist, select));
2542 cc->ctdb.push_back(tdblist[0]);
2544 printf("End terminus %s-%d: %s\n",
2545 *pdba->resinfo[cc->r_end[i]].name,
2546 pdba->resinfo[cc->r_end[i]].nr,
2547 (cc->ctdb[i])->name.c_str());
2553 cc->ctdb.push_back(nullptr);
2556 std::vector<MoleculePatchDatabase> hb_chain;
2557 /* lookup hackblocks and rtp for all residues */
2558 std::vector<PreprocessResidue> restp_chain;
2559 get_hackblocks_rtp(&hb_chain,
2572 /* ideally, now we would not need the rtp itself anymore, but do
2573 everything using the hb and restp arrays. Unfortunately, that
2574 requires some re-thinking of code in gen_vsite.c, which I won't
2575 do now :( AF 26-7-99 */
2577 rename_atoms(nullptr, ffdir_, pdba, &symtab, restp_chain, false, &rt, false, bVerbose_);
2579 match_atomnames_with_rtp(restp_chain, hb_chain, pdba, &symtab, x, bVerbose_, logger);
2584 t_blocka* block = new_blocka();
2586 sort_pdbatoms(restp_chain, natom, &pdba, &sortAtoms[chain], &x, block, &gnames);
2587 remove_duplicate_atoms(pdba, x, bVerbose_, logger);
2592 GMX_LOG(logger.warning)
2594 .appendTextFormatted(
2595 "With the -remh option the generated "
2596 "index file (%s) might be useless "
2597 "(the index file is generated before hydrogens are added)",
2598 indexOutputFile_.c_str());
2600 write_index(indexOutputFile_.c_str(), block, gnames, false, 0);
2602 for (int i = 0; i < block->nr; i++)
2612 GMX_LOG(logger.warning)
2614 .appendTextFormatted(
2615 "Without sorting no check for duplicate atoms can be done");
2618 /* Generate Hydrogen atoms (and termini) in the sequence */
2619 GMX_LOG(logger.info)
2621 .appendTextFormatted(
2622 "Generating any missing hydrogen atoms and/or adding termini.");
2634 cc->cyclicBondsIndex);
2635 GMX_LOG(logger.info)
2637 .appendTextFormatted("Now there are %d residues with %d atoms", pdba->nres, pdba->nr);
2639 /* make up molecule name(s) */
2641 int k = (cc->nterpairs > 0 && cc->r_start[0] >= 0) ? cc->r_start[0] : 0;
2643 std::string restype = rt.typeOfNamedDatabaseResidue(*pdba->resinfo[k].name);
2645 std::string molname;
2653 this_chainid = cc->chainid;
2655 /* Add the chain id if we have one */
2656 if (this_chainid != ' ')
2658 suffix.append(formatString("_chain_%c", this_chainid));
2661 /* Check if there have been previous chains with the same id */
2663 for (int k = 0; k < chain; k++)
2665 if (cc->chainid == chains[k].chainid)
2670 /* Add the number for this chain identifier if there are multiple copies */
2673 suffix.append(formatString("%d", nid_used + 1));
2676 if (suffix.length() > 0)
2678 molname.append(restype);
2679 molname.append(suffix);
2686 std::string itp_fn = topologyFile_;
2688 std::string posre_fn = includeTopologyFile_;
2689 if ((numChains - nwaterchain > 1) && !cc->bAllWat)
2692 printf("Chain time...\n");
2693 // construct the itp file name
2694 itp_fn = stripSuffixIfPresent(itp_fn, ".top");
2696 itp_fn.append(molname);
2697 itp_fn.append(".itp");
2698 // now do the same for posre
2699 posre_fn = stripSuffixIfPresent(posre_fn, ".itp");
2700 posre_fn.append("_");
2701 posre_fn.append(molname);
2702 posre_fn.append(".itp");
2703 if (posre_fn == itp_fn)
2705 posre_fn = Path::concatenateBeforeExtension(posre_fn, "_pr");
2707 incls_.emplace_back();
2708 incls_.back() = itp_fn;
2709 itp_file_ = gmx_fio_fopen(itp_fn.c_str(), "w");
2716 mols_.emplace_back();
2719 mols_.back().name = "SOL";
2720 mols_.back().nr = pdba->nres;
2724 mols_.back().name = molname;
2725 mols_.back().nr = 1;
2730 print_top_comment(itp_file_, itp_fn.c_str(), ffdir_, true);
2736 top_file2 = nullptr;
2740 top_file2 = itp_file_;
2744 top_file2 = top_file;
2770 cc->cyclicBondsIndex,
2775 write_posres(posre_fn.c_str(), pdba, posre_fc_);
2780 gmx_fio_fclose(itp_file_);
2783 /* pdba and natom have been reassigned somewhere so: */
2788 if (watermodel_ == nullptr)
2790 for (int chain = 0; chain < numChains; chain++)
2792 if (chains[chain].bAllWat)
2794 auto message = formatString(
2795 "You have chosen not to include a water model, "
2796 "but there is water in the input file. Select a "
2797 "water model or remove the water from your input file.");
2798 GMX_THROW(InconsistentInputError(message));
2804 std::string waterFile = formatString("%s%c%s.itp", ffdir_, DIR_SEPARATOR, watermodel_);
2805 if (!fflib_fexist(waterFile))
2807 auto message = formatString(
2808 "The topology file '%s' for the selected water "
2809 "model '%s' can not be found in the force field "
2810 "directory. Select a different water model.",
2813 GMX_THROW(InconsistentInputError(message));
2817 print_top_mols(top_file, title, ffdir_, watermodel_, incls_, mols_);
2818 gmx_fio_fclose(top_file);
2820 /* now merge all chains back together */
2823 for (int i = 0; (i < numChains); i++)
2825 natom += chains[i].pdba->nr;
2826 nres += chains[i].pdba->nres;
2830 init_t_atoms(atoms, natom, false);
2831 for (int i = 0; i < atoms->nres; i++)
2833 sfree(atoms->resinfo[i].name);
2836 srenew(atoms->resinfo, nres);
2840 for (int i = 0; (i < numChains); i++)
2844 GMX_LOG(logger.info)
2846 .appendTextFormatted("Including chain %d in system: %d atoms %d residues",
2849 chains[i].pdba->nres);
2851 for (int j = 0; (j < chains[i].pdba->nr); j++)
2853 atoms->atom[k] = chains[i].pdba->atom[j];
2854 atoms->atom[k].resind += l; /* l is processed nr of residues */
2855 atoms->atomname[k] = chains[i].pdba->atomname[j];
2856 atoms->resinfo[atoms->atom[k].resind].chainid = chains[i].chainid;
2857 x.push_back(chains[i].x[j]);
2860 for (int j = 0; (j < chains[i].pdba->nres); j++)
2862 atoms->resinfo[l] = chains[i].pdba->resinfo[j];
2865 atoms->resinfo[l].name = atoms->resinfo[l].rtp;
2869 done_atom(chains[i].pdba);
2874 GMX_LOG(logger.info)
2876 .appendTextFormatted("Now there are %d atoms and %d residues", k, l);
2877 print_sums(atoms, true, logger);
2881 GMX_LOG(logger.info).asParagraph().appendTextFormatted("Writing coordinate file...");
2882 clear_rvec(box_space);
2885 make_new_box(atoms->nr, gmx::as_rvec_array(x.data()), box, box_space, false);
2888 outputConfFile_.c_str(), title, atoms, gmx::as_rvec_array(x.data()), nullptr, pbcType, box);
2890 done_symtab(&symtab);
2891 done_atom(&pdba_all);
2893 for (int chain = 0; chain < numChains; chain++)
2895 sfree(sortAtoms[chain]);
2896 sfree(localAtoms[chain]);
2904 GMX_LOG(logger.info)
2906 .appendTextFormatted("\t\t--------- PLEASE NOTE ------------");
2907 GMX_LOG(logger.info)
2909 .appendTextFormatted("You have successfully generated a topology from: %s.",
2910 inputConfFile_.c_str());
2911 if (watermodel_ != nullptr)
2913 GMX_LOG(logger.info)
2915 .appendTextFormatted(
2916 "The %s force field and the %s water model are used.", ffname_, watermodel_);
2921 GMX_LOG(logger.info).asParagraph().appendTextFormatted("The %s force field is used.", ffname_);
2923 GMX_LOG(logger.info)
2925 .appendTextFormatted("\t\t--------- ETON ESAELP ------------");
2932 const char pdb2gmxInfo::name[] = "pdb2gmx";
2933 const char pdb2gmxInfo::shortDescription[] =
2934 "Convert coordinate files to topology and FF-compliant coordinate files";
2935 ICommandLineOptionsModulePointer pdb2gmxInfo::create()
2937 return std::make_unique<pdb2gmx>();