2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
52 #include "gromacs/commandline/cmdlineoptionsmodule.h"
53 #include "gromacs/fileio/confio.h"
54 #include "gromacs/fileio/filetypes.h"
55 #include "gromacs/fileio/gmxfio.h"
56 #include "gromacs/fileio/pdbio.h"
57 #include "gromacs/gmxlib/conformation_utilities.h"
58 #include "gromacs/gmxpreprocess/fflibutil.h"
59 #include "gromacs/gmxpreprocess/genhydro.h"
60 #include "gromacs/gmxpreprocess/gpp_atomtype.h"
61 #include "gromacs/gmxpreprocess/grompp_impl.h"
62 #include "gromacs/gmxpreprocess/h_db.h"
63 #include "gromacs/gmxpreprocess/hizzie.h"
64 #include "gromacs/gmxpreprocess/pdb2top.h"
65 #include "gromacs/gmxpreprocess/pgutil.h"
66 #include "gromacs/gmxpreprocess/specbond.h"
67 #include "gromacs/gmxpreprocess/ter_db.h"
68 #include "gromacs/gmxpreprocess/toputil.h"
69 #include "gromacs/gmxpreprocess/xlate.h"
70 #include "gromacs/math/vec.h"
71 #include "gromacs/options/basicoptions.h"
72 #include "gromacs/options/filenameoption.h"
73 #include "gromacs/options/ioptionscontainer.h"
74 #include "gromacs/topology/atomprop.h"
75 #include "gromacs/topology/block.h"
76 #include "gromacs/topology/index.h"
77 #include "gromacs/topology/residuetypes.h"
78 #include "gromacs/topology/symtab.h"
79 #include "gromacs/topology/topology.h"
80 #include "gromacs/utility/dir_separator.h"
81 #include "gromacs/utility/exceptions.h"
82 #include "gromacs/utility/fatalerror.h"
83 #include "gromacs/utility/filestream.h"
84 #include "gromacs/utility/loggerbuilder.h"
85 #include "gromacs/utility/path.h"
86 #include "gromacs/utility/smalloc.h"
87 #include "gromacs/utility/strdb.h"
88 #include "gromacs/utility/stringutil.h"
90 #include "hackblock.h"
95 RtpRename(const char* newGmx, const char* newMain, const char* newNter, const char* newCter, const char* newBter) :
113 const char* res2bb_notermini(const std::string& name, gmx::ArrayRef<const RtpRename> rr)
115 /* NOTE: This function returns the main building block name,
116 * it does not take terminal renaming into account.
118 auto found = std::find_if(rr.begin(), rr.end(), [&name](const auto& rename) {
119 return gmx::equalCaseInsensitive(name, rename.gmx);
121 return found != rr.end() ? found->main.c_str() : name.c_str();
124 const char* select_res(int nr,
129 gmx::ArrayRef<const RtpRename> rr)
131 printf("Which %s type do you want for residue %d\n", title, resnr + 1);
132 for (int sel = 0; (sel < nr); sel++)
134 printf("%d. %s (%s)\n", sel, expl[sel], res2bb_notermini(name[sel], rr));
136 printf("\nType a number:");
140 if (scanf("%d", &userSelection) != 1)
142 gmx_fatal(FARGS, "Answer me for res %s %d!", title, resnr + 1);
145 return name[userSelection];
148 const char* get_asptp(int resnr, gmx::ArrayRef<const RtpRename> rr)
156 const char* lh[easpNR] = { "ASP", "ASPH" };
157 const char* expl[easpNR] = { "Not protonated (charge -1)", "Protonated (charge 0)" };
159 return select_res(easpNR, resnr, lh, expl, "ASPARTIC ACID", rr);
162 const char* get_glutp(int resnr, gmx::ArrayRef<const RtpRename> rr)
170 const char* lh[egluNR] = { "GLU", "GLUH" };
171 const char* expl[egluNR] = { "Not protonated (charge -1)", "Protonated (charge 0)" };
173 return select_res(egluNR, resnr, lh, expl, "GLUTAMIC ACID", rr);
176 const char* get_glntp(int resnr, gmx::ArrayRef<const RtpRename> rr)
184 const char* lh[eglnNR] = { "GLN", "QLN" };
185 const char* expl[eglnNR] = { "Not protonated (charge 0)", "Protonated (charge +1)" };
187 return select_res(eglnNR, resnr, lh, expl, "GLUTAMINE", rr);
190 const char* get_lystp(int resnr, gmx::ArrayRef<const RtpRename> rr)
198 const char* lh[elysNR] = { "LYSN", "LYS" };
199 const char* expl[elysNR] = { "Not protonated (charge 0)", "Protonated (charge +1)" };
201 return select_res(elysNR, resnr, lh, expl, "LYSINE", rr);
204 const char* get_argtp(int resnr, gmx::ArrayRef<const RtpRename> rr)
212 const char* lh[eargNR] = { "ARGN", "ARG" };
213 const char* expl[eargNR] = { "Not protonated (charge 0)", "Protonated (charge +1)" };
215 return select_res(eargNR, resnr, lh, expl, "ARGININE", rr);
218 const char* get_histp(int resnr, gmx::ArrayRef<const RtpRename> rr)
220 const char* expl[ehisNR] = {
221 "H on ND1 only", "H on NE2 only", "H on ND1 and NE2", "Coupled to Heme"
224 return select_res(ehisNR, resnr, hh, expl, "HISTIDINE", rr);
227 void read_rtprename(const char* fname, FILE* fp, std::vector<RtpRename>* rtprename)
229 char line[STRLEN], buf[STRLEN];
232 while (get_a_line(fp, line, STRLEN))
234 /* line is NULL-terminated and length<STRLEN, so final arg cannot overflow.
235 * For other args, we read up to 6 chars (so we can detect if the length is > 5).
236 * Note that the buffer length has been increased to 7 to allow this,
237 * so we just need to make sure the strings have zero-length initially.
249 int nc = sscanf(line, "%6s %6s %6s %6s %6s %s", gmx, main, nter, cter, bter, buf);
250 RtpRename newEntry(gmx, main, nter, cter, bter);
253 if (nc != 2 && nc != 5)
256 "Residue renaming database '%s' has %d columns instead of %d or %d",
267 "A line in residue renaming database '%s' has %d columns, while previous "
268 "lines have %d columns",
276 /* This file does not have special termini names, copy them from main */
277 newEntry.nter = newEntry.main;
278 newEntry.cter = newEntry.main;
279 newEntry.bter = newEntry.main;
281 rtprename->push_back(newEntry);
285 std::string search_resrename(gmx::ArrayRef<const RtpRename> rr, const char* name, bool bStart, bool bEnd, bool bCompareFFRTPname)
287 auto found = std::find_if(rr.begin(), rr.end(), [&name, &bCompareFFRTPname](const auto& rename) {
288 return ((!bCompareFFRTPname && (name == rename.gmx))
289 || (bCompareFFRTPname && (name == rename.main)));
293 /* If found in the database, rename this residue's rtp building block,
294 * otherwise keep the old name.
296 if (found != rr.end())
300 newName = found->bter;
304 newName = found->nter;
308 newName = found->cter;
312 newName = found->main;
315 if (newName[0] == '-')
318 "In the chosen force field there is no residue type for '%s'%s",
320 bStart ? (bEnd ? " as a standalone (starting & ending) residue" : " as a starting terminus")
321 : (bEnd ? " as an ending terminus" : ""));
328 void rename_resrtp(t_atoms* pdba,
330 gmx::ArrayRef<const int> r_start,
331 gmx::ArrayRef<const int> r_end,
332 gmx::ArrayRef<const RtpRename> rr,
335 const gmx::MDLogger& logger)
337 bool bFFRTPTERRNM = (getenv("GMX_NO_FFRTP_TER_RENAME") == nullptr);
339 for (int r = 0; r < pdba->nres; r++)
343 for (int j = 0; j < nterpairs; j++)
350 for (int j = 0; j < nterpairs; j++)
358 std::string newName = search_resrename(rr, *pdba->resinfo[r].rtp, bStart, bEnd, false);
360 if (bFFRTPTERRNM && newName.empty() && (bStart || bEnd))
362 /* This is a terminal residue, but the residue name,
363 * currently stored in .rtp, is not a standard residue name,
364 * but probably a force field specific rtp name.
365 * Check if we need to rename it because it is terminal.
367 newName = search_resrename(rr, *pdba->resinfo[r].rtp, bStart, bEnd, true);
370 if (!newName.empty() && newName != *pdba->resinfo[r].rtp)
376 .appendTextFormatted("Changing rtp entry of residue %d %s to '%s'",
378 *pdba->resinfo[r].name,
381 pdba->resinfo[r].rtp = put_symtab(symtab, newName.c_str());
386 void pdbres_to_gmxrtp(t_atoms* pdba)
390 for (i = 0; (i < pdba->nres); i++)
392 if (pdba->resinfo[i].rtp == nullptr)
394 pdba->resinfo[i].rtp = pdba->resinfo[i].name;
399 void rename_pdbres(t_atoms* pdba, const char* oldnm, const char* newnm, bool bFullCompare, t_symtab* symtab)
404 for (i = 0; (i < pdba->nres); i++)
406 resnm = *pdba->resinfo[i].name;
407 if ((bFullCompare && (gmx::equalCaseInsensitive(resnm, oldnm)))
408 || (!bFullCompare && strstr(resnm, oldnm) != nullptr))
410 /* Rename the residue name (not the rtp name) */
411 pdba->resinfo[i].name = put_symtab(symtab, newnm);
416 /*! \brief Rename all residues named \c oldnm to \c newnm
418 * Search for residues for which the residue name from the input
419 * configuration file matches \c oldnm, and when found choose the rtp
420 * entry and name of \c newnm.
422 * \todo Refactor this function to accept a lambda that accepts i and
423 * numMatchesFound but always produces \c newnm. Then remove
424 * renameResiduesInteractively by calling this method with suitable
425 * lambdas that capture its parameter \c rr and ignores
426 * numMatchesFound. */
427 void renameResidue(const gmx::MDLogger& logger,
434 int numMatchesFound = 0;
435 for (int i = 0; (i < pdba->nres); i++)
437 /* We have not set the rtp name yet, use the residue name */
438 const char* residueNameInInputConfiguration = *pdba->resinfo[i].name;
439 if ((bFullCompare && (gmx::equalCaseInsensitive(residueNameInInputConfiguration, oldnm)))
440 || (!bFullCompare && strstr(residueNameInInputConfiguration, oldnm) != nullptr))
442 /* Change the rtp building block name */
443 pdba->resinfo[i].rtp = put_symtab(symtab, newnm);
444 pdba->resinfo[i].name = pdba->resinfo[i].rtp;
448 if (numMatchesFound > 0)
452 .appendTextFormatted(
453 "Replaced %d residue%s named %s to the default %s. Use interactive "
454 "selection of protonated residues if that is what you need.",
456 numMatchesFound > 1 ? "s" : "",
462 /*! \brief Rename all residues named \c oldnm according to the user's
465 * Search for residues for which the residue name from the input
466 * configuration file matches \c oldnm, and when found choose the rtp
467 * entry and name of the interactive choice from \c gettp.
469 * \todo Remove this function, per todo in \c renameResidue. */
470 void renameResidueInteractively(t_atoms* pdba,
472 const char* gettp(int, gmx::ArrayRef<const RtpRename>),
475 gmx::ArrayRef<const RtpRename> rr)
477 // Search for residues i for which the residue name from the input
478 // configuration file matches oldnm, so it can replaced by the rtp
479 // entry and name of newnm.
480 for (int i = 0; i < pdba->nres; i++)
482 /* We have not set the rtp name yet, use the residue name */
483 char* residueNameInInputConfiguration = *pdba->resinfo[i].name;
484 if ((bFullCompare && (strcmp(residueNameInInputConfiguration, oldnm) == 0))
485 || (!bFullCompare && strstr(residueNameInInputConfiguration, oldnm) != nullptr))
487 const char* interactiveRtpChoice = gettp(i, rr);
488 pdba->resinfo[i].rtp = put_symtab(symtab, interactiveRtpChoice);
489 pdba->resinfo[i].name = pdba->resinfo[i].rtp;
494 void check_occupancy(t_atoms* atoms, const char* filename, bool bVerbose, const gmx::MDLogger& logger)
500 ftp = fn2ftp(filename);
501 if (!atoms->pdbinfo || ((ftp != efPDB) && (ftp != efBRK) && (ftp != efENT)))
503 GMX_LOG(logger.warning).asParagraph().appendTextFormatted("No occupancies in %s", filename);
507 for (i = 0; (i < atoms->nr); i++)
509 if (atoms->pdbinfo[i].occup != 1)
513 GMX_LOG(logger.warning)
515 .appendTextFormatted("Occupancy for atom %s%d-%s is %f rather than 1",
516 *atoms->resinfo[atoms->atom[i].resind].name,
517 atoms->resinfo[atoms->atom[i].resind].nr,
519 atoms->pdbinfo[i].occup);
521 if (atoms->pdbinfo[i].occup == 0)
531 if (nzero == atoms->nr)
533 GMX_LOG(logger.warning)
535 .appendTextFormatted(
536 "All occupancy fields zero. This is probably not an X-Ray structure");
538 else if ((nzero > 0) || (nnotone > 0))
540 GMX_LOG(logger.warning)
542 .appendTextFormatted(
543 "there were %d atoms with zero occupancy and %d atoms with "
544 " occupancy unequal to one (out of %d atoms). Check your pdb "
552 GMX_LOG(logger.warning).asParagraph().appendTextFormatted("All occupancies are one");
557 void write_posres(const char* fn, t_atoms* pdba, real fc)
562 fp = gmx_fio_fopen(fn, "w");
564 "; In this topology include file, you will find position restraint\n"
565 "; entries for all the heavy atoms in your original pdb file.\n"
566 "; This means that all the protons which were added by pdb2gmx are\n"
567 "; not restrained.\n"
569 "[ position_restraints ]\n"
570 "; %4s%6s%8s%8s%8s\n",
576 for (i = 0; (i < pdba->nr); i++)
578 if (!is_hydrogen(*pdba->atomname[i]) && !is_dummymass(*pdba->atomname[i]))
580 fprintf(fp, "%6d%6d %g %g %g\n", i + 1, 1, fc, fc, fc);
586 int read_pdball(const char* inf,
600 /* Read a pdb file. (containing proteins) */
602 int natom, new_natom, i;
605 printf("Reading %s...\n", inf);
606 readConfAndAtoms(inf, symtab, title, atoms, pbcType, x, nullptr, box);
608 if (atoms->pdbinfo == nullptr)
610 snew(atoms->pdbinfo, atoms->nr);
612 if (fn2ftp(inf) == efPDB)
614 get_pdb_atomnumber(atoms, aps);
619 for (i = 0; i < atoms->nr; i++)
621 if (!is_hydrogen(*atoms->atomname[i]))
623 atoms->atom[new_natom] = atoms->atom[i];
624 atoms->atomname[new_natom] = atoms->atomname[i];
625 atoms->pdbinfo[new_natom] = atoms->pdbinfo[i];
626 copy_rvec((*x)[i], (*x)[new_natom]);
630 atoms->nr = new_natom;
637 printf(" '%s',", *title);
639 printf(" %d atoms\n", natom);
641 /* Rename residues */
642 rename_pdbres(atoms, "HOH", watres, false, symtab);
643 rename_pdbres(atoms, "SOL", watres, false, symtab);
644 rename_pdbres(atoms, "WAT", watres, false, symtab);
646 rename_atoms("xlateat.dat", nullptr, atoms, symtab, {}, true, rt, true, bVerbose);
654 write_sto_conf(outf, *title, atoms, *x, nullptr, *pbcType, box);
660 void process_chain(const gmx::MDLogger& logger,
662 gmx::ArrayRef<gmx::RVec> x,
675 gmx::ArrayRef<const RtpRename> rr)
677 /* Rename aromatics, lys, asp and histidine */
680 renameResidue(logger, pdba, "TYR", "TYRU", false, symtab);
684 renameResidue(logger, pdba, "TRP", "TRPU", false, symtab);
688 renameResidue(logger, pdba, "PHE", "PHEU", false, symtab);
692 renameResidueInteractively(pdba, "LYS", get_lystp, false, symtab, rr);
696 renameResidueInteractively(pdba, "ARG", get_argtp, false, symtab, rr);
700 renameResidueInteractively(pdba, "GLN", get_glntp, false, symtab, rr);
704 renameResidueInteractively(pdba, "ASP", get_asptp, false, symtab, rr);
708 renameResidue(logger, pdba, "ASPH", "ASP", false, symtab);
712 renameResidueInteractively(pdba, "GLU", get_glutp, false, symtab, rr);
716 renameResidue(logger, pdba, "GLUH", "GLU", false, symtab);
721 set_histp(pdba, gmx::as_rvec_array(x.data()), symtab, angle, distance);
725 renameResidueInteractively(pdba, "HIS", get_histp, true, symtab, rr);
728 /* Initialize the rtp builing block names with the residue names
729 * for the residues that have not been processed above.
731 pdbres_to_gmxrtp(pdba);
733 /* Now we have all rtp names set.
734 * The rtp names will conform to Gromacs naming,
735 * unless the input pdb file contained one or more force field specific
736 * rtp names as residue names.
740 /* struct for sorting the atoms from the pdb file */
743 int resnr; /* residue number */
744 int j; /* database order index */
745 int index; /* original atom number */
746 char anm1; /* second letter of atom name */
747 char altloc; /* alternate location indicator */
750 bool pdbicomp(const t_pdbindex& a, const t_pdbindex& b)
752 int d = (a.resnr - b.resnr);
758 d = (a.anm1 - b.anm1);
761 d = (a.altloc - b.altloc);
768 void sort_pdbatoms(gmx::ArrayRef<const PreprocessResidue> restp_chain,
771 t_atoms** newPdbAtoms,
772 std::vector<gmx::RVec>* x,
776 t_atoms* pdba = *pdbaptr;
777 std::vector<gmx::RVec> xnew;
784 for (int i = 0; i < natoms; i++)
786 atomnm = *pdba->atomname[i];
787 const PreprocessResidue* localPpResidue = &restp_chain[pdba->atom[i].resind];
789 std::find_if(localPpResidue->atomname.begin(),
790 localPpResidue->atomname.end(),
791 [&atomnm](char** it) { return gmx::equalCaseInsensitive(atomnm, *it); });
792 if (found == localPpResidue->atomname.end())
794 std::string buf = gmx::formatString(
795 "Atom %s in residue %s %d was not found in rtp entry %s with %d atoms\n"
796 "while sorting atoms.\n%s",
798 *pdba->resinfo[pdba->atom[i].resind].name,
799 pdba->resinfo[pdba->atom[i].resind].nr,
800 localPpResidue->resname.c_str(),
801 localPpResidue->natom(),
803 ? "\nFor a hydrogen, this can be a different protonation state, or it\n"
804 "might have had a different number in the PDB file and was rebuilt\n"
805 "(it might for instance have been H3, and we only expected H1 & "
807 "Note that hydrogens might have been added to the entry for the "
809 "Remove this hydrogen or choose a different protonation state to "
811 "Option -ignh will ignore all hydrogens in the input."
813 gmx_fatal(FARGS, "%s", buf.c_str());
815 /* make shadow array to be sorted into indexgroup */
816 pdbi[i].resnr = pdba->atom[i].resind;
817 pdbi[i].j = std::distance(localPpResidue->atomname.begin(), found);
819 pdbi[i].anm1 = atomnm[1];
820 pdbi[i].altloc = pdba->pdbinfo[i].altloc;
822 std::sort(pdbi, pdbi + natoms, pdbicomp);
824 /* pdba is sorted in pdbnew using the pdbi index */
825 std::vector<int> a(natoms);
826 srenew(*newPdbAtoms, 1);
827 init_t_atoms((*newPdbAtoms), natoms, true);
828 (*newPdbAtoms)->nr = pdba->nr;
829 (*newPdbAtoms)->nres = pdba->nres;
830 srenew((*newPdbAtoms)->resinfo, pdba->nres);
831 std::copy(pdba->resinfo, pdba->resinfo + pdba->nres, (*newPdbAtoms)->resinfo);
832 for (int i = 0; i < natoms; i++)
834 (*newPdbAtoms)->atom[i] = pdba->atom[pdbi[i].index];
835 (*newPdbAtoms)->atomname[i] = pdba->atomname[pdbi[i].index];
836 (*newPdbAtoms)->pdbinfo[i] = pdba->pdbinfo[pdbi[i].index];
837 xnew.push_back(x->at(pdbi[i].index));
838 /* make indexgroup in block */
839 a[i] = pdbi[i].index;
844 /* copy the sorted pdbnew back to pdba */
845 *pdbaptr = *newPdbAtoms;
847 add_grp(block, gnames, a, "prot_sort");
851 int remove_duplicate_atoms(t_atoms* pdba, gmx::ArrayRef<gmx::RVec> x, bool bVerbose, const gmx::MDLogger& logger)
853 int i, j, oldnatoms, ndel;
856 GMX_LOG(logger.info).asParagraph().appendTextFormatted("Checking for duplicate atoms....");
857 oldnatoms = pdba->nr;
859 /* NOTE: pdba->nr is modified inside the loop */
860 for (i = 1; (i < pdba->nr); i++)
862 /* compare 'i' and 'i-1', throw away 'i' if they are identical
863 this is a 'while' because multiple alternate locations can be present */
864 while ((i < pdba->nr) && (pdba->atom[i - 1].resind == pdba->atom[i].resind)
865 && (strcmp(*pdba->atomname[i - 1], *pdba->atomname[i]) == 0))
870 ri = &pdba->resinfo[pdba->atom[i].resind];
873 .appendTextFormatted("deleting duplicate atom %4s %s%4d%c",
878 if (ri->chainid && (ri->chainid != ' '))
880 printf(" ch %c", ri->chainid);
884 if (pdba->pdbinfo[i].atomnr)
886 printf(" pdb nr %4d", pdba->pdbinfo[i].atomnr);
888 if (pdba->pdbinfo[i].altloc && (pdba->pdbinfo[i].altloc != ' '))
890 printf(" altloc %c", pdba->pdbinfo[i].altloc);
896 /* We can not free, since it might be in the symtab */
897 /* sfree(pdba->atomname[i]); */
898 for (j = i; j < pdba->nr; j++)
900 pdba->atom[j] = pdba->atom[j + 1];
901 pdba->atomname[j] = pdba->atomname[j + 1];
904 pdba->pdbinfo[j] = pdba->pdbinfo[j + 1];
906 copy_rvec(x[j + 1], x[j]);
908 srenew(pdba->atom, pdba->nr);
909 /* srenew(pdba->atomname, pdba->nr); */
910 srenew(pdba->pdbinfo, pdba->nr);
913 if (pdba->nr != oldnatoms)
917 .appendTextFormatted("Now there are %d atoms. Deleted %d duplicates.", pdba->nr, ndel);
923 void checkResidueTypeSanity(t_atoms* pdba, int r0, int r1, ResidueType* rt)
925 std::string startResidueString =
926 gmx::formatString("%s%d", *pdba->resinfo[r0].name, pdba->resinfo[r0].nr);
927 std::string endResidueString =
928 gmx::formatString("%s%d", *pdba->resinfo[r1 - 1].name, pdba->resinfo[r1 - 1].nr);
930 // Check whether all residues in chain have the same chain ID.
931 bool allResiduesHaveSameChainID = true;
932 char chainID0 = pdba->resinfo[r0].chainid;
934 std::string residueString;
936 for (int i = r0 + 1; i < r1; i++)
938 chainID = pdba->resinfo[i].chainid;
939 if (chainID != chainID0)
941 allResiduesHaveSameChainID = false;
942 residueString = gmx::formatString("%s%d", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
947 if (!allResiduesHaveSameChainID)
950 "The chain covering the range %s--%s does not have a consistent chain ID. "
951 "The first residue has ID '%c', while residue %s has ID '%c'.",
952 startResidueString.c_str(),
953 endResidueString.c_str(),
955 residueString.c_str(),
959 // At this point all residues have the same ID. If they are also non-blank
960 // we can be a bit more aggressive and require the types match too.
963 bool allResiduesHaveSameType = true;
965 std::string restype0 = rt->typeOfNamedDatabaseResidue(*pdba->resinfo[r0].name);
967 for (int i = r0 + 1; i < r1; i++)
969 restype = rt->typeOfNamedDatabaseResidue(*pdba->resinfo[i].name);
970 if (!gmx::equalCaseInsensitive(restype, restype0))
972 allResiduesHaveSameType = false;
973 residueString = gmx::formatString("%s%d", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
978 if (!allResiduesHaveSameType)
981 "The residues in the chain %s--%s do not have a consistent type. "
982 "The first residue has type '%s', while residue %s is of type '%s'. "
983 "Either there is a mistake in your chain, or it includes nonstandard "
984 "residue names that have not yet been added to the residuetypes.dat "
985 "file in the GROMACS library directory. If there are other molecules "
986 "such as ligands, they should not have the same chain ID as the "
987 "adjacent protein chain since it's a separate molecule.",
988 startResidueString.c_str(),
989 endResidueString.c_str(),
991 residueString.c_str(),
997 void find_nc_ter(t_atoms* pdba, int r0, int r1, int* r_start, int* r_end, ResidueType* rt, const gmx::MDLogger& logger)
1000 std::optional<std::string> startrestype;
1005 int startWarnings = 0;
1006 int endWarnings = 0;
1009 // Check that all residues have the same chain identifier, and if it is
1010 // non-blank we also require the residue types to match.
1011 checkResidueTypeSanity(pdba, r0, r1, rt);
1013 // If we return correctly from checkResidueTypeSanity(), the only
1014 // remaining cases where we can have non-matching residue types is if
1015 // the chain ID was blank, which could be the case e.g. for a structure
1016 // read from a GRO file or other file types without chain information.
1017 // In that case we need to be a bit more liberal and detect chains based
1018 // on the residue type.
1020 // If we get here, the chain ID must be identical for all residues
1021 char chainID = pdba->resinfo[r0].chainid;
1023 /* Find the starting terminus (typially N or 5') */
1024 for (i = r0; i < r1 && *r_start == -1; i++)
1026 startrestype = rt->optionalTypeOfNamedDatabaseResidue(*pdba->resinfo[i].name);
1031 if (gmx::equalCaseInsensitive(*startrestype, "Protein")
1032 || gmx::equalCaseInsensitive(*startrestype, "DNA")
1033 || gmx::equalCaseInsensitive(*startrestype, "RNA"))
1035 GMX_LOG(logger.info)
1037 .appendTextFormatted("Identified residue %s%d as a starting terminus.",
1038 *pdba->resinfo[i].name,
1039 pdba->resinfo[i].nr);
1042 else if (gmx::equalCaseInsensitive(*startrestype, "Ion"))
1046 GMX_LOG(logger.info)
1048 .appendTextFormatted(
1049 "Residue %s%d has type 'Ion', assuming it is not linked into a "
1051 *pdba->resinfo[i].name,
1052 pdba->resinfo[i].nr);
1056 GMX_LOG(logger.info)
1058 .appendTextFormatted("Disabling further notes about ions.");
1064 // Either no known residue type, or one not needing special handling
1065 if (startWarnings < 5)
1069 GMX_LOG(logger.warning)
1071 .appendTextFormatted(
1072 "Starting residue %s%d in chain not identified as "
1074 "This chain lacks identifiers, which makes it impossible to do "
1076 "classification of the start/end residues. Here we need to "
1077 "guess this residue "
1078 "should not be part of the chain and instead introduce a "
1079 "break, but that will "
1080 "be catastrophic if they should in fact be linked. Please "
1081 "check your structure, "
1082 "and add %s to residuetypes.dat if this was not correct.",
1083 *pdba->resinfo[i].name,
1084 pdba->resinfo[i].nr,
1085 *pdba->resinfo[i].name);
1089 GMX_LOG(logger.warning)
1091 .appendTextFormatted(
1092 "No residues in chain starting at %s%d identified as "
1094 "This makes it impossible to link them into a molecule, which "
1096 "correct or a catastrophic error. Please check your structure, "
1098 "necessary residue names to residuetypes.dat if this was not "
1100 *pdba->resinfo[i].name,
1101 pdba->resinfo[i].nr);
1104 if (startWarnings == 4)
1106 GMX_LOG(logger.warning)
1108 .appendTextFormatted(
1109 "Disabling further warnings about unidentified residues at start "
1118 /* Go through the rest of the residues, check that they are the same class, and identify the ending terminus. */
1119 for (int i = *r_start; i < r1; i++)
1121 std::optional<std::string> restype =
1122 rt->optionalTypeOfNamedDatabaseResidue(*pdba->resinfo[i].name);
1127 if (gmx::equalCaseInsensitive(*restype, *startrestype) && endWarnings == 0)
1131 else if (gmx::equalCaseInsensitive(*startrestype, "Ion"))
1135 GMX_LOG(logger.info)
1137 .appendTextFormatted(
1138 "Residue %s%d has type 'Ion', assuming it is not linked into a "
1140 *pdba->resinfo[i].name,
1141 pdba->resinfo[i].nr);
1145 GMX_LOG(logger.info)
1147 .appendTextFormatted("Disabling further notes about ions.");
1153 // Either no known residue type, or one not needing special handling.
1154 GMX_RELEASE_ASSERT(chainID == ' ', "Chain ID must be blank");
1155 // Otherwise the call to checkResidueTypeSanity() will
1156 // have caught the problem.
1157 if (endWarnings < 5)
1159 GMX_LOG(logger.warning)
1161 .appendTextFormatted(
1162 "Residue %s%d in chain has different type ('%s') from "
1163 "residue %s%d ('%s'). This chain lacks identifiers, which "
1165 "it impossible to do strict classification of the start/end "
1166 "residues. Here we "
1167 "need to guess this residue should not be part of the chain "
1169 "introduce a break, but that will be catastrophic if they "
1170 "should in fact be "
1171 "linked. Please check your structure, and add %s to "
1173 "if this was not correct.",
1174 *pdba->resinfo[i].name,
1175 pdba->resinfo[i].nr,
1177 *pdba->resinfo[*r_start].name,
1178 pdba->resinfo[*r_start].nr,
1179 startrestype->c_str(),
1180 *pdba->resinfo[i].name);
1182 if (endWarnings == 4)
1184 GMX_LOG(logger.warning)
1186 .appendTextFormatted(
1187 "Disabling further warnings about unidentified residues at end "
1197 GMX_LOG(logger.info)
1199 .appendTextFormatted("Identified residue %s%d as a ending terminus.",
1200 *pdba->resinfo[*r_end].name,
1201 pdba->resinfo[*r_end].nr);
1205 enum class ChainSeparationType : int
1214 const gmx::EnumerationArray<ChainSeparationType, const char*> c_chainSeparationTypeNames = {
1215 { "id_or_ter", "id_and_ter", "ter", "id", "interactive" }
1217 const gmx::EnumerationArray<ChainSeparationType, const char*> c_chainSeparationTypeNotificationMessages = {
1218 { "Splitting chemical chains based on TER records or chain id changing.\n",
1219 "Splitting chemical chains based on TER records and chain id changing.\n",
1220 "Splitting chemical chains based on TER records only (ignoring chain id).\n",
1221 "Splitting chemical chains based on changing chain id only (ignoring TER records).\n",
1222 "Splitting chemical chains interactively.\n" }
1225 void modify_chain_numbers(t_atoms* pdba, ChainSeparationType chainSeparation, const gmx::MDLogger& logger)
1228 char old_prev_chainid;
1229 char old_this_chainid;
1230 int old_prev_chainnum;
1231 int old_this_chainnum;
1233 char select[STRLEN];
1237 const char* prev_atomname;
1238 const char* this_atomname;
1239 const char* prev_resname;
1240 const char* this_resname;
1246 /* The default chain enumeration is based on TER records only */
1247 printf("%s", c_chainSeparationTypeNotificationMessages[chainSeparation]);
1249 old_prev_chainid = '?';
1250 old_prev_chainnum = -1;
1253 this_atomname = nullptr;
1255 this_resname = nullptr;
1259 for (i = 0; i < pdba->nres; i++)
1261 ri = &pdba->resinfo[i];
1262 old_this_chainid = ri->chainid;
1263 old_this_chainnum = ri->chainnum;
1265 prev_atomname = this_atomname;
1266 prev_atomnum = this_atomnum;
1267 prev_resname = this_resname;
1268 prev_resnum = this_resnum;
1269 prev_chainid = this_chainid;
1271 this_atomname = *(pdba->atomname[i]);
1272 this_atomnum = (pdba->pdbinfo != nullptr) ? pdba->pdbinfo[i].atomnr : i + 1;
1273 this_resname = *ri->name;
1274 this_resnum = ri->nr;
1275 this_chainid = ri->chainid;
1277 switch (chainSeparation)
1279 case ChainSeparationType::IdOrTer:
1280 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1286 case ChainSeparationType::IdAndTer:
1287 if (old_this_chainid != old_prev_chainid && old_this_chainnum != old_prev_chainnum)
1293 case ChainSeparationType::Id:
1294 if (old_this_chainid != old_prev_chainid)
1300 case ChainSeparationType::Ter:
1301 if (old_this_chainnum != old_prev_chainnum)
1306 case ChainSeparationType::Interactive:
1307 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1311 GMX_LOG(logger.info)
1313 .appendTextFormatted(
1314 "Split the chain (and introduce termini) between residue %s%d (chain id '%c', atom %d %s)\
1316 "and residue %s%d (chain id '%c', atom %d %s) ? [n/y]",
1328 if (nullptr == fgets(select, STRLEN - 1, stdin))
1330 gmx_fatal(FARGS, "Error reading from stdin");
1333 if (i == 0 || select[0] == 'y')
1339 case ChainSeparationType::Count:
1340 gmx_fatal(FARGS, "Internal inconsistency - this shouldn't happen...");
1342 old_prev_chainid = old_this_chainid;
1343 old_prev_chainnum = old_this_chainnum;
1345 ri->chainnum = new_chainnum;
1349 bool checkChainCyclicity(t_atoms* pdba,
1353 gmx::ArrayRef<const PreprocessResidue> rtpFFDB,
1354 gmx::ArrayRef<const RtpRename> rr,
1355 real long_bond_dist_,
1356 real short_bond_dist_)
1358 int ai = -1, aj = -1;
1359 char* rtpname = *(pdba->resinfo[start_ter].rtp);
1360 std::string newName = search_resrename(rr, rtpname, false, false, false);
1361 if (newName.empty())
1365 auto res = getDatabaseEntry(newName, rtpFFDB);
1366 const char *name_ai, *name_aj;
1368 for (const auto& patch : res->rb[BondedTypes::Bonds].b)
1369 { /* Search backward bond for n/5' terminus */
1370 name_ai = patch.ai().c_str();
1371 name_aj = patch.aj().c_str();
1372 if (name_ai[0] == '-')
1374 aj = search_res_atom(++name_ai, end_ter, pdba, "check", TRUE);
1375 ai = search_res_atom(name_aj, start_ter, pdba, "check", TRUE);
1377 else if (name_aj[0] == '-')
1379 aj = search_res_atom(++name_aj, end_ter, pdba, "check", TRUE);
1380 ai = search_res_atom(name_ai, start_ter, pdba, "check", TRUE);
1382 if (ai >= 0 && aj >= 0)
1388 if (!(ai >= 0 && aj >= 0))
1390 rtpname = *(pdba->resinfo[end_ter].rtp);
1391 newName = search_resrename(rr, rtpname, false, false, false);
1392 if (newName.empty())
1396 res = getDatabaseEntry(newName, rtpFFDB);
1397 for (const auto& patch : res->rb[BondedTypes::Bonds].b)
1399 /* Seach forward bond for c/3' terminus */
1400 name_ai = patch.ai().c_str();
1401 name_aj = patch.aj().c_str();
1403 if (name_ai[0] == '+')
1405 ai = search_res_atom(name_aj, end_ter, pdba, "check", TRUE);
1406 aj = search_res_atom(++name_ai, start_ter, pdba, "check", TRUE);
1408 else if (name_aj[0] == '+')
1410 ai = search_res_atom(name_ai, end_ter, pdba, "check", TRUE);
1411 aj = search_res_atom(++name_aj, start_ter, pdba, "check", TRUE);
1413 if (ai >= 0 && aj >= 0)
1420 if (ai >= 0 && aj >= 0)
1422 real dist = distance2(pdbx[ai], pdbx[aj]);
1423 /* it is better to read bond length from ffbonded.itp */
1424 return (dist < gmx::square(long_bond_dist_) && dist > gmx::square(short_bond_dist_));
1435 char chainnum = ' ';
1438 bool bAllWat = false;
1440 std::vector<int> chainstart;
1447 bool bAllWat = false;
1449 std::vector<int> chainstart;
1450 std::vector<MoleculePatchDatabase*> ntdb;
1451 std::vector<MoleculePatchDatabase*> ctdb;
1452 std::vector<int> r_start;
1453 std::vector<int> r_end;
1455 std::vector<gmx::RVec> x;
1456 std::vector<int> cyclicBondsIndex;
1459 enum class VSitesType : int
1466 const gmx::EnumerationArray<VSitesType, const char*> c_vsitesTypeNames = {
1467 { "none", "hydrogens", "aromatics" }
1470 enum class WaterType : int
1482 const gmx::EnumerationArray<WaterType, const char*> c_waterTypeNames = {
1483 { "select", "none", "spc", "spce", "tip3p", "tip4p", "tip5p", "tips3p" }
1486 enum class MergeType : int
1493 const gmx::EnumerationArray<MergeType, const char*> c_mergeTypeNames = {
1494 { "no", "all", "interactive" }
1505 class pdb2gmx : public ICommandLineOptionsModule
1511 bVsiteAromatics_(FALSE),
1512 chainSeparation_(ChainSeparationType::IdOrTer),
1513 vsitesType_(VSitesType::None),
1514 waterType_(WaterType::Select),
1515 mergeType_(MergeType::No),
1519 gmx::LoggerBuilder builder;
1520 builder.addTargetStream(gmx::MDLogger::LogLevel::Info, &gmx::TextOutputFile::standardOutput());
1521 builder.addTargetStream(gmx::MDLogger::LogLevel::Warning, &gmx::TextOutputFile::standardError());
1522 loggerOwner_ = std::make_unique<LoggerOwner>(builder.build());
1525 // From ICommandLineOptionsModule
1526 void init(CommandLineModuleSettings* /*settings*/) override {}
1528 void initOptions(IOptionsContainer* options, ICommandLineOptionsModuleSettings* settings) override;
1530 void optionsFinished() override;
1548 bool bAllowMissing_;
1552 bool bChargeGroups_;
1562 bool bVsiteAromatics_;
1566 real long_bond_dist_;
1567 real short_bond_dist_;
1569 std::string indexOutputFile_;
1570 std::string outputFile_;
1571 std::string topologyFile_;
1572 std::string includeTopologyFile_;
1573 std::string outputConfFile_;
1574 std::string inputConfFile_;
1575 std::string outFile_;
1578 ChainSeparationType chainSeparation_;
1579 VSitesType vsitesType_;
1580 WaterType waterType_;
1581 MergeType mergeType_;
1584 char forcefield_[STRLEN];
1585 char ffdir_[STRLEN];
1588 std::vector<std::string> incls_;
1589 std::vector<t_mols> mols_;
1591 std::unique_ptr<LoggerOwner> loggerOwner_;
1594 void pdb2gmx::initOptions(IOptionsContainer* options, ICommandLineOptionsModuleSettings* settings)
1596 const char* desc[] = {
1597 "[THISMODULE] reads a [REF].pdb[ref] (or [REF].gro[ref]) file, reads",
1598 "some database files, adds hydrogens to the molecules and generates",
1599 "coordinates in GROMACS (GROMOS), or optionally [REF].pdb[ref], format",
1600 "and a topology in GROMACS format.",
1601 "These files can subsequently be processed to generate a run input file.",
1603 "[THISMODULE] will search for force fields by looking for",
1604 "a [TT]forcefield.itp[tt] file in subdirectories [TT]<forcefield>.ff[tt]",
1605 "of the current working directory and of the GROMACS library directory",
1606 "as inferred from the path of the binary or the [TT]GMXLIB[tt] environment",
1608 "By default the forcefield selection is interactive,",
1609 "but you can use the [TT]-ff[tt] option to specify one of the short names",
1610 "in the list on the command line instead. In that case [THISMODULE] just looks",
1611 "for the corresponding [TT]<forcefield>.ff[tt] directory.",
1613 "After choosing a force field, all files will be read only from",
1614 "the corresponding force field directory.",
1615 "If you want to modify or add a residue types, you can copy the force",
1616 "field directory from the GROMACS library directory to your current",
1617 "working directory. If you want to add new protein residue types,",
1618 "you will need to modify [TT]residuetypes.dat[tt] in the library directory",
1619 "or copy the whole library directory to a local directory and set",
1620 "the environment variable [TT]GMXLIB[tt] to the name of that directory.",
1621 "Check Chapter 5 of the manual for more information about file formats.",
1624 "Note that a [REF].pdb[ref] file is nothing more than a file format, and it",
1625 "need not necessarily contain a protein structure. Every kind of",
1626 "molecule for which there is support in the database can be converted.",
1627 "If there is no support in the database, you can add it yourself.[PAR]",
1629 "The program has limited intelligence, it reads a number of database",
1630 "files, that allow it to make special bonds (Cys-Cys, Heme-His, etc.),",
1631 "if necessary this can be done manually. The program can prompt the",
1632 "user to select which kind of LYS, ASP, GLU, CYS or HIS residue is",
1633 "desired. For Lys the choice is between neutral (two protons on NZ) or",
1634 "protonated (three protons, default), for Asp and Glu unprotonated",
1635 "(default) or protonated, for His the proton can be either on ND1,",
1636 "on NE2 or on both. By default these selections are done automatically.",
1637 "For His, this is based on an optimal hydrogen bonding",
1638 "conformation. Hydrogen bonds are defined based on a simple geometric",
1639 "criterion, specified by the maximum hydrogen-donor-acceptor angle",
1640 "and donor-acceptor distance, which are set by [TT]-angle[tt] and",
1641 "[TT]-dist[tt] respectively.[PAR]",
1643 "The protonation state of N- and C-termini can be chosen interactively",
1644 "with the [TT]-ter[tt] flag. Default termini are ionized (NH3+ and COO-),",
1645 "respectively. Some force fields support zwitterionic forms for chains of",
1646 "one residue, but for polypeptides these options should NOT be selected.",
1647 "The AMBER force fields have unique forms for the terminal residues,",
1648 "and these are incompatible with the [TT]-ter[tt] mechanism. You need",
1649 "to prefix your N- or C-terminal residue names with \"N\" or \"C\"",
1650 "respectively to use these forms, making sure you preserve the format",
1651 "of the coordinate file. Alternatively, use named terminating residues",
1652 "(e.g. ACE, NME).[PAR]",
1654 "The separation of chains is not entirely trivial since the markup",
1655 "in user-generated PDB files frequently varies and sometimes it",
1656 "is desirable to merge entries across a TER record, for instance",
1657 "if you want a disulfide bridge or distance restraints between",
1658 "two protein chains or if you have a HEME group bound to a protein.",
1659 "In such cases multiple chains should be contained in a single",
1660 "[TT]moleculetype[tt] definition.",
1661 "To handle this, [THISMODULE] uses two separate options.",
1662 "First, [TT]-chainsep[tt] allows you to choose when a new chemical chain should",
1663 "start, and termini added when applicable. This can be done based on the",
1664 "existence of TER records, when the chain id changes, or combinations of either",
1665 "or both of these. You can also do the selection fully interactively.",
1666 "In addition, there is a [TT]-merge[tt] option that controls how multiple chains",
1667 "are merged into one moleculetype, after adding all the chemical termini (or not).",
1668 "This can be turned off (no merging), all non-water chains can be merged into a",
1669 "single molecule, or the selection can be done interactively.[PAR]",
1671 "[THISMODULE] will also check the occupancy field of the [REF].pdb[ref] file.",
1672 "If any of the occupancies are not one, indicating that the atom is",
1673 "not resolved well in the structure, a warning message is issued.",
1674 "When a [REF].pdb[ref] file does not originate from an X-ray structure determination",
1675 "all occupancy fields may be zero. Either way, it is up to the user",
1676 "to verify the correctness of the input data (read the article!).[PAR]",
1678 "During processing the atoms will be reordered according to GROMACS",
1679 "conventions. With [TT]-n[tt] an index file can be generated that",
1680 "contains one group reordered in the same way. This allows you to",
1681 "convert a GROMOS trajectory and coordinate file to GROMOS. There is",
1682 "one limitation: reordering is done after the hydrogens are stripped",
1683 "from the input and before new hydrogens are added. This means that",
1684 "you should not use [TT]-ignh[tt].[PAR]",
1686 "The [REF].gro[ref] and [TT].g96[tt] file formats do not support chain",
1687 "identifiers. Therefore it is useful to enter a [REF].pdb[ref] file name at",
1688 "the [TT]-o[tt] option when you want to convert a multi-chain [REF].pdb[ref] file.",
1691 "The option [TT]-vsite[tt] removes hydrogen and fast improper dihedral",
1692 "motions. Angular and out-of-plane motions can be removed by changing",
1693 "hydrogens into virtual sites and fixing angles, which fixes their",
1694 "position relative to neighboring atoms. Additionally, all atoms in the",
1695 "aromatic rings of the standard amino acids (i.e. PHE, TRP, TYR and HIS)",
1696 "can be converted into virtual sites, eliminating the fast improper dihedral",
1697 "fluctuations in these rings (but this feature is deprecated).",
1698 "[BB]Note[bb] that in this case all other hydrogen",
1699 "atoms are also converted to virtual sites. The mass of all atoms that are",
1700 "converted into virtual sites, is added to the heavy atoms.[PAR]",
1701 "Also slowing down of dihedral motion can be done with [TT]-heavyh[tt]",
1702 "done by increasing the hydrogen-mass by a factor of 4. This is also",
1703 "done for water hydrogens to slow down the rotational motion of water.",
1704 "The increase in mass of the hydrogens is subtracted from the bonded",
1705 "(heavy) atom so that the total mass of the system remains the same.",
1707 "As a special case, ring-closed (or cyclic) molecules are considered.",
1708 "[THISMODULE] automatically determines if a cyclic molecule is present",
1709 "by evaluating the distance between the terminal atoms of a given chain.",
1710 "If this distance is greater than the [TT]-sb[tt]",
1711 "(\"Short bond warning distance\", default 0.05 nm)",
1712 "and less than the [TT]-lb[tt] (\"Long bond warning distance\", default 0.25 nm)",
1713 "the molecule is considered to be ring closed and will be processed as such.",
1714 "Please note that this does not detect cyclic bonds over periodic boundaries."
1717 settings->setHelpText(desc);
1719 options->addOption(BooleanOption("newrtp").store(&bNewRTP_).defaultValue(false).hidden().description(
1720 "Write the residue database in new format to [TT]new.rtp[tt]"));
1722 RealOption("lb").store(&long_bond_dist_).defaultValue(0.25).hidden().description("Long bond warning distance"));
1724 RealOption("sb").store(&short_bond_dist_).defaultValue(0.05).hidden().description("Short bond warning distance"));
1725 options->addOption(EnumOption<ChainSeparationType>("chainsep")
1726 .enumValue(c_chainSeparationTypeNames)
1727 .store(&chainSeparation_)
1728 .description("Condition in PDB files when a new chain should be "
1729 "started (adding termini)"));
1730 options->addOption(EnumOption<MergeType>("merge")
1731 .enumValue(c_mergeTypeNames)
1733 .description("Merge multiple chains into a single [moleculetype]"));
1734 options->addOption(StringOption("ff").store(&ff_).defaultValue("select").description(
1735 "Force field, interactive by default. Use [TT]-h[tt] for information."));
1737 EnumOption<WaterType>("water").store(&waterType_).enumValue(c_waterTypeNames).description("Water model to use"));
1738 options->addOption(BooleanOption("inter").store(&bInter_).defaultValue(false).description(
1739 "Set the next 8 options to interactive"));
1740 options->addOption(BooleanOption("ss").store(&bCysMan_).defaultValue(false).description(
1741 "Interactive SS bridge selection"));
1742 options->addOption(BooleanOption("ter").store(&bTerMan_).defaultValue(false).description(
1743 "Interactive termini selection, instead of charged (default)"));
1744 options->addOption(BooleanOption("lys").store(&bLysMan_).defaultValue(false).description(
1745 "Interactive lysine selection, instead of charged"));
1746 options->addOption(BooleanOption("arg").store(&bArgMan_).defaultValue(false).description(
1747 "Interactive arginine selection, instead of charged"));
1748 options->addOption(BooleanOption("asp").store(&bAspMan_).defaultValue(false).description(
1749 "Interactive aspartic acid selection, instead of charged"));
1750 options->addOption(BooleanOption("glu").store(&bGluMan_).defaultValue(false).description(
1751 "Interactive glutamic acid selection, instead of charged"));
1752 options->addOption(BooleanOption("gln").store(&bGlnMan_).defaultValue(false).description(
1753 "Interactive glutamine selection, instead of charged"));
1754 options->addOption(BooleanOption("his").store(&bHisMan_).defaultValue(false).description(
1755 "Interactive histidine selection, instead of checking H-bonds"));
1756 options->addOption(RealOption("angle").store(&angle_).defaultValue(135.0).description(
1757 "Minimum hydrogen-donor-acceptor angle for a H-bond (degrees)"));
1759 RealOption("dist").store(&distance_).defaultValue(0.3).description("Maximum donor-acceptor distance for a H-bond (nm)"));
1760 options->addOption(BooleanOption("una").store(&bUnA_).defaultValue(false).description(
1761 "Select aromatic rings with united CH atoms on phenylalanine, tryptophane and "
1763 options->addOption(BooleanOption("sort").store(&bSort_).defaultValue(true).hidden().description(
1764 "Sort the residues according to database, turning this off is dangerous as charge "
1765 "groups might be broken in parts"));
1767 BooleanOption("ignh").store(&bRemoveH_).defaultValue(false).description("Ignore hydrogen atoms that are in the coordinate file"));
1769 BooleanOption("missing")
1770 .store(&bAllowMissing_)
1771 .defaultValue(false)
1773 "Continue when atoms are missing and bonds cannot be made, dangerous"));
1775 BooleanOption("v").store(&bVerbose_).defaultValue(false).description("Be slightly more verbose in messages"));
1777 RealOption("posrefc").store(&posre_fc_).defaultValue(1000).description("Force constant for position restraints"));
1778 options->addOption(EnumOption<VSitesType>("vsite")
1779 .store(&vsitesType_)
1780 .enumValue(c_vsitesTypeNames)
1781 .description("Convert atoms to virtual sites"));
1782 options->addOption(BooleanOption("heavyh").store(&bHeavyH_).defaultValue(false).description(
1783 "Make hydrogen atoms heavy"));
1785 BooleanOption("deuterate").store(&bDeuterate_).defaultValue(false).description("Change the mass of hydrogens to 2 amu"));
1786 options->addOption(BooleanOption("chargegrp")
1787 .store(&bChargeGroups_)
1789 .description("Use charge groups in the [REF].rtp[ref] file"));
1790 options->addOption(BooleanOption("cmap").store(&bCmap_).defaultValue(true).description(
1791 "Use cmap torsions (if enabled in the [REF].rtp[ref] file)"));
1792 options->addOption(BooleanOption("renum")
1794 .defaultValue(false)
1795 .description("Renumber the residues consecutively in the output"));
1796 options->addOption(BooleanOption("rtpres")
1797 .store(&bRTPresname_)
1798 .defaultValue(false)
1799 .description("Use [REF].rtp[ref] entry names as residue names"));
1800 options->addOption(FileNameOption("f")
1803 .store(&inputConfFile_)
1805 .defaultBasename("protein")
1807 .description("Structure file"));
1808 options->addOption(FileNameOption("o")
1811 .store(&outputConfFile_)
1813 .defaultBasename("conf")
1814 .description("Structure file"));
1815 options->addOption(FileNameOption("p")
1818 .store(&topologyFile_)
1820 .defaultBasename("topol")
1821 .description("Topology file"));
1822 options->addOption(FileNameOption("i")
1825 .store(&includeTopologyFile_)
1827 .defaultBasename("posre")
1828 .description("Include file for topology"));
1829 options->addOption(FileNameOption("n")
1832 .store(&indexOutputFile_)
1833 .storeIsSet(&bIndexSet_)
1834 .defaultBasename("index")
1835 .description("Index file"));
1836 options->addOption(FileNameOption("q")
1840 .storeIsSet(&bOutputSet_)
1841 .defaultBasename("clean")
1843 .description("Structure file"));
1846 void pdb2gmx::optionsFinished()
1848 if (inputConfFile_.empty())
1850 GMX_THROW(InconsistentInputError("You must supply an input file"));
1854 /* if anything changes here, also change description of -inter */
1869 else if (bDeuterate_)
1878 /* Force field selection, interactive or direct */
1879 choose_ff(strcmp(ff_.c_str(), "select") == 0 ? nullptr : ff_.c_str(),
1881 sizeof(forcefield_),
1884 loggerOwner_->logger());
1886 if (strlen(forcefield_) > 0)
1888 ffname_ = forcefield_;
1889 ffname_[0] = std::toupper(ffname_[0]);
1893 gmx_fatal(FARGS, "Empty forcefield string");
1899 char select[STRLEN];
1900 std::vector<DisulfideBond> ssbonds;
1904 const char* prev_atomname;
1905 const char* this_atomname;
1906 const char* prev_resname;
1907 const char* this_resname;
1912 int prev_chainnumber;
1913 int this_chainnumber;
1914 int this_chainstart;
1915 int prev_chainstart;
1917 const gmx::MDLogger& logger = loggerOwner_->logger();
1919 GMX_LOG(logger.info)
1921 .appendTextFormatted("Using the %s force field in directory %s", ffname_, ffdir_);
1923 choose_watermodel(c_waterTypeNames[waterType_], ffdir_, &watermodel_, logger);
1925 switch (vsitesType_)
1927 case VSitesType::None:
1929 bVsiteAromatics_ = false;
1931 case VSitesType::Hydrogens:
1933 bVsiteAromatics_ = false;
1935 case VSitesType::Aromatics:
1937 bVsiteAromatics_ = true;
1939 case VSitesType::Count:
1940 gmx_fatal(FARGS, "Internal inconsistency: VSitesType is out of range.");
1943 /* Open the symbol table */
1945 open_symtab(&symtab);
1947 /* Residue type database */
1950 /* Read residue renaming database(s), if present */
1951 std::vector<std::string> rrn = fflib_search_file_end(ffdir_, ".r2b", FALSE);
1953 std::vector<RtpRename> rtprename;
1954 for (const auto& filename : rrn)
1956 GMX_LOG(logger.info).asParagraph().appendTextFormatted("going to rename %s", filename.c_str());
1957 FILE* fp = fflib_open(filename);
1958 read_rtprename(filename.c_str(), fp, &rtprename);
1962 /* Add all alternative names from the residue renaming database to the list
1963 of recognized amino/nucleic acids. */
1964 for (const auto& rename : rtprename)
1966 /* Only add names if the 'standard' gromacs/iupac base name was found */
1967 if (auto restype = rt.optionalTypeOfNamedDatabaseResidue(rename.gmx))
1969 rt.addResidue(rename.main, *restype);
1970 rt.addResidue(rename.nter, *restype);
1971 rt.addResidue(rename.cter, *restype);
1972 rt.addResidue(rename.bter, *restype);
1979 if (watermodel_ != nullptr && (strstr(watermodel_, "4p") || strstr(watermodel_, "4P")))
1983 else if (watermodel_ != nullptr && (strstr(watermodel_, "5p") || strstr(watermodel_, "5P")))
1993 char* title = nullptr;
1997 int natom = read_pdball(inputConfFile_.c_str(),
2014 std::string message = formatString("No atoms found in pdb file %s\n", inputConfFile_.c_str());
2015 GMX_THROW(InconsistentInputError(message));
2018 GMX_LOG(logger.info).asParagraph().appendTextFormatted("Analyzing pdb file");
2019 int nwaterchain = 0;
2021 modify_chain_numbers(&pdba_all, chainSeparation_, logger);
2023 int nchainmerges = 0;
2025 this_atomname = nullptr;
2027 this_resname = nullptr;
2030 this_chainnumber = -1;
2031 this_chainstart = 0;
2032 /* Keep the compiler happy */
2033 prev_chainstart = 0;
2036 std::vector<t_pdbchain> pdb_ch;
2039 bool bMerged = false;
2040 for (int i = 0; (i < natom); i++)
2042 ri = &pdba_all.resinfo[pdba_all.atom[i].resind];
2044 /* TODO this should live in a helper object, and consolidate
2045 that with code in modify_chain_numbers */
2046 prev_atomname = this_atomname;
2047 prev_atomnum = this_atomnum;
2048 prev_resname = this_resname;
2049 prev_resnum = this_resnum;
2050 prev_chainid = this_chainid;
2051 prev_chainnumber = this_chainnumber;
2054 prev_chainstart = this_chainstart;
2057 this_atomname = *pdba_all.atomname[i];
2058 this_atomnum = (pdba_all.pdbinfo != nullptr) ? pdba_all.pdbinfo[i].atomnr : i + 1;
2059 this_resname = *ri->name;
2060 this_resnum = ri->nr;
2061 this_chainid = ri->chainid;
2062 this_chainnumber = ri->chainnum;
2064 bWat_ = gmx::equalCaseInsensitive(*ri->name, watres);
2066 if ((i == 0) || (this_chainnumber != prev_chainnumber) || (bWat_ != bPrevWat_))
2070 "Must have pdbinfo from reading a PDB file if chain number is changing");
2071 this_chainstart = pdba_all.atom[i].resind;
2073 if (i > 0 && !bWat_)
2075 if (!strncmp(c_mergeTypeNames[mergeType_], "int", 3))
2077 GMX_LOG(logger.info)
2079 .appendTextFormatted(
2080 "Merge chain ending with residue %s%d (chain id '%c', atom %d "
2081 "%s) and chain starting with "
2082 "residue %s%d (chain id '%c', atom %d %s) into a single "
2083 "moleculetype (keeping termini)? [n/y]",
2095 if (nullptr == fgets(select, STRLEN - 1, stdin))
2097 gmx_fatal(FARGS, "Error reading from stdin");
2099 bMerged = (select[0] == 'y');
2101 else if (!strncmp(c_mergeTypeNames[mergeType_], "all", 3))
2109 pdb_ch[numChains - 1].chainstart[pdb_ch[numChains - 1].nterpairs] =
2110 pdba_all.atom[i].resind - prev_chainstart;
2111 pdb_ch[numChains - 1].nterpairs++;
2112 pdb_ch[numChains - 1].chainstart.resize(pdb_ch[numChains - 1].nterpairs + 1);
2117 /* set natom for previous chain */
2120 pdb_ch[numChains - 1].natom = i - pdb_ch[numChains - 1].start;
2127 /* check if chain identifier was used before */
2128 for (int j = 0; (j < numChains); j++)
2130 if (pdb_ch[j].chainid != ' ' && pdb_ch[j].chainid == ri->chainid)
2132 GMX_LOG(logger.warning)
2134 .appendTextFormatted(
2135 "Chain identifier '%c' is used in two non-sequential "
2137 "They will be treated as separate chains unless you "
2138 "reorder your file.",
2142 t_pdbchain newChain;
2143 newChain.chainid = ri->chainid;
2144 newChain.chainnum = ri->chainnum;
2146 newChain.bAllWat = bWat_;
2149 newChain.nterpairs = 0;
2153 newChain.nterpairs = 1;
2155 newChain.chainstart.resize(newChain.nterpairs + 1);
2156 /* modified [numChains] to [0] below */
2157 newChain.chainstart[0] = 0;
2158 pdb_ch.push_back(newChain);
2164 pdb_ch.back().natom = natom - pdb_ch.back().start;
2166 /* set all the water blocks at the end of the chain */
2167 std::vector<int> swap_index(numChains);
2169 for (int i = 0; i < numChains; i++)
2171 if (!pdb_ch[i].bAllWat)
2177 for (int i = 0; i < numChains; i++)
2179 if (pdb_ch[i].bAllWat)
2185 if (nwaterchain > 1)
2187 GMX_LOG(logger.info)
2189 .appendTextFormatted("Moved all the water blocks to the end");
2193 std::vector<t_chain> chains(numChains);
2194 /* copy pdb data and x for all chains */
2195 for (int i = 0; (i < numChains); i++)
2197 int si = swap_index[i];
2198 chains[i].chainid = pdb_ch[si].chainid;
2199 chains[i].chainnum = pdb_ch[si].chainnum;
2200 chains[i].bAllWat = pdb_ch[si].bAllWat;
2201 chains[i].nterpairs = pdb_ch[si].nterpairs;
2202 chains[i].chainstart = pdb_ch[si].chainstart;
2203 chains[i].ntdb.clear();
2204 chains[i].ctdb.clear();
2205 chains[i].r_start.resize(pdb_ch[si].nterpairs);
2206 chains[i].r_end.resize(pdb_ch[si].nterpairs);
2208 snew(chains[i].pdba, 1);
2209 init_t_atoms(chains[i].pdba, pdb_ch[si].natom, true);
2210 for (j = 0; j < chains[i].pdba->nr; j++)
2212 chains[i].pdba->atom[j] = pdba_all.atom[pdb_ch[si].start + j];
2213 chains[i].pdba->atomname[j] = put_symtab(&symtab, *pdba_all.atomname[pdb_ch[si].start + j]);
2214 chains[i].pdba->pdbinfo[j] = pdba_all.pdbinfo[pdb_ch[si].start + j];
2215 chains[i].x.emplace_back(pdbx[pdb_ch[si].start + j]);
2217 /* Re-index the residues assuming that the indices are continuous */
2218 int k = chains[i].pdba->atom[0].resind;
2219 int nres = chains[i].pdba->atom[chains[i].pdba->nr - 1].resind - k + 1;
2220 chains[i].pdba->nres = nres;
2221 for (int j = 0; j < chains[i].pdba->nr; j++)
2223 chains[i].pdba->atom[j].resind -= k;
2225 srenew(chains[i].pdba->resinfo, nres);
2226 for (int j = 0; j < nres; j++)
2228 chains[i].pdba->resinfo[j] = pdba_all.resinfo[k + j];
2229 chains[i].pdba->resinfo[j].name = put_symtab(&symtab, *pdba_all.resinfo[k + j].name);
2230 /* make all chain identifiers equal to that of the chain */
2231 chains[i].pdba->resinfo[j].chainid = pdb_ch[si].chainid;
2235 if (nchainmerges > 0)
2237 GMX_LOG(logger.info)
2239 .appendTextFormatted("Merged chains into joint molecule definitions at %d places.",
2243 GMX_LOG(logger.info)
2245 .appendTextFormatted(
2246 "There are %d chains and %d blocks of water and "
2247 "%d residues with %d atoms",
2248 numChains - nwaterchain,
2253 GMX_LOG(logger.info)
2255 .appendTextFormatted(" %5s %4s %6s", "chain", "#res", "#atoms");
2256 for (int i = 0; (i < numChains); i++)
2258 GMX_LOG(logger.info)
2260 .appendTextFormatted(" %d '%c' %5d %6d %s\n",
2262 chains[i].chainid ? chains[i].chainid : '-',
2263 chains[i].pdba->nres,
2265 chains[i].bAllWat ? "(only water)" : "");
2268 check_occupancy(&pdba_all, inputConfFile_.c_str(), bVerbose_, logger);
2270 /* Read atomtypes... */
2271 PreprocessingAtomTypes atype = read_atype(ffdir_, &symtab);
2273 /* read residue database */
2274 GMX_LOG(logger.info).asParagraph().appendTextFormatted("Reading residue database... (%s)", forcefield_);
2275 std::vector<std::string> rtpf = fflib_search_file_end(ffdir_, ".rtp", true);
2276 std::vector<PreprocessResidue> rtpFFDB;
2277 for (const auto& filename : rtpf)
2279 readResidueDatabase(filename, &rtpFFDB, &atype, &symtab, logger, false);
2283 /* Not correct with multiple rtp input files with different bonded types */
2284 FILE* fp = gmx_fio_fopen("new.rtp", "w");
2285 print_resall(fp, rtpFFDB, atype);
2289 /* read hydrogen database */
2290 std::vector<MoleculePatchDatabase> ah;
2291 read_h_db(ffdir_, &ah);
2293 /* Read Termini database... */
2294 std::vector<MoleculePatchDatabase> ntdb;
2295 std::vector<MoleculePatchDatabase> ctdb;
2296 std::vector<MoleculePatchDatabase*> tdblist;
2297 int nNtdb = read_ter_db(ffdir_, 'n', &ntdb, &atype);
2298 int nCtdb = read_ter_db(ffdir_, 'c', &ctdb, &atype);
2300 FILE* top_file = gmx_fio_fopen(topologyFile_.c_str(), "w");
2302 print_top_header(top_file, topologyFile_.c_str(), FALSE, ffdir_, mHmult_);
2305 std::vector<gmx::RVec> x;
2306 /* new pdb datastructure for sorting. */
2307 t_atoms** sortAtoms = nullptr;
2308 t_atoms** localAtoms = nullptr;
2309 snew(sortAtoms, numChains);
2310 snew(localAtoms, numChains);
2311 for (int chain = 0; (chain < numChains); chain++)
2313 cc = &(chains[chain]);
2315 /* set pdba, natom and nres to the current chain */
2318 natom = cc->pdba->nr;
2319 int nres = cc->pdba->nres;
2321 if (cc->chainid && (cc->chainid != ' '))
2323 GMX_LOG(logger.info)
2325 .appendTextFormatted("Processing chain %d '%c' (%d atoms, %d residues)",
2333 GMX_LOG(logger.info)
2335 .appendTextFormatted(
2336 "Processing chain %d (%d atoms, %d residues)", chain + 1, natom, nres);
2339 process_chain(logger,
2356 cc->chainstart[cc->nterpairs] = pdba->nres;
2358 for (int i = 0; i < cc->nterpairs; i++)
2361 pdba, cc->chainstart[i], cc->chainstart[i + 1], &(cc->r_start[j]), &(cc->r_end[j]), &rt, logger);
2362 if (cc->r_start[j] >= 0 && cc->r_end[j] >= 0)
2364 if (checkChainCyclicity(
2365 pdba, pdbx, cc->r_start[j], cc->r_end[j], rtpFFDB, rtprename, long_bond_dist_, short_bond_dist_))
2367 cc->cyclicBondsIndex.push_back(cc->r_start[j]);
2368 cc->cyclicBondsIndex.push_back(cc->r_end[j]);
2369 cc->r_start[j] = -1;
2379 if (cc->nterpairs == 0 && cc->cyclicBondsIndex.empty())
2381 GMX_LOG(logger.info)
2383 .appendTextFormatted(
2384 "Problem with chain definition, or missing terminal residues. "
2385 "This chain does not appear to contain a recognized chain molecule. "
2386 "If this is incorrect, you can edit residuetypes.dat to modify the "
2390 /* Check for disulfides and other special bonds */
2391 ssbonds = makeDisulfideBonds(pdba, gmx::as_rvec_array(x.data()), bCysMan_, bVerbose_);
2393 if (!rtprename.empty())
2395 rename_resrtp(pdba, cc->nterpairs, cc->r_start, cc->r_end, rtprename, &symtab, bVerbose_, logger);
2398 for (int i = 0; i < cc->nterpairs; i++)
2401 * We first apply a filter so we only have the
2402 * termini that can be applied to the residue in question
2403 * (or a generic terminus if no-residue specific is available).
2405 /* First the N terminus */
2408 tdblist = filter_ter(ntdb, *pdba->resinfo[cc->r_start[i]].name);
2409 if (tdblist.empty())
2411 GMX_LOG(logger.info)
2413 .appendTextFormatted(
2414 "No suitable end (N or 5') terminus found in database - "
2415 "assuming this residue "
2416 "is already in a terminus-specific form and skipping terminus "
2418 cc->ntdb.push_back(nullptr);
2422 if (bTerMan_ && !tdblist.empty())
2425 "Select start terminus type for %s-%d",
2426 *pdba->resinfo[cc->r_start[i]].name,
2427 pdba->resinfo[cc->r_start[i]].nr);
2428 cc->ntdb.push_back(choose_ter(tdblist, select));
2432 cc->ntdb.push_back(tdblist[0]);
2435 printf("Start terminus %s-%d: %s\n",
2436 *pdba->resinfo[cc->r_start[i]].name,
2437 pdba->resinfo[cc->r_start[i]].nr,
2438 (cc->ntdb[i])->name.c_str());
2444 cc->ntdb.push_back(nullptr);
2447 /* And the C terminus */
2450 tdblist = filter_ter(ctdb, *pdba->resinfo[cc->r_end[i]].name);
2451 if (tdblist.empty())
2453 GMX_LOG(logger.info)
2455 .appendTextFormatted(
2456 "No suitable end (C or 3') terminus found in database - "
2457 "assuming this residue"
2458 "is already in a terminus-specific form and skipping terminus "
2460 cc->ctdb.push_back(nullptr);
2464 if (bTerMan_ && !tdblist.empty())
2467 "Select end terminus type for %s-%d",
2468 *pdba->resinfo[cc->r_end[i]].name,
2469 pdba->resinfo[cc->r_end[i]].nr);
2470 cc->ctdb.push_back(choose_ter(tdblist, select));
2474 cc->ctdb.push_back(tdblist[0]);
2476 printf("End terminus %s-%d: %s\n",
2477 *pdba->resinfo[cc->r_end[i]].name,
2478 pdba->resinfo[cc->r_end[i]].nr,
2479 (cc->ctdb[i])->name.c_str());
2485 cc->ctdb.push_back(nullptr);
2488 std::vector<MoleculePatchDatabase> hb_chain;
2489 /* lookup hackblocks and rtp for all residues */
2490 std::vector<PreprocessResidue> restp_chain;
2491 get_hackblocks_rtp(&hb_chain,
2504 /* ideally, now we would not need the rtp itself anymore, but do
2505 everything using the hb and restp arrays. Unfortunately, that
2506 requires some re-thinking of code in gen_vsite.c, which I won't
2507 do now :( AF 26-7-99 */
2509 rename_atoms(nullptr, ffdir_, pdba, &symtab, restp_chain, false, &rt, false, bVerbose_);
2511 match_atomnames_with_rtp(restp_chain, hb_chain, pdba, &symtab, x, bVerbose_, logger);
2516 t_blocka* block = new_blocka();
2518 sort_pdbatoms(restp_chain, natom, &pdba, &sortAtoms[chain], &x, block, &gnames);
2519 remove_duplicate_atoms(pdba, x, bVerbose_, logger);
2524 GMX_LOG(logger.warning)
2526 .appendTextFormatted(
2527 "With the -remh option the generated "
2528 "index file (%s) might be useless "
2529 "(the index file is generated before hydrogens are added)",
2530 indexOutputFile_.c_str());
2532 write_index(indexOutputFile_.c_str(), block, gnames, false, 0);
2534 for (int i = 0; i < block->nr; i++)
2544 GMX_LOG(logger.warning)
2546 .appendTextFormatted(
2547 "Without sorting no check for duplicate atoms can be done");
2550 /* Generate Hydrogen atoms (and termini) in the sequence */
2551 GMX_LOG(logger.info)
2553 .appendTextFormatted(
2554 "Generating any missing hydrogen atoms and/or adding termini.");
2566 cc->cyclicBondsIndex);
2567 GMX_LOG(logger.info)
2569 .appendTextFormatted("Now there are %d residues with %d atoms", pdba->nres, pdba->nr);
2571 /* make up molecule name(s) */
2573 int k = (cc->nterpairs > 0 && cc->r_start[0] >= 0) ? cc->r_start[0] : 0;
2575 std::string restype = rt.typeOfNamedDatabaseResidue(*pdba->resinfo[k].name);
2577 std::string molname;
2585 this_chainid = cc->chainid;
2587 /* Add the chain id if we have one */
2588 if (this_chainid != ' ')
2590 suffix.append(formatString("_chain_%c", this_chainid));
2593 /* Check if there have been previous chains with the same id */
2595 for (int k = 0; k < chain; k++)
2597 if (cc->chainid == chains[k].chainid)
2602 /* Add the number for this chain identifier if there are multiple copies */
2605 suffix.append(formatString("%d", nid_used + 1));
2608 if (suffix.length() > 0)
2610 molname.append(restype);
2611 molname.append(suffix);
2618 std::string itp_fn = topologyFile_;
2620 std::string posre_fn = includeTopologyFile_;
2621 if ((numChains - nwaterchain > 1) && !cc->bAllWat)
2624 printf("Chain time...\n");
2625 // construct the itp file name
2626 itp_fn = stripSuffixIfPresent(itp_fn, ".top");
2628 itp_fn.append(molname);
2629 itp_fn.append(".itp");
2630 // now do the same for posre
2631 posre_fn = stripSuffixIfPresent(posre_fn, ".itp");
2632 posre_fn.append("_");
2633 posre_fn.append(molname);
2634 posre_fn.append(".itp");
2635 if (posre_fn == itp_fn)
2637 posre_fn = Path::concatenateBeforeExtension(posre_fn, "_pr");
2639 incls_.emplace_back();
2640 incls_.back() = itp_fn;
2641 itp_file_ = gmx_fio_fopen(itp_fn.c_str(), "w");
2648 mols_.emplace_back();
2651 mols_.back().name = "SOL";
2652 mols_.back().nr = pdba->nres;
2656 mols_.back().name = molname;
2657 mols_.back().nr = 1;
2662 print_top_comment(itp_file_, itp_fn.c_str(), ffdir_, true);
2668 top_file2 = nullptr;
2672 top_file2 = itp_file_;
2676 top_file2 = top_file;
2702 cc->cyclicBondsIndex,
2707 write_posres(posre_fn.c_str(), pdba, posre_fc_);
2712 gmx_fio_fclose(itp_file_);
2715 /* pdba and natom have been reassigned somewhere so: */
2720 if (watermodel_ == nullptr)
2722 for (int chain = 0; chain < numChains; chain++)
2724 if (chains[chain].bAllWat)
2726 auto message = formatString(
2727 "You have chosen not to include a water model, "
2728 "but there is water in the input file. Select a "
2729 "water model or remove the water from your input file.");
2730 GMX_THROW(InconsistentInputError(message));
2736 std::string waterFile = formatString("%s%c%s.itp", ffdir_, DIR_SEPARATOR, watermodel_);
2737 if (!fflib_fexist(waterFile))
2739 auto message = formatString(
2740 "The topology file '%s' for the selected water "
2741 "model '%s' can not be found in the force field "
2742 "directory. Select a different water model.",
2745 GMX_THROW(InconsistentInputError(message));
2749 print_top_mols(top_file, title, ffdir_, watermodel_, incls_, mols_);
2750 gmx_fio_fclose(top_file);
2752 /* now merge all chains back together */
2755 for (int i = 0; (i < numChains); i++)
2757 natom += chains[i].pdba->nr;
2758 nres += chains[i].pdba->nres;
2762 init_t_atoms(atoms, natom, false);
2763 for (int i = 0; i < atoms->nres; i++)
2765 sfree(atoms->resinfo[i].name);
2768 srenew(atoms->resinfo, nres);
2772 for (int i = 0; (i < numChains); i++)
2776 GMX_LOG(logger.info)
2778 .appendTextFormatted("Including chain %d in system: %d atoms %d residues",
2781 chains[i].pdba->nres);
2783 for (int j = 0; (j < chains[i].pdba->nr); j++)
2785 atoms->atom[k] = chains[i].pdba->atom[j];
2786 atoms->atom[k].resind += l; /* l is processed nr of residues */
2787 atoms->atomname[k] = chains[i].pdba->atomname[j];
2788 atoms->resinfo[atoms->atom[k].resind].chainid = chains[i].chainid;
2789 x.push_back(chains[i].x[j]);
2792 for (int j = 0; (j < chains[i].pdba->nres); j++)
2794 atoms->resinfo[l] = chains[i].pdba->resinfo[j];
2797 atoms->resinfo[l].name = atoms->resinfo[l].rtp;
2801 done_atom(chains[i].pdba);
2806 GMX_LOG(logger.info)
2808 .appendTextFormatted("Now there are %d atoms and %d residues", k, l);
2809 print_sums(atoms, true, logger);
2813 GMX_LOG(logger.info).asParagraph().appendTextFormatted("Writing coordinate file...");
2814 clear_rvec(box_space);
2817 make_new_box(atoms->nr, gmx::as_rvec_array(x.data()), box, box_space, false);
2820 outputConfFile_.c_str(), title, atoms, gmx::as_rvec_array(x.data()), nullptr, pbcType, box);
2822 done_symtab(&symtab);
2823 done_atom(&pdba_all);
2825 for (int chain = 0; chain < numChains; chain++)
2827 sfree(sortAtoms[chain]);
2828 sfree(localAtoms[chain]);
2836 GMX_LOG(logger.info)
2838 .appendTextFormatted("\t\t--------- PLEASE NOTE ------------");
2839 GMX_LOG(logger.info)
2841 .appendTextFormatted("You have successfully generated a topology from: %s.",
2842 inputConfFile_.c_str());
2843 if (watermodel_ != nullptr)
2845 GMX_LOG(logger.info)
2847 .appendTextFormatted(
2848 "The %s force field and the %s water model are used.", ffname_, watermodel_);
2853 GMX_LOG(logger.info).asParagraph().appendTextFormatted("The %s force field is used.", ffname_);
2855 GMX_LOG(logger.info)
2857 .appendTextFormatted("\t\t--------- ETON ESAELP ------------");
2864 const char pdb2gmxInfo::name[] = "pdb2gmx";
2865 const char pdb2gmxInfo::shortDescription[] =
2866 "Convert coordinate files to topology and FF-compliant coordinate files";
2867 ICommandLineOptionsModulePointer pdb2gmxInfo::create()
2869 return std::make_unique<pdb2gmx>();