2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020,2021, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
52 #include "gromacs/commandline/cmdlineoptionsmodule.h"
53 #include "gromacs/fileio/confio.h"
54 #include "gromacs/fileio/filetypes.h"
55 #include "gromacs/fileio/gmxfio.h"
56 #include "gromacs/fileio/pdbio.h"
57 #include "gromacs/gmxlib/conformation_utilities.h"
58 #include "gromacs/gmxpreprocess/fflibutil.h"
59 #include "gromacs/gmxpreprocess/genhydro.h"
60 #include "gromacs/gmxpreprocess/gpp_atomtype.h"
61 #include "gromacs/gmxpreprocess/grompp_impl.h"
62 #include "gromacs/gmxpreprocess/h_db.h"
63 #include "gromacs/gmxpreprocess/hizzie.h"
64 #include "gromacs/gmxpreprocess/pdb2top.h"
65 #include "gromacs/gmxpreprocess/pgutil.h"
66 #include "gromacs/gmxpreprocess/specbond.h"
67 #include "gromacs/gmxpreprocess/ter_db.h"
68 #include "gromacs/gmxpreprocess/toputil.h"
69 #include "gromacs/gmxpreprocess/xlate.h"
70 #include "gromacs/math/vec.h"
71 #include "gromacs/options/basicoptions.h"
72 #include "gromacs/options/filenameoption.h"
73 #include "gromacs/options/ioptionscontainer.h"
74 #include "gromacs/topology/atomprop.h"
75 #include "gromacs/topology/block.h"
76 #include "gromacs/topology/index.h"
77 #include "gromacs/topology/residuetypes.h"
78 #include "gromacs/topology/symtab.h"
79 #include "gromacs/topology/topology.h"
80 #include "gromacs/utility/dir_separator.h"
81 #include "gromacs/utility/exceptions.h"
82 #include "gromacs/utility/fatalerror.h"
83 #include "gromacs/utility/filestream.h"
84 #include "gromacs/utility/loggerbuilder.h"
85 #include "gromacs/utility/path.h"
86 #include "gromacs/utility/smalloc.h"
87 #include "gromacs/utility/strdb.h"
88 #include "gromacs/utility/stringutil.h"
90 #include "hackblock.h"
95 RtpRename(const char* newGmx, const char* newMain, const char* newNter, const char* newCter, const char* newBter) :
113 const char* res2bb_notermini(const std::string& name, gmx::ArrayRef<const RtpRename> rr)
115 /* NOTE: This function returns the main building block name,
116 * it does not take terminal renaming into account.
118 auto found = std::find_if(rr.begin(), rr.end(), [&name](const auto& rename) {
119 return gmx::equalCaseInsensitive(name, rename.gmx);
121 return found != rr.end() ? found->main.c_str() : name.c_str();
124 const char* select_res(int nr,
129 gmx::ArrayRef<const RtpRename> rr)
131 printf("Which %s type do you want for residue %d\n", title, resnr + 1);
132 for (int sel = 0; (sel < nr); sel++)
134 printf("%d. %s (%s)\n", sel, expl[sel], res2bb_notermini(name[sel], rr));
136 printf("\nType a number:");
140 if (scanf("%d", &userSelection) != 1)
142 gmx_fatal(FARGS, "Answer me for res %s %d!", title, resnr + 1);
145 return name[userSelection];
148 const char* get_asptp(int resnr, gmx::ArrayRef<const RtpRename> rr)
156 const char* lh[easpNR] = { "ASP", "ASPH" };
157 const char* expl[easpNR] = { "Not protonated (charge -1)", "Protonated (charge 0)" };
159 return select_res(easpNR, resnr, lh, expl, "ASPARTIC ACID", rr);
162 const char* get_glutp(int resnr, gmx::ArrayRef<const RtpRename> rr)
170 const char* lh[egluNR] = { "GLU", "GLUH" };
171 const char* expl[egluNR] = { "Not protonated (charge -1)", "Protonated (charge 0)" };
173 return select_res(egluNR, resnr, lh, expl, "GLUTAMIC ACID", rr);
176 const char* get_glntp(int resnr, gmx::ArrayRef<const RtpRename> rr)
184 const char* lh[eglnNR] = { "GLN", "QLN" };
185 const char* expl[eglnNR] = { "Not protonated (charge 0)", "Protonated (charge +1)" };
187 return select_res(eglnNR, resnr, lh, expl, "GLUTAMINE", rr);
190 const char* get_lystp(int resnr, gmx::ArrayRef<const RtpRename> rr)
198 const char* lh[elysNR] = { "LYSN", "LYS" };
199 const char* expl[elysNR] = { "Not protonated (charge 0)", "Protonated (charge +1)" };
201 return select_res(elysNR, resnr, lh, expl, "LYSINE", rr);
204 const char* get_argtp(int resnr, gmx::ArrayRef<const RtpRename> rr)
212 const char* lh[eargNR] = { "ARGN", "ARG" };
213 const char* expl[eargNR] = { "Not protonated (charge 0)", "Protonated (charge +1)" };
215 return select_res(eargNR, resnr, lh, expl, "ARGININE", rr);
218 const char* get_histp(int resnr, gmx::ArrayRef<const RtpRename> rr)
220 const char* expl[ehisNR] = { "H on ND1 only", "H on NE2 only", "H on ND1 and NE2",
223 return select_res(ehisNR, resnr, hh, expl, "HISTIDINE", rr);
226 void read_rtprename(const char* fname, FILE* fp, std::vector<RtpRename>* rtprename)
228 char line[STRLEN], buf[STRLEN];
231 while (get_a_line(fp, line, STRLEN))
233 /* line is NULL-terminated and length<STRLEN, so final arg cannot overflow.
234 * For other args, we read up to 6 chars (so we can detect if the length is > 5).
235 * Note that the buffer length has been increased to 7 to allow this,
236 * so we just need to make sure the strings have zero-length initially.
248 int nc = sscanf(line, "%6s %6s %6s %6s %6s %s", gmx, main, nter, cter, bter, buf);
249 RtpRename newEntry(gmx, main, nter, cter, bter);
252 if (nc != 2 && nc != 5)
254 gmx_fatal(FARGS, "Residue renaming database '%s' has %d columns instead of %d or %d",
262 "A line in residue renaming database '%s' has %d columns, while previous "
263 "lines have %d columns",
269 /* This file does not have special termini names, copy them from main */
270 newEntry.nter = newEntry.main;
271 newEntry.cter = newEntry.main;
272 newEntry.bter = newEntry.main;
274 rtprename->push_back(newEntry);
278 std::string search_resrename(gmx::ArrayRef<const RtpRename> rr, const char* name, bool bStart, bool bEnd, bool bCompareFFRTPname)
280 auto found = std::find_if(rr.begin(), rr.end(), [&name, &bCompareFFRTPname](const auto& rename) {
281 return ((!bCompareFFRTPname && (name == rename.gmx))
282 || (bCompareFFRTPname && (name == rename.main)));
286 /* If found in the database, rename this residue's rtp building block,
287 * otherwise keep the old name.
289 if (found != rr.end())
293 newName = found->bter;
297 newName = found->nter;
301 newName = found->cter;
305 newName = found->main;
308 if (newName[0] == '-')
310 gmx_fatal(FARGS, "In the chosen force field there is no residue type for '%s'%s", name,
311 bStart ? (bEnd ? " as a standalone (starting & ending) residue" : " as a starting terminus")
312 : (bEnd ? " as an ending terminus" : ""));
319 void rename_resrtp(t_atoms* pdba,
321 gmx::ArrayRef<const int> r_start,
322 gmx::ArrayRef<const int> r_end,
323 gmx::ArrayRef<const RtpRename> rr,
326 const gmx::MDLogger& logger)
328 bool bFFRTPTERRNM = (getenv("GMX_NO_FFRTP_TER_RENAME") == nullptr);
330 for (int r = 0; r < pdba->nres; r++)
334 for (int j = 0; j < nterpairs; j++)
341 for (int j = 0; j < nterpairs; j++)
349 std::string newName = search_resrename(rr, *pdba->resinfo[r].rtp, bStart, bEnd, false);
351 if (bFFRTPTERRNM && newName.empty() && (bStart || bEnd))
353 /* This is a terminal residue, but the residue name,
354 * currently stored in .rtp, is not a standard residue name,
355 * but probably a force field specific rtp name.
356 * Check if we need to rename it because it is terminal.
358 newName = search_resrename(rr, *pdba->resinfo[r].rtp, bStart, bEnd, true);
361 if (!newName.empty() && newName != *pdba->resinfo[r].rtp)
367 .appendTextFormatted("Changing rtp entry of residue %d %s to '%s'",
368 pdba->resinfo[r].nr, *pdba->resinfo[r].name,
371 pdba->resinfo[r].rtp = put_symtab(symtab, newName.c_str());
376 void pdbres_to_gmxrtp(t_atoms* pdba)
380 for (i = 0; (i < pdba->nres); i++)
382 if (pdba->resinfo[i].rtp == nullptr)
384 pdba->resinfo[i].rtp = pdba->resinfo[i].name;
389 void rename_pdbres(t_atoms* pdba, const char* oldnm, const char* newnm, bool bFullCompare, t_symtab* symtab)
394 for (i = 0; (i < pdba->nres); i++)
396 resnm = *pdba->resinfo[i].name;
397 if ((bFullCompare && (gmx::equalCaseInsensitive(resnm, oldnm)))
398 || (!bFullCompare && strstr(resnm, oldnm) != nullptr))
400 /* Rename the residue name (not the rtp name) */
401 pdba->resinfo[i].name = put_symtab(symtab, newnm);
406 /*! \brief Rename all residues named \c oldnm to \c newnm
408 * Search for residues for which the residue name from the input
409 * configuration file matches \c oldnm, and when found choose the rtp
410 * entry and name of \c newnm.
412 * \todo Refactor this function to accept a lambda that accepts i and
413 * numMatchesFound but always produces \c newnm. Then remove
414 * renameResiduesInteractively by calling this method with suitable
415 * lambdas that capture its parameter \c rr and ignores
416 * numMatchesFound. */
417 void renameResidue(const gmx::MDLogger& logger,
424 int numMatchesFound = 0;
425 for (int i = 0; (i < pdba->nres); i++)
427 /* We have not set the rtp name yet, use the residue name */
428 const char* residueNameInInputConfiguration = *pdba->resinfo[i].name;
429 if ((bFullCompare && (gmx::equalCaseInsensitive(residueNameInInputConfiguration, oldnm)))
430 || (!bFullCompare && strstr(residueNameInInputConfiguration, oldnm) != nullptr))
432 /* Change the rtp building block name */
433 pdba->resinfo[i].rtp = put_symtab(symtab, newnm);
434 pdba->resinfo[i].name = pdba->resinfo[i].rtp;
438 if (numMatchesFound > 0)
442 .appendTextFormatted(
443 "Replaced %d residue%s named %s to the default %s. Use interactive "
444 "selection of protonated residues if that is what you need.",
445 numMatchesFound, numMatchesFound > 1 ? "s" : "", oldnm, newnm);
449 /*! \brief Rename all residues named \c oldnm according to the user's
452 * Search for residues for which the residue name from the input
453 * configuration file matches \c oldnm, and when found choose the rtp
454 * entry and name of the interactive choice from \c gettp.
456 * \todo Remove this function, per todo in \c renameResidue. */
457 void renameResidueInteractively(t_atoms* pdba,
459 const char* gettp(int, gmx::ArrayRef<const RtpRename>),
462 gmx::ArrayRef<const RtpRename> rr)
464 // Search for residues i for which the residue name from the input
465 // configuration file matches oldnm, so it can replaced by the rtp
466 // entry and name of newnm.
467 for (int i = 0; i < pdba->nres; i++)
469 /* We have not set the rtp name yet, use the residue name */
470 char* residueNameInInputConfiguration = *pdba->resinfo[i].name;
471 if ((bFullCompare && (strcmp(residueNameInInputConfiguration, oldnm) == 0))
472 || (!bFullCompare && strstr(residueNameInInputConfiguration, oldnm) != nullptr))
474 const char* interactiveRtpChoice = gettp(i, rr);
475 pdba->resinfo[i].rtp = put_symtab(symtab, interactiveRtpChoice);
476 pdba->resinfo[i].name = pdba->resinfo[i].rtp;
481 void check_occupancy(t_atoms* atoms, const char* filename, bool bVerbose, const gmx::MDLogger& logger)
487 ftp = fn2ftp(filename);
488 if (!atoms->pdbinfo || ((ftp != efPDB) && (ftp != efBRK) && (ftp != efENT)))
490 GMX_LOG(logger.warning).asParagraph().appendTextFormatted("No occupancies in %s", filename);
494 for (i = 0; (i < atoms->nr); i++)
496 if (atoms->pdbinfo[i].occup != 1)
500 GMX_LOG(logger.warning)
502 .appendTextFormatted("Occupancy for atom %s%d-%s is %f rather than 1",
503 *atoms->resinfo[atoms->atom[i].resind].name,
504 atoms->resinfo[atoms->atom[i].resind].nr,
505 *atoms->atomname[i], atoms->pdbinfo[i].occup);
507 if (atoms->pdbinfo[i].occup == 0)
517 if (nzero == atoms->nr)
519 GMX_LOG(logger.warning)
521 .appendTextFormatted(
522 "All occupancy fields zero. This is probably not an X-Ray structure");
524 else if ((nzero > 0) || (nnotone > 0))
526 GMX_LOG(logger.warning)
528 .appendTextFormatted(
529 "there were %d atoms with zero occupancy and %d atoms with "
530 " occupancy unequal to one (out of %d atoms). Check your pdb "
532 nzero, nnotone, atoms->nr);
536 GMX_LOG(logger.warning).asParagraph().appendTextFormatted("All occupancies are one");
541 void write_posres(const char* fn, t_atoms* pdba, real fc)
546 fp = gmx_fio_fopen(fn, "w");
548 "; In this topology include file, you will find position restraint\n"
549 "; entries for all the heavy atoms in your original pdb file.\n"
550 "; This means that all the protons which were added by pdb2gmx are\n"
551 "; not restrained.\n"
553 "[ position_restraints ]\n"
554 "; %4s%6s%8s%8s%8s\n",
555 "atom", "type", "fx", "fy", "fz");
556 for (i = 0; (i < pdba->nr); i++)
558 if (!is_hydrogen(*pdba->atomname[i]) && !is_dummymass(*pdba->atomname[i]))
560 fprintf(fp, "%6d%6d %g %g %g\n", i + 1, 1, fc, fc, fc);
566 int read_pdball(const char* inf,
580 /* Read a pdb file. (containing proteins) */
582 int natom, new_natom, i;
585 printf("Reading %s...\n", inf);
586 readConfAndAtoms(inf, symtab, title, atoms, pbcType, x, nullptr, box);
588 if (atoms->pdbinfo == nullptr)
590 snew(atoms->pdbinfo, atoms->nr);
592 if (fn2ftp(inf) == efPDB)
594 get_pdb_atomnumber(atoms, aps);
599 for (i = 0; i < atoms->nr; i++)
601 if (!is_hydrogen(*atoms->atomname[i]))
603 atoms->atom[new_natom] = atoms->atom[i];
604 atoms->atomname[new_natom] = atoms->atomname[i];
605 atoms->pdbinfo[new_natom] = atoms->pdbinfo[i];
606 copy_rvec((*x)[i], (*x)[new_natom]);
610 atoms->nr = new_natom;
617 printf(" '%s',", *title);
619 printf(" %d atoms\n", natom);
621 /* Rename residues */
622 rename_pdbres(atoms, "HOH", watres, false, symtab);
623 rename_pdbres(atoms, "SOL", watres, false, symtab);
624 rename_pdbres(atoms, "WAT", watres, false, symtab);
626 rename_atoms("xlateat.dat", nullptr, atoms, symtab, {}, true, rt, true, bVerbose);
634 write_sto_conf(outf, *title, atoms, *x, nullptr, *pbcType, box);
640 void process_chain(const gmx::MDLogger& logger,
642 gmx::ArrayRef<gmx::RVec> x,
655 gmx::ArrayRef<const RtpRename> rr)
657 /* Rename aromatics, lys, asp and histidine */
660 renameResidue(logger, pdba, "TYR", "TYRU", false, symtab);
664 renameResidue(logger, pdba, "TRP", "TRPU", false, symtab);
668 renameResidue(logger, pdba, "PHE", "PHEU", false, symtab);
672 renameResidueInteractively(pdba, "LYS", get_lystp, false, symtab, rr);
676 renameResidueInteractively(pdba, "ARG", get_argtp, false, symtab, rr);
680 renameResidueInteractively(pdba, "GLN", get_glntp, false, symtab, rr);
684 renameResidueInteractively(pdba, "ASP", get_asptp, false, symtab, rr);
688 renameResidue(logger, pdba, "ASPH", "ASP", false, symtab);
692 renameResidueInteractively(pdba, "GLU", get_glutp, false, symtab, rr);
696 renameResidue(logger, pdba, "GLUH", "GLU", false, symtab);
701 set_histp(pdba, gmx::as_rvec_array(x.data()), symtab, angle, distance);
705 renameResidueInteractively(pdba, "HIS", get_histp, true, symtab, rr);
708 /* Initialize the rtp builing block names with the residue names
709 * for the residues that have not been processed above.
711 pdbres_to_gmxrtp(pdba);
713 /* Now we have all rtp names set.
714 * The rtp names will conform to Gromacs naming,
715 * unless the input pdb file contained one or more force field specific
716 * rtp names as residue names.
720 /* struct for sorting the atoms from the pdb file */
723 int resnr; /* residue number */
724 int j; /* database order index */
725 int index; /* original atom number */
726 char anm1; /* second letter of atom name */
727 char altloc; /* alternate location indicator */
730 bool pdbicomp(const t_pdbindex& a, const t_pdbindex& b)
732 int d = (a.resnr - b.resnr);
738 d = (a.anm1 - b.anm1);
741 d = (a.altloc - b.altloc);
748 void sort_pdbatoms(gmx::ArrayRef<const PreprocessResidue> restp_chain,
751 t_atoms** newPdbAtoms,
752 std::vector<gmx::RVec>* x,
756 t_atoms* pdba = *pdbaptr;
757 std::vector<gmx::RVec> xnew;
764 for (int i = 0; i < natoms; i++)
766 atomnm = *pdba->atomname[i];
767 const PreprocessResidue* localPpResidue = &restp_chain[pdba->atom[i].resind];
769 std::find_if(localPpResidue->atomname.begin(), localPpResidue->atomname.end(),
770 [&atomnm](char** it) { return gmx::equalCaseInsensitive(atomnm, *it); });
771 if (found == localPpResidue->atomname.end())
773 std::string buf = gmx::formatString(
774 "Atom %s in residue %s %d was not found in rtp entry %s with %d atoms\n"
775 "while sorting atoms.\n%s",
776 atomnm, *pdba->resinfo[pdba->atom[i].resind].name,
777 pdba->resinfo[pdba->atom[i].resind].nr, localPpResidue->resname.c_str(),
778 localPpResidue->natom(),
780 ? "\nFor a hydrogen, this can be a different protonation state, or it\n"
781 "might have had a different number in the PDB file and was rebuilt\n"
782 "(it might for instance have been H3, and we only expected H1 & "
784 "Note that hydrogens might have been added to the entry for the "
786 "Remove this hydrogen or choose a different protonation state to "
788 "Option -ignh will ignore all hydrogens in the input."
790 gmx_fatal(FARGS, "%s", buf.c_str());
792 /* make shadow array to be sorted into indexgroup */
793 pdbi[i].resnr = pdba->atom[i].resind;
794 pdbi[i].j = std::distance(localPpResidue->atomname.begin(), found);
796 pdbi[i].anm1 = atomnm[1];
797 pdbi[i].altloc = pdba->pdbinfo[i].altloc;
799 std::sort(pdbi, pdbi + natoms, pdbicomp);
801 /* pdba is sorted in pdbnew using the pdbi index */
802 std::vector<int> a(natoms);
803 srenew(*newPdbAtoms, 1);
804 init_t_atoms((*newPdbAtoms), natoms, true);
805 (*newPdbAtoms)->nr = pdba->nr;
806 (*newPdbAtoms)->nres = pdba->nres;
807 srenew((*newPdbAtoms)->resinfo, pdba->nres);
808 std::copy(pdba->resinfo, pdba->resinfo + pdba->nres, (*newPdbAtoms)->resinfo);
809 for (int i = 0; i < natoms; i++)
811 (*newPdbAtoms)->atom[i] = pdba->atom[pdbi[i].index];
812 (*newPdbAtoms)->atomname[i] = pdba->atomname[pdbi[i].index];
813 (*newPdbAtoms)->pdbinfo[i] = pdba->pdbinfo[pdbi[i].index];
814 xnew.push_back(x->at(pdbi[i].index));
815 /* make indexgroup in block */
816 a[i] = pdbi[i].index;
821 /* copy the sorted pdbnew back to pdba */
822 *pdbaptr = *newPdbAtoms;
824 add_grp(block, gnames, a, "prot_sort");
828 int remove_duplicate_atoms(t_atoms* pdba, gmx::ArrayRef<gmx::RVec> x, bool bVerbose, const gmx::MDLogger& logger)
830 int i, j, oldnatoms, ndel;
833 GMX_LOG(logger.info).asParagraph().appendTextFormatted("Checking for duplicate atoms....");
834 oldnatoms = pdba->nr;
836 /* NOTE: pdba->nr is modified inside the loop */
837 for (i = 1; (i < pdba->nr); i++)
839 /* compare 'i' and 'i-1', throw away 'i' if they are identical
840 this is a 'while' because multiple alternate locations can be present */
841 while ((i < pdba->nr) && (pdba->atom[i - 1].resind == pdba->atom[i].resind)
842 && (strcmp(*pdba->atomname[i - 1], *pdba->atomname[i]) == 0))
847 ri = &pdba->resinfo[pdba->atom[i].resind];
850 .appendTextFormatted("deleting duplicate atom %4s %s%4d%c",
851 *pdba->atomname[i], *ri->name, ri->nr, ri->ic);
852 if (ri->chainid && (ri->chainid != ' '))
854 printf(" ch %c", ri->chainid);
858 if (pdba->pdbinfo[i].atomnr)
860 printf(" pdb nr %4d", pdba->pdbinfo[i].atomnr);
862 if (pdba->pdbinfo[i].altloc && (pdba->pdbinfo[i].altloc != ' '))
864 printf(" altloc %c", pdba->pdbinfo[i].altloc);
870 /* We can not free, since it might be in the symtab */
871 /* sfree(pdba->atomname[i]); */
872 for (j = i; j < pdba->nr; j++)
874 pdba->atom[j] = pdba->atom[j + 1];
875 pdba->atomname[j] = pdba->atomname[j + 1];
878 pdba->pdbinfo[j] = pdba->pdbinfo[j + 1];
880 copy_rvec(x[j + 1], x[j]);
882 srenew(pdba->atom, pdba->nr);
883 /* srenew(pdba->atomname, pdba->nr); */
884 srenew(pdba->pdbinfo, pdba->nr);
887 if (pdba->nr != oldnatoms)
891 .appendTextFormatted("Now there are %d atoms. Deleted %d duplicates.", pdba->nr, ndel);
897 void checkResidueTypeSanity(t_atoms* pdba, int r0, int r1, ResidueType* rt)
899 std::string startResidueString =
900 gmx::formatString("%s%d", *pdba->resinfo[r0].name, pdba->resinfo[r0].nr);
901 std::string endResidueString =
902 gmx::formatString("%s%d", *pdba->resinfo[r1 - 1].name, pdba->resinfo[r1 - 1].nr);
904 // Check whether all residues in chain have the same chain ID.
905 bool allResiduesHaveSameChainID = true;
906 char chainID0 = pdba->resinfo[r0].chainid;
908 std::string residueString;
910 for (int i = r0 + 1; i < r1; i++)
912 chainID = pdba->resinfo[i].chainid;
913 if (chainID != chainID0)
915 allResiduesHaveSameChainID = false;
916 residueString = gmx::formatString("%s%d", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
921 if (!allResiduesHaveSameChainID)
924 "The chain covering the range %s--%s does not have a consistent chain ID. "
925 "The first residue has ID '%c', while residue %s has ID '%c'.",
926 startResidueString.c_str(), endResidueString.c_str(), chainID0,
927 residueString.c_str(), chainID);
930 // At this point all residues have the same ID. If they are also non-blank
931 // we can be a bit more aggressive and require the types match too.
934 bool allResiduesHaveSameType = true;
936 std::string restype0 = rt->typeOfNamedDatabaseResidue(*pdba->resinfo[r0].name);
938 for (int i = r0 + 1; i < r1; i++)
940 restype = rt->typeOfNamedDatabaseResidue(*pdba->resinfo[i].name);
941 if (!gmx::equalCaseInsensitive(restype, restype0))
943 allResiduesHaveSameType = false;
944 residueString = gmx::formatString("%s%d", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
949 if (!allResiduesHaveSameType)
952 "The residues in the chain %s--%s do not have a consistent type. "
953 "The first residue has type '%s', while residue %s is of type '%s'. "
954 "Either there is a mistake in your chain, or it includes nonstandard "
955 "residue names that have not yet been added to the residuetypes.dat "
956 "file in the GROMACS library directory. If there are other molecules "
957 "such as ligands, they should not have the same chain ID as the "
958 "adjacent protein chain since it's a separate molecule.",
959 startResidueString.c_str(), endResidueString.c_str(), restype0.c_str(),
960 residueString.c_str(), restype.c_str());
965 void find_nc_ter(t_atoms* pdba, int r0, int r1, int* r_start, int* r_end, ResidueType* rt, const gmx::MDLogger& logger)
968 std::optional<std::string> startrestype;
973 int startWarnings = 0;
977 // Check that all residues have the same chain identifier, and if it is
978 // non-blank we also require the residue types to match.
979 checkResidueTypeSanity(pdba, r0, r1, rt);
981 // If we return correctly from checkResidueTypeSanity(), the only
982 // remaining cases where we can have non-matching residue types is if
983 // the chain ID was blank, which could be the case e.g. for a structure
984 // read from a GRO file or other file types without chain information.
985 // In that case we need to be a bit more liberal and detect chains based
986 // on the residue type.
988 // If we get here, the chain ID must be identical for all residues
989 char chainID = pdba->resinfo[r0].chainid;
991 /* Find the starting terminus (typially N or 5') */
992 for (i = r0; i < r1 && *r_start == -1; i++)
994 startrestype = rt->optionalTypeOfNamedDatabaseResidue(*pdba->resinfo[i].name);
999 if (gmx::equalCaseInsensitive(*startrestype, "Protein")
1000 || gmx::equalCaseInsensitive(*startrestype, "DNA")
1001 || gmx::equalCaseInsensitive(*startrestype, "RNA"))
1003 GMX_LOG(logger.info)
1005 .appendTextFormatted("Identified residue %s%d as a starting terminus.",
1006 *pdba->resinfo[i].name, pdba->resinfo[i].nr);
1009 else if (gmx::equalCaseInsensitive(*startrestype, "Ion"))
1013 GMX_LOG(logger.info)
1015 .appendTextFormatted(
1016 "Residue %s%d has type 'Ion', assuming it is not linked into a "
1018 *pdba->resinfo[i].name, pdba->resinfo[i].nr);
1022 GMX_LOG(logger.info)
1024 .appendTextFormatted("Disabling further notes about ions.");
1030 // Either no known residue type, or one not needing special handling
1031 if (startWarnings < 5)
1035 GMX_LOG(logger.warning)
1037 .appendTextFormatted(
1038 "Starting residue %s%d in chain not identified as "
1040 "This chain lacks identifiers, which makes it impossible to do "
1042 "classification of the start/end residues. Here we need to "
1043 "guess this residue "
1044 "should not be part of the chain and instead introduce a "
1045 "break, but that will "
1046 "be catastrophic if they should in fact be linked. Please "
1047 "check your structure, "
1048 "and add %s to residuetypes.dat if this was not correct.",
1049 *pdba->resinfo[i].name, pdba->resinfo[i].nr, *pdba->resinfo[i].name);
1053 GMX_LOG(logger.warning)
1055 .appendTextFormatted(
1056 "No residues in chain starting at %s%d identified as "
1058 "This makes it impossible to link them into a molecule, which "
1060 "correct or a catastrophic error. Please check your structure, "
1062 "necessary residue names to residuetypes.dat if this was not "
1064 *pdba->resinfo[i].name, pdba->resinfo[i].nr);
1067 if (startWarnings == 4)
1069 GMX_LOG(logger.warning)
1071 .appendTextFormatted(
1072 "Disabling further warnings about unidentified residues at start "
1081 /* Go through the rest of the residues, check that they are the same class, and identify the ending terminus. */
1082 for (int i = *r_start; i < r1; i++)
1084 std::optional<std::string> restype =
1085 rt->optionalTypeOfNamedDatabaseResidue(*pdba->resinfo[i].name);
1090 if (gmx::equalCaseInsensitive(*restype, *startrestype) && endWarnings == 0)
1094 else if (gmx::equalCaseInsensitive(*startrestype, "Ion"))
1098 GMX_LOG(logger.info)
1100 .appendTextFormatted(
1101 "Residue %s%d has type 'Ion', assuming it is not linked into a "
1103 *pdba->resinfo[i].name, pdba->resinfo[i].nr);
1107 GMX_LOG(logger.info)
1109 .appendTextFormatted("Disabling further notes about ions.");
1115 // Either no known residue type, or one not needing special handling.
1116 GMX_RELEASE_ASSERT(chainID == ' ', "Chain ID must be blank");
1117 // Otherwise the call to checkResidueTypeSanity() will
1118 // have caught the problem.
1119 if (endWarnings < 5)
1121 GMX_LOG(logger.warning)
1123 .appendTextFormatted(
1124 "Residue %s%d in chain has different type ('%s') from "
1125 "residue %s%d ('%s'). This chain lacks identifiers, which "
1127 "it impossible to do strict classification of the start/end "
1128 "residues. Here we "
1129 "need to guess this residue should not be part of the chain "
1131 "introduce a break, but that will be catastrophic if they "
1132 "should in fact be "
1133 "linked. Please check your structure, and add %s to "
1135 "if this was not correct.",
1136 *pdba->resinfo[i].name, pdba->resinfo[i].nr, restype->c_str(),
1137 *pdba->resinfo[*r_start].name, pdba->resinfo[*r_start].nr,
1138 startrestype->c_str(), *pdba->resinfo[i].name);
1140 if (endWarnings == 4)
1142 GMX_LOG(logger.warning)
1144 .appendTextFormatted(
1145 "Disabling further warnings about unidentified residues at end "
1155 GMX_LOG(logger.info)
1157 .appendTextFormatted("Identified residue %s%d as a ending terminus.",
1158 *pdba->resinfo[*r_end].name, pdba->resinfo[*r_end].nr);
1162 enum class ChainSeparationType : int
1171 const gmx::EnumerationArray<ChainSeparationType, const char*> c_chainSeparationTypeNames = {
1172 { "id_or_ter", "id_and_ter", "ter", "id", "interactive" }
1174 const gmx::EnumerationArray<ChainSeparationType, const char*> c_chainSeparationTypeNotificationMessages = {
1175 { "Splitting chemical chains based on TER records or chain id changing.\n",
1176 "Splitting chemical chains based on TER records and chain id changing.\n",
1177 "Splitting chemical chains based on TER records only (ignoring chain id).\n",
1178 "Splitting chemical chains based on changing chain id only (ignoring TER records).\n",
1179 "Splitting chemical chains interactively.\n" }
1182 void modify_chain_numbers(t_atoms* pdba, ChainSeparationType chainSeparation, const gmx::MDLogger& logger)
1185 char old_prev_chainid;
1186 char old_this_chainid;
1187 int old_prev_chainnum;
1188 int old_this_chainnum;
1190 char select[STRLEN];
1194 const char* prev_atomname;
1195 const char* this_atomname;
1196 const char* prev_resname;
1197 const char* this_resname;
1203 /* The default chain enumeration is based on TER records only */
1204 printf("%s", c_chainSeparationTypeNotificationMessages[chainSeparation]);
1206 old_prev_chainid = '?';
1207 old_prev_chainnum = -1;
1210 this_atomname = nullptr;
1212 this_resname = nullptr;
1216 for (i = 0; i < pdba->nres; i++)
1218 ri = &pdba->resinfo[i];
1219 old_this_chainid = ri->chainid;
1220 old_this_chainnum = ri->chainnum;
1222 prev_atomname = this_atomname;
1223 prev_atomnum = this_atomnum;
1224 prev_resname = this_resname;
1225 prev_resnum = this_resnum;
1226 prev_chainid = this_chainid;
1228 this_atomname = *(pdba->atomname[i]);
1229 this_atomnum = (pdba->pdbinfo != nullptr) ? pdba->pdbinfo[i].atomnr : i + 1;
1230 this_resname = *ri->name;
1231 this_resnum = ri->nr;
1232 this_chainid = ri->chainid;
1234 switch (chainSeparation)
1236 case ChainSeparationType::IdOrTer:
1237 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1243 case ChainSeparationType::IdAndTer:
1244 if (old_this_chainid != old_prev_chainid && old_this_chainnum != old_prev_chainnum)
1250 case ChainSeparationType::Id:
1251 if (old_this_chainid != old_prev_chainid)
1257 case ChainSeparationType::Ter:
1258 if (old_this_chainnum != old_prev_chainnum)
1263 case ChainSeparationType::Interactive:
1264 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1268 GMX_LOG(logger.info)
1270 .appendTextFormatted(
1271 "Split the chain (and introduce termini) between residue %s%d (chain id '%c', atom %d %s)\
1273 "and residue %s%d (chain id '%c', atom %d %s) ? [n/y]",
1274 prev_resname, prev_resnum, prev_chainid, prev_atomnum,
1275 prev_atomname, this_resname, this_resnum, this_chainid,
1276 this_atomnum, this_atomname);
1278 if (nullptr == fgets(select, STRLEN - 1, stdin))
1280 gmx_fatal(FARGS, "Error reading from stdin");
1283 if (i == 0 || select[0] == 'y')
1289 case ChainSeparationType::Count:
1290 gmx_fatal(FARGS, "Internal inconsistency - this shouldn't happen...");
1292 old_prev_chainid = old_this_chainid;
1293 old_prev_chainnum = old_this_chainnum;
1295 ri->chainnum = new_chainnum;
1299 bool checkChainCyclicity(t_atoms* pdba,
1303 gmx::ArrayRef<const PreprocessResidue> rtpFFDB,
1304 gmx::ArrayRef<const RtpRename> rr,
1305 real long_bond_dist_,
1306 real short_bond_dist_)
1308 // if start and end are the same, we can't have a cycle
1309 if (start_ter == end_ter)
1313 int ai = -1, aj = -1;
1314 char* rtpname = *(pdba->resinfo[start_ter].rtp);
1315 std::string newName = search_resrename(rr, rtpname, false, false, false);
1316 if (newName.empty())
1320 auto res = getDatabaseEntry(newName, rtpFFDB);
1321 const char *name_ai, *name_aj;
1323 for (const auto& patch : res->rb[ebtsBONDS].b)
1324 { /* Search backward bond for n/5' terminus */
1325 name_ai = patch.ai().c_str();
1326 name_aj = patch.aj().c_str();
1327 if (name_ai[0] == '-')
1329 aj = search_res_atom(++name_ai, end_ter, pdba, "check", TRUE);
1330 ai = search_res_atom(name_aj, start_ter, pdba, "check", TRUE);
1332 else if (name_aj[0] == '-')
1334 aj = search_res_atom(++name_aj, end_ter, pdba, "check", TRUE);
1335 ai = search_res_atom(name_ai, start_ter, pdba, "check", TRUE);
1337 if (ai >= 0 && aj >= 0)
1343 if (!(ai >= 0 && aj >= 0))
1345 rtpname = *(pdba->resinfo[end_ter].rtp);
1346 newName = search_resrename(rr, rtpname, false, false, false);
1347 if (newName.empty())
1351 res = getDatabaseEntry(newName, rtpFFDB);
1352 for (const auto& patch : res->rb[ebtsBONDS].b)
1354 /* Seach forward bond for c/3' terminus */
1355 name_ai = patch.ai().c_str();
1356 name_aj = patch.aj().c_str();
1358 if (name_ai[0] == '+')
1360 ai = search_res_atom(name_aj, end_ter, pdba, "check", TRUE);
1361 aj = search_res_atom(++name_ai, start_ter, pdba, "check", TRUE);
1363 else if (name_aj[0] == '+')
1365 ai = search_res_atom(name_ai, end_ter, pdba, "check", TRUE);
1366 aj = search_res_atom(++name_aj, start_ter, pdba, "check", TRUE);
1368 if (ai >= 0 && aj >= 0)
1375 if (ai >= 0 && aj >= 0)
1377 real dist = distance2(pdbx[ai], pdbx[aj]);
1378 /* it is better to read bond length from ffbonded.itp */
1379 return (dist < gmx::square(long_bond_dist_) && dist > gmx::square(short_bond_dist_));
1390 char chainnum = ' ';
1393 bool bAllWat = false;
1395 std::vector<int> chainstart;
1402 bool bAllWat = false;
1404 std::vector<int> chainstart;
1405 std::vector<MoleculePatchDatabase*> ntdb;
1406 std::vector<MoleculePatchDatabase*> ctdb;
1407 std::vector<int> r_start;
1408 std::vector<int> r_end;
1410 std::vector<gmx::RVec> x;
1411 std::vector<int> cyclicBondsIndex;
1414 enum class VSitesType : int
1421 const gmx::EnumerationArray<VSitesType, const char*> c_vsitesTypeNames = { { "none", "hydrogens",
1424 enum class WaterType : int
1436 const gmx::EnumerationArray<WaterType, const char*> c_waterTypeNames = {
1437 { "select", "none", "spc", "spce", "tip3p", "tip4p", "tip5p", "tips3p" }
1440 enum class MergeType : int
1447 const gmx::EnumerationArray<MergeType, const char*> c_mergeTypeNames = { { "no", "all",
1458 class pdb2gmx : public ICommandLineOptionsModule
1464 bVsiteAromatics_(FALSE),
1465 chainSeparation_(ChainSeparationType::IdOrTer),
1466 vsitesType_(VSitesType::None),
1467 waterType_(WaterType::Select),
1468 mergeType_(MergeType::No),
1472 gmx::LoggerBuilder builder;
1473 builder.addTargetStream(gmx::MDLogger::LogLevel::Info, &gmx::TextOutputFile::standardOutput());
1474 builder.addTargetStream(gmx::MDLogger::LogLevel::Warning, &gmx::TextOutputFile::standardError());
1475 loggerOwner_ = std::make_unique<LoggerOwner>(builder.build());
1478 // From ICommandLineOptionsModule
1479 void init(CommandLineModuleSettings* /*settings*/) override {}
1481 void initOptions(IOptionsContainer* options, ICommandLineOptionsModuleSettings* settings) override;
1483 void optionsFinished() override;
1501 bool bAllowMissing_;
1505 bool bChargeGroups_;
1515 bool bVsiteAromatics_;
1519 real long_bond_dist_;
1520 real short_bond_dist_;
1522 std::string indexOutputFile_;
1523 std::string outputFile_;
1524 std::string topologyFile_;
1525 std::string includeTopologyFile_;
1526 std::string outputConfFile_;
1527 std::string inputConfFile_;
1528 std::string outFile_;
1531 ChainSeparationType chainSeparation_;
1532 VSitesType vsitesType_;
1533 WaterType waterType_;
1534 MergeType mergeType_;
1537 char forcefield_[STRLEN];
1538 char ffdir_[STRLEN];
1541 std::vector<std::string> incls_;
1542 std::vector<t_mols> mols_;
1544 std::unique_ptr<LoggerOwner> loggerOwner_;
1547 void pdb2gmx::initOptions(IOptionsContainer* options, ICommandLineOptionsModuleSettings* settings)
1549 const char* desc[] = {
1550 "[THISMODULE] reads a [REF].pdb[ref] (or [REF].gro[ref]) file, reads",
1551 "some database files, adds hydrogens to the molecules and generates",
1552 "coordinates in GROMACS (GROMOS), or optionally [REF].pdb[ref], format",
1553 "and a topology in GROMACS format.",
1554 "These files can subsequently be processed to generate a run input file.",
1556 "[THISMODULE] will search for force fields by looking for",
1557 "a [TT]forcefield.itp[tt] file in subdirectories [TT]<forcefield>.ff[tt]",
1558 "of the current working directory and of the GROMACS library directory",
1559 "as inferred from the path of the binary or the [TT]GMXLIB[tt] environment",
1561 "By default the forcefield selection is interactive,",
1562 "but you can use the [TT]-ff[tt] option to specify one of the short names",
1563 "in the list on the command line instead. In that case [THISMODULE] just looks",
1564 "for the corresponding [TT]<forcefield>.ff[tt] directory.",
1566 "After choosing a force field, all files will be read only from",
1567 "the corresponding force field directory.",
1568 "If you want to modify or add a residue types, you can copy the force",
1569 "field directory from the GROMACS library directory to your current",
1570 "working directory. If you want to add new protein residue types,",
1571 "you will need to modify [TT]residuetypes.dat[tt] in the library directory",
1572 "or copy the whole library directory to a local directory and set",
1573 "the environment variable [TT]GMXLIB[tt] to the name of that directory.",
1574 "Check Chapter 5 of the manual for more information about file formats.",
1577 "Note that a [REF].pdb[ref] file is nothing more than a file format, and it",
1578 "need not necessarily contain a protein structure. Every kind of",
1579 "molecule for which there is support in the database can be converted.",
1580 "If there is no support in the database, you can add it yourself.[PAR]",
1582 "The program has limited intelligence, it reads a number of database",
1583 "files, that allow it to make special bonds (Cys-Cys, Heme-His, etc.),",
1584 "if necessary this can be done manually. The program can prompt the",
1585 "user to select which kind of LYS, ASP, GLU, CYS or HIS residue is",
1586 "desired. For Lys the choice is between neutral (two protons on NZ) or",
1587 "protonated (three protons, default), for Asp and Glu unprotonated",
1588 "(default) or protonated, for His the proton can be either on ND1,",
1589 "on NE2 or on both. By default these selections are done automatically.",
1590 "For His, this is based on an optimal hydrogen bonding",
1591 "conformation. Hydrogen bonds are defined based on a simple geometric",
1592 "criterion, specified by the maximum hydrogen-donor-acceptor angle",
1593 "and donor-acceptor distance, which are set by [TT]-angle[tt] and",
1594 "[TT]-dist[tt] respectively.[PAR]",
1596 "The protonation state of N- and C-termini can be chosen interactively",
1597 "with the [TT]-ter[tt] flag. Default termini are ionized (NH3+ and COO-),",
1598 "respectively. Some force fields support zwitterionic forms for chains of",
1599 "one residue, but for polypeptides these options should NOT be selected.",
1600 "The AMBER force fields have unique forms for the terminal residues,",
1601 "and these are incompatible with the [TT]-ter[tt] mechanism. You need",
1602 "to prefix your N- or C-terminal residue names with \"N\" or \"C\"",
1603 "respectively to use these forms, making sure you preserve the format",
1604 "of the coordinate file. Alternatively, use named terminating residues",
1605 "(e.g. ACE, NME).[PAR]",
1607 "The separation of chains is not entirely trivial since the markup",
1608 "in user-generated PDB files frequently varies and sometimes it",
1609 "is desirable to merge entries across a TER record, for instance",
1610 "if you want a disulfide bridge or distance restraints between",
1611 "two protein chains or if you have a HEME group bound to a protein.",
1612 "In such cases multiple chains should be contained in a single",
1613 "[TT]moleculetype[tt] definition.",
1614 "To handle this, [THISMODULE] uses two separate options.",
1615 "First, [TT]-chainsep[tt] allows you to choose when a new chemical chain should",
1616 "start, and termini added when applicable. This can be done based on the",
1617 "existence of TER records, when the chain id changes, or combinations of either",
1618 "or both of these. You can also do the selection fully interactively.",
1619 "In addition, there is a [TT]-merge[tt] option that controls how multiple chains",
1620 "are merged into one moleculetype, after adding all the chemical termini (or not).",
1621 "This can be turned off (no merging), all non-water chains can be merged into a",
1622 "single molecule, or the selection can be done interactively.[PAR]",
1624 "[THISMODULE] will also check the occupancy field of the [REF].pdb[ref] file.",
1625 "If any of the occupancies are not one, indicating that the atom is",
1626 "not resolved well in the structure, a warning message is issued.",
1627 "When a [REF].pdb[ref] file does not originate from an X-ray structure determination",
1628 "all occupancy fields may be zero. Either way, it is up to the user",
1629 "to verify the correctness of the input data (read the article!).[PAR]",
1631 "During processing the atoms will be reordered according to GROMACS",
1632 "conventions. With [TT]-n[tt] an index file can be generated that",
1633 "contains one group reordered in the same way. This allows you to",
1634 "convert a GROMOS trajectory and coordinate file to GROMOS. There is",
1635 "one limitation: reordering is done after the hydrogens are stripped",
1636 "from the input and before new hydrogens are added. This means that",
1637 "you should not use [TT]-ignh[tt].[PAR]",
1639 "The [REF].gro[ref] and [TT].g96[tt] file formats do not support chain",
1640 "identifiers. Therefore it is useful to enter a [REF].pdb[ref] file name at",
1641 "the [TT]-o[tt] option when you want to convert a multi-chain [REF].pdb[ref] file.",
1644 "The option [TT]-vsite[tt] removes hydrogen and fast improper dihedral",
1645 "motions. Angular and out-of-plane motions can be removed by changing",
1646 "hydrogens into virtual sites and fixing angles, which fixes their",
1647 "position relative to neighboring atoms. Additionally, all atoms in the",
1648 "aromatic rings of the standard amino acids (i.e. PHE, TRP, TYR and HIS)",
1649 "can be converted into virtual sites, eliminating the fast improper dihedral",
1650 "fluctuations in these rings (but this feature is deprecated).",
1651 "[BB]Note[bb] that in this case all other hydrogen",
1652 "atoms are also converted to virtual sites. The mass of all atoms that are",
1653 "converted into virtual sites, is added to the heavy atoms.[PAR]",
1654 "Also slowing down of dihedral motion can be done with [TT]-heavyh[tt]",
1655 "done by increasing the hydrogen-mass by a factor of 4. This is also",
1656 "done for water hydrogens to slow down the rotational motion of water.",
1657 "The increase in mass of the hydrogens is subtracted from the bonded",
1658 "(heavy) atom so that the total mass of the system remains the same.",
1660 "As a special case, ring-closed (or cyclic) molecules are considered.",
1661 "[THISMODULE] automatically determines if a cyclic molecule is present",
1662 "by evaluating the distance between the terminal atoms of a given chain.",
1663 "If this distance is greater than the [TT]-sb[tt]",
1664 "(\"Short bond warning distance\", default 0.05 nm)",
1665 "and less than the [TT]-lb[tt] (\"Long bond warning distance\", default 0.25 nm)",
1666 "the molecule is considered to be ring closed and will be processed as such.",
1667 "Please note that this does not detect cyclic bonds over periodic boundaries."
1670 settings->setHelpText(desc);
1672 options->addOption(BooleanOption("newrtp").store(&bNewRTP_).defaultValue(false).hidden().description(
1673 "Write the residue database in new format to [TT]new.rtp[tt]"));
1675 RealOption("lb").store(&long_bond_dist_).defaultValue(0.25).hidden().description("Long bond warning distance"));
1677 RealOption("sb").store(&short_bond_dist_).defaultValue(0.05).hidden().description("Short bond warning distance"));
1678 options->addOption(EnumOption<ChainSeparationType>("chainsep")
1679 .enumValue(c_chainSeparationTypeNames)
1680 .store(&chainSeparation_)
1681 .description("Condition in PDB files when a new chain should be "
1682 "started (adding termini)"));
1683 options->addOption(EnumOption<MergeType>("merge")
1684 .enumValue(c_mergeTypeNames)
1686 .description("Merge multiple chains into a single [moleculetype]"));
1687 options->addOption(StringOption("ff").store(&ff_).defaultValue("select").description(
1688 "Force field, interactive by default. Use [TT]-h[tt] for information."));
1690 EnumOption<WaterType>("water").store(&waterType_).enumValue(c_waterTypeNames).description("Water model to use"));
1691 options->addOption(BooleanOption("inter").store(&bInter_).defaultValue(false).description(
1692 "Set the next 8 options to interactive"));
1693 options->addOption(BooleanOption("ss").store(&bCysMan_).defaultValue(false).description(
1694 "Interactive SS bridge selection"));
1695 options->addOption(BooleanOption("ter").store(&bTerMan_).defaultValue(false).description(
1696 "Interactive termini selection, instead of charged (default)"));
1697 options->addOption(BooleanOption("lys").store(&bLysMan_).defaultValue(false).description(
1698 "Interactive lysine selection, instead of charged"));
1699 options->addOption(BooleanOption("arg").store(&bArgMan_).defaultValue(false).description(
1700 "Interactive arginine selection, instead of charged"));
1701 options->addOption(BooleanOption("asp").store(&bAspMan_).defaultValue(false).description(
1702 "Interactive aspartic acid selection, instead of charged"));
1703 options->addOption(BooleanOption("glu").store(&bGluMan_).defaultValue(false).description(
1704 "Interactive glutamic acid selection, instead of charged"));
1705 options->addOption(BooleanOption("gln").store(&bGlnMan_).defaultValue(false).description(
1706 "Interactive glutamine selection, instead of charged"));
1707 options->addOption(BooleanOption("his").store(&bHisMan_).defaultValue(false).description(
1708 "Interactive histidine selection, instead of checking H-bonds"));
1709 options->addOption(RealOption("angle").store(&angle_).defaultValue(135.0).description(
1710 "Minimum hydrogen-donor-acceptor angle for a H-bond (degrees)"));
1712 RealOption("dist").store(&distance_).defaultValue(0.3).description("Maximum donor-acceptor distance for a H-bond (nm)"));
1713 options->addOption(BooleanOption("una").store(&bUnA_).defaultValue(false).description(
1714 "Select aromatic rings with united CH atoms on phenylalanine, tryptophane and "
1716 options->addOption(BooleanOption("sort").store(&bSort_).defaultValue(true).hidden().description(
1717 "Sort the residues according to database, turning this off is dangerous as charge "
1718 "groups might be broken in parts"));
1720 BooleanOption("ignh").store(&bRemoveH_).defaultValue(false).description("Ignore hydrogen atoms that are in the coordinate file"));
1722 BooleanOption("missing")
1723 .store(&bAllowMissing_)
1724 .defaultValue(false)
1726 "Continue when atoms are missing and bonds cannot be made, dangerous"));
1728 BooleanOption("v").store(&bVerbose_).defaultValue(false).description("Be slightly more verbose in messages"));
1730 RealOption("posrefc").store(&posre_fc_).defaultValue(1000).description("Force constant for position restraints"));
1731 options->addOption(EnumOption<VSitesType>("vsite")
1732 .store(&vsitesType_)
1733 .enumValue(c_vsitesTypeNames)
1734 .description("Convert atoms to virtual sites"));
1735 options->addOption(BooleanOption("heavyh").store(&bHeavyH_).defaultValue(false).description(
1736 "Make hydrogen atoms heavy"));
1738 BooleanOption("deuterate").store(&bDeuterate_).defaultValue(false).description("Change the mass of hydrogens to 2 amu"));
1739 options->addOption(BooleanOption("chargegrp")
1740 .store(&bChargeGroups_)
1742 .description("Use charge groups in the [REF].rtp[ref] file"));
1743 options->addOption(BooleanOption("cmap").store(&bCmap_).defaultValue(true).description(
1744 "Use cmap torsions (if enabled in the [REF].rtp[ref] file)"));
1745 options->addOption(BooleanOption("renum")
1747 .defaultValue(false)
1748 .description("Renumber the residues consecutively in the output"));
1749 options->addOption(BooleanOption("rtpres")
1750 .store(&bRTPresname_)
1751 .defaultValue(false)
1752 .description("Use [REF].rtp[ref] entry names as residue names"));
1753 options->addOption(FileNameOption("f")
1756 .store(&inputConfFile_)
1758 .defaultBasename("protein")
1760 .description("Structure file"));
1761 options->addOption(FileNameOption("o")
1764 .store(&outputConfFile_)
1766 .defaultBasename("conf")
1767 .description("Structure file"));
1768 options->addOption(FileNameOption("p")
1771 .store(&topologyFile_)
1773 .defaultBasename("topol")
1774 .description("Topology file"));
1775 options->addOption(FileNameOption("i")
1778 .store(&includeTopologyFile_)
1780 .defaultBasename("posre")
1781 .description("Include file for topology"));
1782 options->addOption(FileNameOption("n")
1785 .store(&indexOutputFile_)
1786 .storeIsSet(&bIndexSet_)
1787 .defaultBasename("index")
1788 .description("Index file"));
1789 options->addOption(FileNameOption("q")
1793 .storeIsSet(&bOutputSet_)
1794 .defaultBasename("clean")
1796 .description("Structure file"));
1799 void pdb2gmx::optionsFinished()
1801 if (inputConfFile_.empty())
1803 GMX_THROW(InconsistentInputError("You must supply an input file"));
1807 /* if anything changes here, also change description of -inter */
1822 else if (bDeuterate_)
1831 /* Force field selection, interactive or direct */
1832 choose_ff(strcmp(ff_.c_str(), "select") == 0 ? nullptr : ff_.c_str(), forcefield_,
1833 sizeof(forcefield_), ffdir_, sizeof(ffdir_), loggerOwner_->logger());
1835 if (strlen(forcefield_) > 0)
1837 ffname_ = forcefield_;
1838 ffname_[0] = std::toupper(ffname_[0]);
1842 gmx_fatal(FARGS, "Empty forcefield string");
1848 char select[STRLEN];
1849 std::vector<DisulfideBond> ssbonds;
1853 const char* prev_atomname;
1854 const char* this_atomname;
1855 const char* prev_resname;
1856 const char* this_resname;
1861 int prev_chainnumber;
1862 int this_chainnumber;
1863 int this_chainstart;
1864 int prev_chainstart;
1866 const gmx::MDLogger& logger = loggerOwner_->logger();
1868 GMX_LOG(logger.info)
1870 .appendTextFormatted("Using the %s force field in directory %s", ffname_, ffdir_);
1872 choose_watermodel(c_waterTypeNames[waterType_], ffdir_, &watermodel_, logger);
1874 switch (vsitesType_)
1876 case VSitesType::None:
1878 bVsiteAromatics_ = false;
1880 case VSitesType::Hydrogens:
1882 bVsiteAromatics_ = false;
1884 case VSitesType::Aromatics:
1886 bVsiteAromatics_ = true;
1888 case VSitesType::Count:
1889 gmx_fatal(FARGS, "Internal inconsistency: VSitesType is out of range.");
1892 /* Open the symbol table */
1894 open_symtab(&symtab);
1896 /* Residue type database */
1899 /* Read residue renaming database(s), if present */
1900 std::vector<std::string> rrn = fflib_search_file_end(ffdir_, ".r2b", FALSE);
1902 std::vector<RtpRename> rtprename;
1903 for (const auto& filename : rrn)
1905 GMX_LOG(logger.info).asParagraph().appendTextFormatted("going to rename %s", filename.c_str());
1906 FILE* fp = fflib_open(filename);
1907 read_rtprename(filename.c_str(), fp, &rtprename);
1911 /* Add all alternative names from the residue renaming database to the list
1912 of recognized amino/nucleic acids. */
1913 for (const auto& rename : rtprename)
1915 /* Only add names if the 'standard' gromacs/iupac base name was found */
1916 if (auto restype = rt.optionalTypeOfNamedDatabaseResidue(rename.gmx))
1918 rt.addResidue(rename.main, *restype);
1919 rt.addResidue(rename.nter, *restype);
1920 rt.addResidue(rename.cter, *restype);
1921 rt.addResidue(rename.bter, *restype);
1928 if (watermodel_ != nullptr && (strstr(watermodel_, "4p") || strstr(watermodel_, "4P")))
1932 else if (watermodel_ != nullptr && (strstr(watermodel_, "5p") || strstr(watermodel_, "5P")))
1942 char* title = nullptr;
1946 int natom = read_pdball(inputConfFile_.c_str(), bOutputSet_, outFile_.c_str(), &title, &pdba_all,
1947 &pdbx, &pbcType, box, bRemoveH_, &symtab, &rt, watres, &aps, bVerbose_);
1951 std::string message = formatString("No atoms found in pdb file %s\n", inputConfFile_.c_str());
1952 GMX_THROW(InconsistentInputError(message));
1955 GMX_LOG(logger.info).asParagraph().appendTextFormatted("Analyzing pdb file");
1956 int nwaterchain = 0;
1958 modify_chain_numbers(&pdba_all, chainSeparation_, logger);
1960 int nchainmerges = 0;
1962 this_atomname = nullptr;
1964 this_resname = nullptr;
1967 this_chainnumber = -1;
1968 this_chainstart = 0;
1969 /* Keep the compiler happy */
1970 prev_chainstart = 0;
1973 std::vector<t_pdbchain> pdb_ch;
1976 bool bMerged = false;
1977 for (int i = 0; (i < natom); i++)
1979 ri = &pdba_all.resinfo[pdba_all.atom[i].resind];
1981 /* TODO this should live in a helper object, and consolidate
1982 that with code in modify_chain_numbers */
1983 prev_atomname = this_atomname;
1984 prev_atomnum = this_atomnum;
1985 prev_resname = this_resname;
1986 prev_resnum = this_resnum;
1987 prev_chainid = this_chainid;
1988 prev_chainnumber = this_chainnumber;
1991 prev_chainstart = this_chainstart;
1994 this_atomname = *pdba_all.atomname[i];
1995 this_atomnum = (pdba_all.pdbinfo != nullptr) ? pdba_all.pdbinfo[i].atomnr : i + 1;
1996 this_resname = *ri->name;
1997 this_resnum = ri->nr;
1998 this_chainid = ri->chainid;
1999 this_chainnumber = ri->chainnum;
2001 bWat_ = gmx::equalCaseInsensitive(*ri->name, watres);
2003 if ((i == 0) || (this_chainnumber != prev_chainnumber) || (bWat_ != bPrevWat_))
2007 "Must have pdbinfo from reading a PDB file if chain number is changing");
2008 this_chainstart = pdba_all.atom[i].resind;
2010 if (i > 0 && !bWat_)
2012 if (!strncmp(c_mergeTypeNames[mergeType_], "int", 3))
2014 GMX_LOG(logger.info)
2016 .appendTextFormatted(
2017 "Merge chain ending with residue %s%d (chain id '%c', atom %d "
2018 "%s) and chain starting with "
2019 "residue %s%d (chain id '%c', atom %d %s) into a single "
2020 "moleculetype (keeping termini)? [n/y]",
2021 prev_resname, prev_resnum, prev_chainid, prev_atomnum,
2022 prev_atomname, this_resname, this_resnum, this_chainid,
2023 this_atomnum, this_atomname);
2025 if (nullptr == fgets(select, STRLEN - 1, stdin))
2027 gmx_fatal(FARGS, "Error reading from stdin");
2029 bMerged = (select[0] == 'y');
2031 else if (!strncmp(c_mergeTypeNames[mergeType_], "all", 3))
2039 pdb_ch[numChains - 1].chainstart[pdb_ch[numChains - 1].nterpairs] =
2040 pdba_all.atom[i].resind - prev_chainstart;
2041 pdb_ch[numChains - 1].nterpairs++;
2042 pdb_ch[numChains - 1].chainstart.resize(pdb_ch[numChains - 1].nterpairs + 1);
2047 /* set natom for previous chain */
2050 pdb_ch[numChains - 1].natom = i - pdb_ch[numChains - 1].start;
2057 /* check if chain identifier was used before */
2058 for (int j = 0; (j < numChains); j++)
2060 if (pdb_ch[j].chainid != ' ' && pdb_ch[j].chainid == ri->chainid)
2062 GMX_LOG(logger.warning)
2064 .appendTextFormatted(
2065 "Chain identifier '%c' is used in two non-sequential "
2067 "They will be treated as separate chains unless you "
2068 "reorder your file.",
2072 t_pdbchain newChain;
2073 newChain.chainid = ri->chainid;
2074 newChain.chainnum = ri->chainnum;
2076 newChain.bAllWat = bWat_;
2079 newChain.nterpairs = 0;
2083 newChain.nterpairs = 1;
2085 newChain.chainstart.resize(newChain.nterpairs + 1);
2086 /* modified [numChains] to [0] below */
2087 newChain.chainstart[0] = 0;
2088 pdb_ch.push_back(newChain);
2094 pdb_ch.back().natom = natom - pdb_ch.back().start;
2096 /* set all the water blocks at the end of the chain */
2097 std::vector<int> swap_index(numChains);
2099 for (int i = 0; i < numChains; i++)
2101 if (!pdb_ch[i].bAllWat)
2107 for (int i = 0; i < numChains; i++)
2109 if (pdb_ch[i].bAllWat)
2115 if (nwaterchain > 1)
2117 GMX_LOG(logger.info)
2119 .appendTextFormatted("Moved all the water blocks to the end");
2123 std::vector<t_chain> chains(numChains);
2124 /* copy pdb data and x for all chains */
2125 for (int i = 0; (i < numChains); i++)
2127 int si = swap_index[i];
2128 chains[i].chainid = pdb_ch[si].chainid;
2129 chains[i].chainnum = pdb_ch[si].chainnum;
2130 chains[i].bAllWat = pdb_ch[si].bAllWat;
2131 chains[i].nterpairs = pdb_ch[si].nterpairs;
2132 chains[i].chainstart = pdb_ch[si].chainstart;
2133 chains[i].ntdb.clear();
2134 chains[i].ctdb.clear();
2135 chains[i].r_start.resize(pdb_ch[si].nterpairs);
2136 chains[i].r_end.resize(pdb_ch[si].nterpairs);
2138 snew(chains[i].pdba, 1);
2139 init_t_atoms(chains[i].pdba, pdb_ch[si].natom, true);
2140 for (j = 0; j < chains[i].pdba->nr; j++)
2142 chains[i].pdba->atom[j] = pdba_all.atom[pdb_ch[si].start + j];
2143 chains[i].pdba->atomname[j] = put_symtab(&symtab, *pdba_all.atomname[pdb_ch[si].start + j]);
2144 chains[i].pdba->pdbinfo[j] = pdba_all.pdbinfo[pdb_ch[si].start + j];
2145 chains[i].x.emplace_back(pdbx[pdb_ch[si].start + j]);
2147 /* Re-index the residues assuming that the indices are continuous */
2148 int k = chains[i].pdba->atom[0].resind;
2149 int nres = chains[i].pdba->atom[chains[i].pdba->nr - 1].resind - k + 1;
2150 chains[i].pdba->nres = nres;
2151 for (int j = 0; j < chains[i].pdba->nr; j++)
2153 chains[i].pdba->atom[j].resind -= k;
2155 srenew(chains[i].pdba->resinfo, nres);
2156 for (int j = 0; j < nres; j++)
2158 chains[i].pdba->resinfo[j] = pdba_all.resinfo[k + j];
2159 chains[i].pdba->resinfo[j].name = put_symtab(&symtab, *pdba_all.resinfo[k + j].name);
2160 /* make all chain identifiers equal to that of the chain */
2161 chains[i].pdba->resinfo[j].chainid = pdb_ch[si].chainid;
2165 if (nchainmerges > 0)
2167 GMX_LOG(logger.info)
2169 .appendTextFormatted("Merged chains into joint molecule definitions at %d places.",
2173 GMX_LOG(logger.info)
2175 .appendTextFormatted(
2176 "There are %d chains and %d blocks of water and "
2177 "%d residues with %d atoms",
2178 numChains - nwaterchain, nwaterchain, pdba_all.nres, natom);
2180 GMX_LOG(logger.info)
2182 .appendTextFormatted(" %5s %4s %6s", "chain", "#res", "#atoms");
2183 for (int i = 0; (i < numChains); i++)
2185 GMX_LOG(logger.info)
2187 .appendTextFormatted(" %d '%c' %5d %6d %s\n", i + 1,
2188 chains[i].chainid ? chains[i].chainid : '-', chains[i].pdba->nres,
2189 chains[i].pdba->nr, chains[i].bAllWat ? "(only water)" : "");
2192 check_occupancy(&pdba_all, inputConfFile_.c_str(), bVerbose_, logger);
2194 /* Read atomtypes... */
2195 PreprocessingAtomTypes atype = read_atype(ffdir_, &symtab);
2197 /* read residue database */
2198 GMX_LOG(logger.info).asParagraph().appendTextFormatted("Reading residue database... (%s)", forcefield_);
2199 std::vector<std::string> rtpf = fflib_search_file_end(ffdir_, ".rtp", true);
2200 std::vector<PreprocessResidue> rtpFFDB;
2201 for (const auto& filename : rtpf)
2203 readResidueDatabase(filename, &rtpFFDB, &atype, &symtab, logger, false);
2207 /* Not correct with multiple rtp input files with different bonded types */
2208 FILE* fp = gmx_fio_fopen("new.rtp", "w");
2209 print_resall(fp, rtpFFDB, atype);
2213 /* read hydrogen database */
2214 std::vector<MoleculePatchDatabase> ah;
2215 read_h_db(ffdir_, &ah);
2217 /* Read Termini database... */
2218 std::vector<MoleculePatchDatabase> ntdb;
2219 std::vector<MoleculePatchDatabase> ctdb;
2220 std::vector<MoleculePatchDatabase*> tdblist;
2221 int nNtdb = read_ter_db(ffdir_, 'n', &ntdb, &atype);
2222 int nCtdb = read_ter_db(ffdir_, 'c', &ctdb, &atype);
2224 FILE* top_file = gmx_fio_fopen(topologyFile_.c_str(), "w");
2226 print_top_header(top_file, topologyFile_.c_str(), FALSE, ffdir_, mHmult_);
2229 std::vector<gmx::RVec> x;
2230 /* new pdb datastructure for sorting. */
2231 t_atoms** sortAtoms = nullptr;
2232 t_atoms** localAtoms = nullptr;
2233 snew(sortAtoms, numChains);
2234 snew(localAtoms, numChains);
2235 for (int chain = 0; (chain < numChains); chain++)
2237 cc = &(chains[chain]);
2239 /* set pdba, natom and nres to the current chain */
2242 natom = cc->pdba->nr;
2243 int nres = cc->pdba->nres;
2245 if (cc->chainid && (cc->chainid != ' '))
2247 GMX_LOG(logger.info)
2249 .appendTextFormatted("Processing chain %d '%c' (%d atoms, %d residues)",
2250 chain + 1, cc->chainid, natom, nres);
2254 GMX_LOG(logger.info)
2256 .appendTextFormatted("Processing chain %d (%d atoms, %d residues)", chain + 1,
2260 process_chain(logger, pdba, x, bUnA_, bUnA_, bUnA_, bLysMan_, bAspMan_, bGluMan_, bHisMan_,
2261 bArgMan_, bGlnMan_, angle_, distance_, &symtab, rtprename);
2263 cc->chainstart[cc->nterpairs] = pdba->nres;
2265 for (int i = 0; i < cc->nterpairs; i++)
2267 find_nc_ter(pdba, cc->chainstart[i], cc->chainstart[i + 1], &(cc->r_start[j]),
2268 &(cc->r_end[j]), &rt, logger);
2269 if (cc->r_start[j] >= 0 && cc->r_end[j] >= 0)
2271 if (checkChainCyclicity(pdba, pdbx, cc->r_start[j], cc->r_end[j], rtpFFDB,
2272 rtprename, long_bond_dist_, short_bond_dist_))
2274 cc->cyclicBondsIndex.push_back(cc->r_start[j]);
2275 cc->cyclicBondsIndex.push_back(cc->r_end[j]);
2276 cc->r_start[j] = -1;
2286 if (cc->nterpairs == 0 && cc->cyclicBondsIndex.empty())
2288 GMX_LOG(logger.info)
2290 .appendTextFormatted(
2291 "Problem with chain definition, or missing terminal residues. "
2292 "This chain does not appear to contain a recognized chain molecule. "
2293 "If this is incorrect, you can edit residuetypes.dat to modify the "
2297 /* Check for disulfides and other special bonds */
2298 ssbonds = makeDisulfideBonds(pdba, gmx::as_rvec_array(x.data()), bCysMan_, bVerbose_);
2300 if (!rtprename.empty())
2302 rename_resrtp(pdba, cc->nterpairs, cc->r_start, cc->r_end, rtprename, &symtab, bVerbose_, logger);
2305 for (int i = 0; i < cc->nterpairs; i++)
2308 * We first apply a filter so we only have the
2309 * termini that can be applied to the residue in question
2310 * (or a generic terminus if no-residue specific is available).
2312 /* First the N terminus */
2315 tdblist = filter_ter(ntdb, *pdba->resinfo[cc->r_start[i]].name);
2316 if (tdblist.empty())
2318 GMX_LOG(logger.info)
2320 .appendTextFormatted(
2321 "No suitable end (N or 5') terminus found in database - "
2322 "assuming this residue "
2323 "is already in a terminus-specific form and skipping terminus "
2325 cc->ntdb.push_back(nullptr);
2329 if (bTerMan_ && !tdblist.empty())
2331 sprintf(select, "Select start terminus type for %s-%d",
2332 *pdba->resinfo[cc->r_start[i]].name, pdba->resinfo[cc->r_start[i]].nr);
2333 cc->ntdb.push_back(choose_ter(tdblist, select));
2337 cc->ntdb.push_back(tdblist[0]);
2340 printf("Start terminus %s-%d: %s\n", *pdba->resinfo[cc->r_start[i]].name,
2341 pdba->resinfo[cc->r_start[i]].nr, (cc->ntdb[i])->name.c_str());
2347 cc->ntdb.push_back(nullptr);
2350 /* And the C terminus */
2353 tdblist = filter_ter(ctdb, *pdba->resinfo[cc->r_end[i]].name);
2354 if (tdblist.empty())
2356 GMX_LOG(logger.info)
2358 .appendTextFormatted(
2359 "No suitable end (C or 3') terminus found in database - "
2360 "assuming this residue"
2361 "is already in a terminus-specific form and skipping terminus "
2363 cc->ctdb.push_back(nullptr);
2367 if (bTerMan_ && !tdblist.empty())
2369 sprintf(select, "Select end terminus type for %s-%d",
2370 *pdba->resinfo[cc->r_end[i]].name, pdba->resinfo[cc->r_end[i]].nr);
2371 cc->ctdb.push_back(choose_ter(tdblist, select));
2375 cc->ctdb.push_back(tdblist[0]);
2377 printf("End terminus %s-%d: %s\n", *pdba->resinfo[cc->r_end[i]].name,
2378 pdba->resinfo[cc->r_end[i]].nr, (cc->ctdb[i])->name.c_str());
2384 cc->ctdb.push_back(nullptr);
2387 std::vector<MoleculePatchDatabase> hb_chain;
2388 /* lookup hackblocks and rtp for all residues */
2389 std::vector<PreprocessResidue> restp_chain;
2390 get_hackblocks_rtp(&hb_chain, &restp_chain, rtpFFDB, pdba->nres, pdba->resinfo, cc->nterpairs,
2391 &symtab, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end, bAllowMissing_, logger);
2392 /* ideally, now we would not need the rtp itself anymore, but do
2393 everything using the hb and restp arrays. Unfortunately, that
2394 requires some re-thinking of code in gen_vsite.c, which I won't
2395 do now :( AF 26-7-99 */
2397 rename_atoms(nullptr, ffdir_, pdba, &symtab, restp_chain, false, &rt, false, bVerbose_);
2399 match_atomnames_with_rtp(restp_chain, hb_chain, pdba, &symtab, x, bVerbose_, logger);
2404 t_blocka* block = new_blocka();
2406 sort_pdbatoms(restp_chain, natom, &pdba, &sortAtoms[chain], &x, block, &gnames);
2407 remove_duplicate_atoms(pdba, x, bVerbose_, logger);
2412 GMX_LOG(logger.warning)
2414 .appendTextFormatted(
2415 "With the -remh option the generated "
2416 "index file (%s) might be useless "
2417 "(the index file is generated before hydrogens are added)",
2418 indexOutputFile_.c_str());
2420 write_index(indexOutputFile_.c_str(), block, gnames, false, 0);
2422 for (int i = 0; i < block->nr; i++)
2432 GMX_LOG(logger.warning)
2434 .appendTextFormatted(
2435 "Without sorting no check for duplicate atoms can be done");
2438 /* Generate Hydrogen atoms (and termini) in the sequence */
2439 GMX_LOG(logger.info)
2441 .appendTextFormatted(
2442 "Generating any missing hydrogen atoms and/or adding termini.");
2443 add_h(&pdba, &localAtoms[chain], &x, ah, &symtab, cc->nterpairs, cc->ntdb, cc->ctdb,
2444 cc->r_start, cc->r_end, bAllowMissing_, cc->cyclicBondsIndex);
2445 GMX_LOG(logger.info)
2447 .appendTextFormatted("Now there are %d residues with %d atoms", pdba->nres, pdba->nr);
2449 /* make up molecule name(s) */
2451 int k = (cc->nterpairs > 0 && cc->r_start[0] >= 0) ? cc->r_start[0] : 0;
2453 std::string restype = rt.typeOfNamedDatabaseResidue(*pdba->resinfo[k].name);
2455 std::string molname;
2463 this_chainid = cc->chainid;
2465 /* Add the chain id if we have one */
2466 if (this_chainid != ' ')
2468 suffix.append(formatString("_chain_%c", this_chainid));
2471 /* Check if there have been previous chains with the same id */
2473 for (int k = 0; k < chain; k++)
2475 if (cc->chainid == chains[k].chainid)
2480 /* Add the number for this chain identifier if there are multiple copies */
2483 suffix.append(formatString("%d", nid_used + 1));
2486 if (suffix.length() > 0)
2488 molname.append(restype);
2489 molname.append(suffix);
2496 std::string itp_fn = topologyFile_;
2498 std::string posre_fn = includeTopologyFile_;
2499 if ((numChains - nwaterchain > 1) && !cc->bAllWat)
2502 printf("Chain time...\n");
2503 // construct the itp file name
2504 itp_fn = stripSuffixIfPresent(itp_fn, ".top");
2506 itp_fn.append(molname);
2507 itp_fn.append(".itp");
2508 // now do the same for posre
2509 posre_fn = stripSuffixIfPresent(posre_fn, ".itp");
2510 posre_fn.append("_");
2511 posre_fn.append(molname);
2512 posre_fn.append(".itp");
2513 if (posre_fn == itp_fn)
2515 posre_fn = Path::concatenateBeforeExtension(posre_fn, "_pr");
2517 incls_.emplace_back();
2518 incls_.back() = itp_fn;
2519 itp_file_ = gmx_fio_fopen(itp_fn.c_str(), "w");
2526 mols_.emplace_back();
2529 mols_.back().name = "SOL";
2530 mols_.back().nr = pdba->nres;
2534 mols_.back().name = molname;
2535 mols_.back().nr = 1;
2540 print_top_comment(itp_file_, itp_fn.c_str(), ffdir_, true);
2546 top_file2 = nullptr;
2550 top_file2 = itp_file_;
2554 top_file2 = top_file;
2557 pdb2top(top_file2, posre_fn.c_str(), molname.c_str(), pdba, &x, &atype, &symtab, rtpFFDB,
2558 restp_chain, hb_chain, bAllowMissing_, bVsites_, bVsiteAromatics_, ffdir_, mHmult_,
2559 ssbonds, long_bond_dist_, short_bond_dist_, bDeuterate_, bChargeGroups_, bCmap_,
2560 bRenumRes_, bRTPresname_, cc->cyclicBondsIndex, logger);
2564 write_posres(posre_fn.c_str(), pdba, posre_fc_);
2569 gmx_fio_fclose(itp_file_);
2572 /* pdba and natom have been reassigned somewhere so: */
2577 if (watermodel_ == nullptr)
2579 for (int chain = 0; chain < numChains; chain++)
2581 if (chains[chain].bAllWat)
2583 auto message = formatString(
2584 "You have chosen not to include a water model, "
2585 "but there is water in the input file. Select a "
2586 "water model or remove the water from your input file.");
2587 GMX_THROW(InconsistentInputError(message));
2593 std::string waterFile = formatString("%s%c%s.itp", ffdir_, DIR_SEPARATOR, watermodel_);
2594 if (!fflib_fexist(waterFile))
2596 auto message = formatString(
2597 "The topology file '%s' for the selected water "
2598 "model '%s' can not be found in the force field "
2599 "directory. Select a different water model.",
2600 waterFile.c_str(), watermodel_);
2601 GMX_THROW(InconsistentInputError(message));
2605 print_top_mols(top_file, title, ffdir_, watermodel_, incls_, mols_);
2606 gmx_fio_fclose(top_file);
2608 /* now merge all chains back together */
2611 for (int i = 0; (i < numChains); i++)
2613 natom += chains[i].pdba->nr;
2614 nres += chains[i].pdba->nres;
2618 init_t_atoms(atoms, natom, false);
2619 for (int i = 0; i < atoms->nres; i++)
2621 sfree(atoms->resinfo[i].name);
2624 srenew(atoms->resinfo, nres);
2628 for (int i = 0; (i < numChains); i++)
2632 GMX_LOG(logger.info)
2634 .appendTextFormatted("Including chain %d in system: %d atoms %d residues",
2635 i + 1, chains[i].pdba->nr, chains[i].pdba->nres);
2637 for (int j = 0; (j < chains[i].pdba->nr); j++)
2639 atoms->atom[k] = chains[i].pdba->atom[j];
2640 atoms->atom[k].resind += l; /* l is processed nr of residues */
2641 atoms->atomname[k] = chains[i].pdba->atomname[j];
2642 atoms->resinfo[atoms->atom[k].resind].chainid = chains[i].chainid;
2643 x.push_back(chains[i].x[j]);
2646 for (int j = 0; (j < chains[i].pdba->nres); j++)
2648 atoms->resinfo[l] = chains[i].pdba->resinfo[j];
2651 atoms->resinfo[l].name = atoms->resinfo[l].rtp;
2655 done_atom(chains[i].pdba);
2660 GMX_LOG(logger.info)
2662 .appendTextFormatted("Now there are %d atoms and %d residues", k, l);
2663 print_sums(atoms, true, logger);
2667 GMX_LOG(logger.info).asParagraph().appendTextFormatted("Writing coordinate file...");
2668 clear_rvec(box_space);
2671 make_new_box(atoms->nr, gmx::as_rvec_array(x.data()), box, box_space, false);
2673 write_sto_conf(outputConfFile_.c_str(), title, atoms, gmx::as_rvec_array(x.data()), nullptr,
2676 done_symtab(&symtab);
2677 done_atom(&pdba_all);
2679 for (int chain = 0; chain < numChains; chain++)
2681 sfree(sortAtoms[chain]);
2682 sfree(localAtoms[chain]);
2690 GMX_LOG(logger.info)
2692 .appendTextFormatted("\t\t--------- PLEASE NOTE ------------");
2693 GMX_LOG(logger.info)
2695 .appendTextFormatted("You have successfully generated a topology from: %s.",
2696 inputConfFile_.c_str());
2697 if (watermodel_ != nullptr)
2699 GMX_LOG(logger.info)
2701 .appendTextFormatted("The %s force field and the %s water model are used.", ffname_,
2707 GMX_LOG(logger.info).asParagraph().appendTextFormatted("The %s force field is used.", ffname_);
2709 GMX_LOG(logger.info)
2711 .appendTextFormatted("\t\t--------- ETON ESAELP ------------");
2718 const char pdb2gmxInfo::name[] = "pdb2gmx";
2719 const char pdb2gmxInfo::shortDescription[] =
2720 "Convert coordinate files to topology and FF-compliant coordinate files";
2721 ICommandLineOptionsModulePointer pdb2gmxInfo::create()
2723 return std::make_unique<pdb2gmx>();