2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
52 #include "gromacs/commandline/cmdlineoptionsmodule.h"
53 #include "gromacs/fileio/confio.h"
54 #include "gromacs/fileio/filetypes.h"
55 #include "gromacs/fileio/gmxfio.h"
56 #include "gromacs/fileio/pdbio.h"
57 #include "gromacs/gmxlib/conformation_utilities.h"
58 #include "gromacs/gmxpreprocess/fflibutil.h"
59 #include "gromacs/gmxpreprocess/genhydro.h"
60 #include "gromacs/gmxpreprocess/gpp_atomtype.h"
61 #include "gromacs/gmxpreprocess/grompp_impl.h"
62 #include "gromacs/gmxpreprocess/h_db.h"
63 #include "gromacs/gmxpreprocess/hizzie.h"
64 #include "gromacs/gmxpreprocess/pdb2top.h"
65 #include "gromacs/gmxpreprocess/pgutil.h"
66 #include "gromacs/gmxpreprocess/specbond.h"
67 #include "gromacs/gmxpreprocess/ter_db.h"
68 #include "gromacs/gmxpreprocess/toputil.h"
69 #include "gromacs/gmxpreprocess/xlate.h"
70 #include "gromacs/math/vec.h"
71 #include "gromacs/options/basicoptions.h"
72 #include "gromacs/options/filenameoption.h"
73 #include "gromacs/options/ioptionscontainer.h"
74 #include "gromacs/topology/atomprop.h"
75 #include "gromacs/topology/block.h"
76 #include "gromacs/topology/index.h"
77 #include "gromacs/topology/residuetypes.h"
78 #include "gromacs/topology/symtab.h"
79 #include "gromacs/topology/topology.h"
80 #include "gromacs/utility/dir_separator.h"
81 #include "gromacs/utility/exceptions.h"
82 #include "gromacs/utility/fatalerror.h"
83 #include "gromacs/utility/filestream.h"
84 #include "gromacs/utility/loggerbuilder.h"
85 #include "gromacs/utility/path.h"
86 #include "gromacs/utility/smalloc.h"
87 #include "gromacs/utility/strdb.h"
88 #include "gromacs/utility/stringutil.h"
90 #include "hackblock.h"
95 RtpRename(const char* newGmx, const char* newMain, const char* newNter, const char* newCter, const char* newBter) :
113 const char* res2bb_notermini(const std::string& name, gmx::ArrayRef<const RtpRename> rr)
115 /* NOTE: This function returns the main building block name,
116 * it does not take terminal renaming into account.
118 auto found = std::find_if(rr.begin(), rr.end(), [&name](const auto& rename) {
119 return gmx::equalCaseInsensitive(name, rename.gmx);
121 return found != rr.end() ? found->main.c_str() : name.c_str();
124 const char* select_res(int nr,
129 gmx::ArrayRef<const RtpRename> rr)
131 printf("Which %s type do you want for residue %d\n", title, resnr + 1);
132 for (int sel = 0; (sel < nr); sel++)
134 printf("%d. %s (%s)\n", sel, expl[sel], res2bb_notermini(name[sel], rr));
136 printf("\nType a number:");
140 if (scanf("%d", &userSelection) != 1)
142 gmx_fatal(FARGS, "Answer me for res %s %d!", title, resnr + 1);
145 return name[userSelection];
148 const char* get_asptp(int resnr, gmx::ArrayRef<const RtpRename> rr)
156 const char* lh[easpNR] = { "ASP", "ASPH" };
157 const char* expl[easpNR] = { "Not protonated (charge -1)", "Protonated (charge 0)" };
159 return select_res(easpNR, resnr, lh, expl, "ASPARTIC ACID", rr);
162 const char* get_glutp(int resnr, gmx::ArrayRef<const RtpRename> rr)
170 const char* lh[egluNR] = { "GLU", "GLUH" };
171 const char* expl[egluNR] = { "Not protonated (charge -1)", "Protonated (charge 0)" };
173 return select_res(egluNR, resnr, lh, expl, "GLUTAMIC ACID", rr);
176 const char* get_glntp(int resnr, gmx::ArrayRef<const RtpRename> rr)
184 const char* lh[eglnNR] = { "GLN", "QLN" };
185 const char* expl[eglnNR] = { "Not protonated (charge 0)", "Protonated (charge +1)" };
187 return select_res(eglnNR, resnr, lh, expl, "GLUTAMINE", rr);
190 const char* get_lystp(int resnr, gmx::ArrayRef<const RtpRename> rr)
198 const char* lh[elysNR] = { "LYSN", "LYS" };
199 const char* expl[elysNR] = { "Not protonated (charge 0)", "Protonated (charge +1)" };
201 return select_res(elysNR, resnr, lh, expl, "LYSINE", rr);
204 const char* get_argtp(int resnr, gmx::ArrayRef<const RtpRename> rr)
212 const char* lh[eargNR] = { "ARGN", "ARG" };
213 const char* expl[eargNR] = { "Not protonated (charge 0)", "Protonated (charge +1)" };
215 return select_res(eargNR, resnr, lh, expl, "ARGININE", rr);
218 const char* get_histp(int resnr, gmx::ArrayRef<const RtpRename> rr)
220 const char* expl[ehisNR] = { "H on ND1 only", "H on NE2 only", "H on ND1 and NE2",
223 return select_res(ehisNR, resnr, hh, expl, "HISTIDINE", rr);
226 void read_rtprename(const char* fname, FILE* fp, std::vector<RtpRename>* rtprename)
228 char line[STRLEN], buf[STRLEN];
231 while (get_a_line(fp, line, STRLEN))
233 /* line is NULL-terminated and length<STRLEN, so final arg cannot overflow.
234 * For other args, we read up to 6 chars (so we can detect if the length is > 5).
235 * Note that the buffer length has been increased to 7 to allow this,
236 * so we just need to make sure the strings have zero-length initially.
248 int nc = sscanf(line, "%6s %6s %6s %6s %6s %s", gmx, main, nter, cter, bter, buf);
249 RtpRename newEntry(gmx, main, nter, cter, bter);
252 if (nc != 2 && nc != 5)
254 gmx_fatal(FARGS, "Residue renaming database '%s' has %d columns instead of %d or %d",
262 "A line in residue renaming database '%s' has %d columns, while previous "
263 "lines have %d columns",
269 /* This file does not have special termini names, copy them from main */
270 newEntry.nter = newEntry.main;
271 newEntry.cter = newEntry.main;
272 newEntry.bter = newEntry.main;
274 rtprename->push_back(newEntry);
278 std::string search_resrename(gmx::ArrayRef<const RtpRename> rr, const char* name, bool bStart, bool bEnd, bool bCompareFFRTPname)
280 auto found = std::find_if(rr.begin(), rr.end(), [&name, &bCompareFFRTPname](const auto& rename) {
281 return ((!bCompareFFRTPname && (name == rename.gmx))
282 || (bCompareFFRTPname && (name == rename.main)));
286 /* If found in the database, rename this residue's rtp building block,
287 * otherwise keep the old name.
289 if (found != rr.end())
293 newName = found->bter;
297 newName = found->nter;
301 newName = found->cter;
305 newName = found->main;
308 if (newName[0] == '-')
310 gmx_fatal(FARGS, "In the chosen force field there is no residue type for '%s'%s", name,
311 bStart ? (bEnd ? " as a standalone (starting & ending) residue" : " as a starting terminus")
312 : (bEnd ? " as an ending terminus" : ""));
319 void rename_resrtp(t_atoms* pdba,
321 gmx::ArrayRef<const int> r_start,
322 gmx::ArrayRef<const int> r_end,
323 gmx::ArrayRef<const RtpRename> rr,
326 const gmx::MDLogger& logger)
328 bool bFFRTPTERRNM = (getenv("GMX_NO_FFRTP_TER_RENAME") == nullptr);
330 for (int r = 0; r < pdba->nres; r++)
334 for (int j = 0; j < nterpairs; j++)
341 for (int j = 0; j < nterpairs; j++)
349 std::string newName = search_resrename(rr, *pdba->resinfo[r].rtp, bStart, bEnd, false);
351 if (bFFRTPTERRNM && newName.empty() && (bStart || bEnd))
353 /* This is a terminal residue, but the residue name,
354 * currently stored in .rtp, is not a standard residue name,
355 * but probably a force field specific rtp name.
356 * Check if we need to rename it because it is terminal.
358 newName = search_resrename(rr, *pdba->resinfo[r].rtp, bStart, bEnd, true);
361 if (!newName.empty() && newName != *pdba->resinfo[r].rtp)
367 .appendTextFormatted("Changing rtp entry of residue %d %s to '%s'",
368 pdba->resinfo[r].nr, *pdba->resinfo[r].name,
371 pdba->resinfo[r].rtp = put_symtab(symtab, newName.c_str());
376 void pdbres_to_gmxrtp(t_atoms* pdba)
380 for (i = 0; (i < pdba->nres); i++)
382 if (pdba->resinfo[i].rtp == nullptr)
384 pdba->resinfo[i].rtp = pdba->resinfo[i].name;
389 void rename_pdbres(t_atoms* pdba, const char* oldnm, const char* newnm, bool bFullCompare, t_symtab* symtab)
394 for (i = 0; (i < pdba->nres); i++)
396 resnm = *pdba->resinfo[i].name;
397 if ((bFullCompare && (gmx::equalCaseInsensitive(resnm, oldnm)))
398 || (!bFullCompare && strstr(resnm, oldnm) != nullptr))
400 /* Rename the residue name (not the rtp name) */
401 pdba->resinfo[i].name = put_symtab(symtab, newnm);
406 void rename_bb(t_atoms* pdba, const char* oldnm, const char* newnm, bool bFullCompare, t_symtab* symtab)
411 for (i = 0; (i < pdba->nres); i++)
413 /* We have not set the rtp name yes, use the residue name */
414 bbnm = *pdba->resinfo[i].name;
415 if ((bFullCompare && (gmx::equalCaseInsensitive(bbnm, oldnm)))
416 || (!bFullCompare && strstr(bbnm, oldnm) != nullptr))
418 /* Change the rtp builing block name */
419 pdba->resinfo[i].rtp = put_symtab(symtab, newnm);
424 void rename_bbint(t_atoms* pdba,
426 const char* gettp(int, gmx::ArrayRef<const RtpRename>),
429 gmx::ArrayRef<const RtpRename> rr)
435 for (i = 0; i < pdba->nres; i++)
437 /* We have not set the rtp name yet, use the residue name */
438 bbnm = *pdba->resinfo[i].name;
439 if ((bFullCompare && (strcmp(bbnm, oldnm) == 0)) || (!bFullCompare && strstr(bbnm, oldnm) != nullptr))
442 pdba->resinfo[i].rtp = put_symtab(symtab, ptr);
447 void check_occupancy(t_atoms* atoms, const char* filename, bool bVerbose, const gmx::MDLogger& logger)
453 ftp = fn2ftp(filename);
454 if (!atoms->pdbinfo || ((ftp != efPDB) && (ftp != efBRK) && (ftp != efENT)))
456 GMX_LOG(logger.warning).asParagraph().appendTextFormatted("No occupancies in %s", filename);
460 for (i = 0; (i < atoms->nr); i++)
462 if (atoms->pdbinfo[i].occup != 1)
466 GMX_LOG(logger.warning)
468 .appendTextFormatted("Occupancy for atom %s%d-%s is %f rather than 1",
469 *atoms->resinfo[atoms->atom[i].resind].name,
470 atoms->resinfo[atoms->atom[i].resind].nr,
471 *atoms->atomname[i], atoms->pdbinfo[i].occup);
473 if (atoms->pdbinfo[i].occup == 0)
483 if (nzero == atoms->nr)
485 GMX_LOG(logger.warning)
487 .appendTextFormatted(
488 "All occupancy fields zero. This is probably not an X-Ray structure");
490 else if ((nzero > 0) || (nnotone > 0))
492 GMX_LOG(logger.warning)
494 .appendTextFormatted(
495 "there were %d atoms with zero occupancy and %d atoms with "
496 " occupancy unequal to one (out of %d atoms). Check your pdb "
498 nzero, nnotone, atoms->nr);
502 GMX_LOG(logger.warning).asParagraph().appendTextFormatted("All occupancies are one");
507 void write_posres(const char* fn, t_atoms* pdba, real fc)
512 fp = gmx_fio_fopen(fn, "w");
514 "; In this topology include file, you will find position restraint\n"
515 "; entries for all the heavy atoms in your original pdb file.\n"
516 "; This means that all the protons which were added by pdb2gmx are\n"
517 "; not restrained.\n"
519 "[ position_restraints ]\n"
520 "; %4s%6s%8s%8s%8s\n",
521 "atom", "type", "fx", "fy", "fz");
522 for (i = 0; (i < pdba->nr); i++)
524 if (!is_hydrogen(*pdba->atomname[i]) && !is_dummymass(*pdba->atomname[i]))
526 fprintf(fp, "%6d%6d %g %g %g\n", i + 1, 1, fc, fc, fc);
532 int read_pdball(const char* inf,
546 /* Read a pdb file. (containing proteins) */
548 int natom, new_natom, i;
551 printf("Reading %s...\n", inf);
552 readConfAndAtoms(inf, symtab, title, atoms, pbcType, x, nullptr, box);
554 if (atoms->pdbinfo == nullptr)
556 snew(atoms->pdbinfo, atoms->nr);
558 if (fn2ftp(inf) == efPDB)
560 get_pdb_atomnumber(atoms, aps);
565 for (i = 0; i < atoms->nr; i++)
567 if (!is_hydrogen(*atoms->atomname[i]))
569 atoms->atom[new_natom] = atoms->atom[i];
570 atoms->atomname[new_natom] = atoms->atomname[i];
571 atoms->pdbinfo[new_natom] = atoms->pdbinfo[i];
572 copy_rvec((*x)[i], (*x)[new_natom]);
576 atoms->nr = new_natom;
583 printf(" '%s',", *title);
585 printf(" %d atoms\n", natom);
587 /* Rename residues */
588 rename_pdbres(atoms, "HOH", watres, false, symtab);
589 rename_pdbres(atoms, "SOL", watres, false, symtab);
590 rename_pdbres(atoms, "WAT", watres, false, symtab);
592 rename_atoms("xlateat.dat", nullptr, atoms, symtab, {}, true, rt, true, bVerbose);
600 write_sto_conf(outf, *title, atoms, *x, nullptr, *pbcType, box);
606 void process_chain(t_atoms* pdba,
607 gmx::ArrayRef<gmx::RVec> x,
620 gmx::ArrayRef<const RtpRename> rr)
622 /* Rename aromatics, lys, asp and histidine */
625 rename_bb(pdba, "TYR", "TYRU", false, symtab);
629 rename_bb(pdba, "TRP", "TRPU", false, symtab);
633 rename_bb(pdba, "PHE", "PHEU", false, symtab);
637 rename_bbint(pdba, "LYS", get_lystp, false, symtab, rr);
641 rename_bbint(pdba, "ARG", get_argtp, false, symtab, rr);
645 rename_bbint(pdba, "GLN", get_glntp, false, symtab, rr);
649 rename_bbint(pdba, "ASP", get_asptp, false, symtab, rr);
653 rename_bb(pdba, "ASPH", "ASP", false, symtab);
657 rename_bbint(pdba, "GLU", get_glutp, false, symtab, rr);
661 rename_bb(pdba, "GLUH", "GLU", false, symtab);
666 set_histp(pdba, gmx::as_rvec_array(x.data()), symtab, angle, distance);
670 rename_bbint(pdba, "HIS", get_histp, true, symtab, rr);
673 /* Initialize the rtp builing block names with the residue names
674 * for the residues that have not been processed above.
676 pdbres_to_gmxrtp(pdba);
678 /* Now we have all rtp names set.
679 * The rtp names will conform to Gromacs naming,
680 * unless the input pdb file contained one or more force field specific
681 * rtp names as residue names.
685 /* struct for sorting the atoms from the pdb file */
688 int resnr; /* residue number */
689 int j; /* database order index */
690 int index; /* original atom number */
691 char anm1; /* second letter of atom name */
692 char altloc; /* alternate location indicator */
695 bool pdbicomp(const t_pdbindex& a, const t_pdbindex& b)
697 int d = (a.resnr - b.resnr);
703 d = (a.anm1 - b.anm1);
706 d = (a.altloc - b.altloc);
713 void sort_pdbatoms(gmx::ArrayRef<const PreprocessResidue> restp_chain,
716 t_atoms** newPdbAtoms,
717 std::vector<gmx::RVec>* x,
721 t_atoms* pdba = *pdbaptr;
722 std::vector<gmx::RVec> xnew;
729 for (int i = 0; i < natoms; i++)
731 atomnm = *pdba->atomname[i];
732 const PreprocessResidue* localPpResidue = &restp_chain[pdba->atom[i].resind];
734 std::find_if(localPpResidue->atomname.begin(), localPpResidue->atomname.end(),
735 [&atomnm](char** it) { return gmx::equalCaseInsensitive(atomnm, *it); });
736 if (found == localPpResidue->atomname.end())
738 std::string buf = gmx::formatString(
739 "Atom %s in residue %s %d was not found in rtp entry %s with %d atoms\n"
740 "while sorting atoms.\n%s",
741 atomnm, *pdba->resinfo[pdba->atom[i].resind].name,
742 pdba->resinfo[pdba->atom[i].resind].nr, localPpResidue->resname.c_str(),
743 localPpResidue->natom(),
745 ? "\nFor a hydrogen, this can be a different protonation state, or it\n"
746 "might have had a different number in the PDB file and was rebuilt\n"
747 "(it might for instance have been H3, and we only expected H1 & "
749 "Note that hydrogens might have been added to the entry for the "
751 "Remove this hydrogen or choose a different protonation state to "
753 "Option -ignh will ignore all hydrogens in the input."
755 gmx_fatal(FARGS, "%s", buf.c_str());
757 /* make shadow array to be sorted into indexgroup */
758 pdbi[i].resnr = pdba->atom[i].resind;
759 pdbi[i].j = std::distance(localPpResidue->atomname.begin(), found);
761 pdbi[i].anm1 = atomnm[1];
762 pdbi[i].altloc = pdba->pdbinfo[i].altloc;
764 std::sort(pdbi, pdbi + natoms, pdbicomp);
766 /* pdba is sorted in pdbnew using the pdbi index */
767 std::vector<int> a(natoms);
768 srenew(*newPdbAtoms, 1);
769 init_t_atoms((*newPdbAtoms), natoms, true);
770 (*newPdbAtoms)->nr = pdba->nr;
771 (*newPdbAtoms)->nres = pdba->nres;
772 srenew((*newPdbAtoms)->resinfo, pdba->nres);
773 std::copy(pdba->resinfo, pdba->resinfo + pdba->nres, (*newPdbAtoms)->resinfo);
774 for (int i = 0; i < natoms; i++)
776 (*newPdbAtoms)->atom[i] = pdba->atom[pdbi[i].index];
777 (*newPdbAtoms)->atomname[i] = pdba->atomname[pdbi[i].index];
778 (*newPdbAtoms)->pdbinfo[i] = pdba->pdbinfo[pdbi[i].index];
779 xnew.push_back(x->at(pdbi[i].index));
780 /* make indexgroup in block */
781 a[i] = pdbi[i].index;
786 /* copy the sorted pdbnew back to pdba */
787 *pdbaptr = *newPdbAtoms;
789 add_grp(block, gnames, a, "prot_sort");
793 int remove_duplicate_atoms(t_atoms* pdba, gmx::ArrayRef<gmx::RVec> x, bool bVerbose, const gmx::MDLogger& logger)
795 int i, j, oldnatoms, ndel;
798 GMX_LOG(logger.info).asParagraph().appendTextFormatted("Checking for duplicate atoms....");
799 oldnatoms = pdba->nr;
801 /* NOTE: pdba->nr is modified inside the loop */
802 for (i = 1; (i < pdba->nr); i++)
804 /* compare 'i' and 'i-1', throw away 'i' if they are identical
805 this is a 'while' because multiple alternate locations can be present */
806 while ((i < pdba->nr) && (pdba->atom[i - 1].resind == pdba->atom[i].resind)
807 && (strcmp(*pdba->atomname[i - 1], *pdba->atomname[i]) == 0))
812 ri = &pdba->resinfo[pdba->atom[i].resind];
815 .appendTextFormatted("deleting duplicate atom %4s %s%4d%c",
816 *pdba->atomname[i], *ri->name, ri->nr, ri->ic);
817 if (ri->chainid && (ri->chainid != ' '))
819 printf(" ch %c", ri->chainid);
823 if (pdba->pdbinfo[i].atomnr)
825 printf(" pdb nr %4d", pdba->pdbinfo[i].atomnr);
827 if (pdba->pdbinfo[i].altloc && (pdba->pdbinfo[i].altloc != ' '))
829 printf(" altloc %c", pdba->pdbinfo[i].altloc);
835 /* We can not free, since it might be in the symtab */
836 /* sfree(pdba->atomname[i]); */
837 for (j = i; j < pdba->nr; j++)
839 pdba->atom[j] = pdba->atom[j + 1];
840 pdba->atomname[j] = pdba->atomname[j + 1];
843 pdba->pdbinfo[j] = pdba->pdbinfo[j + 1];
845 copy_rvec(x[j + 1], x[j]);
847 srenew(pdba->atom, pdba->nr);
848 /* srenew(pdba->atomname, pdba->nr); */
849 srenew(pdba->pdbinfo, pdba->nr);
852 if (pdba->nr != oldnatoms)
856 .appendTextFormatted("Now there are %d atoms. Deleted %d duplicates.", pdba->nr, ndel);
862 void checkResidueTypeSanity(t_atoms* pdba, int r0, int r1, ResidueType* rt)
864 std::string startResidueString =
865 gmx::formatString("%s%d", *pdba->resinfo[r0].name, pdba->resinfo[r0].nr);
866 std::string endResidueString =
867 gmx::formatString("%s%d", *pdba->resinfo[r1 - 1].name, pdba->resinfo[r1 - 1].nr);
869 // Check whether all residues in chain have the same chain ID.
870 bool allResiduesHaveSameChainID = true;
871 char chainID0 = pdba->resinfo[r0].chainid;
873 std::string residueString;
875 for (int i = r0 + 1; i < r1; i++)
877 chainID = pdba->resinfo[i].chainid;
878 if (chainID != chainID0)
880 allResiduesHaveSameChainID = false;
881 residueString = gmx::formatString("%s%d", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
886 if (!allResiduesHaveSameChainID)
889 "The chain covering the range %s--%s does not have a consistent chain ID. "
890 "The first residue has ID '%c', while residue %s has ID '%c'.",
891 startResidueString.c_str(), endResidueString.c_str(), chainID0,
892 residueString.c_str(), chainID);
895 // At this point all residues have the same ID. If they are also non-blank
896 // we can be a bit more aggressive and require the types match too.
899 bool allResiduesHaveSameType = true;
901 std::string restype0 = rt->typeOfNamedDatabaseResidue(*pdba->resinfo[r0].name);
903 for (int i = r0 + 1; i < r1; i++)
905 restype = rt->typeOfNamedDatabaseResidue(*pdba->resinfo[i].name);
906 if (!gmx::equalCaseInsensitive(restype, restype0))
908 allResiduesHaveSameType = false;
909 residueString = gmx::formatString("%s%d", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
914 if (!allResiduesHaveSameType)
917 "The residues in the chain %s--%s do not have a consistent type. "
918 "The first residue has type '%s', while residue %s is of type '%s'. "
919 "Either there is a mistake in your chain, or it includes nonstandard "
920 "residue names that have not yet been added to the residuetypes.dat "
921 "file in the GROMACS library directory. If there are other molecules "
922 "such as ligands, they should not have the same chain ID as the "
923 "adjacent protein chain since it's a separate molecule.",
924 startResidueString.c_str(), endResidueString.c_str(), restype0.c_str(),
925 residueString.c_str(), restype.c_str());
930 void find_nc_ter(t_atoms* pdba, int r0, int r1, int* r_start, int* r_end, ResidueType* rt, const gmx::MDLogger& logger)
933 std::optional<std::string> startrestype;
938 int startWarnings = 0;
942 // Check that all residues have the same chain identifier, and if it is
943 // non-blank we also require the residue types to match.
944 checkResidueTypeSanity(pdba, r0, r1, rt);
946 // If we return correctly from checkResidueTypeSanity(), the only
947 // remaining cases where we can have non-matching residue types is if
948 // the chain ID was blank, which could be the case e.g. for a structure
949 // read from a GRO file or other file types without chain information.
950 // In that case we need to be a bit more liberal and detect chains based
951 // on the residue type.
953 // If we get here, the chain ID must be identical for all residues
954 char chainID = pdba->resinfo[r0].chainid;
956 /* Find the starting terminus (typially N or 5') */
957 for (i = r0; i < r1 && *r_start == -1; i++)
959 startrestype = rt->optionalTypeOfNamedDatabaseResidue(*pdba->resinfo[i].name);
964 if (gmx::equalCaseInsensitive(*startrestype, "Protein")
965 || gmx::equalCaseInsensitive(*startrestype, "DNA")
966 || gmx::equalCaseInsensitive(*startrestype, "RNA"))
970 .appendTextFormatted("Identified residue %s%d as a starting terminus.",
971 *pdba->resinfo[i].name, pdba->resinfo[i].nr);
974 else if (gmx::equalCaseInsensitive(*startrestype, "Ion"))
980 .appendTextFormatted(
981 "Residue %s%d has type 'Ion', assuming it is not linked into a "
983 *pdba->resinfo[i].name, pdba->resinfo[i].nr);
989 .appendTextFormatted("Disabling further notes about ions.");
995 // Either no known residue type, or one not needing special handling
996 if (startWarnings < 5)
1000 GMX_LOG(logger.warning)
1002 .appendTextFormatted(
1003 "Starting residue %s%d in chain not identified as "
1005 "This chain lacks identifiers, which makes it impossible to do "
1007 "classification of the start/end residues. Here we need to "
1008 "guess this residue "
1009 "should not be part of the chain and instead introduce a "
1010 "break, but that will "
1011 "be catastrophic if they should in fact be linked. Please "
1012 "check your structure, "
1013 "and add %s to residuetypes.dat if this was not correct.",
1014 *pdba->resinfo[i].name, pdba->resinfo[i].nr, *pdba->resinfo[i].name);
1018 GMX_LOG(logger.warning)
1020 .appendTextFormatted(
1021 "No residues in chain starting at %s%d identified as "
1023 "This makes it impossible to link them into a molecule, which "
1025 "correct or a catastrophic error. Please check your structure, "
1027 "necessary residue names to residuetypes.dat if this was not "
1029 *pdba->resinfo[i].name, pdba->resinfo[i].nr);
1032 if (startWarnings == 4)
1034 GMX_LOG(logger.warning)
1036 .appendTextFormatted(
1037 "Disabling further warnings about unidentified residues at start "
1046 /* Go through the rest of the residues, check that they are the same class, and identify the ending terminus. */
1047 for (int i = *r_start; i < r1; i++)
1049 std::optional<std::string> restype =
1050 rt->optionalTypeOfNamedDatabaseResidue(*pdba->resinfo[i].name);
1055 if (gmx::equalCaseInsensitive(*restype, *startrestype) && endWarnings == 0)
1059 else if (gmx::equalCaseInsensitive(*startrestype, "Ion"))
1063 GMX_LOG(logger.info)
1065 .appendTextFormatted(
1066 "Residue %s%d has type 'Ion', assuming it is not linked into a "
1068 *pdba->resinfo[i].name, pdba->resinfo[i].nr);
1072 GMX_LOG(logger.info)
1074 .appendTextFormatted("Disabling further notes about ions.");
1080 // Either no known residue type, or one not needing special handling.
1081 GMX_RELEASE_ASSERT(chainID == ' ', "Chain ID must be blank");
1082 // Otherwise the call to checkResidueTypeSanity() will
1083 // have caught the problem.
1084 if (endWarnings < 5)
1086 GMX_LOG(logger.warning)
1088 .appendTextFormatted(
1089 "Residue %s%d in chain has different type ('%s') from "
1090 "residue %s%d ('%s'). This chain lacks identifiers, which "
1092 "it impossible to do strict classification of the start/end "
1093 "residues. Here we "
1094 "need to guess this residue should not be part of the chain "
1096 "introduce a break, but that will be catastrophic if they "
1097 "should in fact be "
1098 "linked. Please check your structure, and add %s to "
1100 "if this was not correct.",
1101 *pdba->resinfo[i].name, pdba->resinfo[i].nr, restype->c_str(),
1102 *pdba->resinfo[*r_start].name, pdba->resinfo[*r_start].nr,
1103 startrestype->c_str(), *pdba->resinfo[i].name);
1105 if (endWarnings == 4)
1107 GMX_LOG(logger.warning)
1109 .appendTextFormatted(
1110 "Disabling further warnings about unidentified residues at end "
1120 GMX_LOG(logger.info)
1122 .appendTextFormatted("Identified residue %s%d as a ending terminus.",
1123 *pdba->resinfo[*r_end].name, pdba->resinfo[*r_end].nr);
1127 enum class ChainSeparationType : int
1136 const gmx::EnumerationArray<ChainSeparationType, const char*> c_chainSeparationTypeNames = {
1137 { "id_or_ter", "id_and_ter", "ter", "id", "interactive" }
1139 const gmx::EnumerationArray<ChainSeparationType, const char*> c_chainSeparationTypeNotificationMessages = {
1140 { "Splitting chemical chains based on TER records or chain id changing.\n",
1141 "Splitting chemical chains based on TER records and chain id changing.\n",
1142 "Splitting chemical chains based on TER records only (ignoring chain id).\n",
1143 "Splitting chemical chains based on changing chain id only (ignoring TER records).\n",
1144 "Splitting chemical chains interactively.\n" }
1147 void modify_chain_numbers(t_atoms* pdba, ChainSeparationType chainSeparation, const gmx::MDLogger& logger)
1150 char old_prev_chainid;
1151 char old_this_chainid;
1152 int old_prev_chainnum;
1153 int old_this_chainnum;
1155 char select[STRLEN];
1159 const char* prev_atomname;
1160 const char* this_atomname;
1161 const char* prev_resname;
1162 const char* this_resname;
1168 /* The default chain enumeration is based on TER records only */
1169 printf("%s", c_chainSeparationTypeNotificationMessages[chainSeparation]);
1171 old_prev_chainid = '?';
1172 old_prev_chainnum = -1;
1175 this_atomname = nullptr;
1177 this_resname = nullptr;
1181 for (i = 0; i < pdba->nres; i++)
1183 ri = &pdba->resinfo[i];
1184 old_this_chainid = ri->chainid;
1185 old_this_chainnum = ri->chainnum;
1187 prev_atomname = this_atomname;
1188 prev_atomnum = this_atomnum;
1189 prev_resname = this_resname;
1190 prev_resnum = this_resnum;
1191 prev_chainid = this_chainid;
1193 this_atomname = *(pdba->atomname[i]);
1194 this_atomnum = (pdba->pdbinfo != nullptr) ? pdba->pdbinfo[i].atomnr : i + 1;
1195 this_resname = *ri->name;
1196 this_resnum = ri->nr;
1197 this_chainid = ri->chainid;
1199 switch (chainSeparation)
1201 case ChainSeparationType::IdOrTer:
1202 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1208 case ChainSeparationType::IdAndTer:
1209 if (old_this_chainid != old_prev_chainid && old_this_chainnum != old_prev_chainnum)
1215 case ChainSeparationType::Id:
1216 if (old_this_chainid != old_prev_chainid)
1222 case ChainSeparationType::Ter:
1223 if (old_this_chainnum != old_prev_chainnum)
1228 case ChainSeparationType::Interactive:
1229 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1233 GMX_LOG(logger.info)
1235 .appendTextFormatted(
1236 "Split the chain (and introduce termini) between residue %s%d (chain id '%c', atom %d %s)\
1238 "and residue %s%d (chain id '%c', atom %d %s) ? [n/y]",
1239 prev_resname, prev_resnum, prev_chainid, prev_atomnum,
1240 prev_atomname, this_resname, this_resnum, this_chainid,
1241 this_atomnum, this_atomname);
1243 if (nullptr == fgets(select, STRLEN - 1, stdin))
1245 gmx_fatal(FARGS, "Error reading from stdin");
1248 if (i == 0 || select[0] == 'y')
1254 case ChainSeparationType::Count:
1255 gmx_fatal(FARGS, "Internal inconsistency - this shouldn't happen...");
1257 old_prev_chainid = old_this_chainid;
1258 old_prev_chainnum = old_this_chainnum;
1260 ri->chainnum = new_chainnum;
1264 bool checkChainCyclicity(t_atoms* pdba,
1268 gmx::ArrayRef<const PreprocessResidue> rtpFFDB,
1269 gmx::ArrayRef<const RtpRename> rr,
1270 real long_bond_dist_,
1271 real short_bond_dist_)
1273 int ai = -1, aj = -1;
1274 char* rtpname = *(pdba->resinfo[start_ter].rtp);
1275 std::string newName = search_resrename(rr, rtpname, false, false, false);
1276 if (newName.empty())
1280 auto res = getDatabaseEntry(newName, rtpFFDB);
1281 const char *name_ai, *name_aj;
1283 for (const auto& patch : res->rb[ebtsBONDS].b)
1284 { /* Search backward bond for n/5' terminus */
1285 name_ai = patch.ai().c_str();
1286 name_aj = patch.aj().c_str();
1287 if (name_ai[0] == '-')
1289 aj = search_res_atom(++name_ai, end_ter, pdba, "check", TRUE);
1290 ai = search_res_atom(name_aj, start_ter, pdba, "check", TRUE);
1292 else if (name_aj[0] == '-')
1294 aj = search_res_atom(++name_aj, end_ter, pdba, "check", TRUE);
1295 ai = search_res_atom(name_ai, start_ter, pdba, "check", TRUE);
1297 if (ai >= 0 && aj >= 0)
1303 if (!(ai >= 0 && aj >= 0))
1305 rtpname = *(pdba->resinfo[end_ter].rtp);
1306 newName = search_resrename(rr, rtpname, false, false, false);
1307 if (newName.empty())
1311 res = getDatabaseEntry(newName, rtpFFDB);
1312 for (const auto& patch : res->rb[ebtsBONDS].b)
1314 /* Seach forward bond for c/3' terminus */
1315 name_ai = patch.ai().c_str();
1316 name_aj = patch.aj().c_str();
1318 if (name_ai[0] == '+')
1320 ai = search_res_atom(name_aj, end_ter, pdba, "check", TRUE);
1321 aj = search_res_atom(++name_ai, start_ter, pdba, "check", TRUE);
1323 else if (name_aj[0] == '+')
1325 ai = search_res_atom(name_ai, end_ter, pdba, "check", TRUE);
1326 aj = search_res_atom(++name_aj, start_ter, pdba, "check", TRUE);
1328 if (ai >= 0 && aj >= 0)
1335 if (ai >= 0 && aj >= 0)
1337 real dist = distance2(pdbx[ai], pdbx[aj]);
1338 /* it is better to read bond length from ffbonded.itp */
1339 return (dist < gmx::square(long_bond_dist_) && dist > gmx::square(short_bond_dist_));
1350 char chainnum = ' ';
1353 bool bAllWat = false;
1355 std::vector<int> chainstart;
1362 bool bAllWat = false;
1364 std::vector<int> chainstart;
1365 std::vector<MoleculePatchDatabase*> ntdb;
1366 std::vector<MoleculePatchDatabase*> ctdb;
1367 std::vector<int> r_start;
1368 std::vector<int> r_end;
1370 std::vector<gmx::RVec> x;
1371 std::vector<int> cyclicBondsIndex;
1374 enum class VSitesType : int
1381 const gmx::EnumerationArray<VSitesType, const char*> c_vsitesTypeNames = { { "none", "hydrogens",
1384 enum class WaterType : int
1396 const gmx::EnumerationArray<WaterType, const char*> c_waterTypeNames = {
1397 { "select", "none", "spc", "spce", "tip3p", "tip4p", "tip5p", "tips3p" }
1400 enum class MergeType : int
1407 const gmx::EnumerationArray<MergeType, const char*> c_mergeTypeNames = { { "no", "all",
1418 class pdb2gmx : public ICommandLineOptionsModule
1424 bVsiteAromatics_(FALSE),
1425 chainSeparation_(ChainSeparationType::IdOrTer),
1426 vsitesType_(VSitesType::None),
1427 waterType_(WaterType::Select),
1428 mergeType_(MergeType::No),
1432 gmx::LoggerBuilder builder;
1433 builder.addTargetStream(gmx::MDLogger::LogLevel::Info, &gmx::TextOutputFile::standardOutput());
1434 builder.addTargetStream(gmx::MDLogger::LogLevel::Warning, &gmx::TextOutputFile::standardError());
1435 loggerOwner_ = std::make_unique<LoggerOwner>(builder.build());
1438 // From ICommandLineOptionsModule
1439 void init(CommandLineModuleSettings* /*settings*/) override {}
1441 void initOptions(IOptionsContainer* options, ICommandLineOptionsModuleSettings* settings) override;
1443 void optionsFinished() override;
1461 bool bAllowMissing_;
1465 bool bChargeGroups_;
1475 bool bVsiteAromatics_;
1479 real long_bond_dist_;
1480 real short_bond_dist_;
1482 std::string indexOutputFile_;
1483 std::string outputFile_;
1484 std::string topologyFile_;
1485 std::string includeTopologyFile_;
1486 std::string outputConfFile_;
1487 std::string inputConfFile_;
1488 std::string outFile_;
1491 ChainSeparationType chainSeparation_;
1492 VSitesType vsitesType_;
1493 WaterType waterType_;
1494 MergeType mergeType_;
1497 char forcefield_[STRLEN];
1498 char ffdir_[STRLEN];
1501 std::vector<std::string> incls_;
1502 std::vector<t_mols> mols_;
1504 std::unique_ptr<LoggerOwner> loggerOwner_;
1507 void pdb2gmx::initOptions(IOptionsContainer* options, ICommandLineOptionsModuleSettings* settings)
1509 const char* desc[] = {
1510 "[THISMODULE] reads a [REF].pdb[ref] (or [REF].gro[ref]) file, reads",
1511 "some database files, adds hydrogens to the molecules and generates",
1512 "coordinates in GROMACS (GROMOS), or optionally [REF].pdb[ref], format",
1513 "and a topology in GROMACS format.",
1514 "These files can subsequently be processed to generate a run input file.",
1516 "[THISMODULE] will search for force fields by looking for",
1517 "a [TT]forcefield.itp[tt] file in subdirectories [TT]<forcefield>.ff[tt]",
1518 "of the current working directory and of the GROMACS library directory",
1519 "as inferred from the path of the binary or the [TT]GMXLIB[tt] environment",
1521 "By default the forcefield selection is interactive,",
1522 "but you can use the [TT]-ff[tt] option to specify one of the short names",
1523 "in the list on the command line instead. In that case [THISMODULE] just looks",
1524 "for the corresponding [TT]<forcefield>.ff[tt] directory.",
1526 "After choosing a force field, all files will be read only from",
1527 "the corresponding force field directory.",
1528 "If you want to modify or add a residue types, you can copy the force",
1529 "field directory from the GROMACS library directory to your current",
1530 "working directory. If you want to add new protein residue types,",
1531 "you will need to modify [TT]residuetypes.dat[tt] in the library directory",
1532 "or copy the whole library directory to a local directory and set",
1533 "the environment variable [TT]GMXLIB[tt] to the name of that directory.",
1534 "Check Chapter 5 of the manual for more information about file formats.",
1537 "Note that a [REF].pdb[ref] file is nothing more than a file format, and it",
1538 "need not necessarily contain a protein structure. Every kind of",
1539 "molecule for which there is support in the database can be converted.",
1540 "If there is no support in the database, you can add it yourself.[PAR]",
1542 "The program has limited intelligence, it reads a number of database",
1543 "files, that allow it to make special bonds (Cys-Cys, Heme-His, etc.),",
1544 "if necessary this can be done manually. The program can prompt the",
1545 "user to select which kind of LYS, ASP, GLU, CYS or HIS residue is",
1546 "desired. For Lys the choice is between neutral (two protons on NZ) or",
1547 "protonated (three protons, default), for Asp and Glu unprotonated",
1548 "(default) or protonated, for His the proton can be either on ND1,",
1549 "on NE2 or on both. By default these selections are done automatically.",
1550 "For His, this is based on an optimal hydrogen bonding",
1551 "conformation. Hydrogen bonds are defined based on a simple geometric",
1552 "criterion, specified by the maximum hydrogen-donor-acceptor angle",
1553 "and donor-acceptor distance, which are set by [TT]-angle[tt] and",
1554 "[TT]-dist[tt] respectively.[PAR]",
1556 "The protonation state of N- and C-termini can be chosen interactively",
1557 "with the [TT]-ter[tt] flag. Default termini are ionized (NH3+ and COO-),",
1558 "respectively. Some force fields support zwitterionic forms for chains of",
1559 "one residue, but for polypeptides these options should NOT be selected.",
1560 "The AMBER force fields have unique forms for the terminal residues,",
1561 "and these are incompatible with the [TT]-ter[tt] mechanism. You need",
1562 "to prefix your N- or C-terminal residue names with \"N\" or \"C\"",
1563 "respectively to use these forms, making sure you preserve the format",
1564 "of the coordinate file. Alternatively, use named terminating residues",
1565 "(e.g. ACE, NME).[PAR]",
1567 "The separation of chains is not entirely trivial since the markup",
1568 "in user-generated PDB files frequently varies and sometimes it",
1569 "is desirable to merge entries across a TER record, for instance",
1570 "if you want a disulfide bridge or distance restraints between",
1571 "two protein chains or if you have a HEME group bound to a protein.",
1572 "In such cases multiple chains should be contained in a single",
1573 "[TT]moleculetype[tt] definition.",
1574 "To handle this, [THISMODULE] uses two separate options.",
1575 "First, [TT]-chainsep[tt] allows you to choose when a new chemical chain should",
1576 "start, and termini added when applicable. This can be done based on the",
1577 "existence of TER records, when the chain id changes, or combinations of either",
1578 "or both of these. You can also do the selection fully interactively.",
1579 "In addition, there is a [TT]-merge[tt] option that controls how multiple chains",
1580 "are merged into one moleculetype, after adding all the chemical termini (or not).",
1581 "This can be turned off (no merging), all non-water chains can be merged into a",
1582 "single molecule, or the selection can be done interactively.[PAR]",
1584 "[THISMODULE] will also check the occupancy field of the [REF].pdb[ref] file.",
1585 "If any of the occupancies are not one, indicating that the atom is",
1586 "not resolved well in the structure, a warning message is issued.",
1587 "When a [REF].pdb[ref] file does not originate from an X-ray structure determination",
1588 "all occupancy fields may be zero. Either way, it is up to the user",
1589 "to verify the correctness of the input data (read the article!).[PAR]",
1591 "During processing the atoms will be reordered according to GROMACS",
1592 "conventions. With [TT]-n[tt] an index file can be generated that",
1593 "contains one group reordered in the same way. This allows you to",
1594 "convert a GROMOS trajectory and coordinate file to GROMOS. There is",
1595 "one limitation: reordering is done after the hydrogens are stripped",
1596 "from the input and before new hydrogens are added. This means that",
1597 "you should not use [TT]-ignh[tt].[PAR]",
1599 "The [REF].gro[ref] and [TT].g96[tt] file formats do not support chain",
1600 "identifiers. Therefore it is useful to enter a [REF].pdb[ref] file name at",
1601 "the [TT]-o[tt] option when you want to convert a multi-chain [REF].pdb[ref] file.",
1604 "The option [TT]-vsite[tt] removes hydrogen and fast improper dihedral",
1605 "motions. Angular and out-of-plane motions can be removed by changing",
1606 "hydrogens into virtual sites and fixing angles, which fixes their",
1607 "position relative to neighboring atoms. Additionally, all atoms in the",
1608 "aromatic rings of the standard amino acids (i.e. PHE, TRP, TYR and HIS)",
1609 "can be converted into virtual sites, eliminating the fast improper dihedral",
1610 "fluctuations in these rings (but this feature is deprecated).",
1611 "[BB]Note[bb] that in this case all other hydrogen",
1612 "atoms are also converted to virtual sites. The mass of all atoms that are",
1613 "converted into virtual sites, is added to the heavy atoms.[PAR]",
1614 "Also slowing down of dihedral motion can be done with [TT]-heavyh[tt]",
1615 "done by increasing the hydrogen-mass by a factor of 4. This is also",
1616 "done for water hydrogens to slow down the rotational motion of water.",
1617 "The increase in mass of the hydrogens is subtracted from the bonded",
1618 "(heavy) atom so that the total mass of the system remains the same.",
1620 "As a special case, ring-closed (or cyclic) molecules are considered.",
1621 "[THISMODULE] automatically determines if a cyclic molecule is present",
1622 "by evaluating the distance between the terminal atoms of a given chain.",
1623 "If this distance is greater than the [TT]-sb[tt]",
1624 "(\"Short bond warning distance\", default 0.05 nm)",
1625 "and less than the [TT]-lb[tt] (\"Long bond warning distance\", default 0.25 nm)",
1626 "the molecule is considered to be ring closed and will be processed as such.",
1627 "Please note that this does not detect cyclic bonds over periodic boundaries."
1630 settings->setHelpText(desc);
1632 options->addOption(BooleanOption("newrtp").store(&bNewRTP_).defaultValue(false).hidden().description(
1633 "Write the residue database in new format to [TT]new.rtp[tt]"));
1635 RealOption("lb").store(&long_bond_dist_).defaultValue(0.25).hidden().description("Long bond warning distance"));
1637 RealOption("sb").store(&short_bond_dist_).defaultValue(0.05).hidden().description("Short bond warning distance"));
1638 options->addOption(EnumOption<ChainSeparationType>("chainsep")
1639 .enumValue(c_chainSeparationTypeNames)
1640 .store(&chainSeparation_)
1641 .description("Condition in PDB files when a new chain should be "
1642 "started (adding termini)"));
1643 options->addOption(EnumOption<MergeType>("merge")
1644 .enumValue(c_mergeTypeNames)
1646 .description("Merge multiple chains into a single [moleculetype]"));
1647 options->addOption(StringOption("ff").store(&ff_).defaultValue("select").description(
1648 "Force field, interactive by default. Use [TT]-h[tt] for information."));
1650 EnumOption<WaterType>("water").store(&waterType_).enumValue(c_waterTypeNames).description("Water model to use"));
1651 options->addOption(BooleanOption("inter").store(&bInter_).defaultValue(false).description(
1652 "Set the next 8 options to interactive"));
1653 options->addOption(BooleanOption("ss").store(&bCysMan_).defaultValue(false).description(
1654 "Interactive SS bridge selection"));
1655 options->addOption(BooleanOption("ter").store(&bTerMan_).defaultValue(false).description(
1656 "Interactive termini selection, instead of charged (default)"));
1657 options->addOption(BooleanOption("lys").store(&bLysMan_).defaultValue(false).description(
1658 "Interactive lysine selection, instead of charged"));
1659 options->addOption(BooleanOption("arg").store(&bArgMan_).defaultValue(false).description(
1660 "Interactive arginine selection, instead of charged"));
1661 options->addOption(BooleanOption("asp").store(&bAspMan_).defaultValue(false).description(
1662 "Interactive aspartic acid selection, instead of charged"));
1663 options->addOption(BooleanOption("glu").store(&bGluMan_).defaultValue(false).description(
1664 "Interactive glutamic acid selection, instead of charged"));
1665 options->addOption(BooleanOption("gln").store(&bGlnMan_).defaultValue(false).description(
1666 "Interactive glutamine selection, instead of charged"));
1667 options->addOption(BooleanOption("his").store(&bHisMan_).defaultValue(false).description(
1668 "Interactive histidine selection, instead of checking H-bonds"));
1669 options->addOption(RealOption("angle").store(&angle_).defaultValue(135.0).description(
1670 "Minimum hydrogen-donor-acceptor angle for a H-bond (degrees)"));
1672 RealOption("dist").store(&distance_).defaultValue(0.3).description("Maximum donor-acceptor distance for a H-bond (nm)"));
1673 options->addOption(BooleanOption("una").store(&bUnA_).defaultValue(false).description(
1674 "Select aromatic rings with united CH atoms on phenylalanine, tryptophane and "
1676 options->addOption(BooleanOption("sort").store(&bSort_).defaultValue(true).hidden().description(
1677 "Sort the residues according to database, turning this off is dangerous as charge "
1678 "groups might be broken in parts"));
1680 BooleanOption("ignh").store(&bRemoveH_).defaultValue(false).description("Ignore hydrogen atoms that are in the coordinate file"));
1682 BooleanOption("missing")
1683 .store(&bAllowMissing_)
1684 .defaultValue(false)
1686 "Continue when atoms are missing and bonds cannot be made, dangerous"));
1688 BooleanOption("v").store(&bVerbose_).defaultValue(false).description("Be slightly more verbose in messages"));
1690 RealOption("posrefc").store(&posre_fc_).defaultValue(1000).description("Force constant for position restraints"));
1691 options->addOption(EnumOption<VSitesType>("vsite")
1692 .store(&vsitesType_)
1693 .enumValue(c_vsitesTypeNames)
1694 .description("Convert atoms to virtual sites"));
1695 options->addOption(BooleanOption("heavyh").store(&bHeavyH_).defaultValue(false).description(
1696 "Make hydrogen atoms heavy"));
1698 BooleanOption("deuterate").store(&bDeuterate_).defaultValue(false).description("Change the mass of hydrogens to 2 amu"));
1699 options->addOption(BooleanOption("chargegrp")
1700 .store(&bChargeGroups_)
1702 .description("Use charge groups in the [REF].rtp[ref] file"));
1703 options->addOption(BooleanOption("cmap").store(&bCmap_).defaultValue(true).description(
1704 "Use cmap torsions (if enabled in the [REF].rtp[ref] file)"));
1705 options->addOption(BooleanOption("renum")
1707 .defaultValue(false)
1708 .description("Renumber the residues consecutively in the output"));
1709 options->addOption(BooleanOption("rtpres")
1710 .store(&bRTPresname_)
1711 .defaultValue(false)
1712 .description("Use [REF].rtp[ref] entry names as residue names"));
1713 options->addOption(FileNameOption("f")
1716 .store(&inputConfFile_)
1718 .defaultBasename("protein")
1720 .description("Structure file"));
1721 options->addOption(FileNameOption("o")
1724 .store(&outputConfFile_)
1726 .defaultBasename("conf")
1727 .description("Structure file"));
1728 options->addOption(FileNameOption("p")
1731 .store(&topologyFile_)
1733 .defaultBasename("topol")
1734 .description("Topology file"));
1735 options->addOption(FileNameOption("i")
1738 .store(&includeTopologyFile_)
1740 .defaultBasename("posre")
1741 .description("Include file for topology"));
1742 options->addOption(FileNameOption("n")
1745 .store(&indexOutputFile_)
1746 .storeIsSet(&bIndexSet_)
1747 .defaultBasename("index")
1748 .description("Index file"));
1749 options->addOption(FileNameOption("q")
1753 .storeIsSet(&bOutputSet_)
1754 .defaultBasename("clean")
1756 .description("Structure file"));
1759 void pdb2gmx::optionsFinished()
1761 if (inputConfFile_.empty())
1763 GMX_THROW(InconsistentInputError("You must supply an input file"));
1767 /* if anything changes here, also change description of -inter */
1782 else if (bDeuterate_)
1791 /* Force field selection, interactive or direct */
1792 choose_ff(strcmp(ff_.c_str(), "select") == 0 ? nullptr : ff_.c_str(), forcefield_,
1793 sizeof(forcefield_), ffdir_, sizeof(ffdir_), loggerOwner_->logger());
1795 if (strlen(forcefield_) > 0)
1797 ffname_ = forcefield_;
1798 ffname_[0] = std::toupper(ffname_[0]);
1802 gmx_fatal(FARGS, "Empty forcefield string");
1808 char select[STRLEN];
1809 std::vector<DisulfideBond> ssbonds;
1813 const char* prev_atomname;
1814 const char* this_atomname;
1815 const char* prev_resname;
1816 const char* this_resname;
1821 int prev_chainnumber;
1822 int this_chainnumber;
1823 int this_chainstart;
1824 int prev_chainstart;
1826 const gmx::MDLogger& logger = loggerOwner_->logger();
1828 GMX_LOG(logger.info)
1830 .appendTextFormatted("Using the %s force field in directory %s", ffname_, ffdir_);
1832 choose_watermodel(c_waterTypeNames[waterType_], ffdir_, &watermodel_, logger);
1834 switch (vsitesType_)
1836 case VSitesType::None:
1838 bVsiteAromatics_ = false;
1840 case VSitesType::Hydrogens:
1842 bVsiteAromatics_ = false;
1844 case VSitesType::Aromatics:
1846 bVsiteAromatics_ = true;
1848 case VSitesType::Count:
1849 gmx_fatal(FARGS, "Internal inconsistency: VSitesType is out of range.");
1852 /* Open the symbol table */
1854 open_symtab(&symtab);
1856 /* Residue type database */
1859 /* Read residue renaming database(s), if present */
1860 std::vector<std::string> rrn = fflib_search_file_end(ffdir_, ".r2b", FALSE);
1862 std::vector<RtpRename> rtprename;
1863 for (const auto& filename : rrn)
1865 GMX_LOG(logger.info).asParagraph().appendTextFormatted("going to rename %s", filename.c_str());
1866 FILE* fp = fflib_open(filename);
1867 read_rtprename(filename.c_str(), fp, &rtprename);
1871 /* Add all alternative names from the residue renaming database to the list
1872 of recognized amino/nucleic acids. */
1873 for (const auto& rename : rtprename)
1875 /* Only add names if the 'standard' gromacs/iupac base name was found */
1876 if (auto restype = rt.optionalTypeOfNamedDatabaseResidue(rename.gmx))
1878 rt.addResidue(rename.main, *restype);
1879 rt.addResidue(rename.nter, *restype);
1880 rt.addResidue(rename.cter, *restype);
1881 rt.addResidue(rename.bter, *restype);
1888 if (watermodel_ != nullptr && (strstr(watermodel_, "4p") || strstr(watermodel_, "4P")))
1892 else if (watermodel_ != nullptr && (strstr(watermodel_, "5p") || strstr(watermodel_, "5P")))
1902 char* title = nullptr;
1906 int natom = read_pdball(inputConfFile_.c_str(), bOutputSet_, outFile_.c_str(), &title, &pdba_all,
1907 &pdbx, &pbcType, box, bRemoveH_, &symtab, &rt, watres, &aps, bVerbose_);
1911 std::string message = formatString("No atoms found in pdb file %s\n", inputConfFile_.c_str());
1912 GMX_THROW(InconsistentInputError(message));
1915 GMX_LOG(logger.info).asParagraph().appendTextFormatted("Analyzing pdb file");
1916 int nwaterchain = 0;
1918 modify_chain_numbers(&pdba_all, chainSeparation_, logger);
1920 int nchainmerges = 0;
1922 this_atomname = nullptr;
1924 this_resname = nullptr;
1927 this_chainnumber = -1;
1928 this_chainstart = 0;
1929 /* Keep the compiler happy */
1930 prev_chainstart = 0;
1933 std::vector<t_pdbchain> pdb_ch;
1936 bool bMerged = false;
1937 for (int i = 0; (i < natom); i++)
1939 ri = &pdba_all.resinfo[pdba_all.atom[i].resind];
1941 /* TODO this should live in a helper object, and consolidate
1942 that with code in modify_chain_numbers */
1943 prev_atomname = this_atomname;
1944 prev_atomnum = this_atomnum;
1945 prev_resname = this_resname;
1946 prev_resnum = this_resnum;
1947 prev_chainid = this_chainid;
1948 prev_chainnumber = this_chainnumber;
1951 prev_chainstart = this_chainstart;
1954 this_atomname = *pdba_all.atomname[i];
1955 this_atomnum = (pdba_all.pdbinfo != nullptr) ? pdba_all.pdbinfo[i].atomnr : i + 1;
1956 this_resname = *ri->name;
1957 this_resnum = ri->nr;
1958 this_chainid = ri->chainid;
1959 this_chainnumber = ri->chainnum;
1961 bWat_ = gmx::equalCaseInsensitive(*ri->name, watres);
1963 if ((i == 0) || (this_chainnumber != prev_chainnumber) || (bWat_ != bPrevWat_))
1967 "Must have pdbinfo from reading a PDB file if chain number is changing");
1968 this_chainstart = pdba_all.atom[i].resind;
1970 if (i > 0 && !bWat_)
1972 if (!strncmp(c_mergeTypeNames[mergeType_], "int", 3))
1974 GMX_LOG(logger.info)
1976 .appendTextFormatted(
1977 "Merge chain ending with residue %s%d (chain id '%c', atom %d "
1978 "%s) and chain starting with "
1979 "residue %s%d (chain id '%c', atom %d %s) into a single "
1980 "moleculetype (keeping termini)? [n/y]",
1981 prev_resname, prev_resnum, prev_chainid, prev_atomnum,
1982 prev_atomname, this_resname, this_resnum, this_chainid,
1983 this_atomnum, this_atomname);
1985 if (nullptr == fgets(select, STRLEN - 1, stdin))
1987 gmx_fatal(FARGS, "Error reading from stdin");
1989 bMerged = (select[0] == 'y');
1991 else if (!strncmp(c_mergeTypeNames[mergeType_], "all", 3))
1999 pdb_ch[numChains - 1].chainstart[pdb_ch[numChains - 1].nterpairs] =
2000 pdba_all.atom[i].resind - prev_chainstart;
2001 pdb_ch[numChains - 1].nterpairs++;
2002 pdb_ch[numChains - 1].chainstart.resize(pdb_ch[numChains - 1].nterpairs + 1);
2007 /* set natom for previous chain */
2010 pdb_ch[numChains - 1].natom = i - pdb_ch[numChains - 1].start;
2017 /* check if chain identifier was used before */
2018 for (int j = 0; (j < numChains); j++)
2020 if (pdb_ch[j].chainid != ' ' && pdb_ch[j].chainid == ri->chainid)
2022 GMX_LOG(logger.warning)
2024 .appendTextFormatted(
2025 "Chain identifier '%c' is used in two non-sequential "
2027 "They will be treated as separate chains unless you "
2028 "reorder your file.",
2032 t_pdbchain newChain;
2033 newChain.chainid = ri->chainid;
2034 newChain.chainnum = ri->chainnum;
2036 newChain.bAllWat = bWat_;
2039 newChain.nterpairs = 0;
2043 newChain.nterpairs = 1;
2045 newChain.chainstart.resize(newChain.nterpairs + 1);
2046 /* modified [numChains] to [0] below */
2047 newChain.chainstart[0] = 0;
2048 pdb_ch.push_back(newChain);
2054 pdb_ch.back().natom = natom - pdb_ch.back().start;
2056 /* set all the water blocks at the end of the chain */
2057 std::vector<int> swap_index(numChains);
2059 for (int i = 0; i < numChains; i++)
2061 if (!pdb_ch[i].bAllWat)
2067 for (int i = 0; i < numChains; i++)
2069 if (pdb_ch[i].bAllWat)
2075 if (nwaterchain > 1)
2077 GMX_LOG(logger.info)
2079 .appendTextFormatted("Moved all the water blocks to the end");
2083 std::vector<t_chain> chains(numChains);
2084 /* copy pdb data and x for all chains */
2085 for (int i = 0; (i < numChains); i++)
2087 int si = swap_index[i];
2088 chains[i].chainid = pdb_ch[si].chainid;
2089 chains[i].chainnum = pdb_ch[si].chainnum;
2090 chains[i].bAllWat = pdb_ch[si].bAllWat;
2091 chains[i].nterpairs = pdb_ch[si].nterpairs;
2092 chains[i].chainstart = pdb_ch[si].chainstart;
2093 chains[i].ntdb.clear();
2094 chains[i].ctdb.clear();
2095 chains[i].r_start.resize(pdb_ch[si].nterpairs);
2096 chains[i].r_end.resize(pdb_ch[si].nterpairs);
2098 snew(chains[i].pdba, 1);
2099 init_t_atoms(chains[i].pdba, pdb_ch[si].natom, true);
2100 for (j = 0; j < chains[i].pdba->nr; j++)
2102 chains[i].pdba->atom[j] = pdba_all.atom[pdb_ch[si].start + j];
2103 chains[i].pdba->atomname[j] = put_symtab(&symtab, *pdba_all.atomname[pdb_ch[si].start + j]);
2104 chains[i].pdba->pdbinfo[j] = pdba_all.pdbinfo[pdb_ch[si].start + j];
2105 chains[i].x.emplace_back(pdbx[pdb_ch[si].start + j]);
2107 /* Re-index the residues assuming that the indices are continuous */
2108 int k = chains[i].pdba->atom[0].resind;
2109 int nres = chains[i].pdba->atom[chains[i].pdba->nr - 1].resind - k + 1;
2110 chains[i].pdba->nres = nres;
2111 for (int j = 0; j < chains[i].pdba->nr; j++)
2113 chains[i].pdba->atom[j].resind -= k;
2115 srenew(chains[i].pdba->resinfo, nres);
2116 for (int j = 0; j < nres; j++)
2118 chains[i].pdba->resinfo[j] = pdba_all.resinfo[k + j];
2119 chains[i].pdba->resinfo[j].name = put_symtab(&symtab, *pdba_all.resinfo[k + j].name);
2120 /* make all chain identifiers equal to that of the chain */
2121 chains[i].pdba->resinfo[j].chainid = pdb_ch[si].chainid;
2125 if (nchainmerges > 0)
2127 GMX_LOG(logger.info)
2129 .appendTextFormatted("Merged chains into joint molecule definitions at %d places.",
2133 GMX_LOG(logger.info)
2135 .appendTextFormatted(
2136 "There are %d chains and %d blocks of water and "
2137 "%d residues with %d atoms",
2138 numChains - nwaterchain, nwaterchain, pdba_all.nres, natom);
2140 GMX_LOG(logger.info)
2142 .appendTextFormatted(" %5s %4s %6s", "chain", "#res", "#atoms");
2143 for (int i = 0; (i < numChains); i++)
2145 GMX_LOG(logger.info)
2147 .appendTextFormatted(" %d '%c' %5d %6d %s\n", i + 1,
2148 chains[i].chainid ? chains[i].chainid : '-', chains[i].pdba->nres,
2149 chains[i].pdba->nr, chains[i].bAllWat ? "(only water)" : "");
2152 check_occupancy(&pdba_all, inputConfFile_.c_str(), bVerbose_, logger);
2154 /* Read atomtypes... */
2155 PreprocessingAtomTypes atype = read_atype(ffdir_, &symtab);
2157 /* read residue database */
2158 GMX_LOG(logger.info).asParagraph().appendTextFormatted("Reading residue database... (%s)", forcefield_);
2159 std::vector<std::string> rtpf = fflib_search_file_end(ffdir_, ".rtp", true);
2160 std::vector<PreprocessResidue> rtpFFDB;
2161 for (const auto& filename : rtpf)
2163 readResidueDatabase(filename, &rtpFFDB, &atype, &symtab, logger, false);
2167 /* Not correct with multiple rtp input files with different bonded types */
2168 FILE* fp = gmx_fio_fopen("new.rtp", "w");
2169 print_resall(fp, rtpFFDB, atype);
2173 /* read hydrogen database */
2174 std::vector<MoleculePatchDatabase> ah;
2175 read_h_db(ffdir_, &ah);
2177 /* Read Termini database... */
2178 std::vector<MoleculePatchDatabase> ntdb;
2179 std::vector<MoleculePatchDatabase> ctdb;
2180 std::vector<MoleculePatchDatabase*> tdblist;
2181 int nNtdb = read_ter_db(ffdir_, 'n', &ntdb, &atype);
2182 int nCtdb = read_ter_db(ffdir_, 'c', &ctdb, &atype);
2184 FILE* top_file = gmx_fio_fopen(topologyFile_.c_str(), "w");
2186 print_top_header(top_file, topologyFile_.c_str(), FALSE, ffdir_, mHmult_);
2189 std::vector<gmx::RVec> x;
2190 /* new pdb datastructure for sorting. */
2191 t_atoms** sortAtoms = nullptr;
2192 t_atoms** localAtoms = nullptr;
2193 snew(sortAtoms, numChains);
2194 snew(localAtoms, numChains);
2195 for (int chain = 0; (chain < numChains); chain++)
2197 cc = &(chains[chain]);
2199 /* set pdba, natom and nres to the current chain */
2202 natom = cc->pdba->nr;
2203 int nres = cc->pdba->nres;
2205 if (cc->chainid && (cc->chainid != ' '))
2207 GMX_LOG(logger.info)
2209 .appendTextFormatted("Processing chain %d '%c' (%d atoms, %d residues)",
2210 chain + 1, cc->chainid, natom, nres);
2214 GMX_LOG(logger.info)
2216 .appendTextFormatted("Processing chain %d (%d atoms, %d residues)", chain + 1,
2220 process_chain(pdba, x, bUnA_, bUnA_, bUnA_, bLysMan_, bAspMan_, bGluMan_, bHisMan_,
2221 bArgMan_, bGlnMan_, angle_, distance_, &symtab, rtprename);
2223 cc->chainstart[cc->nterpairs] = pdba->nres;
2225 for (int i = 0; i < cc->nterpairs; i++)
2227 find_nc_ter(pdba, cc->chainstart[i], cc->chainstart[i + 1], &(cc->r_start[j]),
2228 &(cc->r_end[j]), &rt, logger);
2229 if (cc->r_start[j] >= 0 && cc->r_end[j] >= 0)
2231 if (checkChainCyclicity(pdba, pdbx, cc->r_start[j], cc->r_end[j], rtpFFDB,
2232 rtprename, long_bond_dist_, short_bond_dist_))
2234 cc->cyclicBondsIndex.push_back(cc->r_start[j]);
2235 cc->cyclicBondsIndex.push_back(cc->r_end[j]);
2236 cc->r_start[j] = -1;
2246 if (cc->nterpairs == 0 && cc->cyclicBondsIndex.empty())
2248 GMX_LOG(logger.info)
2250 .appendTextFormatted(
2251 "Problem with chain definition, or missing terminal residues. "
2252 "This chain does not appear to contain a recognized chain molecule. "
2253 "If this is incorrect, you can edit residuetypes.dat to modify the "
2257 /* Check for disulfides and other special bonds */
2258 ssbonds = makeDisulfideBonds(pdba, gmx::as_rvec_array(x.data()), bCysMan_, bVerbose_);
2260 if (!rtprename.empty())
2262 rename_resrtp(pdba, cc->nterpairs, cc->r_start, cc->r_end, rtprename, &symtab, bVerbose_, logger);
2265 for (int i = 0; i < cc->nterpairs; i++)
2268 * We first apply a filter so we only have the
2269 * termini that can be applied to the residue in question
2270 * (or a generic terminus if no-residue specific is available).
2272 /* First the N terminus */
2275 tdblist = filter_ter(ntdb, *pdba->resinfo[cc->r_start[i]].name);
2276 if (tdblist.empty())
2278 GMX_LOG(logger.info)
2280 .appendTextFormatted(
2281 "No suitable end (N or 5') terminus found in database - "
2282 "assuming this residue "
2283 "is already in a terminus-specific form and skipping terminus "
2285 cc->ntdb.push_back(nullptr);
2289 if (bTerMan_ && !tdblist.empty())
2291 sprintf(select, "Select start terminus type for %s-%d",
2292 *pdba->resinfo[cc->r_start[i]].name, pdba->resinfo[cc->r_start[i]].nr);
2293 cc->ntdb.push_back(choose_ter(tdblist, select));
2297 cc->ntdb.push_back(tdblist[0]);
2300 printf("Start terminus %s-%d: %s\n", *pdba->resinfo[cc->r_start[i]].name,
2301 pdba->resinfo[cc->r_start[i]].nr, (cc->ntdb[i])->name.c_str());
2307 cc->ntdb.push_back(nullptr);
2310 /* And the C terminus */
2313 tdblist = filter_ter(ctdb, *pdba->resinfo[cc->r_end[i]].name);
2314 if (tdblist.empty())
2316 GMX_LOG(logger.info)
2318 .appendTextFormatted(
2319 "No suitable end (C or 3') terminus found in database - "
2320 "assuming this residue"
2321 "is already in a terminus-specific form and skipping terminus "
2323 cc->ctdb.push_back(nullptr);
2327 if (bTerMan_ && !tdblist.empty())
2329 sprintf(select, "Select end terminus type for %s-%d",
2330 *pdba->resinfo[cc->r_end[i]].name, pdba->resinfo[cc->r_end[i]].nr);
2331 cc->ctdb.push_back(choose_ter(tdblist, select));
2335 cc->ctdb.push_back(tdblist[0]);
2337 printf("End terminus %s-%d: %s\n", *pdba->resinfo[cc->r_end[i]].name,
2338 pdba->resinfo[cc->r_end[i]].nr, (cc->ctdb[i])->name.c_str());
2344 cc->ctdb.push_back(nullptr);
2347 std::vector<MoleculePatchDatabase> hb_chain;
2348 /* lookup hackblocks and rtp for all residues */
2349 std::vector<PreprocessResidue> restp_chain;
2350 get_hackblocks_rtp(&hb_chain, &restp_chain, rtpFFDB, pdba->nres, pdba->resinfo, cc->nterpairs,
2351 &symtab, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end, bAllowMissing_, logger);
2352 /* ideally, now we would not need the rtp itself anymore, but do
2353 everything using the hb and restp arrays. Unfortunately, that
2354 requires some re-thinking of code in gen_vsite.c, which I won't
2355 do now :( AF 26-7-99 */
2357 rename_atoms(nullptr, ffdir_, pdba, &symtab, restp_chain, false, &rt, false, bVerbose_);
2359 match_atomnames_with_rtp(restp_chain, hb_chain, pdba, &symtab, x, bVerbose_, logger);
2364 t_blocka* block = new_blocka();
2366 sort_pdbatoms(restp_chain, natom, &pdba, &sortAtoms[chain], &x, block, &gnames);
2367 remove_duplicate_atoms(pdba, x, bVerbose_, logger);
2372 GMX_LOG(logger.warning)
2374 .appendTextFormatted(
2375 "With the -remh option the generated "
2376 "index file (%s) might be useless "
2377 "(the index file is generated before hydrogens are added)",
2378 indexOutputFile_.c_str());
2380 write_index(indexOutputFile_.c_str(), block, gnames, false, 0);
2382 for (int i = 0; i < block->nr; i++)
2392 GMX_LOG(logger.warning)
2394 .appendTextFormatted(
2395 "Without sorting no check for duplicate atoms can be done");
2398 /* Generate Hydrogen atoms (and termini) in the sequence */
2399 GMX_LOG(logger.info)
2401 .appendTextFormatted(
2402 "Generating any missing hydrogen atoms and/or adding termini.");
2403 add_h(&pdba, &localAtoms[chain], &x, ah, &symtab, cc->nterpairs, cc->ntdb, cc->ctdb,
2404 cc->r_start, cc->r_end, bAllowMissing_, cc->cyclicBondsIndex);
2405 GMX_LOG(logger.info)
2407 .appendTextFormatted("Now there are %d residues with %d atoms", pdba->nres, pdba->nr);
2409 /* make up molecule name(s) */
2411 int k = (cc->nterpairs > 0 && cc->r_start[0] >= 0) ? cc->r_start[0] : 0;
2413 std::string restype = rt.typeOfNamedDatabaseResidue(*pdba->resinfo[k].name);
2415 std::string molname;
2423 this_chainid = cc->chainid;
2425 /* Add the chain id if we have one */
2426 if (this_chainid != ' ')
2428 suffix.append(formatString("_chain_%c", this_chainid));
2431 /* Check if there have been previous chains with the same id */
2433 for (int k = 0; k < chain; k++)
2435 if (cc->chainid == chains[k].chainid)
2440 /* Add the number for this chain identifier if there are multiple copies */
2443 suffix.append(formatString("%d", nid_used + 1));
2446 if (suffix.length() > 0)
2448 molname.append(restype);
2449 molname.append(suffix);
2456 std::string itp_fn = topologyFile_;
2458 std::string posre_fn = includeTopologyFile_;
2459 if ((numChains - nwaterchain > 1) && !cc->bAllWat)
2462 printf("Chain time...\n");
2463 // construct the itp file name
2464 itp_fn = stripSuffixIfPresent(itp_fn, ".top");
2466 itp_fn.append(molname);
2467 itp_fn.append(".itp");
2468 // now do the same for posre
2469 posre_fn = stripSuffixIfPresent(posre_fn, ".itp");
2470 posre_fn.append("_");
2471 posre_fn.append(molname);
2472 posre_fn.append(".itp");
2473 if (posre_fn == itp_fn)
2475 posre_fn = Path::concatenateBeforeExtension(posre_fn, "_pr");
2477 incls_.emplace_back();
2478 incls_.back() = itp_fn;
2479 itp_file_ = gmx_fio_fopen(itp_fn.c_str(), "w");
2486 mols_.emplace_back();
2489 mols_.back().name = "SOL";
2490 mols_.back().nr = pdba->nres;
2494 mols_.back().name = molname;
2495 mols_.back().nr = 1;
2500 print_top_comment(itp_file_, itp_fn.c_str(), ffdir_, true);
2506 top_file2 = nullptr;
2510 top_file2 = itp_file_;
2514 top_file2 = top_file;
2517 pdb2top(top_file2, posre_fn.c_str(), molname.c_str(), pdba, &x, &atype, &symtab, rtpFFDB,
2518 restp_chain, hb_chain, bAllowMissing_, bVsites_, bVsiteAromatics_, ffdir_, mHmult_,
2519 ssbonds, long_bond_dist_, short_bond_dist_, bDeuterate_, bChargeGroups_, bCmap_,
2520 bRenumRes_, bRTPresname_, cc->cyclicBondsIndex, logger);
2524 write_posres(posre_fn.c_str(), pdba, posre_fc_);
2529 gmx_fio_fclose(itp_file_);
2532 /* pdba and natom have been reassigned somewhere so: */
2537 if (watermodel_ == nullptr)
2539 for (int chain = 0; chain < numChains; chain++)
2541 if (chains[chain].bAllWat)
2543 auto message = formatString(
2544 "You have chosen not to include a water model, "
2545 "but there is water in the input file. Select a "
2546 "water model or remove the water from your input file.");
2547 GMX_THROW(InconsistentInputError(message));
2553 std::string waterFile = formatString("%s%c%s.itp", ffdir_, DIR_SEPARATOR, watermodel_);
2554 if (!fflib_fexist(waterFile))
2556 auto message = formatString(
2557 "The topology file '%s' for the selected water "
2558 "model '%s' can not be found in the force field "
2559 "directory. Select a different water model.",
2560 waterFile.c_str(), watermodel_);
2561 GMX_THROW(InconsistentInputError(message));
2565 print_top_mols(top_file, title, ffdir_, watermodel_, incls_, mols_);
2566 gmx_fio_fclose(top_file);
2568 /* now merge all chains back together */
2571 for (int i = 0; (i < numChains); i++)
2573 natom += chains[i].pdba->nr;
2574 nres += chains[i].pdba->nres;
2578 init_t_atoms(atoms, natom, false);
2579 for (int i = 0; i < atoms->nres; i++)
2581 sfree(atoms->resinfo[i].name);
2584 srenew(atoms->resinfo, nres);
2588 for (int i = 0; (i < numChains); i++)
2592 GMX_LOG(logger.info)
2594 .appendTextFormatted("Including chain %d in system: %d atoms %d residues",
2595 i + 1, chains[i].pdba->nr, chains[i].pdba->nres);
2597 for (int j = 0; (j < chains[i].pdba->nr); j++)
2599 atoms->atom[k] = chains[i].pdba->atom[j];
2600 atoms->atom[k].resind += l; /* l is processed nr of residues */
2601 atoms->atomname[k] = chains[i].pdba->atomname[j];
2602 atoms->resinfo[atoms->atom[k].resind].chainid = chains[i].chainid;
2603 x.push_back(chains[i].x[j]);
2606 for (int j = 0; (j < chains[i].pdba->nres); j++)
2608 atoms->resinfo[l] = chains[i].pdba->resinfo[j];
2611 atoms->resinfo[l].name = atoms->resinfo[l].rtp;
2615 done_atom(chains[i].pdba);
2620 GMX_LOG(logger.info)
2622 .appendTextFormatted("Now there are %d atoms and %d residues", k, l);
2623 print_sums(atoms, true, logger);
2627 GMX_LOG(logger.info).asParagraph().appendTextFormatted("Writing coordinate file...");
2628 clear_rvec(box_space);
2631 make_new_box(atoms->nr, gmx::as_rvec_array(x.data()), box, box_space, false);
2633 write_sto_conf(outputConfFile_.c_str(), title, atoms, gmx::as_rvec_array(x.data()), nullptr,
2636 done_symtab(&symtab);
2637 done_atom(&pdba_all);
2639 for (int chain = 0; chain < numChains; chain++)
2641 sfree(sortAtoms[chain]);
2642 sfree(localAtoms[chain]);
2650 GMX_LOG(logger.info)
2652 .appendTextFormatted("\t\t--------- PLEASE NOTE ------------");
2653 GMX_LOG(logger.info)
2655 .appendTextFormatted("You have successfully generated a topology from: %s.",
2656 inputConfFile_.c_str());
2657 if (watermodel_ != nullptr)
2659 GMX_LOG(logger.info)
2661 .appendTextFormatted("The %s force field and the %s water model are used.", ffname_,
2667 GMX_LOG(logger.info).asParagraph().appendTextFormatted("The %s force field is used.", ffname_);
2669 GMX_LOG(logger.info)
2671 .appendTextFormatted("\t\t--------- ETON ESAELP ------------");
2678 const char pdb2gmxInfo::name[] = "pdb2gmx";
2679 const char pdb2gmxInfo::shortDescription[] =
2680 "Convert coordinate files to topology and FF-compliant coordinate files";
2681 ICommandLineOptionsModulePointer pdb2gmxInfo::create()
2683 return std::make_unique<pdb2gmx>();