2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
48 #include "gromacs/commandline/pargs.h"
49 #include "gromacs/fileio/confio.h"
50 #include "gromacs/fileio/gmxfio.h"
51 #include "gromacs/fileio/pdbio.h"
52 #include "gromacs/gmxlib/conformation-utilities.h"
53 #include "gromacs/gmxpreprocess/fflibutil.h"
54 #include "gromacs/gmxpreprocess/genhydro.h"
55 #include "gromacs/gmxpreprocess/h_db.h"
56 #include "gromacs/gmxpreprocess/hizzie.h"
57 #include "gromacs/gmxpreprocess/pdb2top.h"
58 #include "gromacs/gmxpreprocess/pgutil.h"
59 #include "gromacs/gmxpreprocess/resall.h"
60 #include "gromacs/gmxpreprocess/specbond.h"
61 #include "gromacs/gmxpreprocess/ter_db.h"
62 #include "gromacs/gmxpreprocess/toputil.h"
63 #include "gromacs/gmxpreprocess/xlate.h"
64 #include "gromacs/math/vec.h"
65 #include "gromacs/topology/atomprop.h"
66 #include "gromacs/topology/block.h"
67 #include "gromacs/topology/index.h"
68 #include "gromacs/topology/residuetypes.h"
69 #include "gromacs/topology/symtab.h"
70 #include "gromacs/topology/topology.h"
71 #include "gromacs/utility/arraysize.h"
72 #include "gromacs/utility/cstringutil.h"
73 #include "gromacs/utility/dir_separator.h"
74 #include "gromacs/utility/fatalerror.h"
75 #include "gromacs/utility/gmxassert.h"
76 #include "gromacs/utility/smalloc.h"
77 #include "gromacs/utility/strdb.h"
78 #include "gromacs/utility/stringutil.h"
82 char gmx[RTP_MAXCHAR+2];
83 char main[RTP_MAXCHAR+2];
84 char nter[RTP_MAXCHAR+2];
85 char cter[RTP_MAXCHAR+2];
86 char bter[RTP_MAXCHAR+2];
90 static const char *res2bb_notermini(const char *name,
91 int nrr, const rtprename_t *rr)
93 /* NOTE: This function returns the main building block name,
94 * it does not take terminal renaming into account.
99 while (i < nrr && gmx_strcasecmp(name, rr[i].gmx) != 0)
104 return (i < nrr ? rr[i].main : name);
107 static const char *select_res(int nr, int resnr,
108 const char *name[], const char *expl[],
110 int nrr, const rtprename_t *rr)
114 printf("Which %s type do you want for residue %d\n", title, resnr+1);
115 for (sel = 0; (sel < nr); sel++)
117 printf("%d. %s (%s)\n",
118 sel, expl[sel], res2bb_notermini(name[sel], nrr, rr));
120 printf("\nType a number:"); fflush(stdout);
122 if (scanf("%d", &sel) != 1)
124 gmx_fatal(FARGS, "Answer me for res %s %d!", title, resnr+1);
130 static const char *get_asptp(int resnr, int nrr, const rtprename_t *rr)
135 const char *lh[easpNR] = { "ASP", "ASPH" };
136 const char *expl[easpNR] = {
137 "Not protonated (charge -1)",
138 "Protonated (charge 0)"
141 return select_res(easpNR, resnr, lh, expl, "ASPARTIC ACID", nrr, rr);
144 static const char *get_glutp(int resnr, int nrr, const rtprename_t *rr)
149 const char *lh[egluNR] = { "GLU", "GLUH" };
150 const char *expl[egluNR] = {
151 "Not protonated (charge -1)",
152 "Protonated (charge 0)"
155 return select_res(egluNR, resnr, lh, expl, "GLUTAMIC ACID", nrr, rr);
158 static const char *get_glntp(int resnr, int nrr, const rtprename_t *rr)
163 const char *lh[eglnNR] = { "GLN", "QLN" };
164 const char *expl[eglnNR] = {
165 "Not protonated (charge 0)",
166 "Protonated (charge +1)"
169 return select_res(eglnNR, resnr, lh, expl, "GLUTAMINE", nrr, rr);
172 static const char *get_lystp(int resnr, int nrr, const rtprename_t *rr)
177 const char *lh[elysNR] = { "LYSN", "LYS" };
178 const char *expl[elysNR] = {
179 "Not protonated (charge 0)",
180 "Protonated (charge +1)"
183 return select_res(elysNR, resnr, lh, expl, "LYSINE", nrr, rr);
186 static const char *get_argtp(int resnr, int nrr, const rtprename_t *rr)
191 const char *lh[eargNR] = { "ARGN", "ARG" };
192 const char *expl[eargNR] = {
193 "Not protonated (charge 0)",
194 "Protonated (charge +1)"
197 return select_res(eargNR, resnr, lh, expl, "ARGININE", nrr, rr);
200 static const char *get_histp(int resnr, int nrr, const rtprename_t *rr)
202 const char *expl[ehisNR] = {
209 return select_res(ehisNR, resnr, hh, expl, "HISTIDINE", nrr, rr);
212 static void read_rtprename(const char *fname, FILE *fp,
213 int *nrtprename, rtprename_t **rtprename)
215 char line[STRLEN], buf[STRLEN];
224 while (get_a_line(fp, line, STRLEN))
227 /* line is NULL-terminated and length<STRLEN, so final arg cannot overflow.
228 * For other args, we read up to 6 chars (so we can detect if the length is > 5).
229 * Note that the buffer length has been increased to 7 to allow this,
230 * so we just need to make sure the strings have zero-length initially.
233 rr[n].main[0] = '\0';
234 rr[n].nter[0] = '\0';
235 rr[n].cter[0] = '\0';
236 rr[n].bter[0] = '\0';
237 nc = sscanf(line, "%6s %6s %6s %6s %6s %s",
238 rr[n].gmx, rr[n].main, rr[n].nter, rr[n].cter, rr[n].bter, buf);
239 if (strlen(rr[n].gmx) > RTP_MAXCHAR || strlen(rr[n].main) > RTP_MAXCHAR ||
240 strlen(rr[n].nter) > RTP_MAXCHAR || strlen(rr[n].cter) > RTP_MAXCHAR || strlen(rr[n].bter) > RTP_MAXCHAR)
242 gmx_fatal(FARGS, "Residue renaming database '%s' has strings longer than %d chars in first 5 columns:\n%s",
243 fname, RTP_MAXCHAR, line);
248 if (nc != 2 && nc != 5)
250 gmx_fatal(FARGS, "Residue renaming database '%s' has %d columns instead of %d, %d or %d", fname, ncol, 2, 5);
256 gmx_fatal(FARGS, "A line in residue renaming database '%s' has %d columns, while previous lines have %d columns", fname, nc, ncol);
261 /* This file does not have special termini names, copy them from main */
262 strcpy(rr[n].nter, rr[n].main);
263 strcpy(rr[n].cter, rr[n].main);
264 strcpy(rr[n].bter, rr[n].main);
274 static char *search_resrename(int nrr, rtprename_t *rr,
276 bool bStart, bool bEnd,
277 bool bCompareFFRTPname)
285 while (i < nrr && ((!bCompareFFRTPname && strcmp(name, rr[i].gmx) != 0) ||
286 ( bCompareFFRTPname && strcmp(name, rr[i].main) != 0)))
291 /* If found in the database, rename this residue's rtp building block,
292 * otherwise keep the old name.
315 gmx_fatal(FARGS, "In the chosen force field there is no residue type for '%s'%s", name, bStart ? ( bEnd ? " as a standalone (starting & ending) residue" : " as a starting terminus") : (bEnd ? " as an ending terminus" : ""));
323 static void rename_resrtp(t_atoms *pdba, int nterpairs, const int *r_start, const int *r_end,
324 int nrr, rtprename_t *rr, t_symtab *symtab,
332 bFFRTPTERRNM = (getenv("GMX_NO_FFRTP_TER_RENAME") == nullptr);
334 for (r = 0; r < pdba->nres; r++)
338 for (j = 0; j < nterpairs; j++)
345 for (j = 0; j < nterpairs; j++)
353 nn = search_resrename(nrr, rr, *pdba->resinfo[r].rtp, bStart, bEnd, FALSE);
355 if (bFFRTPTERRNM && nn == nullptr && (bStart || bEnd))
357 /* This is a terminal residue, but the residue name,
358 * currently stored in .rtp, is not a standard residue name,
359 * but probably a force field specific rtp name.
360 * Check if we need to rename it because it is terminal.
362 nn = search_resrename(nrr, rr,
363 *pdba->resinfo[r].rtp, bStart, bEnd, TRUE);
366 if (nn != nullptr && strcmp(*pdba->resinfo[r].rtp, nn) != 0)
370 printf("Changing rtp entry of residue %d %s to '%s'\n",
371 pdba->resinfo[r].nr, *pdba->resinfo[r].name, nn);
373 pdba->resinfo[r].rtp = put_symtab(symtab, nn);
378 static void pdbres_to_gmxrtp(t_atoms *pdba)
382 for (i = 0; (i < pdba->nres); i++)
384 if (pdba->resinfo[i].rtp == nullptr)
386 pdba->resinfo[i].rtp = pdba->resinfo[i].name;
391 static void rename_pdbres(t_atoms *pdba, const char *oldnm, const char *newnm,
392 bool bFullCompare, t_symtab *symtab)
397 for (i = 0; (i < pdba->nres); i++)
399 resnm = *pdba->resinfo[i].name;
400 if ((bFullCompare && (gmx_strcasecmp(resnm, oldnm) == 0)) ||
401 (!bFullCompare && strstr(resnm, oldnm) != nullptr))
403 /* Rename the residue name (not the rtp name) */
404 pdba->resinfo[i].name = put_symtab(symtab, newnm);
409 static void rename_bb(t_atoms *pdba, const char *oldnm, const char *newnm,
410 bool bFullCompare, t_symtab *symtab)
415 for (i = 0; (i < pdba->nres); i++)
417 /* We have not set the rtp name yes, use the residue name */
418 bbnm = *pdba->resinfo[i].name;
419 if ((bFullCompare && (gmx_strcasecmp(bbnm, oldnm) == 0)) ||
420 (!bFullCompare && strstr(bbnm, oldnm) != nullptr))
422 /* Change the rtp builing block name */
423 pdba->resinfo[i].rtp = put_symtab(symtab, newnm);
428 static void rename_bbint(t_atoms *pdba, const char *oldnm,
429 const char *gettp(int, int, const rtprename_t *),
432 int nrr, const rtprename_t *rr)
438 for (i = 0; i < pdba->nres; i++)
440 /* We have not set the rtp name yes, use the residue name */
441 bbnm = *pdba->resinfo[i].name;
442 if ((bFullCompare && (strcmp(bbnm, oldnm) == 0)) ||
443 (!bFullCompare && strstr(bbnm, oldnm) != nullptr))
445 ptr = gettp(i, nrr, rr);
446 pdba->resinfo[i].rtp = put_symtab(symtab, ptr);
451 static void check_occupancy(t_atoms *atoms, const char *filename, gmx_bool bVerbose)
457 ftp = fn2ftp(filename);
458 if (!atoms->pdbinfo || ((ftp != efPDB) && (ftp != efBRK) && (ftp != efENT)))
460 fprintf(stderr, "No occupancies in %s\n", filename);
464 for (i = 0; (i < atoms->nr); i++)
466 if (atoms->pdbinfo[i].occup != 1)
470 fprintf(stderr, "Occupancy for atom %s%d-%s is %f rather than 1\n",
471 *atoms->resinfo[atoms->atom[i].resind].name,
472 atoms->resinfo[ atoms->atom[i].resind].nr,
474 atoms->pdbinfo[i].occup);
476 if (atoms->pdbinfo[i].occup == 0)
486 if (nzero == atoms->nr)
488 fprintf(stderr, "All occupancy fields zero. This is probably not an X-Ray structure\n");
490 else if ((nzero > 0) || (nnotone > 0))
494 "WARNING: there were %d atoms with zero occupancy and %d atoms with\n"
495 " occupancy unequal to one (out of %d atoms). Check your pdb file.\n"
497 nzero, nnotone, atoms->nr);
501 fprintf(stderr, "All occupancies are one\n");
506 static void write_posres(char *fn, t_atoms *pdba, real fc)
511 fp = gmx_fio_fopen(fn, "w");
513 "; In this topology include file, you will find position restraint\n"
514 "; entries for all the heavy atoms in your original pdb file.\n"
515 "; This means that all the protons which were added by pdb2gmx are\n"
516 "; not restrained.\n"
518 "[ position_restraints ]\n"
519 "; %4s%6s%8s%8s%8s\n", "atom", "type", "fx", "fy", "fz"
521 for (i = 0; (i < pdba->nr); i++)
523 if (!is_hydrogen(*pdba->atomname[i]) && !is_dummymass(*pdba->atomname[i]))
525 fprintf(fp, "%6d%6d %g %g %g\n", i+1, 1, fc, fc, fc);
531 static int read_pdball(const char *inf, const char *outf, char *title,
532 t_atoms *atoms, rvec **x,
533 int *ePBC, matrix box, gmx_bool bRemoveH,
534 t_symtab *symtab, gmx_residuetype_t *rt, const char *watres,
535 gmx_atomprop_t aps, gmx_bool bVerbose)
536 /* Read a pdb file. (containing proteins) */
538 int natom, new_natom, i;
541 printf("Reading %s...\n", inf);
544 read_tps_conf(inf, top, ePBC, x, nullptr, box, FALSE);
545 strncpy(title, *top->name, STRLEN);
546 title[STRLEN-1] = '\0';
550 if (atoms->pdbinfo == nullptr)
552 snew(atoms->pdbinfo, atoms->nr);
554 if (fn2ftp(inf) == efPDB)
556 get_pdb_atomnumber(atoms, aps);
561 for (i = 0; i < atoms->nr; i++)
563 if (!is_hydrogen(*atoms->atomname[i]))
565 atoms->atom[new_natom] = atoms->atom[i];
566 atoms->atomname[new_natom] = atoms->atomname[i];
567 atoms->pdbinfo[new_natom] = atoms->pdbinfo[i];
568 copy_rvec((*x)[i], (*x)[new_natom]);
572 atoms->nr = new_natom;
579 printf(" '%s',", title);
581 printf(" %d atoms\n", natom);
583 /* Rename residues */
584 rename_pdbres(atoms, "HOH", watres, FALSE, symtab);
585 rename_pdbres(atoms, "SOL", watres, FALSE, symtab);
586 rename_pdbres(atoms, "WAT", watres, FALSE, symtab);
588 rename_atoms("xlateat.dat", nullptr,
589 atoms, symtab, nullptr, TRUE, rt, TRUE, bVerbose);
598 write_sto_conf(outf, title, atoms, *x, nullptr, *ePBC, box);
604 static void process_chain(t_atoms *pdba, rvec *x,
605 gmx_bool bTrpU, gmx_bool bPheU, gmx_bool bTyrU,
606 gmx_bool bLysMan, gmx_bool bAspMan, gmx_bool bGluMan,
607 gmx_bool bHisMan, gmx_bool bArgMan, gmx_bool bGlnMan,
608 real angle, real distance, t_symtab *symtab,
609 int nrr, const rtprename_t *rr)
611 /* Rename aromatics, lys, asp and histidine */
614 rename_bb(pdba, "TYR", "TYRU", FALSE, symtab);
618 rename_bb(pdba, "TRP", "TRPU", FALSE, symtab);
622 rename_bb(pdba, "PHE", "PHEU", FALSE, symtab);
626 rename_bbint(pdba, "LYS", get_lystp, FALSE, symtab, nrr, rr);
630 rename_bbint(pdba, "ARG", get_argtp, FALSE, symtab, nrr, rr);
634 rename_bbint(pdba, "GLN", get_glntp, FALSE, symtab, nrr, rr);
638 rename_bbint(pdba, "ASP", get_asptp, FALSE, symtab, nrr, rr);
642 rename_bb(pdba, "ASPH", "ASP", FALSE, symtab);
646 rename_bbint(pdba, "GLU", get_glutp, FALSE, symtab, nrr, rr);
650 rename_bb(pdba, "GLUH", "GLU", FALSE, symtab);
655 set_histp(pdba, x, angle, distance);
659 rename_bbint(pdba, "HIS", get_histp, TRUE, symtab, nrr, rr);
662 /* Initialize the rtp builing block names with the residue names
663 * for the residues that have not been processed above.
665 pdbres_to_gmxrtp(pdba);
667 /* Now we have all rtp names set.
668 * The rtp names will conform to Gromacs naming,
669 * unless the input pdb file contained one or more force field specific
670 * rtp names as residue names.
674 /* struct for sorting the atoms from the pdb file */
676 int resnr; /* residue number */
677 int j; /* database order index */
678 int index; /* original atom number */
679 char anm1; /* second letter of atom name */
680 char altloc; /* alternate location indicator */
683 static bool pdbicomp(const t_pdbindex &a, const t_pdbindex &b)
685 int d = (a.resnr - b.resnr);
691 d = (a.anm1 - b.anm1);
694 d = (a.altloc - b.altloc);
701 static void sort_pdbatoms(t_restp restp[],
702 int natoms, t_atoms **pdbaptr, rvec **x,
703 t_blocka *block, char ***gnames)
705 t_atoms *pdba, *pdbnew;
719 for (i = 0; i < natoms; i++)
721 atomnm = *pdba->atomname[i];
722 rptr = &restp[pdba->atom[i].resind];
723 for (j = 0; (j < rptr->natom); j++)
725 if (gmx_strcasecmp(atomnm, *(rptr->atomname[j])) == 0)
730 if (j == rptr->natom)
735 "Atom %s in residue %s %d was not found in rtp entry %s with %d atoms\n"
736 "while sorting atoms.\n%s", atomnm,
737 *pdba->resinfo[pdba->atom[i].resind].name,
738 pdba->resinfo[pdba->atom[i].resind].nr,
741 is_hydrogen(atomnm) ?
742 "\nFor a hydrogen, this can be a different protonation state, or it\n"
743 "might have had a different number in the PDB file and was rebuilt\n"
744 "(it might for instance have been H3, and we only expected H1 & H2).\n"
745 "Note that hydrogens might have been added to the entry for the N-terminus.\n"
746 "Remove this hydrogen or choose a different protonation state to solve it.\n"
747 "Option -ignh will ignore all hydrogens in the input." : ".");
748 gmx_fatal(FARGS, "%s", buf);
750 /* make shadow array to be sorted into indexgroup */
751 pdbi[i].resnr = pdba->atom[i].resind;
754 pdbi[i].anm1 = atomnm[1];
755 pdbi[i].altloc = pdba->pdbinfo[i].altloc;
757 std::sort(pdbi, pdbi+natoms, pdbicomp);
759 /* pdba is sorted in pdbnew using the pdbi index */
762 init_t_atoms(pdbnew, natoms, TRUE);
764 pdbnew->nr = pdba->nr;
765 pdbnew->nres = pdba->nres;
766 sfree(pdbnew->resinfo);
767 pdbnew->resinfo = pdba->resinfo;
768 for (i = 0; i < natoms; i++)
770 pdbnew->atom[i] = pdba->atom[pdbi[i].index];
771 pdbnew->atomname[i] = pdba->atomname[pdbi[i].index];
772 pdbnew->pdbinfo[i] = pdba->pdbinfo[pdbi[i].index];
773 copy_rvec((*x)[pdbi[i].index], (*xnew)[i]);
774 /* make indexgroup in block */
775 a[i] = pdbi[i].index;
778 sfree(pdba->atomname);
780 sfree(pdba->pdbinfo);
783 /* copy the sorted pdbnew back to pdba */
786 add_grp(block, gnames, natoms, a, "prot_sort");
792 static int remove_duplicate_atoms(t_atoms *pdba, rvec x[], gmx_bool bVerbose)
794 int i, j, oldnatoms, ndel;
797 printf("Checking for duplicate atoms....\n");
798 oldnatoms = pdba->nr;
800 /* NOTE: pdba->nr is modified inside the loop */
801 for (i = 1; (i < pdba->nr); i++)
803 /* compare 'i' and 'i-1', throw away 'i' if they are identical
804 this is a 'while' because multiple alternate locations can be present */
805 while ( (i < pdba->nr) &&
806 (pdba->atom[i-1].resind == pdba->atom[i].resind) &&
807 (strcmp(*pdba->atomname[i-1], *pdba->atomname[i]) == 0) )
812 ri = &pdba->resinfo[pdba->atom[i].resind];
813 printf("deleting duplicate atom %4s %s%4d%c",
814 *pdba->atomname[i], *ri->name, ri->nr, ri->ic);
815 if (ri->chainid && (ri->chainid != ' '))
817 printf(" ch %c", ri->chainid);
821 if (pdba->pdbinfo[i].atomnr)
823 printf(" pdb nr %4d", pdba->pdbinfo[i].atomnr);
825 if (pdba->pdbinfo[i].altloc && (pdba->pdbinfo[i].altloc != ' '))
827 printf(" altloc %c", pdba->pdbinfo[i].altloc);
833 /* We can not free, since it might be in the symtab */
834 /* sfree(pdba->atomname[i]); */
835 for (j = i; j < pdba->nr; j++)
837 pdba->atom[j] = pdba->atom[j+1];
838 pdba->atomname[j] = pdba->atomname[j+1];
841 pdba->pdbinfo[j] = pdba->pdbinfo[j+1];
843 copy_rvec(x[j+1], x[j]);
845 srenew(pdba->atom, pdba->nr);
846 /* srenew(pdba->atomname, pdba->nr); */
847 srenew(pdba->pdbinfo, pdba->nr);
850 if (pdba->nr != oldnatoms)
852 printf("Now there are %d atoms. Deleted %d duplicates.\n", pdba->nr, ndel);
859 checkResidueTypeSanity(t_atoms * pdba,
862 gmx_residuetype_t * rt)
864 std::string startResidueString = gmx::formatString("%s%d", *pdba->resinfo[r0].name, pdba->resinfo[r0].nr);
865 std::string endResidueString = gmx::formatString("%s%d", *pdba->resinfo[r1-1].name, pdba->resinfo[r1-1].nr);
867 // Check whether all residues in chain have the same chain ID.
868 bool allResiduesHaveSameChainID = true;
869 char chainID0 = pdba->resinfo[r0].chainid;
871 std::string residueString;
873 for (int i = r0 + 1; i < r1; i++)
875 chainID = pdba->resinfo[i].chainid;
876 if (chainID != chainID0)
878 allResiduesHaveSameChainID = false;
879 residueString = gmx::formatString("%s%d", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
884 if (!allResiduesHaveSameChainID)
887 "The chain covering the range %s--%s does not have a consistent chain ID. "
888 "The first residue has ID '%c', while residue %s has ID '%c'.",
889 startResidueString.c_str(), endResidueString.c_str(),
890 chainID0, residueString.c_str(), chainID);
893 // At this point all residues have the same ID. If they are also non-blank
894 // we can be a bit more aggressive and require the types match too.
897 bool allResiduesHaveSameType = true;
898 const char *restype0;
900 gmx_residuetype_get_type(rt, *pdba->resinfo[r0].name, &restype0);
902 for (int i = r0 + 1; i < r1; i++)
904 gmx_residuetype_get_type(rt, *pdba->resinfo[i].name, &restype);
905 if (gmx_strcasecmp(restype, restype0))
907 allResiduesHaveSameType = false;
908 residueString = gmx::formatString("%s%d", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
913 if (!allResiduesHaveSameType)
916 "The residues in the chain %s--%s do not have a consistent type. "
917 "The first residue has type '%s', while residue %s is of type '%s'. "
918 "Either there is a mistake in your chain, or it includes nonstandard "
919 "residue names that have not yet been added to the residuetypes.dat "
920 "file in the GROMACS library directory. If there are other molecules "
921 "such as ligands, they should not have the same chain ID as the "
922 "adjacent protein chain since it's a separate molecule.",
923 startResidueString.c_str(), endResidueString.c_str(),
924 restype0, residueString.c_str(), restype);
929 static void find_nc_ter(t_atoms *pdba, int r0, int r1, int *r_start, int *r_end,
930 gmx_residuetype_t *rt)
933 const char *p_startrestype;
934 const char *p_restype;
939 int startWarnings = 0;
943 // Check that all residues have the same chain identifier, and if it is
944 // non-blank we also require the residue types to match.
945 checkResidueTypeSanity(pdba, r0, r1, rt);
947 // If we return correctly from checkResidueTypeSanity(), the only
948 // remaining cases where we can have non-matching residue types is if
949 // the chain ID was blank, which could be the case e.g. for a structure
950 // read from a GRO file or other file types without chain information.
951 // In that case we need to be a bit more liberal and detect chains based
952 // on the residue type.
954 // If we get here, the chain ID must be identical for all residues
955 char chainID = pdba->resinfo[r0].chainid;
957 /* Find the starting terminus (typially N or 5') */
958 for (i = r0; i < r1 && *r_start == -1; i++)
960 gmx_residuetype_get_type(rt, *pdba->resinfo[i].name, &p_startrestype);
961 if (!gmx_strcasecmp(p_startrestype, "Protein") || !gmx_strcasecmp(p_startrestype, "DNA") || !gmx_strcasecmp(p_startrestype, "RNA") )
963 printf("Identified residue %s%d as a starting terminus.\n", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
966 else if (!gmx_strcasecmp(p_startrestype, "Ion"))
970 printf("Residue %s%d has type 'Ion', assuming it is not linked into a chain.\n", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
974 printf("Disabling further notes about ions.\n");
980 if (startWarnings < 5)
984 printf("\nWarning: Starting residue %s%d in chain not identified as Protein/RNA/DNA.\n"
985 "This chain lacks identifiers, which makes it impossible to do strict\n"
986 "classification of the start/end residues. Here we need to guess this residue\n"
987 "should not be part of the chain and instead introduce a break, but that will\n"
988 "be catastrophic if they should in fact be linked. Please check your structure,\n"
989 "and add %s to residuetypes.dat if this was not correct.\n\n",
990 *pdba->resinfo[i].name, pdba->resinfo[i].nr, *pdba->resinfo[i].name);
994 printf("\nWarning: No residues in chain starting at %s%d identified as Protein/RNA/DNA.\n"
995 "This makes it impossible to link them into a molecule, which could either be\n"
996 "correct or a catastrophic error. Please check your structure, and add all\n"
997 "necessary residue names to residuetypes.dat if this was not correct.\n\n",
998 *pdba->resinfo[i].name, pdba->resinfo[i].nr);
1001 if (startWarnings == 4)
1003 printf("Disabling further warnings about unidentified residues at start of chain.\n");
1011 /* Go through the rest of the residues, check that they are the same class, and identify the ending terminus. */
1012 for (i = *r_start; i < r1; i++)
1014 gmx_residuetype_get_type(rt, *pdba->resinfo[i].name, &p_restype);
1015 if (!gmx_strcasecmp(p_restype, p_startrestype) && endWarnings == 0)
1019 else if (!gmx_strcasecmp(p_startrestype, "Ion"))
1023 printf("Residue %s%d has type 'Ion', assuming it is not linked into a chain.\n", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
1027 printf("Disabling further notes about ions.\n");
1033 // This can only trigger if the chain ID is blank - otherwise the
1034 // call to checkResidueTypeSanity() will have caught the problem.
1035 if (endWarnings < 5)
1037 printf("\nWarning: Residue %s%d in chain has different type ('%s') from\n"
1038 "residue %s%d ('%s'). This chain lacks identifiers, which makes\n"
1039 "it impossible to do strict classification of the start/end residues. Here we\n"
1040 "need to guess this residue should not be part of the chain and instead\n"
1041 "introduce a break, but that will be catastrophic if they should in fact be\n"
1042 "linked. Please check your structure, and add %s to residuetypes.dat\n"
1043 "if this was not correct.\n\n",
1044 *pdba->resinfo[i].name, pdba->resinfo[i].nr, p_restype,
1045 *pdba->resinfo[*r_start].name, pdba->resinfo[*r_start].nr, p_startrestype, *pdba->resinfo[i].name);
1047 if (endWarnings == 4)
1049 printf("Disabling further warnings about unidentified residues at end of chain.\n");
1058 printf("Identified residue %s%d as a ending terminus.\n", *pdba->resinfo[*r_end].name, pdba->resinfo[*r_end].nr);
1072 static SplittingType getSplittingType(const char *chainsep)
1074 SplittingType splitting = SPLIT_TER_ONLY; /* keep compiler happy */
1076 /* Be a bit flexible to catch typos */
1077 if (!strncmp(chainsep, "id_o", 4))
1079 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
1080 splitting = SPLIT_ID_OR_TER;
1081 printf("Splitting chemical chains based on TER records or chain id changing.\n");
1083 else if (!strncmp(chainsep, "int", 3))
1085 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
1086 splitting = SPLIT_INTERACTIVE;
1087 printf("Splitting chemical chains interactively.\n");
1089 else if (!strncmp(chainsep, "id_a", 4))
1091 splitting = SPLIT_ID_AND_TER;
1092 printf("Splitting chemical chains based on TER records and chain id changing.\n");
1094 else if (strlen(chainsep) == 2 && !strncmp(chainsep, "id", 4))
1096 splitting = SPLIT_ID_ONLY;
1097 printf("Splitting chemical chains based on changing chain id only (ignoring TER records).\n");
1099 else if (chainsep[0] == 't')
1101 splitting = SPLIT_TER_ONLY;
1102 printf("Splitting chemical chains based on TER records only (ignoring chain id).\n");
1106 gmx_fatal(FARGS, "Unidentified setting for chain separation: %s\n", chainsep);
1112 modify_chain_numbers(t_atoms * pdba,
1113 const char * chainsep)
1116 char old_prev_chainid;
1117 char old_this_chainid;
1118 int old_prev_chainnum;
1119 int old_this_chainnum;
1121 char select[STRLEN];
1125 const char * prev_atomname;
1126 const char * this_atomname;
1127 const char * prev_resname;
1128 const char * this_resname;
1134 SplittingType splitting = getSplittingType(chainsep);
1136 /* The default chain enumeration is id_or_ter */
1138 old_prev_chainid = '?';
1139 old_prev_chainnum = -1;
1142 this_atomname = nullptr;
1144 this_resname = nullptr;
1148 for (i = 0; i < pdba->nres; i++)
1150 ri = &pdba->resinfo[i];
1151 old_this_chainid = ri->chainid;
1152 old_this_chainnum = ri->chainnum;
1154 prev_atomname = this_atomname;
1155 prev_atomnum = this_atomnum;
1156 prev_resname = this_resname;
1157 prev_resnum = this_resnum;
1158 prev_chainid = this_chainid;
1160 this_atomname = *(pdba->atomname[i]);
1161 this_atomnum = (pdba->pdbinfo != nullptr) ? pdba->pdbinfo[i].atomnr : i+1;
1162 this_resname = *ri->name;
1163 this_resnum = ri->nr;
1164 this_chainid = ri->chainid;
1168 case SPLIT_ID_OR_TER:
1169 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1175 case SPLIT_ID_AND_TER:
1176 if (old_this_chainid != old_prev_chainid && old_this_chainnum != old_prev_chainnum)
1183 if (old_this_chainid != old_prev_chainid)
1189 case SPLIT_TER_ONLY:
1190 if (old_this_chainnum != old_prev_chainnum)
1195 case SPLIT_INTERACTIVE:
1196 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1200 printf("Split the chain (and introduce termini) between residue %s%d (chain id '%c', atom %d %s)\n"
1201 "and residue %s%d (chain id '%c', atom %d %s) ? [n/y]\n",
1202 prev_resname, prev_resnum, prev_chainid, prev_atomnum, prev_atomname,
1203 this_resname, this_resnum, this_chainid, this_atomnum, this_atomname);
1205 if (nullptr == fgets(select, STRLEN-1, stdin))
1207 gmx_fatal(FARGS, "Error reading from stdin");
1210 if (i == 0 || select[0] == 'y')
1217 gmx_fatal(FARGS, "Internal inconsistency - this shouldn't happen...");
1219 old_prev_chainid = old_this_chainid;
1220 old_prev_chainnum = old_this_chainnum;
1222 ri->chainnum = new_chainnum;
1251 int gmx_pdb2gmx(int argc, char *argv[])
1253 const char *desc[] = {
1254 "[THISMODULE] reads a [REF].pdb[ref] (or [REF].gro[ref]) file, reads",
1255 "some database files, adds hydrogens to the molecules and generates",
1256 "coordinates in GROMACS (GROMOS), or optionally [REF].pdb[ref], format",
1257 "and a topology in GROMACS format.",
1258 "These files can subsequently be processed to generate a run input file.",
1260 "[THISMODULE] will search for force fields by looking for",
1261 "a [TT]forcefield.itp[tt] file in subdirectories [TT]<forcefield>.ff[tt]",
1262 "of the current working directory and of the GROMACS library directory",
1263 "as inferred from the path of the binary or the [TT]GMXLIB[tt] environment",
1265 "By default the forcefield selection is interactive,",
1266 "but you can use the [TT]-ff[tt] option to specify one of the short names",
1267 "in the list on the command line instead. In that case [THISMODULE] just looks",
1268 "for the corresponding [TT]<forcefield>.ff[tt] directory.",
1270 "After choosing a force field, all files will be read only from",
1271 "the corresponding force field directory.",
1272 "If you want to modify or add a residue types, you can copy the force",
1273 "field directory from the GROMACS library directory to your current",
1274 "working directory. If you want to add new protein residue types,",
1275 "you will need to modify [TT]residuetypes.dat[tt] in the library directory",
1276 "or copy the whole library directory to a local directory and set",
1277 "the environment variable [TT]GMXLIB[tt] to the name of that directory.",
1278 "Check Chapter 5 of the manual for more information about file formats.",
1281 "Note that a [REF].pdb[ref] file is nothing more than a file format, and it",
1282 "need not necessarily contain a protein structure. Every kind of",
1283 "molecule for which there is support in the database can be converted.",
1284 "If there is no support in the database, you can add it yourself.[PAR]",
1286 "The program has limited intelligence, it reads a number of database",
1287 "files, that allow it to make special bonds (Cys-Cys, Heme-His, etc.),",
1288 "if necessary this can be done manually. The program can prompt the",
1289 "user to select which kind of LYS, ASP, GLU, CYS or HIS residue is",
1290 "desired. For Lys the choice is between neutral (two protons on NZ) or",
1291 "protonated (three protons, default), for Asp and Glu unprotonated",
1292 "(default) or protonated, for His the proton can be either on ND1,",
1293 "on NE2 or on both. By default these selections are done automatically.",
1294 "For His, this is based on an optimal hydrogen bonding",
1295 "conformation. Hydrogen bonds are defined based on a simple geometric",
1296 "criterion, specified by the maximum hydrogen-donor-acceptor angle",
1297 "and donor-acceptor distance, which are set by [TT]-angle[tt] and",
1298 "[TT]-dist[tt] respectively.[PAR]",
1300 "The protonation state of N- and C-termini can be chosen interactively",
1301 "with the [TT]-ter[tt] flag. Default termini are ionized (NH3+ and COO-),",
1302 "respectively. Some force fields support zwitterionic forms for chains of",
1303 "one residue, but for polypeptides these options should NOT be selected.",
1304 "The AMBER force fields have unique forms for the terminal residues,",
1305 "and these are incompatible with the [TT]-ter[tt] mechanism. You need",
1306 "to prefix your N- or C-terminal residue names with \"N\" or \"C\"",
1307 "respectively to use these forms, making sure you preserve the format",
1308 "of the coordinate file. Alternatively, use named terminating residues",
1309 "(e.g. ACE, NME).[PAR]",
1311 "The separation of chains is not entirely trivial since the markup",
1312 "in user-generated PDB files frequently varies and sometimes it",
1313 "is desirable to merge entries across a TER record, for instance",
1314 "if you want a disulfide bridge or distance restraints between",
1315 "two protein chains or if you have a HEME group bound to a protein.",
1316 "In such cases multiple chains should be contained in a single",
1317 "[TT]moleculetype[tt] definition.",
1318 "To handle this, [THISMODULE] uses two separate options.",
1319 "First, [TT]-chainsep[tt] allows you to choose when a new chemical chain should",
1320 "start, and termini added when applicable. This can be done based on the",
1321 "existence of TER records, when the chain id changes, or combinations of either",
1322 "or both of these. You can also do the selection fully interactively.",
1323 "In addition, there is a [TT]-merge[tt] option that controls how multiple chains",
1324 "are merged into one moleculetype, after adding all the chemical termini (or not).",
1325 "This can be turned off (no merging), all non-water chains can be merged into a",
1326 "single molecule, or the selection can be done interactively.[PAR]",
1328 "[THISMODULE] will also check the occupancy field of the [REF].pdb[ref] file.",
1329 "If any of the occupancies are not one, indicating that the atom is",
1330 "not resolved well in the structure, a warning message is issued.",
1331 "When a [REF].pdb[ref] file does not originate from an X-ray structure determination",
1332 "all occupancy fields may be zero. Either way, it is up to the user",
1333 "to verify the correctness of the input data (read the article!).[PAR]",
1335 "During processing the atoms will be reordered according to GROMACS",
1336 "conventions. With [TT]-n[tt] an index file can be generated that",
1337 "contains one group reordered in the same way. This allows you to",
1338 "convert a GROMOS trajectory and coordinate file to GROMOS. There is",
1339 "one limitation: reordering is done after the hydrogens are stripped",
1340 "from the input and before new hydrogens are added. This means that",
1341 "you should not use [TT]-ignh[tt].[PAR]",
1343 "The [REF].gro[ref] and [TT].g96[tt] file formats do not support chain",
1344 "identifiers. Therefore it is useful to enter a [REF].pdb[ref] file name at",
1345 "the [TT]-o[tt] option when you want to convert a multi-chain [REF].pdb[ref] file.",
1348 "The option [TT]-vsite[tt] removes hydrogen and fast improper dihedral",
1349 "motions. Angular and out-of-plane motions can be removed by changing",
1350 "hydrogens into virtual sites and fixing angles, which fixes their",
1351 "position relative to neighboring atoms. Additionally, all atoms in the",
1352 "aromatic rings of the standard amino acids (i.e. PHE, TRP, TYR and HIS)",
1353 "can be converted into virtual sites, eliminating the fast improper dihedral",
1354 "fluctuations in these rings. [BB]Note[bb] that in this case all other hydrogen",
1355 "atoms are also converted to virtual sites. The mass of all atoms that are",
1356 "converted into virtual sites, is added to the heavy atoms.[PAR]",
1357 "Also slowing down of dihedral motion can be done with [TT]-heavyh[tt]",
1358 "done by increasing the hydrogen-mass by a factor of 4. This is also",
1359 "done for water hydrogens to slow down the rotational motion of water.",
1360 "The increase in mass of the hydrogens is subtracted from the bonded",
1361 "(heavy) atom so that the total mass of the system remains the same."
1365 FILE *fp, *top_file, *top_file2, *itp_file = nullptr;
1367 t_atoms pdba_all, *pdba;
1371 int chain, nch, maxch, nwaterchain;
1373 t_chain *chains, *cc;
1374 char select[STRLEN];
1382 int i, j, k, l, nrtp;
1383 int *swap_index, si;
1387 gpp_atomtype_t atype;
1388 gmx_residuetype_t*rt;
1390 char itp_fn[STRLEN], posre_fn[STRLEN], buf_fn[STRLEN];
1391 char molname[STRLEN], title[STRLEN];
1392 char *c, forcefield[STRLEN], ffdir[STRLEN];
1393 char ffname[STRLEN], suffix[STRLEN], buf[STRLEN];
1401 rtprename_t *rtprename = nullptr;
1402 int nah, nNtdb, nCtdb, ntdblist;
1403 t_hackblock *ntdb, *ctdb, **tdblist;
1407 bool bVsites = FALSE, bWat, bPrevWat = FALSE, bITP, bVsiteAromatics = FALSE;
1409 t_hackblock *hb_chain;
1410 t_restp *restp_chain;
1411 gmx_output_env_t *oenv;
1412 const char *p_restype;
1416 const char * prev_atomname;
1417 const char * this_atomname;
1418 const char * prev_resname;
1419 const char * this_resname;
1424 int prev_chainnumber;
1425 int this_chainnumber;
1427 int this_chainstart;
1428 int prev_chainstart;
1435 { efSTX, "-f", "eiwit.pdb", ffREAD },
1436 { efSTO, "-o", "conf", ffWRITE },
1437 { efTOP, nullptr, nullptr, ffWRITE },
1438 { efITP, "-i", "posre", ffWRITE },
1439 { efNDX, "-n", "clean", ffOPTWR },
1440 { efSTO, "-q", "clean.pdb", ffOPTWR }
1442 #define NFILE asize(fnm)
1444 gmx_bool bNewRTP = FALSE;
1445 gmx_bool bInter = FALSE, bCysMan = FALSE;
1446 gmx_bool bLysMan = FALSE, bAspMan = FALSE, bGluMan = FALSE, bHisMan = FALSE;
1447 gmx_bool bGlnMan = FALSE, bArgMan = FALSE;
1448 gmx_bool bTerMan = FALSE, bUnA = FALSE, bHeavyH = FALSE;
1449 gmx_bool bSort = TRUE, bAllowMissing = FALSE, bRemoveH = FALSE;
1450 gmx_bool bDeuterate = FALSE, bVerbose = FALSE, bChargeGroups = TRUE, bCmap = TRUE;
1451 gmx_bool bRenumRes = FALSE, bRTPresname = FALSE;
1452 real angle = 135.0, distance = 0.3, posre_fc = 1000;
1453 real long_bond_dist = 0.25, short_bond_dist = 0.05;
1454 const char *vsitestr[] = { nullptr, "none", "hydrogens", "aromatics", nullptr };
1455 const char *watstr[] = { nullptr, "select", "none", "spc", "spce", "tip3p", "tip4p", "tip5p", "tips3p", nullptr };
1456 const char *chainsep[] = { nullptr, "id_or_ter", "id_and_ter", "ter", "id", "interactive", nullptr };
1457 const char *merge[] = {nullptr, "no", "all", "interactive", nullptr };
1458 const char *ff = "select";
1461 { "-newrtp", FALSE, etBOOL, {&bNewRTP},
1462 "HIDDENWrite the residue database in new format to [TT]new.rtp[tt]"},
1463 { "-lb", FALSE, etREAL, {&long_bond_dist},
1464 "HIDDENLong bond warning distance" },
1465 { "-sb", FALSE, etREAL, {&short_bond_dist},
1466 "HIDDENShort bond warning distance" },
1467 { "-chainsep", FALSE, etENUM, {chainsep},
1468 "Condition in PDB files when a new chain should be started (adding termini)" },
1469 { "-merge", FALSE, etENUM, {&merge},
1470 "Merge multiple chains into a single [moleculetype]" },
1471 { "-ff", FALSE, etSTR, {&ff},
1472 "Force field, interactive by default. Use [TT]-h[tt] for information." },
1473 { "-water", FALSE, etENUM, {watstr},
1474 "Water model to use" },
1475 { "-inter", FALSE, etBOOL, {&bInter},
1476 "Set the next 8 options to interactive"},
1477 { "-ss", FALSE, etBOOL, {&bCysMan},
1478 "Interactive SS bridge selection" },
1479 { "-ter", FALSE, etBOOL, {&bTerMan},
1480 "Interactive termini selection, instead of charged (default)" },
1481 { "-lys", FALSE, etBOOL, {&bLysMan},
1482 "Interactive lysine selection, instead of charged" },
1483 { "-arg", FALSE, etBOOL, {&bArgMan},
1484 "Interactive arginine selection, instead of charged" },
1485 { "-asp", FALSE, etBOOL, {&bAspMan},
1486 "Interactive aspartic acid selection, instead of charged" },
1487 { "-glu", FALSE, etBOOL, {&bGluMan},
1488 "Interactive glutamic acid selection, instead of charged" },
1489 { "-gln", FALSE, etBOOL, {&bGlnMan},
1490 "Interactive glutamine selection, instead of neutral" },
1491 { "-his", FALSE, etBOOL, {&bHisMan},
1492 "Interactive histidine selection, instead of checking H-bonds" },
1493 { "-angle", FALSE, etREAL, {&angle},
1494 "Minimum hydrogen-donor-acceptor angle for a H-bond (degrees)" },
1495 { "-dist", FALSE, etREAL, {&distance},
1496 "Maximum donor-acceptor distance for a H-bond (nm)" },
1497 { "-una", FALSE, etBOOL, {&bUnA},
1498 "Select aromatic rings with united CH atoms on phenylalanine, "
1499 "tryptophane and tyrosine" },
1500 { "-sort", FALSE, etBOOL, {&bSort},
1501 "HIDDENSort the residues according to database, turning this off is dangerous as charge groups might be broken in parts" },
1502 { "-ignh", FALSE, etBOOL, {&bRemoveH},
1503 "Ignore hydrogen atoms that are in the coordinate file" },
1504 { "-missing", FALSE, etBOOL, {&bAllowMissing},
1505 "Continue when atoms are missing and bonds cannot be made, dangerous" },
1506 { "-v", FALSE, etBOOL, {&bVerbose},
1507 "Be slightly more verbose in messages" },
1508 { "-posrefc", FALSE, etREAL, {&posre_fc},
1509 "Force constant for position restraints" },
1510 { "-vsite", FALSE, etENUM, {vsitestr},
1511 "Convert atoms to virtual sites" },
1512 { "-heavyh", FALSE, etBOOL, {&bHeavyH},
1513 "Make hydrogen atoms heavy" },
1514 { "-deuterate", FALSE, etBOOL, {&bDeuterate},
1515 "Change the mass of hydrogens to 2 amu" },
1516 { "-chargegrp", TRUE, etBOOL, {&bChargeGroups},
1517 "Use charge groups in the [REF].rtp[ref] file" },
1518 { "-cmap", TRUE, etBOOL, {&bCmap},
1519 "Use cmap torsions (if enabled in the [REF].rtp[ref] file)" },
1520 { "-renum", TRUE, etBOOL, {&bRenumRes},
1521 "Renumber the residues consecutively in the output" },
1522 { "-rtpres", TRUE, etBOOL, {&bRTPresname},
1523 "Use [REF].rtp[ref] entry names as residue names" }
1525 #define NPARGS asize(pa)
1527 if (!parse_common_args(&argc, argv, 0, NFILE, fnm, asize(pa), pa, asize(desc), desc,
1533 /* Force field selection, interactive or direct */
1534 choose_ff(strcmp(ff, "select") == 0 ? nullptr : ff,
1535 forcefield, sizeof(forcefield),
1536 ffdir, sizeof(ffdir));
1538 if (strlen(forcefield) > 0)
1540 strcpy(ffname, forcefield);
1541 ffname[0] = toupper(ffname[0]);
1545 gmx_fatal(FARGS, "Empty forcefield string");
1548 printf("\nUsing the %s force field in directory %s\n\n",
1551 choose_watermodel(watstr[0], ffdir, &watermodel);
1555 /* if anything changes here, also change description of -inter */
1570 else if (bDeuterate)
1579 /* parse_common_args ensures vsitestr has been selected, but
1580 clang-static-analyzer needs clues to know that */
1581 GMX_ASSERT(vsitestr[0], "-vsite default wasn't processed correctly");
1582 switch (vsitestr[0][0])
1584 case 'n': /* none */
1586 bVsiteAromatics = FALSE;
1588 case 'h': /* hydrogens */
1590 bVsiteAromatics = FALSE;
1592 case 'a': /* aromatics */
1594 bVsiteAromatics = TRUE;
1597 gmx_fatal(FARGS, "DEATH HORROR in $s (%s): vsitestr[0]='%d'",
1598 __FILE__, __LINE__, vsitestr[0]);
1601 /* Open the symbol table */
1602 open_symtab(&symtab);
1604 /* Residue type database */
1605 gmx_residuetype_init(&rt);
1607 /* Read residue renaming database(s), if present */
1608 nrrn = fflib_search_file_end(ffdir, ".r2b", FALSE, &rrn);
1611 rtprename = nullptr;
1612 for (i = 0; i < nrrn; i++)
1614 fp = fflib_open(rrn[i]);
1615 read_rtprename(rrn[i], fp, &nrtprename, &rtprename);
1621 /* Add all alternative names from the residue renaming database to the list of recognized amino/nucleic acids. */
1622 for (i = 0; i < nrtprename; i++)
1624 rc = gmx_residuetype_get_type(rt, rtprename[i].gmx, &p_restype);
1626 /* Only add names if the 'standard' gromacs/iupac base name was found */
1629 gmx_residuetype_add(rt, rtprename[i].main, p_restype);
1630 gmx_residuetype_add(rt, rtprename[i].nter, p_restype);
1631 gmx_residuetype_add(rt, rtprename[i].cter, p_restype);
1632 gmx_residuetype_add(rt, rtprename[i].bter, p_restype);
1637 if (watermodel != nullptr && (strstr(watermodel, "4p") ||
1638 strstr(watermodel, "4P")))
1642 else if (watermodel != nullptr && (strstr(watermodel, "5p") ||
1643 strstr(watermodel, "5P")))
1652 aps = gmx_atomprop_init();
1653 natom = read_pdball(opt2fn("-f", NFILE, fnm), opt2fn_null("-q", NFILE, fnm), title,
1654 &pdba_all, &pdbx, &ePBC, box, bRemoveH, &symtab, rt, watres,
1659 gmx_fatal(FARGS, "No atoms found in pdb file %s\n", opt2fn("-f", NFILE, fnm));
1662 printf("Analyzing pdb file\n");
1665 modify_chain_numbers(&pdba_all, chainsep[0]);
1669 this_atomname = nullptr;
1671 this_resname = nullptr;
1674 this_chainnumber = -1;
1675 this_chainstart = 0;
1676 /* Keep the compiler happy */
1677 prev_chainstart = 0;
1681 snew(pdb_ch, maxch);
1684 for (i = 0; (i < natom); i++)
1686 ri = &pdba_all.resinfo[pdba_all.atom[i].resind];
1688 /* TODO this should live in a helper object, and consolidate
1689 that with code in modify_chain_numbers */
1690 prev_atomname = this_atomname;
1691 prev_atomnum = this_atomnum;
1692 prev_resname = this_resname;
1693 prev_resnum = this_resnum;
1694 prev_chainid = this_chainid;
1695 prev_chainnumber = this_chainnumber;
1698 prev_chainstart = this_chainstart;
1701 this_atomname = *pdba_all.atomname[i];
1702 this_atomnum = (pdba_all.pdbinfo != nullptr) ? pdba_all.pdbinfo[i].atomnr : i+1;
1703 this_resname = *ri->name;
1704 this_resnum = ri->nr;
1705 this_chainid = ri->chainid;
1706 this_chainnumber = ri->chainnum;
1708 bWat = gmx_strcasecmp(*ri->name, watres) == 0;
1710 if ((i == 0) || (this_chainnumber != prev_chainnumber) || (bWat != bPrevWat))
1712 this_chainstart = pdba_all.atom[i].resind;
1716 if (!strncmp(merge[0], "int", 3))
1718 printf("Merge chain ending with residue %s%d (chain id '%c', atom %d %s) and chain starting with\n"
1719 "residue %s%d (chain id '%c', atom %d %s) into a single moleculetype (keeping termini)? [n/y]\n",
1720 prev_resname, prev_resnum, prev_chainid, prev_atomnum, prev_atomname,
1721 this_resname, this_resnum, this_chainid, this_atomnum, this_atomname);
1723 if (nullptr == fgets(select, STRLEN-1, stdin))
1725 gmx_fatal(FARGS, "Error reading from stdin");
1727 bMerged = (select[0] == 'y');
1729 else if (!strncmp(merge[0], "all", 3))
1737 pdb_ch[nch-1].chainstart[pdb_ch[nch-1].nterpairs] =
1738 pdba_all.atom[i].resind - prev_chainstart;
1739 pdb_ch[nch-1].nterpairs++;
1740 srenew(pdb_ch[nch-1].chainstart, pdb_ch[nch-1].nterpairs+1);
1745 /* set natom for previous chain */
1748 pdb_ch[nch-1].natom = i-pdb_ch[nch-1].start;
1755 /* check if chain identifier was used before */
1756 for (j = 0; (j < nch); j++)
1758 if (pdb_ch[j].chainid != ' ' && pdb_ch[j].chainid == ri->chainid)
1760 printf("WARNING: Chain identifier '%c' is used in two non-sequential blocks.\n"
1761 "They will be treated as separate chains unless you reorder your file.\n",
1765 // TODO This is too convoluted. Use a std::vector
1769 srenew(pdb_ch, maxch);
1771 pdb_ch[nch].chainid = ri->chainid;
1772 pdb_ch[nch].chainnum = ri->chainnum;
1773 pdb_ch[nch].start = i;
1774 pdb_ch[nch].bAllWat = bWat;
1777 pdb_ch[nch].nterpairs = 0;
1781 pdb_ch[nch].nterpairs = 1;
1783 snew(pdb_ch[nch].chainstart, pdb_ch[nch].nterpairs+1);
1784 /* modified [nch] to [0] below */
1785 pdb_ch[nch].chainstart[0] = 0;
1791 pdb_ch[nch-1].natom = natom-pdb_ch[nch-1].start;
1793 /* set all the water blocks at the end of the chain */
1794 snew(swap_index, nch);
1796 for (i = 0; i < nch; i++)
1798 if (!pdb_ch[i].bAllWat)
1804 for (i = 0; i < nch; i++)
1806 if (pdb_ch[i].bAllWat)
1812 if (nwaterchain > 1)
1814 printf("Moved all the water blocks to the end\n");
1818 /* copy pdb data and x for all chains */
1819 for (i = 0; (i < nch); i++)
1822 chains[i].chainid = pdb_ch[si].chainid;
1823 chains[i].chainnum = pdb_ch[si].chainnum;
1824 chains[i].bAllWat = pdb_ch[si].bAllWat;
1825 chains[i].nterpairs = pdb_ch[si].nterpairs;
1826 chains[i].chainstart = pdb_ch[si].chainstart;
1827 snew(chains[i].ntdb, pdb_ch[si].nterpairs);
1828 snew(chains[i].ctdb, pdb_ch[si].nterpairs);
1829 snew(chains[i].r_start, pdb_ch[si].nterpairs);
1830 snew(chains[i].r_end, pdb_ch[si].nterpairs);
1832 snew(chains[i].pdba, 1);
1833 init_t_atoms(chains[i].pdba, pdb_ch[si].natom, TRUE);
1834 snew(chains[i].x, chains[i].pdba->nr);
1835 for (j = 0; j < chains[i].pdba->nr; j++)
1837 chains[i].pdba->atom[j] = pdba_all.atom[pdb_ch[si].start+j];
1838 snew(chains[i].pdba->atomname[j], 1);
1839 *chains[i].pdba->atomname[j] =
1840 gmx_strdup(*pdba_all.atomname[pdb_ch[si].start+j]);
1841 chains[i].pdba->pdbinfo[j] = pdba_all.pdbinfo[pdb_ch[si].start+j];
1842 copy_rvec(pdbx[pdb_ch[si].start+j], chains[i].x[j]);
1844 /* Re-index the residues assuming that the indices are continuous */
1845 k = chains[i].pdba->atom[0].resind;
1846 nres = chains[i].pdba->atom[chains[i].pdba->nr-1].resind - k + 1;
1847 chains[i].pdba->nres = nres;
1848 for (j = 0; j < chains[i].pdba->nr; j++)
1850 chains[i].pdba->atom[j].resind -= k;
1852 srenew(chains[i].pdba->resinfo, nres);
1853 for (j = 0; j < nres; j++)
1855 chains[i].pdba->resinfo[j] = pdba_all.resinfo[k+j];
1856 snew(chains[i].pdba->resinfo[j].name, 1);
1857 *chains[i].pdba->resinfo[j].name = gmx_strdup(*pdba_all.resinfo[k+j].name);
1858 /* make all chain identifiers equal to that of the chain */
1859 chains[i].pdba->resinfo[j].chainid = pdb_ch[si].chainid;
1863 if (nchainmerges > 0)
1865 printf("\nMerged chains into joint molecule definitions at %d places.\n\n",
1869 printf("There are %d chains and %d blocks of water and "
1870 "%d residues with %d atoms\n",
1871 nch-nwaterchain, nwaterchain,
1872 pdba_all.nres, natom);
1874 printf("\n %5s %4s %6s\n", "chain", "#res", "#atoms");
1875 for (i = 0; (i < nch); i++)
1877 printf(" %d '%c' %5d %6d %s\n",
1878 i+1, chains[i].chainid ? chains[i].chainid : '-',
1879 chains[i].pdba->nres, chains[i].pdba->nr,
1880 chains[i].bAllWat ? "(only water)" : "");
1884 check_occupancy(&pdba_all, opt2fn("-f", NFILE, fnm), bVerbose);
1886 /* Read atomtypes... */
1887 atype = read_atype(ffdir, &symtab);
1889 /* read residue database */
1890 printf("Reading residue database... (%s)\n", forcefield);
1891 nrtpf = fflib_search_file_end(ffdir, ".rtp", TRUE, &rtpf);
1894 for (i = 0; i < nrtpf; i++)
1896 read_resall(rtpf[i], &nrtp, &restp, atype, &symtab, FALSE);
1902 /* Not correct with multiple rtp input files with different bonded types */
1903 fp = gmx_fio_fopen("new.rtp", "w");
1904 print_resall(fp, nrtp, restp, atype);
1908 /* read hydrogen database */
1909 nah = read_h_db(ffdir, &ah);
1911 /* Read Termini database... */
1912 nNtdb = read_ter_db(ffdir, 'n', &ntdb, atype);
1913 nCtdb = read_ter_db(ffdir, 'c', &ctdb, atype);
1915 top_fn = ftp2fn(efTOP, NFILE, fnm);
1916 top_file = gmx_fio_fopen(top_fn, "w");
1918 print_top_header(top_file, top_fn, FALSE, ffdir, mHmult);
1924 for (chain = 0; (chain < nch); chain++)
1926 cc = &(chains[chain]);
1928 /* set pdba, natom and nres to the current chain */
1931 natom = cc->pdba->nr;
1932 nres = cc->pdba->nres;
1934 if (cc->chainid && ( cc->chainid != ' ' ) )
1936 printf("Processing chain %d '%c' (%d atoms, %d residues)\n",
1937 chain+1, cc->chainid, natom, nres);
1941 printf("Processing chain %d (%d atoms, %d residues)\n",
1942 chain+1, natom, nres);
1945 process_chain(pdba, x, bUnA, bUnA, bUnA, bLysMan, bAspMan, bGluMan,
1946 bHisMan, bArgMan, bGlnMan, angle, distance, &symtab,
1947 nrtprename, rtprename);
1949 cc->chainstart[cc->nterpairs] = pdba->nres;
1951 for (i = 0; i < cc->nterpairs; i++)
1953 find_nc_ter(pdba, cc->chainstart[i], cc->chainstart[i+1],
1954 &(cc->r_start[j]), &(cc->r_end[j]), rt);
1956 if (cc->r_start[j] >= 0 && cc->r_end[j] >= 0)
1962 if (cc->nterpairs == 0)
1964 printf("Problem with chain definition, or missing terminal residues.\n"
1965 "This chain does not appear to contain a recognized chain molecule.\n"
1966 "If this is incorrect, you can edit residuetypes.dat to modify the behavior.\n");
1969 /* Check for disulfides and other special bonds */
1970 nssbonds = mk_specbonds(pdba, x, bCysMan, &ssbonds, bVerbose);
1974 rename_resrtp(pdba, cc->nterpairs, cc->r_start, cc->r_end, nrtprename, rtprename,
1978 for (i = 0; i < cc->nterpairs; i++)
1982 * We first apply a filter so we only have the
1983 * termini that can be applied to the residue in question
1984 * (or a generic terminus if no-residue specific is available).
1986 /* First the N terminus */
1989 tdblist = filter_ter(nNtdb, ntdb,
1990 *pdba->resinfo[cc->r_start[i]].name,
1994 printf("No suitable end (N or 5') terminus found in database - assuming this residue\n"
1995 "is already in a terminus-specific form and skipping terminus selection.\n");
1996 cc->ntdb[i] = nullptr;
2000 if (bTerMan && ntdblist > 1)
2002 sprintf(select, "Select start terminus type for %s-%d",
2003 *pdba->resinfo[cc->r_start[i]].name,
2004 pdba->resinfo[cc->r_start[i]].nr);
2005 cc->ntdb[i] = choose_ter(ntdblist, tdblist, select);
2009 cc->ntdb[i] = tdblist[0];
2012 printf("Start terminus %s-%d: %s\n",
2013 *pdba->resinfo[cc->r_start[i]].name,
2014 pdba->resinfo[cc->r_start[i]].nr,
2015 (cc->ntdb[i])->name);
2021 cc->ntdb[i] = nullptr;
2024 /* And the C terminus */
2027 tdblist = filter_ter(nCtdb, ctdb,
2028 *pdba->resinfo[cc->r_end[i]].name,
2032 printf("No suitable end (C or 3') terminus found in database - assuming this residue\n"
2033 "is already in a terminus-specific form and skipping terminus selection.\n");
2034 cc->ctdb[i] = nullptr;
2038 if (bTerMan && ntdblist > 1)
2040 sprintf(select, "Select end terminus type for %s-%d",
2041 *pdba->resinfo[cc->r_end[i]].name,
2042 pdba->resinfo[cc->r_end[i]].nr);
2043 cc->ctdb[i] = choose_ter(ntdblist, tdblist, select);
2047 cc->ctdb[i] = tdblist[0];
2049 printf("End terminus %s-%d: %s\n",
2050 *pdba->resinfo[cc->r_end[i]].name,
2051 pdba->resinfo[cc->r_end[i]].nr,
2052 (cc->ctdb[i])->name);
2058 cc->ctdb[i] = nullptr;
2061 /* lookup hackblocks and rtp for all residues */
2062 get_hackblocks_rtp(&hb_chain, &restp_chain,
2063 nrtp, restp, pdba->nres, pdba->resinfo,
2064 cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end,
2066 /* ideally, now we would not need the rtp itself anymore, but do
2067 everything using the hb and restp arrays. Unfortunately, that
2068 requires some re-thinking of code in gen_vsite.c, which I won't
2069 do now :( AF 26-7-99 */
2071 rename_atoms(nullptr, ffdir,
2072 pdba, &symtab, restp_chain, FALSE, rt, FALSE, bVerbose);
2074 match_atomnames_with_rtp(restp_chain, hb_chain, pdba, x, bVerbose);
2078 block = new_blocka();
2080 sort_pdbatoms(restp_chain, natom, &pdba, &x, block, &gnames);
2081 remove_duplicate_atoms(pdba, x, bVerbose);
2082 if (ftp2bSet(efNDX, NFILE, fnm))
2086 fprintf(stderr, "WARNING: with the -remh option the generated "
2087 "index file (%s) might be useless\n"
2088 "(the index file is generated before hydrogens are added)",
2089 ftp2fn(efNDX, NFILE, fnm));
2091 write_index(ftp2fn(efNDX, NFILE, fnm), block, gnames, FALSE, 0);
2093 for (i = 0; i < block->nr; i++)
2102 fprintf(stderr, "WARNING: "
2103 "without sorting no check for duplicate atoms can be done\n");
2106 /* Generate Hydrogen atoms (and termini) in the sequence */
2107 printf("Generating any missing hydrogen atoms and/or adding termini.\n");
2108 add_h(&pdba, &x, nah, ah,
2109 cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end, bAllowMissing,
2110 nullptr, nullptr, TRUE, FALSE);
2111 printf("Now there are %d residues with %d atoms\n",
2112 pdba->nres, pdba->nr);
2114 strcpy(posre_fn, ftp2fn(efITP, NFILE, fnm));
2116 /* make up molecule name(s) */
2118 k = (cc->nterpairs > 0 && cc->r_start[0] >= 0) ? cc->r_start[0] : 0;
2120 gmx_residuetype_get_type(rt, *pdba->resinfo[k].name, &p_restype);
2126 sprintf(molname, "Water");
2130 this_chainid = cc->chainid;
2132 /* Add the chain id if we have one */
2133 if (this_chainid != ' ')
2135 sprintf(buf, "_chain_%c", this_chainid);
2136 strcat(suffix, buf);
2139 /* Check if there have been previous chains with the same id */
2141 for (k = 0; k < chain; k++)
2143 if (cc->chainid == chains[k].chainid)
2148 /* Add the number for this chain identifier if there are multiple copies */
2152 sprintf(buf, "%d", nid_used+1);
2153 strcat(suffix, buf);
2156 if (strlen(suffix) > 0)
2158 sprintf(molname, "%s%s", p_restype, suffix);
2162 strcpy(molname, p_restype);
2166 if ((nch-nwaterchain > 1) && !cc->bAllWat)
2169 strcpy(itp_fn, top_fn);
2170 printf("Chain time...\n");
2171 c = strrchr(itp_fn, '.');
2172 sprintf(c, "_%s.itp", molname);
2173 c = strrchr(posre_fn, '.');
2174 sprintf(c, "_%s.itp", molname);
2175 if (strcmp(itp_fn, posre_fn) == 0)
2177 strcpy(buf_fn, posre_fn);
2178 c = strrchr(buf_fn, '.');
2180 sprintf(posre_fn, "%s_pr.itp", buf_fn);
2184 srenew(incls, nincl);
2185 incls[nincl-1] = gmx_strdup(itp_fn);
2186 itp_file = gmx_fio_fopen(itp_fn, "w");
2193 srenew(mols, nmol+1);
2196 mols[nmol].name = gmx_strdup("SOL");
2197 mols[nmol].nr = pdba->nres;
2201 mols[nmol].name = gmx_strdup(molname);
2208 print_top_comment(itp_file, itp_fn, ffdir, TRUE);
2213 top_file2 = nullptr;
2217 top_file2 = itp_file;
2221 top_file2 = top_file;
2224 pdb2top(top_file2, posre_fn, molname, pdba, &x, atype, &symtab,
2226 restp_chain, hb_chain,
2228 bVsites, bVsiteAromatics, ffdir,
2229 mHmult, nssbonds, ssbonds,
2230 long_bond_dist, short_bond_dist, bDeuterate, bChargeGroups, bCmap,
2231 bRenumRes, bRTPresname);
2235 write_posres(posre_fn, pdba, posre_fc);
2240 gmx_fio_fclose(itp_file);
2243 /* pdba and natom have been reassigned somewhere so: */
2249 if (watermodel == nullptr)
2251 for (chain = 0; chain < nch; chain++)
2253 if (chains[chain].bAllWat)
2255 gmx_fatal(FARGS, "You have chosen not to include a water model, but there is water in the input file. Select a water model or remove the water from your input file.");
2261 sprintf(buf_fn, "%s%c%s.itp", ffdir, DIR_SEPARATOR, watermodel);
2262 if (!fflib_fexist(buf_fn))
2264 gmx_fatal(FARGS, "The topology file '%s' for the selected water model '%s' can not be found in the force field directory. Select a different water model.",
2265 buf_fn, watermodel);
2269 print_top_mols(top_file, title, ffdir, watermodel, nincl, incls, nmol, mols);
2270 gmx_fio_fclose(top_file);
2272 gmx_residuetype_destroy(rt);
2274 /* now merge all chains back together */
2277 for (i = 0; (i < nch); i++)
2279 natom += chains[i].pdba->nr;
2280 nres += chains[i].pdba->nres;
2283 init_t_atoms(atoms, natom, FALSE);
2284 for (i = 0; i < atoms->nres; i++)
2286 sfree(atoms->resinfo[i].name);
2288 sfree(atoms->resinfo);
2290 snew(atoms->resinfo, nres);
2294 for (i = 0; (i < nch); i++)
2298 printf("Including chain %d in system: %d atoms %d residues\n",
2299 i+1, chains[i].pdba->nr, chains[i].pdba->nres);
2301 for (j = 0; (j < chains[i].pdba->nr); j++)
2303 atoms->atom[k] = chains[i].pdba->atom[j];
2304 atoms->atom[k].resind += l; /* l is processed nr of residues */
2305 atoms->atomname[k] = chains[i].pdba->atomname[j];
2306 atoms->resinfo[atoms->atom[k].resind].chainid = chains[i].chainid;
2307 copy_rvec(chains[i].x[j], x[k]);
2310 for (j = 0; (j < chains[i].pdba->nres); j++)
2312 atoms->resinfo[l] = chains[i].pdba->resinfo[j];
2315 atoms->resinfo[l].name = atoms->resinfo[l].rtp;
2323 fprintf(stderr, "Now there are %d atoms and %d residues\n", k, l);
2324 print_sums(atoms, TRUE);
2327 fprintf(stderr, "\nWriting coordinate file...\n");
2328 clear_rvec(box_space);
2331 make_new_box(atoms->nr, x, box, box_space, FALSE);
2333 write_sto_conf(ftp2fn(efSTO, NFILE, fnm), title, atoms, x, nullptr, ePBC, box);
2335 printf("\t\t--------- PLEASE NOTE ------------\n");
2336 printf("You have successfully generated a topology from: %s.\n",
2337 opt2fn("-f", NFILE, fnm));
2338 if (watermodel != nullptr)
2340 printf("The %s force field and the %s water model are used.\n",
2341 ffname, watermodel);
2345 printf("The %s force field is used.\n",
2348 printf("\t\t--------- ETON ESAELP ------------\n");