2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
46 #include "gromacs/commandline/pargs.h"
47 #include "gromacs/fileio/confio.h"
48 #include "gromacs/fileio/gmxfio.h"
49 #include "gromacs/fileio/pdbio.h"
50 #include "gromacs/fileio/strdb.h"
51 #include "gromacs/gmxlib/conformation-utilities.h"
52 #include "gromacs/gmxpreprocess/fflibutil.h"
53 #include "gromacs/gmxpreprocess/genhydro.h"
54 #include "gromacs/gmxpreprocess/h_db.h"
55 #include "gromacs/gmxpreprocess/hizzie.h"
56 #include "gromacs/gmxpreprocess/pdb2top.h"
57 #include "gromacs/gmxpreprocess/pgutil.h"
58 #include "gromacs/gmxpreprocess/resall.h"
59 #include "gromacs/gmxpreprocess/specbond.h"
60 #include "gromacs/gmxpreprocess/ter_db.h"
61 #include "gromacs/gmxpreprocess/toputil.h"
62 #include "gromacs/gmxpreprocess/xlate.h"
63 #include "gromacs/legacyheaders/copyrite.h"
64 #include "gromacs/legacyheaders/macros.h"
65 #include "gromacs/legacyheaders/readinp.h"
66 #include "gromacs/legacyheaders/typedefs.h"
67 #include "gromacs/math/vec.h"
68 #include "gromacs/topology/atomprop.h"
69 #include "gromacs/topology/block.h"
70 #include "gromacs/topology/index.h"
71 #include "gromacs/topology/residuetypes.h"
72 #include "gromacs/topology/symtab.h"
73 #include "gromacs/utility/cstringutil.h"
74 #include "gromacs/utility/fatalerror.h"
75 #include "gromacs/utility/futil.h"
76 #include "gromacs/utility/smalloc.h"
87 static const char *res2bb_notermini(const char *name,
88 int nrr, const rtprename_t *rr)
90 /* NOTE: This function returns the main building block name,
91 * it does not take terminal renaming into account.
96 while (i < nrr && gmx_strcasecmp(name, rr[i].gmx) != 0)
101 return (i < nrr ? rr[i].main : name);
104 static const char *select_res(int nr, int resnr,
105 const char *name[], const char *expl[],
107 int nrr, const rtprename_t *rr)
111 printf("Which %s type do you want for residue %d\n", title, resnr+1);
112 for (sel = 0; (sel < nr); sel++)
114 printf("%d. %s (%s)\n",
115 sel, expl[sel], res2bb_notermini(name[sel], nrr, rr));
117 printf("\nType a number:"); fflush(stdout);
119 if (scanf("%d", &sel) != 1)
121 gmx_fatal(FARGS, "Answer me for res %s %d!", title, resnr+1);
127 static const char *get_asptp(int resnr, int nrr, const rtprename_t *rr)
132 const char *lh[easpNR] = { "ASP", "ASPH" };
133 const char *expl[easpNR] = {
134 "Not protonated (charge -1)",
135 "Protonated (charge 0)"
138 return select_res(easpNR, resnr, lh, expl, "ASPARTIC ACID", nrr, rr);
141 static const char *get_glutp(int resnr, int nrr, const rtprename_t *rr)
146 const char *lh[egluNR] = { "GLU", "GLUH" };
147 const char *expl[egluNR] = {
148 "Not protonated (charge -1)",
149 "Protonated (charge 0)"
152 return select_res(egluNR, resnr, lh, expl, "GLUTAMIC ACID", nrr, rr);
155 static const char *get_glntp(int resnr, int nrr, const rtprename_t *rr)
160 const char *lh[eglnNR] = { "GLN", "QLN" };
161 const char *expl[eglnNR] = {
162 "Not protonated (charge 0)",
163 "Protonated (charge +1)"
166 return select_res(eglnNR, resnr, lh, expl, "GLUTAMINE", nrr, rr);
169 static const char *get_lystp(int resnr, int nrr, const rtprename_t *rr)
174 const char *lh[elysNR] = { "LYSN", "LYS" };
175 const char *expl[elysNR] = {
176 "Not protonated (charge 0)",
177 "Protonated (charge +1)"
180 return select_res(elysNR, resnr, lh, expl, "LYSINE", nrr, rr);
183 static const char *get_argtp(int resnr, int nrr, const rtprename_t *rr)
188 const char *lh[eargNR] = { "ARGN", "ARG" };
189 const char *expl[eargNR] = {
190 "Not protonated (charge 0)",
191 "Protonated (charge +1)"
194 return select_res(eargNR, resnr, lh, expl, "ARGININE", nrr, rr);
197 static const char *get_histp(int resnr, int nrr, const rtprename_t *rr)
199 const char *expl[ehisNR] = {
206 return select_res(ehisNR, resnr, hh, expl, "HISTIDINE", nrr, rr);
209 static void read_rtprename(const char *fname, FILE *fp,
210 int *nrtprename, rtprename_t **rtprename)
212 char line[STRLEN], buf[STRLEN];
221 while (get_a_line(fp, line, STRLEN))
224 nc = sscanf(line, "%s %s %s %s %s %s",
225 rr[n].gmx, rr[n].main, rr[n].nter, rr[n].cter, rr[n].bter, buf);
228 if (nc != 2 && nc != 5)
230 gmx_fatal(FARGS, "Residue renaming database '%s' has %d columns instead of %d, %d or %d", fname, ncol, 2, 5);
236 gmx_fatal(FARGS, "A line in residue renaming database '%s' has %d columns, while previous lines have %d columns", fname, nc, ncol);
241 /* This file does not have special termini names, copy them from main */
242 strcpy(rr[n].nter, rr[n].main);
243 strcpy(rr[n].cter, rr[n].main);
244 strcpy(rr[n].bter, rr[n].main);
254 static char *search_resrename(int nrr, rtprename_t *rr,
256 gmx_bool bStart, gmx_bool bEnd,
257 gmx_bool bCompareFFRTPname)
265 while (i < nrr && ((!bCompareFFRTPname && strcmp(name, rr[i].gmx) != 0) ||
266 ( bCompareFFRTPname && strcmp(name, rr[i].main) != 0)))
271 /* If found in the database, rename this residue's rtp building block,
272 * otherwise keep the old name.
295 gmx_fatal(FARGS, "In the chosen force field there is no residue type for '%s'%s", name, bStart ? ( bEnd ? " as a standalone (starting & ending) residue" : " as a starting terminus") : (bEnd ? " as an ending terminus" : ""));
303 static void rename_resrtp(t_atoms *pdba, int nterpairs, int *r_start, int *r_end,
304 int nrr, rtprename_t *rr, t_symtab *symtab,
308 gmx_bool bStart, bEnd;
310 gmx_bool bFFRTPTERRNM;
312 bFFRTPTERRNM = (getenv("GMX_NO_FFRTP_TER_RENAME") == NULL);
314 for (r = 0; r < pdba->nres; r++)
318 for (j = 0; j < nterpairs; j++)
325 for (j = 0; j < nterpairs; j++)
333 nn = search_resrename(nrr, rr, *pdba->resinfo[r].rtp, bStart, bEnd, FALSE);
335 if (bFFRTPTERRNM && nn == NULL && (bStart || bEnd))
337 /* This is a terminal residue, but the residue name,
338 * currently stored in .rtp, is not a standard residue name,
339 * but probably a force field specific rtp name.
340 * Check if we need to rename it because it is terminal.
342 nn = search_resrename(nrr, rr,
343 *pdba->resinfo[r].rtp, bStart, bEnd, TRUE);
346 if (nn != NULL && strcmp(*pdba->resinfo[r].rtp, nn) != 0)
350 printf("Changing rtp entry of residue %d %s to '%s'\n",
351 pdba->resinfo[r].nr, *pdba->resinfo[r].name, nn);
353 pdba->resinfo[r].rtp = put_symtab(symtab, nn);
358 static void pdbres_to_gmxrtp(t_atoms *pdba)
362 for (i = 0; (i < pdba->nres); i++)
364 if (pdba->resinfo[i].rtp == NULL)
366 pdba->resinfo[i].rtp = pdba->resinfo[i].name;
371 static void rename_pdbres(t_atoms *pdba, const char *oldnm, const char *newnm,
372 gmx_bool bFullCompare, t_symtab *symtab)
377 for (i = 0; (i < pdba->nres); i++)
379 resnm = *pdba->resinfo[i].name;
380 if ((bFullCompare && (gmx_strcasecmp(resnm, oldnm) == 0)) ||
381 (!bFullCompare && strstr(resnm, oldnm) != NULL))
383 /* Rename the residue name (not the rtp name) */
384 pdba->resinfo[i].name = put_symtab(symtab, newnm);
389 static void rename_bb(t_atoms *pdba, const char *oldnm, const char *newnm,
390 gmx_bool bFullCompare, t_symtab *symtab)
395 for (i = 0; (i < pdba->nres); i++)
397 /* We have not set the rtp name yes, use the residue name */
398 bbnm = *pdba->resinfo[i].name;
399 if ((bFullCompare && (gmx_strcasecmp(bbnm, oldnm) == 0)) ||
400 (!bFullCompare && strstr(bbnm, oldnm) != NULL))
402 /* Change the rtp builing block name */
403 pdba->resinfo[i].rtp = put_symtab(symtab, newnm);
408 static void rename_bbint(t_atoms *pdba, const char *oldnm,
409 const char *gettp(int, int, const rtprename_t *),
410 gmx_bool bFullCompare,
412 int nrr, const rtprename_t *rr)
418 for (i = 0; i < pdba->nres; i++)
420 /* We have not set the rtp name yes, use the residue name */
421 bbnm = *pdba->resinfo[i].name;
422 if ((bFullCompare && (strcmp(bbnm, oldnm) == 0)) ||
423 (!bFullCompare && strstr(bbnm, oldnm) != NULL))
425 ptr = gettp(i, nrr, rr);
426 pdba->resinfo[i].rtp = put_symtab(symtab, ptr);
431 static void check_occupancy(t_atoms *atoms, const char *filename, gmx_bool bVerbose)
437 ftp = fn2ftp(filename);
438 if (!atoms->pdbinfo || ((ftp != efPDB) && (ftp != efBRK) && (ftp != efENT)))
440 fprintf(stderr, "No occupancies in %s\n", filename);
444 for (i = 0; (i < atoms->nr); i++)
446 if (atoms->pdbinfo[i].occup != 1)
450 fprintf(stderr, "Occupancy for atom %s%d-%s is %f rather than 1\n",
451 *atoms->resinfo[atoms->atom[i].resind].name,
452 atoms->resinfo[ atoms->atom[i].resind].nr,
454 atoms->pdbinfo[i].occup);
456 if (atoms->pdbinfo[i].occup == 0)
466 if (nzero == atoms->nr)
468 fprintf(stderr, "All occupancy fields zero. This is probably not an X-Ray structure\n");
470 else if ((nzero > 0) || (nnotone > 0))
474 "WARNING: there were %d atoms with zero occupancy and %d atoms with\n"
475 " occupancy unequal to one (out of %d atoms). Check your pdb file.\n"
477 nzero, nnotone, atoms->nr);
481 fprintf(stderr, "All occupancies are one\n");
486 void write_posres(char *fn, t_atoms *pdba, real fc)
491 fp = gmx_fio_fopen(fn, "w");
493 "; In this topology include file, you will find position restraint\n"
494 "; entries for all the heavy atoms in your original pdb file.\n"
495 "; This means that all the protons which were added by pdb2gmx are\n"
496 "; not restrained.\n"
498 "[ position_restraints ]\n"
499 "; %4s%6s%8s%8s%8s\n", "atom", "type", "fx", "fy", "fz"
501 for (i = 0; (i < pdba->nr); i++)
503 if (!is_hydrogen(*pdba->atomname[i]) && !is_dummymass(*pdba->atomname[i]))
505 fprintf(fp, "%6d%6d %g %g %g\n", i+1, 1, fc, fc, fc);
511 static int read_pdball(const char *inf, const char *outf, char *title,
512 t_atoms *atoms, rvec **x,
513 int *ePBC, matrix box, gmx_bool bRemoveH,
514 t_symtab *symtab, gmx_residuetype_t *rt, const char *watres,
515 gmx_atomprop_t aps, gmx_bool bVerbose)
516 /* Read a pdb file. (containing proteins) */
518 int natom, new_natom, i;
521 printf("Reading %s...\n", inf);
522 get_stx_coordnum(inf, &natom);
523 init_t_atoms(atoms, natom, TRUE);
525 read_stx_conf(inf, title, atoms, *x, NULL, ePBC, box);
526 if (fn2ftp(inf) == efPDB)
528 get_pdb_atomnumber(atoms, aps);
533 for (i = 0; i < atoms->nr; i++)
535 if (!is_hydrogen(*atoms->atomname[i]))
537 atoms->atom[new_natom] = atoms->atom[i];
538 atoms->atomname[new_natom] = atoms->atomname[i];
539 atoms->pdbinfo[new_natom] = atoms->pdbinfo[i];
540 copy_rvec((*x)[i], (*x)[new_natom]);
544 atoms->nr = new_natom;
549 if (title && title[0])
551 printf(" '%s',", title);
553 printf(" %d atoms\n", natom);
555 /* Rename residues */
556 rename_pdbres(atoms, "HOH", watres, FALSE, symtab);
557 rename_pdbres(atoms, "SOL", watres, FALSE, symtab);
558 rename_pdbres(atoms, "WAT", watres, FALSE, symtab);
560 rename_atoms("xlateat.dat", NULL,
561 atoms, symtab, NULL, TRUE, rt, TRUE, bVerbose);
570 write_sto_conf(outf, title, atoms, *x, NULL, *ePBC, box);
576 void process_chain(t_atoms *pdba, rvec *x,
577 gmx_bool bTrpU, gmx_bool bPheU, gmx_bool bTyrU,
578 gmx_bool bLysMan, gmx_bool bAspMan, gmx_bool bGluMan,
579 gmx_bool bHisMan, gmx_bool bArgMan, gmx_bool bGlnMan,
580 real angle, real distance, t_symtab *symtab,
581 int nrr, const rtprename_t *rr)
583 /* Rename aromatics, lys, asp and histidine */
586 rename_bb(pdba, "TYR", "TYRU", FALSE, symtab);
590 rename_bb(pdba, "TRP", "TRPU", FALSE, symtab);
594 rename_bb(pdba, "PHE", "PHEU", FALSE, symtab);
598 rename_bbint(pdba, "LYS", get_lystp, FALSE, symtab, nrr, rr);
602 rename_bbint(pdba, "ARG", get_argtp, FALSE, symtab, nrr, rr);
606 rename_bbint(pdba, "GLN", get_glntp, FALSE, symtab, nrr, rr);
610 rename_bbint(pdba, "ASP", get_asptp, FALSE, symtab, nrr, rr);
614 rename_bb(pdba, "ASPH", "ASP", FALSE, symtab);
618 rename_bbint(pdba, "GLU", get_glutp, FALSE, symtab, nrr, rr);
622 rename_bb(pdba, "GLUH", "GLU", FALSE, symtab);
627 set_histp(pdba, x, angle, distance);
631 rename_bbint(pdba, "HIS", get_histp, TRUE, symtab, nrr, rr);
634 /* Initialize the rtp builing block names with the residue names
635 * for the residues that have not been processed above.
637 pdbres_to_gmxrtp(pdba);
639 /* Now we have all rtp names set.
640 * The rtp names will conform to Gromacs naming,
641 * unless the input pdb file contained one or more force field specific
642 * rtp names as residue names.
646 /* struct for sorting the atoms from the pdb file */
648 int resnr; /* residue number */
649 int j; /* database order index */
650 int index; /* original atom number */
651 char anm1; /* second letter of atom name */
652 char altloc; /* alternate location indicator */
655 int pdbicomp(const void *a, const void *b)
660 pa = (t_pdbindex *)a;
661 pb = (t_pdbindex *)b;
663 d = (pa->resnr - pb->resnr);
669 d = (pa->anm1 - pb->anm1);
672 d = (pa->altloc - pb->altloc);
680 static void sort_pdbatoms(t_restp restp[],
681 int natoms, t_atoms **pdbaptr, rvec **x,
682 t_blocka *block, char ***gnames)
684 t_atoms *pdba, *pdbnew;
699 for (i = 0; i < natoms; i++)
701 atomnm = *pdba->atomname[i];
702 rptr = &restp[pdba->atom[i].resind];
703 for (j = 0; (j < rptr->natom); j++)
705 if (gmx_strcasecmp(atomnm, *(rptr->atomname[j])) == 0)
710 if (j == rptr->natom)
715 "Atom %s in residue %s %d was not found in rtp entry %s with %d atoms\n"
716 "while sorting atoms.\n%s", atomnm,
717 *pdba->resinfo[pdba->atom[i].resind].name,
718 pdba->resinfo[pdba->atom[i].resind].nr,
721 is_hydrogen(atomnm) ?
722 "\nFor a hydrogen, this can be a different protonation state, or it\n"
723 "might have had a different number in the PDB file and was rebuilt\n"
724 "(it might for instance have been H3, and we only expected H1 & H2).\n"
725 "Note that hydrogens might have been added to the entry for the N-terminus.\n"
726 "Remove this hydrogen or choose a different protonation state to solve it.\n"
727 "Option -ignh will ignore all hydrogens in the input." : ".");
728 gmx_fatal(FARGS, buf);
730 /* make shadow array to be sorted into indexgroup */
731 pdbi[i].resnr = pdba->atom[i].resind;
734 pdbi[i].anm1 = atomnm[1];
735 pdbi[i].altloc = pdba->pdbinfo[i].altloc;
737 qsort(pdbi, natoms, (size_t)sizeof(pdbi[0]), pdbicomp);
739 /* pdba is sorted in pdbnew using the pdbi index */
742 init_t_atoms(pdbnew, natoms, TRUE);
744 pdbnew->nr = pdba->nr;
745 pdbnew->nres = pdba->nres;
746 sfree(pdbnew->resinfo);
747 pdbnew->resinfo = pdba->resinfo;
748 for (i = 0; i < natoms; i++)
750 pdbnew->atom[i] = pdba->atom[pdbi[i].index];
751 pdbnew->atomname[i] = pdba->atomname[pdbi[i].index];
752 pdbnew->pdbinfo[i] = pdba->pdbinfo[pdbi[i].index];
753 copy_rvec((*x)[pdbi[i].index], (*xnew)[i]);
754 /* make indexgroup in block */
755 a[i] = pdbi[i].index;
758 sfree(pdba->atomname);
760 sfree(pdba->pdbinfo);
763 /* copy the sorted pdbnew back to pdba */
766 add_grp(block, gnames, natoms, a, "prot_sort");
772 static int remove_duplicate_atoms(t_atoms *pdba, rvec x[], gmx_bool bVerbose)
774 int i, j, oldnatoms, ndel;
777 printf("Checking for duplicate atoms....\n");
778 oldnatoms = pdba->nr;
780 /* NOTE: pdba->nr is modified inside the loop */
781 for (i = 1; (i < pdba->nr); i++)
783 /* compare 'i' and 'i-1', throw away 'i' if they are identical
784 this is a 'while' because multiple alternate locations can be present */
785 while ( (i < pdba->nr) &&
786 (pdba->atom[i-1].resind == pdba->atom[i].resind) &&
787 (strcmp(*pdba->atomname[i-1], *pdba->atomname[i]) == 0) )
792 ri = &pdba->resinfo[pdba->atom[i].resind];
793 printf("deleting duplicate atom %4s %s%4d%c",
794 *pdba->atomname[i], *ri->name, ri->nr, ri->ic);
795 if (ri->chainid && (ri->chainid != ' '))
797 printf(" ch %c", ri->chainid);
801 if (pdba->pdbinfo[i].atomnr)
803 printf(" pdb nr %4d", pdba->pdbinfo[i].atomnr);
805 if (pdba->pdbinfo[i].altloc && (pdba->pdbinfo[i].altloc != ' '))
807 printf(" altloc %c", pdba->pdbinfo[i].altloc);
813 /* We can not free, since it might be in the symtab */
814 /* sfree(pdba->atomname[i]); */
815 for (j = i; j < pdba->nr; j++)
817 pdba->atom[j] = pdba->atom[j+1];
818 pdba->atomname[j] = pdba->atomname[j+1];
819 pdba->pdbinfo[j] = pdba->pdbinfo[j+1];
820 copy_rvec(x[j+1], x[j]);
822 srenew(pdba->atom, pdba->nr);
823 /* srenew(pdba->atomname, pdba->nr); */
824 srenew(pdba->pdbinfo, pdba->nr);
827 if (pdba->nr != oldnatoms)
829 printf("Now there are %d atoms. Deleted %d duplicates.\n", pdba->nr, ndel);
835 void find_nc_ter(t_atoms *pdba, int r0, int r1, int *r_start, int *r_end,
836 gmx_residuetype_t *rt)
839 const char *p_startrestype;
840 const char *p_restype;
841 int nstartwarn, nendwarn;
849 /* Find the starting terminus (typially N or 5') */
850 for (i = r0; i < r1 && *r_start == -1; i++)
852 gmx_residuetype_get_type(rt, *pdba->resinfo[i].name, &p_startrestype);
853 if (!gmx_strcasecmp(p_startrestype, "Protein") || !gmx_strcasecmp(p_startrestype, "DNA") || !gmx_strcasecmp(p_startrestype, "RNA") )
855 printf("Identified residue %s%d as a starting terminus.\n", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
862 printf("Warning: Starting residue %s%d in chain not identified as Protein/RNA/DNA.\n", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
866 printf("More than 5 unidentified residues at start of chain - disabling further warnings.\n");
874 /* Go through the rest of the residues, check that they are the same class, and identify the ending terminus. */
875 for (i = *r_start; i < r1; i++)
877 gmx_residuetype_get_type(rt, *pdba->resinfo[i].name, &p_restype);
878 if (!gmx_strcasecmp(p_restype, p_startrestype) && nendwarn == 0)
886 printf("Warning: Residue %s%d in chain has different type (%s) from starting residue %s%d (%s).\n",
887 *pdba->resinfo[i].name, pdba->resinfo[i].nr, p_restype,
888 *pdba->resinfo[*r_start].name, pdba->resinfo[*r_start].nr, p_startrestype);
892 printf("More than 5 unidentified residues at end of chain - disabling further warnings.\n");
901 printf("Identified residue %s%d as a ending terminus.\n", *pdba->resinfo[*r_end].name, pdba->resinfo[*r_end].nr);
907 modify_chain_numbers(t_atoms * pdba,
908 const char * chainsep)
911 char old_prev_chainid;
912 char old_this_chainid;
913 int old_prev_chainnum;
914 int old_this_chainnum;
920 const char * prev_atomname;
921 const char * this_atomname;
922 const char * prev_resname;
923 const char * this_resname;
928 int prev_chainnumber;
929 int this_chainnumber;
941 splitting = SPLIT_TER_ONLY; /* keep compiler happy */
943 /* Be a bit flexible to catch typos */
944 if (!strncmp(chainsep, "id_o", 4))
946 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
947 splitting = SPLIT_ID_OR_TER;
948 printf("Splitting chemical chains based on TER records or chain id changing.\n");
950 else if (!strncmp(chainsep, "int", 3))
952 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
953 splitting = SPLIT_INTERACTIVE;
954 printf("Splitting chemical chains interactively.\n");
956 else if (!strncmp(chainsep, "id_a", 4))
958 splitting = SPLIT_ID_AND_TER;
959 printf("Splitting chemical chains based on TER records and chain id changing.\n");
961 else if (strlen(chainsep) == 2 && !strncmp(chainsep, "id", 4))
963 splitting = SPLIT_ID_ONLY;
964 printf("Splitting chemical chains based on changing chain id only (ignoring TER records).\n");
966 else if (chainsep[0] == 't')
968 splitting = SPLIT_TER_ONLY;
969 printf("Splitting chemical chains based on TER records only (ignoring chain id).\n");
973 gmx_fatal(FARGS, "Unidentified setting for chain separation: %s\n", chainsep);
976 /* The default chain enumeration is based on TER records only, which is reflected in chainnum below */
978 old_prev_chainid = '?';
979 old_prev_chainnum = -1;
982 this_atomname = NULL;
987 this_chainnumber = -1;
989 for (i = 0; i < pdba->nres; i++)
991 ri = &pdba->resinfo[i];
992 old_this_chainid = ri->chainid;
993 old_this_chainnum = ri->chainnum;
995 prev_atomname = this_atomname;
996 prev_atomnum = this_atomnum;
997 prev_resname = this_resname;
998 prev_resnum = this_resnum;
999 prev_chainid = this_chainid;
1000 prev_chainnumber = this_chainnumber;
1002 this_atomname = *(pdba->atomname[i]);
1003 this_atomnum = (pdba->pdbinfo != NULL) ? pdba->pdbinfo[i].atomnr : i+1;
1004 this_resname = *ri->name;
1005 this_resnum = ri->nr;
1006 this_chainid = ri->chainid;
1007 this_chainnumber = ri->chainnum;
1011 case SPLIT_ID_OR_TER:
1012 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1018 case SPLIT_ID_AND_TER:
1019 if (old_this_chainid != old_prev_chainid && old_this_chainnum != old_prev_chainnum)
1026 if (old_this_chainid != old_prev_chainid)
1032 case SPLIT_TER_ONLY:
1033 if (old_this_chainnum != old_prev_chainnum)
1038 case SPLIT_INTERACTIVE:
1039 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1043 printf("Split the chain (and introduce termini) between residue %s%d (chain id '%c', atom %d %s)\n"
1044 "and residue %s%d (chain id '%c', atom %d %s) ? [n/y]\n",
1045 prev_resname, prev_resnum, prev_chainid, prev_atomnum, prev_atomname,
1046 this_resname, this_resnum, this_chainid, this_atomnum, this_atomname);
1048 if (NULL == fgets(select, STRLEN-1, stdin))
1050 gmx_fatal(FARGS, "Error reading from stdin");
1053 if (i == 0 || select[0] == 'y')
1060 gmx_fatal(FARGS, "Internal inconsistency - this shouldn't happen...");
1063 old_prev_chainid = old_this_chainid;
1064 old_prev_chainnum = old_this_chainnum;
1066 ri->chainnum = new_chainnum;
1095 int gmx_pdb2gmx(int argc, char *argv[])
1097 const char *desc[] = {
1098 "[THISMODULE] reads a [TT].pdb[tt] (or [TT].gro[tt]) file, reads",
1099 "some database files, adds hydrogens to the molecules and generates",
1100 "coordinates in GROMACS (GROMOS), or optionally [TT].pdb[tt], format",
1101 "and a topology in GROMACS format.",
1102 "These files can subsequently be processed to generate a run input file.",
1104 "[THISMODULE] will search for force fields by looking for",
1105 "a [TT]forcefield.itp[tt] file in subdirectories [TT]<forcefield>.ff[tt]",
1106 "of the current working directory and of the GROMACS library directory",
1107 "as inferred from the path of the binary or the [TT]GMXLIB[tt] environment",
1109 "By default the forcefield selection is interactive,",
1110 "but you can use the [TT]-ff[tt] option to specify one of the short names",
1111 "in the list on the command line instead. In that case [THISMODULE] just looks",
1112 "for the corresponding [TT]<forcefield>.ff[tt] directory.",
1114 "After choosing a force field, all files will be read only from",
1115 "the corresponding force field directory.",
1116 "If you want to modify or add a residue types, you can copy the force",
1117 "field directory from the GROMACS library directory to your current",
1118 "working directory. If you want to add new protein residue types,",
1119 "you will need to modify [TT]residuetypes.dat[tt] in the library directory",
1120 "or copy the whole library directory to a local directory and set",
1121 "the environment variable [TT]GMXLIB[tt] to the name of that directory.",
1122 "Check Chapter 5 of the manual for more information about file formats.",
1125 "Note that a [TT].pdb[tt] file is nothing more than a file format, and it",
1126 "need not necessarily contain a protein structure. Every kind of",
1127 "molecule for which there is support in the database can be converted.",
1128 "If there is no support in the database, you can add it yourself.[PAR]",
1130 "The program has limited intelligence, it reads a number of database",
1131 "files, that allow it to make special bonds (Cys-Cys, Heme-His, etc.),",
1132 "if necessary this can be done manually. The program can prompt the",
1133 "user to select which kind of LYS, ASP, GLU, CYS or HIS residue is",
1134 "desired. For Lys the choice is between neutral (two protons on NZ) or",
1135 "protonated (three protons, default), for Asp and Glu unprotonated",
1136 "(default) or protonated, for His the proton can be either on ND1,",
1137 "on NE2 or on both. By default these selections are done automatically.",
1138 "For His, this is based on an optimal hydrogen bonding",
1139 "conformation. Hydrogen bonds are defined based on a simple geometric",
1140 "criterion, specified by the maximum hydrogen-donor-acceptor angle",
1141 "and donor-acceptor distance, which are set by [TT]-angle[tt] and",
1142 "[TT]-dist[tt] respectively.[PAR]",
1144 "The protonation state of N- and C-termini can be chosen interactively",
1145 "with the [TT]-ter[tt] flag. Default termini are ionized (NH3+ and COO-),",
1146 "respectively. Some force fields support zwitterionic forms for chains of",
1147 "one residue, but for polypeptides these options should NOT be selected.",
1148 "The AMBER force fields have unique forms for the terminal residues,",
1149 "and these are incompatible with the [TT]-ter[tt] mechanism. You need",
1150 "to prefix your N- or C-terminal residue names with \"N\" or \"C\"",
1151 "respectively to use these forms, making sure you preserve the format",
1152 "of the coordinate file. Alternatively, use named terminating residues",
1153 "(e.g. ACE, NME).[PAR]",
1155 "The separation of chains is not entirely trivial since the markup",
1156 "in user-generated PDB files frequently varies and sometimes it",
1157 "is desirable to merge entries across a TER record, for instance",
1158 "if you want a disulfide bridge or distance restraints between",
1159 "two protein chains or if you have a HEME group bound to a protein.",
1160 "In such cases multiple chains should be contained in a single",
1161 "[TT]moleculetype[tt] definition.",
1162 "To handle this, [THISMODULE] uses two separate options.",
1163 "First, [TT]-chainsep[tt] allows you to choose when a new chemical chain should",
1164 "start, and termini added when applicable. This can be done based on the",
1165 "existence of TER records, when the chain id changes, or combinations of either",
1166 "or both of these. You can also do the selection fully interactively.",
1167 "In addition, there is a [TT]-merge[tt] option that controls how multiple chains",
1168 "are merged into one moleculetype, after adding all the chemical termini (or not).",
1169 "This can be turned off (no merging), all non-water chains can be merged into a",
1170 "single molecule, or the selection can be done interactively.[PAR]",
1172 "[THISMODULE] will also check the occupancy field of the [TT].pdb[tt] file.",
1173 "If any of the occupancies are not one, indicating that the atom is",
1174 "not resolved well in the structure, a warning message is issued.",
1175 "When a [TT].pdb[tt] file does not originate from an X-ray structure determination",
1176 "all occupancy fields may be zero. Either way, it is up to the user",
1177 "to verify the correctness of the input data (read the article!).[PAR]",
1179 "During processing the atoms will be reordered according to GROMACS",
1180 "conventions. With [TT]-n[tt] an index file can be generated that",
1181 "contains one group reordered in the same way. This allows you to",
1182 "convert a GROMOS trajectory and coordinate file to GROMOS. There is",
1183 "one limitation: reordering is done after the hydrogens are stripped",
1184 "from the input and before new hydrogens are added. This means that",
1185 "you should not use [TT]-ignh[tt].[PAR]",
1187 "The [TT].gro[tt] and [TT].g96[tt] file formats do not support chain",
1188 "identifiers. Therefore it is useful to enter a [TT].pdb[tt] file name at",
1189 "the [TT]-o[tt] option when you want to convert a multi-chain [TT].pdb[tt] file.",
1192 "The option [TT]-vsite[tt] removes hydrogen and fast improper dihedral",
1193 "motions. Angular and out-of-plane motions can be removed by changing",
1194 "hydrogens into virtual sites and fixing angles, which fixes their",
1195 "position relative to neighboring atoms. Additionally, all atoms in the",
1196 "aromatic rings of the standard amino acids (i.e. PHE, TRP, TYR and HIS)",
1197 "can be converted into virtual sites, eliminating the fast improper dihedral",
1198 "fluctuations in these rings. [BB]Note[bb] that in this case all other hydrogen",
1199 "atoms are also converted to virtual sites. The mass of all atoms that are",
1200 "converted into virtual sites, is added to the heavy atoms.[PAR]",
1201 "Also slowing down of dihedral motion can be done with [TT]-heavyh[tt]",
1202 "done by increasing the hydrogen-mass by a factor of 4. This is also",
1203 "done for water hydrogens to slow down the rotational motion of water.",
1204 "The increase in mass of the hydrogens is subtracted from the bonded",
1205 "(heavy) atom so that the total mass of the system remains the same."
1209 FILE *fp, *top_file, *top_file2, *itp_file = NULL;
1211 t_atoms pdba_all, *pdba;
1215 int chain, nch, maxch, nwaterchain;
1217 t_chain *chains, *cc;
1218 char select[STRLEN];
1226 int i, j, k, l, nrtp;
1227 int *swap_index, si;
1231 gpp_atomtype_t atype;
1232 gmx_residuetype_t*rt;
1234 char fn[256], itp_fn[STRLEN], posre_fn[STRLEN], buf_fn[STRLEN];
1235 char molname[STRLEN], title[STRLEN], quote[STRLEN];
1236 char *c, forcefield[STRLEN], ffdir[STRLEN];
1237 char ffname[STRLEN], suffix[STRLEN], buf[STRLEN];
1245 int nrtprename, naa;
1246 rtprename_t *rtprename = NULL;
1247 int nah, nNtdb, nCtdb, ntdblist;
1248 t_hackblock *ntdb, *ctdb, **tdblist;
1252 gmx_bool bVsites = FALSE, bWat, bPrevWat = FALSE, bITP, bVsiteAromatics = FALSE, bCheckMerge;
1254 t_hackblock *hb_chain;
1255 t_restp *restp_chain;
1257 const char *p_restype;
1261 const char * prev_atomname;
1262 const char * this_atomname;
1263 const char * prev_resname;
1264 const char * this_resname;
1269 int prev_chainnumber;
1270 int this_chainnumber;
1272 int this_chainstart;
1273 int prev_chainstart;
1280 { efSTX, "-f", "eiwit.pdb", ffREAD },
1281 { efSTO, "-o", "conf", ffWRITE },
1282 { efTOP, NULL, NULL, ffWRITE },
1283 { efITP, "-i", "posre", ffWRITE },
1284 { efNDX, "-n", "clean", ffOPTWR },
1285 { efSTO, "-q", "clean.pdb", ffOPTWR }
1287 #define NFILE asize(fnm)
1290 /* Command line arguments must be static */
1291 static gmx_bool bNewRTP = FALSE;
1292 static gmx_bool bInter = FALSE, bCysMan = FALSE;
1293 static gmx_bool bLysMan = FALSE, bAspMan = FALSE, bGluMan = FALSE, bHisMan = FALSE;
1294 static gmx_bool bGlnMan = FALSE, bArgMan = FALSE;
1295 static gmx_bool bTerMan = FALSE, bUnA = FALSE, bHeavyH;
1296 static gmx_bool bSort = TRUE, bAllowMissing = FALSE, bRemoveH = FALSE;
1297 static gmx_bool bDeuterate = FALSE, bVerbose = FALSE, bChargeGroups = TRUE, bCmap = TRUE;
1298 static gmx_bool bRenumRes = FALSE, bRTPresname = FALSE;
1299 static real angle = 135.0, distance = 0.3, posre_fc = 1000;
1300 static real long_bond_dist = 0.25, short_bond_dist = 0.05;
1301 static const char *vsitestr[] = { NULL, "none", "hydrogens", "aromatics", NULL };
1302 static const char *watstr[] = { NULL, "select", "none", "spc", "spce", "tip3p", "tip4p", "tip5p", NULL };
1303 static const char *chainsep[] = { NULL, "id_or_ter", "id_and_ter", "ter", "id", "interactive", NULL };
1304 static const char *merge[] = {NULL, "no", "all", "interactive", NULL };
1305 static const char *ff = "select";
1308 { "-newrtp", FALSE, etBOOL, {&bNewRTP},
1309 "HIDDENWrite the residue database in new format to [TT]new.rtp[tt]"},
1310 { "-lb", FALSE, etREAL, {&long_bond_dist},
1311 "HIDDENLong bond warning distance" },
1312 { "-sb", FALSE, etREAL, {&short_bond_dist},
1313 "HIDDENShort bond warning distance" },
1314 { "-chainsep", FALSE, etENUM, {chainsep},
1315 "Condition in PDB files when a new chain should be started (adding termini)" },
1316 { "-merge", FALSE, etENUM, {&merge},
1317 "Merge multiple chains into a single [moleculetype]" },
1318 { "-ff", FALSE, etSTR, {&ff},
1319 "Force field, interactive by default. Use [TT]-h[tt] for information." },
1320 { "-water", FALSE, etENUM, {watstr},
1321 "Water model to use" },
1322 { "-inter", FALSE, etBOOL, {&bInter},
1323 "Set the next 8 options to interactive"},
1324 { "-ss", FALSE, etBOOL, {&bCysMan},
1325 "Interactive SS bridge selection" },
1326 { "-ter", FALSE, etBOOL, {&bTerMan},
1327 "Interactive termini selection, instead of charged (default)" },
1328 { "-lys", FALSE, etBOOL, {&bLysMan},
1329 "Interactive lysine selection, instead of charged" },
1330 { "-arg", FALSE, etBOOL, {&bArgMan},
1331 "Interactive arginine selection, instead of charged" },
1332 { "-asp", FALSE, etBOOL, {&bAspMan},
1333 "Interactive aspartic acid selection, instead of charged" },
1334 { "-glu", FALSE, etBOOL, {&bGluMan},
1335 "Interactive glutamic acid selection, instead of charged" },
1336 { "-gln", FALSE, etBOOL, {&bGlnMan},
1337 "Interactive glutamine selection, instead of neutral" },
1338 { "-his", FALSE, etBOOL, {&bHisMan},
1339 "Interactive histidine selection, instead of checking H-bonds" },
1340 { "-angle", FALSE, etREAL, {&angle},
1341 "Minimum hydrogen-donor-acceptor angle for a H-bond (degrees)" },
1342 { "-dist", FALSE, etREAL, {&distance},
1343 "Maximum donor-acceptor distance for a H-bond (nm)" },
1344 { "-una", FALSE, etBOOL, {&bUnA},
1345 "Select aromatic rings with united CH atoms on phenylalanine, "
1346 "tryptophane and tyrosine" },
1347 { "-sort", FALSE, etBOOL, {&bSort},
1348 "HIDDENSort the residues according to database, turning this off is dangerous as charge groups might be broken in parts" },
1349 { "-ignh", FALSE, etBOOL, {&bRemoveH},
1350 "Ignore hydrogen atoms that are in the coordinate file" },
1351 { "-missing", FALSE, etBOOL, {&bAllowMissing},
1352 "Continue when atoms are missing, dangerous" },
1353 { "-v", FALSE, etBOOL, {&bVerbose},
1354 "Be slightly more verbose in messages" },
1355 { "-posrefc", FALSE, etREAL, {&posre_fc},
1356 "Force constant for position restraints" },
1357 { "-vsite", FALSE, etENUM, {vsitestr},
1358 "Convert atoms to virtual sites" },
1359 { "-heavyh", FALSE, etBOOL, {&bHeavyH},
1360 "Make hydrogen atoms heavy" },
1361 { "-deuterate", FALSE, etBOOL, {&bDeuterate},
1362 "Change the mass of hydrogens to 2 amu" },
1363 { "-chargegrp", TRUE, etBOOL, {&bChargeGroups},
1364 "Use charge groups in the [TT].rtp[tt] file" },
1365 { "-cmap", TRUE, etBOOL, {&bCmap},
1366 "Use cmap torsions (if enabled in the [TT].rtp[tt] file)" },
1367 { "-renum", TRUE, etBOOL, {&bRenumRes},
1368 "Renumber the residues consecutively in the output" },
1369 { "-rtpres", TRUE, etBOOL, {&bRTPresname},
1370 "Use [TT].rtp[tt] entry names as residue names" }
1372 #define NPARGS asize(pa)
1374 if (!parse_common_args(&argc, argv, 0, NFILE, fnm, asize(pa), pa, asize(desc), desc,
1380 /* Force field selection, interactive or direct */
1381 choose_ff(strcmp(ff, "select") == 0 ? NULL : ff,
1382 forcefield, sizeof(forcefield),
1383 ffdir, sizeof(ffdir));
1385 if (strlen(forcefield) > 0)
1387 strcpy(ffname, forcefield);
1388 ffname[0] = toupper(ffname[0]);
1392 gmx_fatal(FARGS, "Empty forcefield string");
1395 printf("\nUsing the %s force field in directory %s\n\n",
1398 choose_watermodel(watstr[0], ffdir, &watermodel);
1402 /* if anything changes here, also change description of -inter */
1417 else if (bDeuterate)
1426 switch (vsitestr[0][0])
1428 case 'n': /* none */
1430 bVsiteAromatics = FALSE;
1432 case 'h': /* hydrogens */
1434 bVsiteAromatics = FALSE;
1436 case 'a': /* aromatics */
1438 bVsiteAromatics = TRUE;
1441 gmx_fatal(FARGS, "DEATH HORROR in $s (%d): vsitestr[0]='%s'",
1442 __FILE__, __LINE__, vsitestr[0]);
1445 /* Open the symbol table */
1446 open_symtab(&symtab);
1448 /* Residue type database */
1449 gmx_residuetype_init(&rt);
1451 /* Read residue renaming database(s), if present */
1452 nrrn = fflib_search_file_end(ffdir, ".r2b", FALSE, &rrn);
1456 for (i = 0; i < nrrn; i++)
1458 fp = fflib_open(rrn[i]);
1459 read_rtprename(rrn[i], fp, &nrtprename, &rtprename);
1465 /* Add all alternative names from the residue renaming database to the list of recognized amino/nucleic acids. */
1467 for (i = 0; i < nrtprename; i++)
1469 rc = gmx_residuetype_get_type(rt, rtprename[i].gmx, &p_restype);
1471 /* Only add names if the 'standard' gromacs/iupac base name was found */
1474 gmx_residuetype_add(rt, rtprename[i].main, p_restype);
1475 gmx_residuetype_add(rt, rtprename[i].nter, p_restype);
1476 gmx_residuetype_add(rt, rtprename[i].cter, p_restype);
1477 gmx_residuetype_add(rt, rtprename[i].bter, p_restype);
1482 if (watermodel != NULL && (strstr(watermodel, "4p") ||
1483 strstr(watermodel, "4P")))
1487 else if (watermodel != NULL && (strstr(watermodel, "5p") ||
1488 strstr(watermodel, "5P")))
1497 aps = gmx_atomprop_init();
1498 natom = read_pdball(opt2fn("-f", NFILE, fnm), opt2fn_null("-q", NFILE, fnm), title,
1499 &pdba_all, &pdbx, &ePBC, box, bRemoveH, &symtab, rt, watres,
1504 gmx_fatal(FARGS, "No atoms found in pdb file %s\n", opt2fn("-f", NFILE, fnm));
1507 printf("Analyzing pdb file\n");
1512 modify_chain_numbers(&pdba_all, chainsep[0]);
1516 this_atomname = NULL;
1518 this_resname = NULL;
1521 this_chainnumber = -1;
1522 this_chainstart = 0;
1523 /* Keep the compiler happy */
1524 prev_chainstart = 0;
1529 for (i = 0; (i < natom); i++)
1531 ri = &pdba_all.resinfo[pdba_all.atom[i].resind];
1533 prev_atomname = this_atomname;
1534 prev_atomnum = this_atomnum;
1535 prev_resname = this_resname;
1536 prev_resnum = this_resnum;
1537 prev_chainid = this_chainid;
1538 prev_chainnumber = this_chainnumber;
1541 prev_chainstart = this_chainstart;
1544 this_atomname = *pdba_all.atomname[i];
1545 this_atomnum = (pdba_all.pdbinfo != NULL) ? pdba_all.pdbinfo[i].atomnr : i+1;
1546 this_resname = *ri->name;
1547 this_resnum = ri->nr;
1548 this_chainid = ri->chainid;
1549 this_chainnumber = ri->chainnum;
1551 bWat = gmx_strcasecmp(*ri->name, watres) == 0;
1552 if ((i == 0) || (this_chainnumber != prev_chainnumber) || (bWat != bPrevWat))
1554 this_chainstart = pdba_all.atom[i].resind;
1559 if (!strncmp(merge[0], "int", 3))
1561 printf("Merge chain ending with residue %s%d (chain id '%c', atom %d %s) and chain starting with\n"
1562 "residue %s%d (chain id '%c', atom %d %s) into a single moleculetype (keeping termini)? [n/y]\n",
1563 prev_resname, prev_resnum, prev_chainid, prev_atomnum, prev_atomname,
1564 this_resname, this_resnum, this_chainid, this_atomnum, this_atomname);
1566 if (NULL == fgets(select, STRLEN-1, stdin))
1568 gmx_fatal(FARGS, "Error reading from stdin");
1570 bMerged = (select[0] == 'y');
1572 else if (!strncmp(merge[0], "all", 3))
1580 pdb_ch[nch-1].chainstart[pdb_ch[nch-1].nterpairs] =
1581 pdba_all.atom[i].resind - prev_chainstart;
1582 pdb_ch[nch-1].nterpairs++;
1583 srenew(pdb_ch[nch-1].chainstart, pdb_ch[nch-1].nterpairs+1);
1588 /* set natom for previous chain */
1591 pdb_ch[nch-1].natom = i-pdb_ch[nch-1].start;
1598 /* check if chain identifier was used before */
1599 for (j = 0; (j < nch); j++)
1601 if (pdb_ch[j].chainid != ' ' && pdb_ch[j].chainid == ri->chainid)
1603 printf("WARNING: Chain identifier '%c' is used in two non-sequential blocks.\n"
1604 "They will be treated as separate chains unless you reorder your file.\n",
1611 srenew(pdb_ch, maxch);
1613 pdb_ch[nch].chainid = ri->chainid;
1614 pdb_ch[nch].chainnum = ri->chainnum;
1615 pdb_ch[nch].start = i;
1616 pdb_ch[nch].bAllWat = bWat;
1619 pdb_ch[nch].nterpairs = 0;
1623 pdb_ch[nch].nterpairs = 1;
1625 snew(pdb_ch[nch].chainstart, pdb_ch[nch].nterpairs+1);
1626 /* modified [nch] to [0] below */
1627 pdb_ch[nch].chainstart[0] = 0;
1633 pdb_ch[nch-1].natom = natom-pdb_ch[nch-1].start;
1635 /* set all the water blocks at the end of the chain */
1636 snew(swap_index, nch);
1638 for (i = 0; i < nch; i++)
1640 if (!pdb_ch[i].bAllWat)
1646 for (i = 0; i < nch; i++)
1648 if (pdb_ch[i].bAllWat)
1654 if (nwaterchain > 1)
1656 printf("Moved all the water blocks to the end\n");
1660 /* copy pdb data and x for all chains */
1661 for (i = 0; (i < nch); i++)
1664 chains[i].chainid = pdb_ch[si].chainid;
1665 chains[i].chainnum = pdb_ch[si].chainnum;
1666 chains[i].bAllWat = pdb_ch[si].bAllWat;
1667 chains[i].nterpairs = pdb_ch[si].nterpairs;
1668 chains[i].chainstart = pdb_ch[si].chainstart;
1669 snew(chains[i].ntdb, pdb_ch[si].nterpairs);
1670 snew(chains[i].ctdb, pdb_ch[si].nterpairs);
1671 snew(chains[i].r_start, pdb_ch[si].nterpairs);
1672 snew(chains[i].r_end, pdb_ch[si].nterpairs);
1674 snew(chains[i].pdba, 1);
1675 init_t_atoms(chains[i].pdba, pdb_ch[si].natom, TRUE);
1676 snew(chains[i].x, chains[i].pdba->nr);
1677 for (j = 0; j < chains[i].pdba->nr; j++)
1679 chains[i].pdba->atom[j] = pdba_all.atom[pdb_ch[si].start+j];
1680 snew(chains[i].pdba->atomname[j], 1);
1681 *chains[i].pdba->atomname[j] =
1682 gmx_strdup(*pdba_all.atomname[pdb_ch[si].start+j]);
1683 chains[i].pdba->pdbinfo[j] = pdba_all.pdbinfo[pdb_ch[si].start+j];
1684 copy_rvec(pdbx[pdb_ch[si].start+j], chains[i].x[j]);
1686 /* Re-index the residues assuming that the indices are continuous */
1687 k = chains[i].pdba->atom[0].resind;
1688 nres = chains[i].pdba->atom[chains[i].pdba->nr-1].resind - k + 1;
1689 chains[i].pdba->nres = nres;
1690 for (j = 0; j < chains[i].pdba->nr; j++)
1692 chains[i].pdba->atom[j].resind -= k;
1694 srenew(chains[i].pdba->resinfo, nres);
1695 for (j = 0; j < nres; j++)
1697 chains[i].pdba->resinfo[j] = pdba_all.resinfo[k+j];
1698 snew(chains[i].pdba->resinfo[j].name, 1);
1699 *chains[i].pdba->resinfo[j].name = gmx_strdup(*pdba_all.resinfo[k+j].name);
1700 /* make all chain identifiers equal to that of the chain */
1701 chains[i].pdba->resinfo[j].chainid = pdb_ch[si].chainid;
1705 if (nchainmerges > 0)
1707 printf("\nMerged chains into joint molecule definitions at %d places.\n\n",
1711 printf("There are %d chains and %d blocks of water and "
1712 "%d residues with %d atoms\n",
1713 nch-nwaterchain, nwaterchain,
1714 pdba_all.nres, natom);
1716 printf("\n %5s %4s %6s\n", "chain", "#res", "#atoms");
1717 for (i = 0; (i < nch); i++)
1719 printf(" %d '%c' %5d %6d %s\n",
1720 i+1, chains[i].chainid ? chains[i].chainid : '-',
1721 chains[i].pdba->nres, chains[i].pdba->nr,
1722 chains[i].bAllWat ? "(only water)" : "");
1726 check_occupancy(&pdba_all, opt2fn("-f", NFILE, fnm), bVerbose);
1728 /* Read atomtypes... */
1729 atype = read_atype(ffdir, &symtab);
1731 /* read residue database */
1732 printf("Reading residue database... (%s)\n", forcefield);
1733 nrtpf = fflib_search_file_end(ffdir, ".rtp", TRUE, &rtpf);
1736 for (i = 0; i < nrtpf; i++)
1738 read_resall(rtpf[i], &nrtp, &restp, atype, &symtab, FALSE);
1744 /* Not correct with multiple rtp input files with different bonded types */
1745 fp = gmx_fio_fopen("new.rtp", "w");
1746 print_resall(fp, nrtp, restp, atype);
1750 /* read hydrogen database */
1751 nah = read_h_db(ffdir, &ah);
1753 /* Read Termini database... */
1754 nNtdb = read_ter_db(ffdir, 'n', &ntdb, atype);
1755 nCtdb = read_ter_db(ffdir, 'c', &ctdb, atype);
1757 top_fn = ftp2fn(efTOP, NFILE, fnm);
1758 top_file = gmx_fio_fopen(top_fn, "w");
1760 print_top_header(top_file, top_fn, FALSE, ffdir, mHmult);
1767 for (chain = 0; (chain < nch); chain++)
1769 cc = &(chains[chain]);
1771 /* set pdba, natom and nres to the current chain */
1774 natom = cc->pdba->nr;
1775 nres = cc->pdba->nres;
1777 if (cc->chainid && ( cc->chainid != ' ' ) )
1779 printf("Processing chain %d '%c' (%d atoms, %d residues)\n",
1780 chain+1, cc->chainid, natom, nres);
1784 printf("Processing chain %d (%d atoms, %d residues)\n",
1785 chain+1, natom, nres);
1788 process_chain(pdba, x, bUnA, bUnA, bUnA, bLysMan, bAspMan, bGluMan,
1789 bHisMan, bArgMan, bGlnMan, angle, distance, &symtab,
1790 nrtprename, rtprename);
1792 cc->chainstart[cc->nterpairs] = pdba->nres;
1794 for (i = 0; i < cc->nterpairs; i++)
1796 find_nc_ter(pdba, cc->chainstart[i], cc->chainstart[i+1],
1797 &(cc->r_start[j]), &(cc->r_end[j]), rt);
1799 if (cc->r_start[j] >= 0 && cc->r_end[j] >= 0)
1805 if (cc->nterpairs == 0)
1807 printf("Problem with chain definition, or missing terminal residues.\n"
1808 "This chain does not appear to contain a recognized chain molecule.\n"
1809 "If this is incorrect, you can edit residuetypes.dat to modify the behavior.\n");
1812 /* Check for disulfides and other special bonds */
1813 nssbonds = mk_specbonds(pdba, x, bCysMan, &ssbonds, bVerbose);
1817 rename_resrtp(pdba, cc->nterpairs, cc->r_start, cc->r_end, nrtprename, rtprename,
1825 sprintf(fn, "chain.pdb");
1829 sprintf(fn, "chain_%c%d.pdb", cc->chainid, cc->chainnum);
1831 write_sto_conf(fn, title, pdba, x, NULL, ePBC, box);
1835 for (i = 0; i < cc->nterpairs; i++)
1839 * We first apply a filter so we only have the
1840 * termini that can be applied to the residue in question
1841 * (or a generic terminus if no-residue specific is available).
1843 /* First the N terminus */
1846 tdblist = filter_ter(nrtp, restp, nNtdb, ntdb,
1847 *pdba->resinfo[cc->r_start[i]].name,
1848 *pdba->resinfo[cc->r_start[i]].rtp,
1852 printf("No suitable end (N or 5') terminus found in database - assuming this residue\n"
1853 "is already in a terminus-specific form and skipping terminus selection.\n");
1858 if (bTerMan && ntdblist > 1)
1860 sprintf(select, "Select start terminus type for %s-%d",
1861 *pdba->resinfo[cc->r_start[i]].name,
1862 pdba->resinfo[cc->r_start[i]].nr);
1863 cc->ntdb[i] = choose_ter(ntdblist, tdblist, select);
1867 cc->ntdb[i] = tdblist[0];
1870 printf("Start terminus %s-%d: %s\n",
1871 *pdba->resinfo[cc->r_start[i]].name,
1872 pdba->resinfo[cc->r_start[i]].nr,
1873 (cc->ntdb[i])->name);
1882 /* And the C terminus */
1885 tdblist = filter_ter(nrtp, restp, nCtdb, ctdb,
1886 *pdba->resinfo[cc->r_end[i]].name,
1887 *pdba->resinfo[cc->r_end[i]].rtp,
1891 printf("No suitable end (C or 3') terminus found in database - assuming this residue\n"
1892 "is already in a terminus-specific form and skipping terminus selection.\n");
1897 if (bTerMan && ntdblist > 1)
1899 sprintf(select, "Select end terminus type for %s-%d",
1900 *pdba->resinfo[cc->r_end[i]].name,
1901 pdba->resinfo[cc->r_end[i]].nr);
1902 cc->ctdb[i] = choose_ter(ntdblist, tdblist, select);
1906 cc->ctdb[i] = tdblist[0];
1908 printf("End terminus %s-%d: %s\n",
1909 *pdba->resinfo[cc->r_end[i]].name,
1910 pdba->resinfo[cc->r_end[i]].nr,
1911 (cc->ctdb[i])->name);
1920 /* lookup hackblocks and rtp for all residues */
1921 get_hackblocks_rtp(&hb_chain, &restp_chain,
1922 nrtp, restp, pdba->nres, pdba->resinfo,
1923 cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end);
1924 /* ideally, now we would not need the rtp itself anymore, but do
1925 everything using the hb and restp arrays. Unfortunately, that
1926 requires some re-thinking of code in gen_vsite.c, which I won't
1927 do now :( AF 26-7-99 */
1929 rename_atoms(NULL, ffdir,
1930 pdba, &symtab, restp_chain, FALSE, rt, FALSE, bVerbose);
1932 match_atomnames_with_rtp(restp_chain, hb_chain, pdba, x, bVerbose);
1936 block = new_blocka();
1938 sort_pdbatoms(restp_chain, natom, &pdba, &x, block, &gnames);
1939 natom = remove_duplicate_atoms(pdba, x, bVerbose);
1940 if (ftp2bSet(efNDX, NFILE, fnm))
1944 fprintf(stderr, "WARNING: with the -remh option the generated "
1945 "index file (%s) might be useless\n"
1946 "(the index file is generated before hydrogens are added)",
1947 ftp2fn(efNDX, NFILE, fnm));
1949 write_index(ftp2fn(efNDX, NFILE, fnm), block, gnames, FALSE, 0);
1951 for (i = 0; i < block->nr; i++)
1960 fprintf(stderr, "WARNING: "
1961 "without sorting no check for duplicate atoms can be done\n");
1964 /* Generate Hydrogen atoms (and termini) in the sequence */
1965 printf("Generating any missing hydrogen atoms and/or adding termini.\n");
1966 natom = add_h(&pdba, &x, nah, ah,
1967 cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end, bAllowMissing,
1968 NULL, NULL, TRUE, FALSE);
1969 printf("Now there are %d residues with %d atoms\n",
1970 pdba->nres, pdba->nr);
1973 write_pdbfile(debug, title, pdba, x, ePBC, box, ' ', 0, NULL, TRUE);
1978 for (i = 0; (i < natom); i++)
1980 fprintf(debug, "Res %s%d atom %d %s\n",
1981 *(pdba->resinfo[pdba->atom[i].resind].name),
1982 pdba->resinfo[pdba->atom[i].resind].nr, i+1, *pdba->atomname[i]);
1986 strcpy(posre_fn, ftp2fn(efITP, NFILE, fnm));
1988 /* make up molecule name(s) */
1990 k = (cc->nterpairs > 0 && cc->r_start[0] >= 0) ? cc->r_start[0] : 0;
1992 gmx_residuetype_get_type(rt, *pdba->resinfo[k].name, &p_restype);
1998 sprintf(molname, "Water");
2002 this_chainid = cc->chainid;
2004 /* Add the chain id if we have one */
2005 if (this_chainid != ' ')
2007 sprintf(buf, "_chain_%c", this_chainid);
2008 strcat(suffix, buf);
2011 /* Check if there have been previous chains with the same id */
2013 for (k = 0; k < chain; k++)
2015 if (cc->chainid == chains[k].chainid)
2020 /* Add the number for this chain identifier if there are multiple copies */
2024 sprintf(buf, "%d", nid_used+1);
2025 strcat(suffix, buf);
2028 if (strlen(suffix) > 0)
2030 sprintf(molname, "%s%s", p_restype, suffix);
2034 strcpy(molname, p_restype);
2038 if ((nch-nwaterchain > 1) && !cc->bAllWat)
2041 strcpy(itp_fn, top_fn);
2042 printf("Chain time...\n");
2043 c = strrchr(itp_fn, '.');
2044 sprintf(c, "_%s.itp", molname);
2045 c = strrchr(posre_fn, '.');
2046 sprintf(c, "_%s.itp", molname);
2047 if (strcmp(itp_fn, posre_fn) == 0)
2049 strcpy(buf_fn, posre_fn);
2050 c = strrchr(buf_fn, '.');
2052 sprintf(posre_fn, "%s_pr.itp", buf_fn);
2056 srenew(incls, nincl);
2057 incls[nincl-1] = gmx_strdup(itp_fn);
2058 itp_file = gmx_fio_fopen(itp_fn, "w");
2065 srenew(mols, nmol+1);
2068 mols[nmol].name = gmx_strdup("SOL");
2069 mols[nmol].nr = pdba->nres;
2073 mols[nmol].name = gmx_strdup(molname);
2080 print_top_comment(itp_file, itp_fn, ffdir, TRUE);
2090 top_file2 = itp_file;
2094 top_file2 = top_file;
2097 pdb2top(top_file2, posre_fn, molname, pdba, &x, atype, &symtab,
2099 restp_chain, hb_chain,
2101 bVsites, bVsiteAromatics, ffdir,
2102 mHmult, nssbonds, ssbonds,
2103 long_bond_dist, short_bond_dist, bDeuterate, bChargeGroups, bCmap,
2104 bRenumRes, bRTPresname);
2108 write_posres(posre_fn, pdba, posre_fc);
2113 gmx_fio_fclose(itp_file);
2116 /* pdba and natom have been reassigned somewhere so: */
2122 if (cc->chainid == ' ')
2124 sprintf(fn, "chain.pdb");
2128 sprintf(fn, "chain_%c.pdb", cc->chainid);
2130 cool_quote(quote, 255, NULL);
2131 write_sto_conf(fn, quote, pdba, x, NULL, ePBC, box);
2135 if (watermodel == NULL)
2137 for (chain = 0; chain < nch; chain++)
2139 if (chains[chain].bAllWat)
2141 gmx_fatal(FARGS, "You have chosen not to include a water model, but there is water in the input file. Select a water model or remove the water from your input file.");
2147 sprintf(buf_fn, "%s%c%s.itp", ffdir, DIR_SEPARATOR, watermodel);
2148 if (!fflib_fexist(buf_fn))
2150 gmx_fatal(FARGS, "The topology file '%s' for the selected water model '%s' can not be found in the force field directory. Select a different water model.",
2151 buf_fn, watermodel);
2155 print_top_mols(top_file, title, ffdir, watermodel, nincl, incls, nmol, mols);
2156 gmx_fio_fclose(top_file);
2158 gmx_residuetype_destroy(rt);
2160 /* now merge all chains back together */
2163 for (i = 0; (i < nch); i++)
2165 natom += chains[i].pdba->nr;
2166 nres += chains[i].pdba->nres;
2169 init_t_atoms(atoms, natom, FALSE);
2170 for (i = 0; i < atoms->nres; i++)
2172 sfree(atoms->resinfo[i].name);
2174 sfree(atoms->resinfo);
2176 snew(atoms->resinfo, nres);
2180 for (i = 0; (i < nch); i++)
2184 printf("Including chain %d in system: %d atoms %d residues\n",
2185 i+1, chains[i].pdba->nr, chains[i].pdba->nres);
2187 for (j = 0; (j < chains[i].pdba->nr); j++)
2189 atoms->atom[k] = chains[i].pdba->atom[j];
2190 atoms->atom[k].resind += l; /* l is processed nr of residues */
2191 atoms->atomname[k] = chains[i].pdba->atomname[j];
2192 atoms->resinfo[atoms->atom[k].resind].chainid = chains[i].chainid;
2193 copy_rvec(chains[i].x[j], x[k]);
2196 for (j = 0; (j < chains[i].pdba->nres); j++)
2198 atoms->resinfo[l] = chains[i].pdba->resinfo[j];
2201 atoms->resinfo[l].name = atoms->resinfo[l].rtp;
2209 fprintf(stderr, "Now there are %d atoms and %d residues\n", k, l);
2210 print_sums(atoms, TRUE);
2213 fprintf(stderr, "\nWriting coordinate file...\n");
2214 clear_rvec(box_space);
2217 make_new_box(atoms->nr, x, box, box_space, FALSE);
2219 write_sto_conf(ftp2fn(efSTO, NFILE, fnm), title, atoms, x, NULL, ePBC, box);
2221 printf("\t\t--------- PLEASE NOTE ------------\n");
2222 printf("You have successfully generated a topology from: %s.\n",
2223 opt2fn("-f", NFILE, fnm));
2224 if (watermodel != NULL)
2226 printf("The %s force field and the %s water model are used.\n",
2227 ffname, watermodel);
2231 printf("The %s force field is used.\n",
2234 printf("\t\t--------- ETON ESAELP ------------\n");