2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
48 #include "gromacs/legacyheaders/typedefs.h"
49 #include "gromacs/fileio/gmxfio.h"
50 #include "gromacs/legacyheaders/copyrite.h"
51 #include "gromacs/utility/cstringutil.h"
52 #include "gromacs/fileio/confio.h"
53 #include "gromacs/math/vec.h"
54 #include "gromacs/utility/futil.h"
55 #include "gromacs/fileio/pdbio.h"
62 #include "gromacs/gmxlib/conformation-utilities.h"
64 #include "gromacs/legacyheaders/readinp.h"
65 #include "gromacs/topology/index.h"
66 #include "fflibutil.h"
67 #include "gromacs/legacyheaders/macros.h"
69 #include "gromacs/commandline/pargs.h"
70 #include "gromacs/fileio/strdb.h"
71 #include "gromacs/topology/atomprop.h"
72 #include "gromacs/topology/block.h"
73 #include "gromacs/topology/index.h"
74 #include "gromacs/topology/residuetypes.h"
75 #include "gromacs/topology/symtab.h"
76 #include "gromacs/utility/smalloc.h"
77 #include "gromacs/utility/fatalerror.h"
92 static const char *res2bb_notermini(const char *name,
93 int nrr, const rtprename_t *rr)
95 /* NOTE: This function returns the main building block name,
96 * it does not take terminal renaming into account.
101 while (i < nrr && gmx_strcasecmp(name, rr[i].gmx) != 0)
106 return (i < nrr ? rr[i].main : name);
109 static const char *select_res(int nr, int resnr,
110 const char *name[], const char *expl[],
112 int nrr, const rtprename_t *rr)
116 printf("Which %s type do you want for residue %d\n", title, resnr+1);
117 for (sel = 0; (sel < nr); sel++)
119 printf("%d. %s (%s)\n",
120 sel, expl[sel], res2bb_notermini(name[sel], nrr, rr));
122 printf("\nType a number:"); fflush(stdout);
124 if (scanf("%d", &sel) != 1)
126 gmx_fatal(FARGS, "Answer me for res %s %d!", title, resnr+1);
132 static const char *get_asptp(int resnr, int nrr, const rtprename_t *rr)
137 const char *lh[easpNR] = { "ASP", "ASPH" };
138 const char *expl[easpNR] = {
139 "Not protonated (charge -1)",
140 "Protonated (charge 0)"
143 return select_res(easpNR, resnr, lh, expl, "ASPARTIC ACID", nrr, rr);
146 static const char *get_glutp(int resnr, int nrr, const rtprename_t *rr)
151 const char *lh[egluNR] = { "GLU", "GLUH" };
152 const char *expl[egluNR] = {
153 "Not protonated (charge -1)",
154 "Protonated (charge 0)"
157 return select_res(egluNR, resnr, lh, expl, "GLUTAMIC ACID", nrr, rr);
160 static const char *get_glntp(int resnr, int nrr, const rtprename_t *rr)
165 const char *lh[eglnNR] = { "GLN", "QLN" };
166 const char *expl[eglnNR] = {
167 "Not protonated (charge 0)",
168 "Protonated (charge +1)"
171 return select_res(eglnNR, resnr, lh, expl, "GLUTAMINE", nrr, rr);
174 static const char *get_lystp(int resnr, int nrr, const rtprename_t *rr)
179 const char *lh[elysNR] = { "LYSN", "LYS" };
180 const char *expl[elysNR] = {
181 "Not protonated (charge 0)",
182 "Protonated (charge +1)"
185 return select_res(elysNR, resnr, lh, expl, "LYSINE", nrr, rr);
188 static const char *get_argtp(int resnr, int nrr, const rtprename_t *rr)
193 const char *lh[eargNR] = { "ARGN", "ARG" };
194 const char *expl[eargNR] = {
195 "Not protonated (charge 0)",
196 "Protonated (charge +1)"
199 return select_res(eargNR, resnr, lh, expl, "ARGININE", nrr, rr);
202 static const char *get_histp(int resnr, int nrr, const rtprename_t *rr)
204 const char *expl[ehisNR] = {
211 return select_res(ehisNR, resnr, hh, expl, "HISTIDINE", nrr, rr);
214 static void read_rtprename(const char *fname, FILE *fp,
215 int *nrtprename, rtprename_t **rtprename)
217 char line[STRLEN], buf[STRLEN];
226 while (get_a_line(fp, line, STRLEN))
229 nc = sscanf(line, "%s %s %s %s %s %s",
230 rr[n].gmx, rr[n].main, rr[n].nter, rr[n].cter, rr[n].bter, buf);
233 if (nc != 2 && nc != 5)
235 gmx_fatal(FARGS, "Residue renaming database '%s' has %d columns instead of %d, %d or %d", fname, ncol, 2, 5);
241 gmx_fatal(FARGS, "A line in residue renaming database '%s' has %d columns, while previous lines have %d columns", fname, nc, ncol);
246 /* This file does not have special termini names, copy them from main */
247 strcpy(rr[n].nter, rr[n].main);
248 strcpy(rr[n].cter, rr[n].main);
249 strcpy(rr[n].bter, rr[n].main);
259 static char *search_resrename(int nrr, rtprename_t *rr,
261 gmx_bool bStart, gmx_bool bEnd,
262 gmx_bool bCompareFFRTPname)
270 while (i < nrr && ((!bCompareFFRTPname && strcmp(name, rr[i].gmx) != 0) ||
271 ( bCompareFFRTPname && strcmp(name, rr[i].main) != 0)))
276 /* If found in the database, rename this residue's rtp building block,
277 * otherwise keep the old name.
300 gmx_fatal(FARGS, "In the chosen force field there is no residue type for '%s'%s", name, bStart ? ( bEnd ? " as a standalone (starting & ending) residue" : " as a starting terminus") : (bEnd ? " as an ending terminus" : ""));
308 static void rename_resrtp(t_atoms *pdba, int nterpairs, int *r_start, int *r_end,
309 int nrr, rtprename_t *rr, t_symtab *symtab,
313 gmx_bool bStart, bEnd;
315 gmx_bool bFFRTPTERRNM;
317 bFFRTPTERRNM = (getenv("GMX_NO_FFRTP_TER_RENAME") == NULL);
319 for (r = 0; r < pdba->nres; r++)
323 for (j = 0; j < nterpairs; j++)
330 for (j = 0; j < nterpairs; j++)
338 nn = search_resrename(nrr, rr, *pdba->resinfo[r].rtp, bStart, bEnd, FALSE);
340 if (bFFRTPTERRNM && nn == NULL && (bStart || bEnd))
342 /* This is a terminal residue, but the residue name,
343 * currently stored in .rtp, is not a standard residue name,
344 * but probably a force field specific rtp name.
345 * Check if we need to rename it because it is terminal.
347 nn = search_resrename(nrr, rr,
348 *pdba->resinfo[r].rtp, bStart, bEnd, TRUE);
351 if (nn != NULL && strcmp(*pdba->resinfo[r].rtp, nn) != 0)
355 printf("Changing rtp entry of residue %d %s to '%s'\n",
356 pdba->resinfo[r].nr, *pdba->resinfo[r].name, nn);
358 pdba->resinfo[r].rtp = put_symtab(symtab, nn);
363 static void pdbres_to_gmxrtp(t_atoms *pdba)
367 for (i = 0; (i < pdba->nres); i++)
369 if (pdba->resinfo[i].rtp == NULL)
371 pdba->resinfo[i].rtp = pdba->resinfo[i].name;
376 static void rename_pdbres(t_atoms *pdba, const char *oldnm, const char *newnm,
377 gmx_bool bFullCompare, t_symtab *symtab)
382 for (i = 0; (i < pdba->nres); i++)
384 resnm = *pdba->resinfo[i].name;
385 if ((bFullCompare && (gmx_strcasecmp(resnm, oldnm) == 0)) ||
386 (!bFullCompare && strstr(resnm, oldnm) != NULL))
388 /* Rename the residue name (not the rtp name) */
389 pdba->resinfo[i].name = put_symtab(symtab, newnm);
394 static void rename_bb(t_atoms *pdba, const char *oldnm, const char *newnm,
395 gmx_bool bFullCompare, t_symtab *symtab)
400 for (i = 0; (i < pdba->nres); i++)
402 /* We have not set the rtp name yes, use the residue name */
403 bbnm = *pdba->resinfo[i].name;
404 if ((bFullCompare && (gmx_strcasecmp(bbnm, oldnm) == 0)) ||
405 (!bFullCompare && strstr(bbnm, oldnm) != NULL))
407 /* Change the rtp builing block name */
408 pdba->resinfo[i].rtp = put_symtab(symtab, newnm);
413 static void rename_bbint(t_atoms *pdba, const char *oldnm,
414 const char *gettp(int, int, const rtprename_t *),
415 gmx_bool bFullCompare,
417 int nrr, const rtprename_t *rr)
423 for (i = 0; i < pdba->nres; i++)
425 /* We have not set the rtp name yes, use the residue name */
426 bbnm = *pdba->resinfo[i].name;
427 if ((bFullCompare && (strcmp(bbnm, oldnm) == 0)) ||
428 (!bFullCompare && strstr(bbnm, oldnm) != NULL))
430 ptr = gettp(i, nrr, rr);
431 pdba->resinfo[i].rtp = put_symtab(symtab, ptr);
436 static void check_occupancy(t_atoms *atoms, const char *filename, gmx_bool bVerbose)
442 ftp = fn2ftp(filename);
443 if (!atoms->pdbinfo || ((ftp != efPDB) && (ftp != efBRK) && (ftp != efENT)))
445 fprintf(stderr, "No occupancies in %s\n", filename);
449 for (i = 0; (i < atoms->nr); i++)
451 if (atoms->pdbinfo[i].occup != 1)
455 fprintf(stderr, "Occupancy for atom %s%d-%s is %f rather than 1\n",
456 *atoms->resinfo[atoms->atom[i].resind].name,
457 atoms->resinfo[ atoms->atom[i].resind].nr,
459 atoms->pdbinfo[i].occup);
461 if (atoms->pdbinfo[i].occup == 0)
471 if (nzero == atoms->nr)
473 fprintf(stderr, "All occupancy fields zero. This is probably not an X-Ray structure\n");
475 else if ((nzero > 0) || (nnotone > 0))
479 "WARNING: there were %d atoms with zero occupancy and %d atoms with\n"
480 " occupancy unequal to one (out of %d atoms). Check your pdb file.\n"
482 nzero, nnotone, atoms->nr);
486 fprintf(stderr, "All occupancies are one\n");
491 void write_posres(char *fn, t_atoms *pdba, real fc)
496 fp = gmx_fio_fopen(fn, "w");
498 "; In this topology include file, you will find position restraint\n"
499 "; entries for all the heavy atoms in your original pdb file.\n"
500 "; This means that all the protons which were added by pdb2gmx are\n"
501 "; not restrained.\n"
503 "[ position_restraints ]\n"
504 "; %4s%6s%8s%8s%8s\n", "atom", "type", "fx", "fy", "fz"
506 for (i = 0; (i < pdba->nr); i++)
508 if (!is_hydrogen(*pdba->atomname[i]) && !is_dummymass(*pdba->atomname[i]))
510 fprintf(fp, "%6d%6d %g %g %g\n", i+1, 1, fc, fc, fc);
516 static int read_pdball(const char *inf, const char *outf, char *title,
517 t_atoms *atoms, rvec **x,
518 int *ePBC, matrix box, gmx_bool bRemoveH,
519 t_symtab *symtab, gmx_residuetype_t *rt, const char *watres,
520 gmx_atomprop_t aps, gmx_bool bVerbose)
521 /* Read a pdb file. (containing proteins) */
523 int natom, new_natom, i;
526 printf("Reading %s...\n", inf);
527 get_stx_coordnum(inf, &natom);
528 init_t_atoms(atoms, natom, TRUE);
530 read_stx_conf(inf, title, atoms, *x, NULL, ePBC, box);
531 if (fn2ftp(inf) == efPDB)
533 get_pdb_atomnumber(atoms, aps);
538 for (i = 0; i < atoms->nr; i++)
540 if (!is_hydrogen(*atoms->atomname[i]))
542 atoms->atom[new_natom] = atoms->atom[i];
543 atoms->atomname[new_natom] = atoms->atomname[i];
544 atoms->pdbinfo[new_natom] = atoms->pdbinfo[i];
545 copy_rvec((*x)[i], (*x)[new_natom]);
549 atoms->nr = new_natom;
554 if (title && title[0])
556 printf(" '%s',", title);
558 printf(" %d atoms\n", natom);
560 /* Rename residues */
561 rename_pdbres(atoms, "HOH", watres, FALSE, symtab);
562 rename_pdbres(atoms, "SOL", watres, FALSE, symtab);
563 rename_pdbres(atoms, "WAT", watres, FALSE, symtab);
565 rename_atoms("xlateat.dat", NULL,
566 atoms, symtab, NULL, TRUE, rt, TRUE, bVerbose);
575 write_sto_conf(outf, title, atoms, *x, NULL, *ePBC, box);
581 void process_chain(t_atoms *pdba, rvec *x,
582 gmx_bool bTrpU, gmx_bool bPheU, gmx_bool bTyrU,
583 gmx_bool bLysMan, gmx_bool bAspMan, gmx_bool bGluMan,
584 gmx_bool bHisMan, gmx_bool bArgMan, gmx_bool bGlnMan,
585 real angle, real distance, t_symtab *symtab,
586 int nrr, const rtprename_t *rr)
588 /* Rename aromatics, lys, asp and histidine */
591 rename_bb(pdba, "TYR", "TYRU", FALSE, symtab);
595 rename_bb(pdba, "TRP", "TRPU", FALSE, symtab);
599 rename_bb(pdba, "PHE", "PHEU", FALSE, symtab);
603 rename_bbint(pdba, "LYS", get_lystp, FALSE, symtab, nrr, rr);
607 rename_bbint(pdba, "ARG", get_argtp, FALSE, symtab, nrr, rr);
611 rename_bbint(pdba, "GLN", get_glntp, FALSE, symtab, nrr, rr);
615 rename_bbint(pdba, "ASP", get_asptp, FALSE, symtab, nrr, rr);
619 rename_bb(pdba, "ASPH", "ASP", FALSE, symtab);
623 rename_bbint(pdba, "GLU", get_glutp, FALSE, symtab, nrr, rr);
627 rename_bb(pdba, "GLUH", "GLU", FALSE, symtab);
632 set_histp(pdba, x, angle, distance);
636 rename_bbint(pdba, "HIS", get_histp, TRUE, symtab, nrr, rr);
639 /* Initialize the rtp builing block names with the residue names
640 * for the residues that have not been processed above.
642 pdbres_to_gmxrtp(pdba);
644 /* Now we have all rtp names set.
645 * The rtp names will conform to Gromacs naming,
646 * unless the input pdb file contained one or more force field specific
647 * rtp names as residue names.
651 /* struct for sorting the atoms from the pdb file */
653 int resnr; /* residue number */
654 int j; /* database order index */
655 int index; /* original atom number */
656 char anm1; /* second letter of atom name */
657 char altloc; /* alternate location indicator */
660 int pdbicomp(const void *a, const void *b)
665 pa = (t_pdbindex *)a;
666 pb = (t_pdbindex *)b;
668 d = (pa->resnr - pb->resnr);
674 d = (pa->anm1 - pb->anm1);
677 d = (pa->altloc - pb->altloc);
685 static void sort_pdbatoms(t_restp restp[],
686 int natoms, t_atoms **pdbaptr, rvec **x,
687 t_blocka *block, char ***gnames)
689 t_atoms *pdba, *pdbnew;
704 for (i = 0; i < natoms; i++)
706 atomnm = *pdba->atomname[i];
707 rptr = &restp[pdba->atom[i].resind];
708 for (j = 0; (j < rptr->natom); j++)
710 if (gmx_strcasecmp(atomnm, *(rptr->atomname[j])) == 0)
715 if (j == rptr->natom)
720 "Atom %s in residue %s %d was not found in rtp entry %s with %d atoms\n"
721 "while sorting atoms.\n%s", atomnm,
722 *pdba->resinfo[pdba->atom[i].resind].name,
723 pdba->resinfo[pdba->atom[i].resind].nr,
726 is_hydrogen(atomnm) ?
727 "\nFor a hydrogen, this can be a different protonation state, or it\n"
728 "might have had a different number in the PDB file and was rebuilt\n"
729 "(it might for instance have been H3, and we only expected H1 & H2).\n"
730 "Note that hydrogens might have been added to the entry for the N-terminus.\n"
731 "Remove this hydrogen or choose a different protonation state to solve it.\n"
732 "Option -ignh will ignore all hydrogens in the input." : ".");
733 gmx_fatal(FARGS, buf);
735 /* make shadow array to be sorted into indexgroup */
736 pdbi[i].resnr = pdba->atom[i].resind;
739 pdbi[i].anm1 = atomnm[1];
740 pdbi[i].altloc = pdba->pdbinfo[i].altloc;
742 qsort(pdbi, natoms, (size_t)sizeof(pdbi[0]), pdbicomp);
744 /* pdba is sorted in pdbnew using the pdbi index */
747 init_t_atoms(pdbnew, natoms, TRUE);
749 pdbnew->nr = pdba->nr;
750 pdbnew->nres = pdba->nres;
751 sfree(pdbnew->resinfo);
752 pdbnew->resinfo = pdba->resinfo;
753 for (i = 0; i < natoms; i++)
755 pdbnew->atom[i] = pdba->atom[pdbi[i].index];
756 pdbnew->atomname[i] = pdba->atomname[pdbi[i].index];
757 pdbnew->pdbinfo[i] = pdba->pdbinfo[pdbi[i].index];
758 copy_rvec((*x)[pdbi[i].index], (*xnew)[i]);
759 /* make indexgroup in block */
760 a[i] = pdbi[i].index;
763 sfree(pdba->atomname);
765 sfree(pdba->pdbinfo);
768 /* copy the sorted pdbnew back to pdba */
771 add_grp(block, gnames, natoms, a, "prot_sort");
777 static int remove_duplicate_atoms(t_atoms *pdba, rvec x[], gmx_bool bVerbose)
779 int i, j, oldnatoms, ndel;
782 printf("Checking for duplicate atoms....\n");
783 oldnatoms = pdba->nr;
785 /* NOTE: pdba->nr is modified inside the loop */
786 for (i = 1; (i < pdba->nr); i++)
788 /* compare 'i' and 'i-1', throw away 'i' if they are identical
789 this is a 'while' because multiple alternate locations can be present */
790 while ( (i < pdba->nr) &&
791 (pdba->atom[i-1].resind == pdba->atom[i].resind) &&
792 (strcmp(*pdba->atomname[i-1], *pdba->atomname[i]) == 0) )
797 ri = &pdba->resinfo[pdba->atom[i].resind];
798 printf("deleting duplicate atom %4s %s%4d%c",
799 *pdba->atomname[i], *ri->name, ri->nr, ri->ic);
800 if (ri->chainid && (ri->chainid != ' '))
802 printf(" ch %c", ri->chainid);
806 if (pdba->pdbinfo[i].atomnr)
808 printf(" pdb nr %4d", pdba->pdbinfo[i].atomnr);
810 if (pdba->pdbinfo[i].altloc && (pdba->pdbinfo[i].altloc != ' '))
812 printf(" altloc %c", pdba->pdbinfo[i].altloc);
818 /* We can not free, since it might be in the symtab */
819 /* sfree(pdba->atomname[i]); */
820 for (j = i; j < pdba->nr; j++)
822 pdba->atom[j] = pdba->atom[j+1];
823 pdba->atomname[j] = pdba->atomname[j+1];
824 pdba->pdbinfo[j] = pdba->pdbinfo[j+1];
825 copy_rvec(x[j+1], x[j]);
827 srenew(pdba->atom, pdba->nr);
828 /* srenew(pdba->atomname, pdba->nr); */
829 srenew(pdba->pdbinfo, pdba->nr);
832 if (pdba->nr != oldnatoms)
834 printf("Now there are %d atoms. Deleted %d duplicates.\n", pdba->nr, ndel);
840 void find_nc_ter(t_atoms *pdba, int r0, int r1, int *r_start, int *r_end,
841 gmx_residuetype_t *rt)
844 const char *p_startrestype;
845 const char *p_restype;
846 int nstartwarn, nendwarn;
854 /* Find the starting terminus (typially N or 5') */
855 for (i = r0; i < r1 && *r_start == -1; i++)
857 gmx_residuetype_get_type(rt, *pdba->resinfo[i].name, &p_startrestype);
858 if (!gmx_strcasecmp(p_startrestype, "Protein") || !gmx_strcasecmp(p_startrestype, "DNA") || !gmx_strcasecmp(p_startrestype, "RNA") )
860 printf("Identified residue %s%d as a starting terminus.\n", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
867 printf("Warning: Starting residue %s%d in chain not identified as Protein/RNA/DNA.\n", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
871 printf("More than 5 unidentified residues at start of chain - disabling further warnings.\n");
879 /* Go through the rest of the residues, check that they are the same class, and identify the ending terminus. */
880 for (i = *r_start; i < r1; i++)
882 gmx_residuetype_get_type(rt, *pdba->resinfo[i].name, &p_restype);
883 if (!gmx_strcasecmp(p_restype, p_startrestype) && nendwarn == 0)
891 printf("Warning: Residue %s%d in chain has different type (%s) from starting residue %s%d (%s).\n",
892 *pdba->resinfo[i].name, pdba->resinfo[i].nr, p_restype,
893 *pdba->resinfo[*r_start].name, pdba->resinfo[*r_start].nr, p_startrestype);
897 printf("More than 5 unidentified residues at end of chain - disabling further warnings.\n");
906 printf("Identified residue %s%d as a ending terminus.\n", *pdba->resinfo[*r_end].name, pdba->resinfo[*r_end].nr);
912 modify_chain_numbers(t_atoms * pdba,
913 const char * chainsep)
916 char old_prev_chainid;
917 char old_this_chainid;
918 int old_prev_chainnum;
919 int old_this_chainnum;
925 const char * prev_atomname;
926 const char * this_atomname;
927 const char * prev_resname;
928 const char * this_resname;
933 int prev_chainnumber;
934 int this_chainnumber;
946 splitting = SPLIT_TER_ONLY; /* keep compiler happy */
948 /* Be a bit flexible to catch typos */
949 if (!strncmp(chainsep, "id_o", 4))
951 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
952 splitting = SPLIT_ID_OR_TER;
953 printf("Splitting chemical chains based on TER records or chain id changing.\n");
955 else if (!strncmp(chainsep, "int", 3))
957 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
958 splitting = SPLIT_INTERACTIVE;
959 printf("Splitting chemical chains interactively.\n");
961 else if (!strncmp(chainsep, "id_a", 4))
963 splitting = SPLIT_ID_AND_TER;
964 printf("Splitting chemical chains based on TER records and chain id changing.\n");
966 else if (strlen(chainsep) == 2 && !strncmp(chainsep, "id", 4))
968 splitting = SPLIT_ID_ONLY;
969 printf("Splitting chemical chains based on changing chain id only (ignoring TER records).\n");
971 else if (chainsep[0] == 't')
973 splitting = SPLIT_TER_ONLY;
974 printf("Splitting chemical chains based on TER records only (ignoring chain id).\n");
978 gmx_fatal(FARGS, "Unidentified setting for chain separation: %s\n", chainsep);
981 /* The default chain enumeration is based on TER records only, which is reflected in chainnum below */
983 old_prev_chainid = '?';
984 old_prev_chainnum = -1;
987 this_atomname = NULL;
992 this_chainnumber = -1;
994 for (i = 0; i < pdba->nres; i++)
996 ri = &pdba->resinfo[i];
997 old_this_chainid = ri->chainid;
998 old_this_chainnum = ri->chainnum;
1000 prev_atomname = this_atomname;
1001 prev_atomnum = this_atomnum;
1002 prev_resname = this_resname;
1003 prev_resnum = this_resnum;
1004 prev_chainid = this_chainid;
1005 prev_chainnumber = this_chainnumber;
1007 this_atomname = *(pdba->atomname[i]);
1008 this_atomnum = (pdba->pdbinfo != NULL) ? pdba->pdbinfo[i].atomnr : i+1;
1009 this_resname = *ri->name;
1010 this_resnum = ri->nr;
1011 this_chainid = ri->chainid;
1012 this_chainnumber = ri->chainnum;
1016 case SPLIT_ID_OR_TER:
1017 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1023 case SPLIT_ID_AND_TER:
1024 if (old_this_chainid != old_prev_chainid && old_this_chainnum != old_prev_chainnum)
1031 if (old_this_chainid != old_prev_chainid)
1037 case SPLIT_TER_ONLY:
1038 if (old_this_chainnum != old_prev_chainnum)
1043 case SPLIT_INTERACTIVE:
1044 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1048 printf("Split the chain (and introduce termini) between residue %s%d (chain id '%c', atom %d %s)\n"
1049 "and residue %s%d (chain id '%c', atom %d %s) ? [n/y]\n",
1050 prev_resname, prev_resnum, prev_chainid, prev_atomnum, prev_atomname,
1051 this_resname, this_resnum, this_chainid, this_atomnum, this_atomname);
1053 if (NULL == fgets(select, STRLEN-1, stdin))
1055 gmx_fatal(FARGS, "Error reading from stdin");
1058 if (i == 0 || select[0] == 'y')
1065 gmx_fatal(FARGS, "Internal inconsistency - this shouldn't happen...");
1068 old_prev_chainid = old_this_chainid;
1069 old_prev_chainnum = old_this_chainnum;
1071 ri->chainnum = new_chainnum;
1100 int gmx_pdb2gmx(int argc, char *argv[])
1102 const char *desc[] = {
1103 "[THISMODULE] reads a [TT].pdb[tt] (or [TT].gro[tt]) file, reads",
1104 "some database files, adds hydrogens to the molecules and generates",
1105 "coordinates in GROMACS (GROMOS), or optionally [TT].pdb[tt], format",
1106 "and a topology in GROMACS format.",
1107 "These files can subsequently be processed to generate a run input file.",
1109 "[THISMODULE] will search for force fields by looking for",
1110 "a [TT]forcefield.itp[tt] file in subdirectories [TT]<forcefield>.ff[tt]",
1111 "of the current working directory and of the GROMACS library directory",
1112 "as inferred from the path of the binary or the [TT]GMXLIB[tt] environment",
1114 "By default the forcefield selection is interactive,",
1115 "but you can use the [TT]-ff[tt] option to specify one of the short names",
1116 "in the list on the command line instead. In that case [THISMODULE] just looks",
1117 "for the corresponding [TT]<forcefield>.ff[tt] directory.",
1119 "After choosing a force field, all files will be read only from",
1120 "the corresponding force field directory.",
1121 "If you want to modify or add a residue types, you can copy the force",
1122 "field directory from the GROMACS library directory to your current",
1123 "working directory. If you want to add new protein residue types,",
1124 "you will need to modify [TT]residuetypes.dat[tt] in the library directory",
1125 "or copy the whole library directory to a local directory and set",
1126 "the environment variable [TT]GMXLIB[tt] to the name of that directory.",
1127 "Check Chapter 5 of the manual for more information about file formats.",
1130 "Note that a [TT].pdb[tt] file is nothing more than a file format, and it",
1131 "need not necessarily contain a protein structure. Every kind of",
1132 "molecule for which there is support in the database can be converted.",
1133 "If there is no support in the database, you can add it yourself.[PAR]",
1135 "The program has limited intelligence, it reads a number of database",
1136 "files, that allow it to make special bonds (Cys-Cys, Heme-His, etc.),",
1137 "if necessary this can be done manually. The program can prompt the",
1138 "user to select which kind of LYS, ASP, GLU, CYS or HIS residue is",
1139 "desired. For Lys the choice is between neutral (two protons on NZ) or",
1140 "protonated (three protons, default), for Asp and Glu unprotonated",
1141 "(default) or protonated, for His the proton can be either on ND1,",
1142 "on NE2 or on both. By default these selections are done automatically.",
1143 "For His, this is based on an optimal hydrogen bonding",
1144 "conformation. Hydrogen bonds are defined based on a simple geometric",
1145 "criterion, specified by the maximum hydrogen-donor-acceptor angle",
1146 "and donor-acceptor distance, which are set by [TT]-angle[tt] and",
1147 "[TT]-dist[tt] respectively.[PAR]",
1149 "The protonation state of N- and C-termini can be chosen interactively",
1150 "with the [TT]-ter[tt] flag. Default termini are ionized (NH3+ and COO-),",
1151 "respectively. Some force fields support zwitterionic forms for chains of",
1152 "one residue, but for polypeptides these options should NOT be selected.",
1153 "The AMBER force fields have unique forms for the terminal residues,",
1154 "and these are incompatible with the [TT]-ter[tt] mechanism. You need",
1155 "to prefix your N- or C-terminal residue names with \"N\" or \"C\"",
1156 "respectively to use these forms, making sure you preserve the format",
1157 "of the coordinate file. Alternatively, use named terminating residues",
1158 "(e.g. ACE, NME).[PAR]",
1160 "The separation of chains is not entirely trivial since the markup",
1161 "in user-generated PDB files frequently varies and sometimes it",
1162 "is desirable to merge entries across a TER record, for instance",
1163 "if you want a disulfide bridge or distance restraints between",
1164 "two protein chains or if you have a HEME group bound to a protein.",
1165 "In such cases multiple chains should be contained in a single",
1166 "[TT]moleculetype[tt] definition.",
1167 "To handle this, [THISMODULE] uses two separate options.",
1168 "First, [TT]-chainsep[tt] allows you to choose when a new chemical chain should",
1169 "start, and termini added when applicable. This can be done based on the",
1170 "existence of TER records, when the chain id changes, or combinations of either",
1171 "or both of these. You can also do the selection fully interactively.",
1172 "In addition, there is a [TT]-merge[tt] option that controls how multiple chains",
1173 "are merged into one moleculetype, after adding all the chemical termini (or not).",
1174 "This can be turned off (no merging), all non-water chains can be merged into a",
1175 "single molecule, or the selection can be done interactively.[PAR]",
1177 "[THISMODULE] will also check the occupancy field of the [TT].pdb[tt] file.",
1178 "If any of the occupancies are not one, indicating that the atom is",
1179 "not resolved well in the structure, a warning message is issued.",
1180 "When a [TT].pdb[tt] file does not originate from an X-ray structure determination",
1181 "all occupancy fields may be zero. Either way, it is up to the user",
1182 "to verify the correctness of the input data (read the article!).[PAR]",
1184 "During processing the atoms will be reordered according to GROMACS",
1185 "conventions. With [TT]-n[tt] an index file can be generated that",
1186 "contains one group reordered in the same way. This allows you to",
1187 "convert a GROMOS trajectory and coordinate file to GROMOS. There is",
1188 "one limitation: reordering is done after the hydrogens are stripped",
1189 "from the input and before new hydrogens are added. This means that",
1190 "you should not use [TT]-ignh[tt].[PAR]",
1192 "The [TT].gro[tt] and [TT].g96[tt] file formats do not support chain",
1193 "identifiers. Therefore it is useful to enter a [TT].pdb[tt] file name at",
1194 "the [TT]-o[tt] option when you want to convert a multi-chain [TT].pdb[tt] file.",
1197 "The option [TT]-vsite[tt] removes hydrogen and fast improper dihedral",
1198 "motions. Angular and out-of-plane motions can be removed by changing",
1199 "hydrogens into virtual sites and fixing angles, which fixes their",
1200 "position relative to neighboring atoms. Additionally, all atoms in the",
1201 "aromatic rings of the standard amino acids (i.e. PHE, TRP, TYR and HIS)",
1202 "can be converted into virtual sites, eliminating the fast improper dihedral",
1203 "fluctuations in these rings. [BB]Note[bb] that in this case all other hydrogen",
1204 "atoms are also converted to virtual sites. The mass of all atoms that are",
1205 "converted into virtual sites, is added to the heavy atoms.[PAR]",
1206 "Also slowing down of dihedral motion can be done with [TT]-heavyh[tt]",
1207 "done by increasing the hydrogen-mass by a factor of 4. This is also",
1208 "done for water hydrogens to slow down the rotational motion of water.",
1209 "The increase in mass of the hydrogens is subtracted from the bonded",
1210 "(heavy) atom so that the total mass of the system remains the same."
1214 FILE *fp, *top_file, *top_file2, *itp_file = NULL;
1216 t_atoms pdba_all, *pdba;
1220 int chain, nch, maxch, nwaterchain;
1222 t_chain *chains, *cc;
1223 char select[STRLEN];
1231 int i, j, k, l, nrtp;
1232 int *swap_index, si;
1236 gpp_atomtype_t atype;
1237 gmx_residuetype_t*rt;
1239 char fn[256], itp_fn[STRLEN], posre_fn[STRLEN], buf_fn[STRLEN];
1240 char molname[STRLEN], title[STRLEN], quote[STRLEN];
1241 char *c, forcefield[STRLEN], ffdir[STRLEN];
1242 char ffname[STRLEN], suffix[STRLEN], buf[STRLEN];
1250 int nrtprename, naa;
1251 rtprename_t *rtprename = NULL;
1252 int nah, nNtdb, nCtdb, ntdblist;
1253 t_hackblock *ntdb, *ctdb, **tdblist;
1257 gmx_bool bVsites = FALSE, bWat, bPrevWat = FALSE, bITP, bVsiteAromatics = FALSE, bCheckMerge;
1259 t_hackblock *hb_chain;
1260 t_restp *restp_chain;
1262 const char *p_restype;
1266 const char * prev_atomname;
1267 const char * this_atomname;
1268 const char * prev_resname;
1269 const char * this_resname;
1274 int prev_chainnumber;
1275 int this_chainnumber;
1277 int this_chainstart;
1278 int prev_chainstart;
1285 { efSTX, "-f", "eiwit.pdb", ffREAD },
1286 { efSTO, "-o", "conf", ffWRITE },
1287 { efTOP, NULL, NULL, ffWRITE },
1288 { efITP, "-i", "posre", ffWRITE },
1289 { efNDX, "-n", "clean", ffOPTWR },
1290 { efSTO, "-q", "clean.pdb", ffOPTWR }
1292 #define NFILE asize(fnm)
1295 /* Command line arguments must be static */
1296 static gmx_bool bNewRTP = FALSE;
1297 static gmx_bool bInter = FALSE, bCysMan = FALSE;
1298 static gmx_bool bLysMan = FALSE, bAspMan = FALSE, bGluMan = FALSE, bHisMan = FALSE;
1299 static gmx_bool bGlnMan = FALSE, bArgMan = FALSE;
1300 static gmx_bool bTerMan = FALSE, bUnA = FALSE, bHeavyH;
1301 static gmx_bool bSort = TRUE, bAllowMissing = FALSE, bRemoveH = FALSE;
1302 static gmx_bool bDeuterate = FALSE, bVerbose = FALSE, bChargeGroups = TRUE, bCmap = TRUE;
1303 static gmx_bool bRenumRes = FALSE, bRTPresname = FALSE;
1304 static real angle = 135.0, distance = 0.3, posre_fc = 1000;
1305 static real long_bond_dist = 0.25, short_bond_dist = 0.05;
1306 static const char *vsitestr[] = { NULL, "none", "hydrogens", "aromatics", NULL };
1307 static const char *watstr[] = { NULL, "select", "none", "spc", "spce", "tip3p", "tip4p", "tip5p", NULL };
1308 static const char *chainsep[] = { NULL, "id_or_ter", "id_and_ter", "ter", "id", "interactive", NULL };
1309 static const char *merge[] = {NULL, "no", "all", "interactive", NULL };
1310 static const char *ff = "select";
1313 { "-newrtp", FALSE, etBOOL, {&bNewRTP},
1314 "HIDDENWrite the residue database in new format to [TT]new.rtp[tt]"},
1315 { "-lb", FALSE, etREAL, {&long_bond_dist},
1316 "HIDDENLong bond warning distance" },
1317 { "-sb", FALSE, etREAL, {&short_bond_dist},
1318 "HIDDENShort bond warning distance" },
1319 { "-chainsep", FALSE, etENUM, {chainsep},
1320 "Condition in PDB files when a new chain should be started (adding termini)" },
1321 { "-merge", FALSE, etENUM, {&merge},
1322 "Merge multiple chains into a single [moleculetype]" },
1323 { "-ff", FALSE, etSTR, {&ff},
1324 "Force field, interactive by default. Use [TT]-h[tt] for information." },
1325 { "-water", FALSE, etENUM, {watstr},
1326 "Water model to use" },
1327 { "-inter", FALSE, etBOOL, {&bInter},
1328 "Set the next 8 options to interactive"},
1329 { "-ss", FALSE, etBOOL, {&bCysMan},
1330 "Interactive SS bridge selection" },
1331 { "-ter", FALSE, etBOOL, {&bTerMan},
1332 "Interactive termini selection, instead of charged (default)" },
1333 { "-lys", FALSE, etBOOL, {&bLysMan},
1334 "Interactive lysine selection, instead of charged" },
1335 { "-arg", FALSE, etBOOL, {&bArgMan},
1336 "Interactive arginine selection, instead of charged" },
1337 { "-asp", FALSE, etBOOL, {&bAspMan},
1338 "Interactive aspartic acid selection, instead of charged" },
1339 { "-glu", FALSE, etBOOL, {&bGluMan},
1340 "Interactive glutamic acid selection, instead of charged" },
1341 { "-gln", FALSE, etBOOL, {&bGlnMan},
1342 "Interactive glutamine selection, instead of neutral" },
1343 { "-his", FALSE, etBOOL, {&bHisMan},
1344 "Interactive histidine selection, instead of checking H-bonds" },
1345 { "-angle", FALSE, etREAL, {&angle},
1346 "Minimum hydrogen-donor-acceptor angle for a H-bond (degrees)" },
1347 { "-dist", FALSE, etREAL, {&distance},
1348 "Maximum donor-acceptor distance for a H-bond (nm)" },
1349 { "-una", FALSE, etBOOL, {&bUnA},
1350 "Select aromatic rings with united CH atoms on phenylalanine, "
1351 "tryptophane and tyrosine" },
1352 { "-sort", FALSE, etBOOL, {&bSort},
1353 "HIDDENSort the residues according to database, turning this off is dangerous as charge groups might be broken in parts" },
1354 { "-ignh", FALSE, etBOOL, {&bRemoveH},
1355 "Ignore hydrogen atoms that are in the coordinate file" },
1356 { "-missing", FALSE, etBOOL, {&bAllowMissing},
1357 "Continue when atoms are missing, dangerous" },
1358 { "-v", FALSE, etBOOL, {&bVerbose},
1359 "Be slightly more verbose in messages" },
1360 { "-posrefc", FALSE, etREAL, {&posre_fc},
1361 "Force constant for position restraints" },
1362 { "-vsite", FALSE, etENUM, {vsitestr},
1363 "Convert atoms to virtual sites" },
1364 { "-heavyh", FALSE, etBOOL, {&bHeavyH},
1365 "Make hydrogen atoms heavy" },
1366 { "-deuterate", FALSE, etBOOL, {&bDeuterate},
1367 "Change the mass of hydrogens to 2 amu" },
1368 { "-chargegrp", TRUE, etBOOL, {&bChargeGroups},
1369 "Use charge groups in the [TT].rtp[tt] file" },
1370 { "-cmap", TRUE, etBOOL, {&bCmap},
1371 "Use cmap torsions (if enabled in the [TT].rtp[tt] file)" },
1372 { "-renum", TRUE, etBOOL, {&bRenumRes},
1373 "Renumber the residues consecutively in the output" },
1374 { "-rtpres", TRUE, etBOOL, {&bRTPresname},
1375 "Use [TT].rtp[tt] entry names as residue names" }
1377 #define NPARGS asize(pa)
1379 if (!parse_common_args(&argc, argv, 0, NFILE, fnm, asize(pa), pa, asize(desc), desc,
1385 /* Force field selection, interactive or direct */
1386 choose_ff(strcmp(ff, "select") == 0 ? NULL : ff,
1387 forcefield, sizeof(forcefield),
1388 ffdir, sizeof(ffdir));
1390 if (strlen(forcefield) > 0)
1392 strcpy(ffname, forcefield);
1393 ffname[0] = toupper(ffname[0]);
1397 gmx_fatal(FARGS, "Empty forcefield string");
1400 printf("\nUsing the %s force field in directory %s\n\n",
1403 choose_watermodel(watstr[0], ffdir, &watermodel);
1407 /* if anything changes here, also change description of -inter */
1422 else if (bDeuterate)
1431 switch (vsitestr[0][0])
1433 case 'n': /* none */
1435 bVsiteAromatics = FALSE;
1437 case 'h': /* hydrogens */
1439 bVsiteAromatics = FALSE;
1441 case 'a': /* aromatics */
1443 bVsiteAromatics = TRUE;
1446 gmx_fatal(FARGS, "DEATH HORROR in $s (%d): vsitestr[0]='%s'",
1447 __FILE__, __LINE__, vsitestr[0]);
1450 /* Open the symbol table */
1451 open_symtab(&symtab);
1453 /* Residue type database */
1454 gmx_residuetype_init(&rt);
1456 /* Read residue renaming database(s), if present */
1457 nrrn = fflib_search_file_end(ffdir, ".r2b", FALSE, &rrn);
1461 for (i = 0; i < nrrn; i++)
1463 fp = fflib_open(rrn[i]);
1464 read_rtprename(rrn[i], fp, &nrtprename, &rtprename);
1470 /* Add all alternative names from the residue renaming database to the list of recognized amino/nucleic acids. */
1472 for (i = 0; i < nrtprename; i++)
1474 rc = gmx_residuetype_get_type(rt, rtprename[i].gmx, &p_restype);
1476 /* Only add names if the 'standard' gromacs/iupac base name was found */
1479 gmx_residuetype_add(rt, rtprename[i].main, p_restype);
1480 gmx_residuetype_add(rt, rtprename[i].nter, p_restype);
1481 gmx_residuetype_add(rt, rtprename[i].cter, p_restype);
1482 gmx_residuetype_add(rt, rtprename[i].bter, p_restype);
1487 if (watermodel != NULL && (strstr(watermodel, "4p") ||
1488 strstr(watermodel, "4P")))
1492 else if (watermodel != NULL && (strstr(watermodel, "5p") ||
1493 strstr(watermodel, "5P")))
1502 aps = gmx_atomprop_init();
1503 natom = read_pdball(opt2fn("-f", NFILE, fnm), opt2fn_null("-q", NFILE, fnm), title,
1504 &pdba_all, &pdbx, &ePBC, box, bRemoveH, &symtab, rt, watres,
1509 gmx_fatal(FARGS, "No atoms found in pdb file %s\n", opt2fn("-f", NFILE, fnm));
1512 printf("Analyzing pdb file\n");
1517 modify_chain_numbers(&pdba_all, chainsep[0]);
1521 this_atomname = NULL;
1523 this_resname = NULL;
1526 this_chainnumber = -1;
1527 this_chainstart = 0;
1528 /* Keep the compiler happy */
1529 prev_chainstart = 0;
1534 for (i = 0; (i < natom); i++)
1536 ri = &pdba_all.resinfo[pdba_all.atom[i].resind];
1538 prev_atomname = this_atomname;
1539 prev_atomnum = this_atomnum;
1540 prev_resname = this_resname;
1541 prev_resnum = this_resnum;
1542 prev_chainid = this_chainid;
1543 prev_chainnumber = this_chainnumber;
1546 prev_chainstart = this_chainstart;
1549 this_atomname = *pdba_all.atomname[i];
1550 this_atomnum = (pdba_all.pdbinfo != NULL) ? pdba_all.pdbinfo[i].atomnr : i+1;
1551 this_resname = *ri->name;
1552 this_resnum = ri->nr;
1553 this_chainid = ri->chainid;
1554 this_chainnumber = ri->chainnum;
1556 bWat = gmx_strcasecmp(*ri->name, watres) == 0;
1557 if ((i == 0) || (this_chainnumber != prev_chainnumber) || (bWat != bPrevWat))
1559 this_chainstart = pdba_all.atom[i].resind;
1564 if (!strncmp(merge[0], "int", 3))
1566 printf("Merge chain ending with residue %s%d (chain id '%c', atom %d %s) and chain starting with\n"
1567 "residue %s%d (chain id '%c', atom %d %s) into a single moleculetype (keeping termini)? [n/y]\n",
1568 prev_resname, prev_resnum, prev_chainid, prev_atomnum, prev_atomname,
1569 this_resname, this_resnum, this_chainid, this_atomnum, this_atomname);
1571 if (NULL == fgets(select, STRLEN-1, stdin))
1573 gmx_fatal(FARGS, "Error reading from stdin");
1575 bMerged = (select[0] == 'y');
1577 else if (!strncmp(merge[0], "all", 3))
1585 pdb_ch[nch-1].chainstart[pdb_ch[nch-1].nterpairs] =
1586 pdba_all.atom[i].resind - prev_chainstart;
1587 pdb_ch[nch-1].nterpairs++;
1588 srenew(pdb_ch[nch-1].chainstart, pdb_ch[nch-1].nterpairs+1);
1593 /* set natom for previous chain */
1596 pdb_ch[nch-1].natom = i-pdb_ch[nch-1].start;
1603 /* check if chain identifier was used before */
1604 for (j = 0; (j < nch); j++)
1606 if (pdb_ch[j].chainid != ' ' && pdb_ch[j].chainid == ri->chainid)
1608 printf("WARNING: Chain identifier '%c' is used in two non-sequential blocks.\n"
1609 "They will be treated as separate chains unless you reorder your file.\n",
1616 srenew(pdb_ch, maxch);
1618 pdb_ch[nch].chainid = ri->chainid;
1619 pdb_ch[nch].chainnum = ri->chainnum;
1620 pdb_ch[nch].start = i;
1621 pdb_ch[nch].bAllWat = bWat;
1624 pdb_ch[nch].nterpairs = 0;
1628 pdb_ch[nch].nterpairs = 1;
1630 snew(pdb_ch[nch].chainstart, pdb_ch[nch].nterpairs+1);
1631 /* modified [nch] to [0] below */
1632 pdb_ch[nch].chainstart[0] = 0;
1638 pdb_ch[nch-1].natom = natom-pdb_ch[nch-1].start;
1640 /* set all the water blocks at the end of the chain */
1641 snew(swap_index, nch);
1643 for (i = 0; i < nch; i++)
1645 if (!pdb_ch[i].bAllWat)
1651 for (i = 0; i < nch; i++)
1653 if (pdb_ch[i].bAllWat)
1659 if (nwaterchain > 1)
1661 printf("Moved all the water blocks to the end\n");
1665 /* copy pdb data and x for all chains */
1666 for (i = 0; (i < nch); i++)
1669 chains[i].chainid = pdb_ch[si].chainid;
1670 chains[i].chainnum = pdb_ch[si].chainnum;
1671 chains[i].bAllWat = pdb_ch[si].bAllWat;
1672 chains[i].nterpairs = pdb_ch[si].nterpairs;
1673 chains[i].chainstart = pdb_ch[si].chainstart;
1674 snew(chains[i].ntdb, pdb_ch[si].nterpairs);
1675 snew(chains[i].ctdb, pdb_ch[si].nterpairs);
1676 snew(chains[i].r_start, pdb_ch[si].nterpairs);
1677 snew(chains[i].r_end, pdb_ch[si].nterpairs);
1679 snew(chains[i].pdba, 1);
1680 init_t_atoms(chains[i].pdba, pdb_ch[si].natom, TRUE);
1681 snew(chains[i].x, chains[i].pdba->nr);
1682 for (j = 0; j < chains[i].pdba->nr; j++)
1684 chains[i].pdba->atom[j] = pdba_all.atom[pdb_ch[si].start+j];
1685 snew(chains[i].pdba->atomname[j], 1);
1686 *chains[i].pdba->atomname[j] =
1687 gmx_strdup(*pdba_all.atomname[pdb_ch[si].start+j]);
1688 chains[i].pdba->pdbinfo[j] = pdba_all.pdbinfo[pdb_ch[si].start+j];
1689 copy_rvec(pdbx[pdb_ch[si].start+j], chains[i].x[j]);
1691 /* Re-index the residues assuming that the indices are continuous */
1692 k = chains[i].pdba->atom[0].resind;
1693 nres = chains[i].pdba->atom[chains[i].pdba->nr-1].resind - k + 1;
1694 chains[i].pdba->nres = nres;
1695 for (j = 0; j < chains[i].pdba->nr; j++)
1697 chains[i].pdba->atom[j].resind -= k;
1699 srenew(chains[i].pdba->resinfo, nres);
1700 for (j = 0; j < nres; j++)
1702 chains[i].pdba->resinfo[j] = pdba_all.resinfo[k+j];
1703 snew(chains[i].pdba->resinfo[j].name, 1);
1704 *chains[i].pdba->resinfo[j].name = gmx_strdup(*pdba_all.resinfo[k+j].name);
1705 /* make all chain identifiers equal to that of the chain */
1706 chains[i].pdba->resinfo[j].chainid = pdb_ch[si].chainid;
1710 if (nchainmerges > 0)
1712 printf("\nMerged chains into joint molecule definitions at %d places.\n\n",
1716 printf("There are %d chains and %d blocks of water and "
1717 "%d residues with %d atoms\n",
1718 nch-nwaterchain, nwaterchain,
1719 pdba_all.nres, natom);
1721 printf("\n %5s %4s %6s\n", "chain", "#res", "#atoms");
1722 for (i = 0; (i < nch); i++)
1724 printf(" %d '%c' %5d %6d %s\n",
1725 i+1, chains[i].chainid ? chains[i].chainid : '-',
1726 chains[i].pdba->nres, chains[i].pdba->nr,
1727 chains[i].bAllWat ? "(only water)" : "");
1731 check_occupancy(&pdba_all, opt2fn("-f", NFILE, fnm), bVerbose);
1733 /* Read atomtypes... */
1734 atype = read_atype(ffdir, &symtab);
1736 /* read residue database */
1737 printf("Reading residue database... (%s)\n", forcefield);
1738 nrtpf = fflib_search_file_end(ffdir, ".rtp", TRUE, &rtpf);
1741 for (i = 0; i < nrtpf; i++)
1743 read_resall(rtpf[i], &nrtp, &restp, atype, &symtab, FALSE);
1749 /* Not correct with multiple rtp input files with different bonded types */
1750 fp = gmx_fio_fopen("new.rtp", "w");
1751 print_resall(fp, nrtp, restp, atype);
1755 /* read hydrogen database */
1756 nah = read_h_db(ffdir, &ah);
1758 /* Read Termini database... */
1759 nNtdb = read_ter_db(ffdir, 'n', &ntdb, atype);
1760 nCtdb = read_ter_db(ffdir, 'c', &ctdb, atype);
1762 top_fn = ftp2fn(efTOP, NFILE, fnm);
1763 top_file = gmx_fio_fopen(top_fn, "w");
1765 print_top_header(top_file, top_fn, FALSE, ffdir, mHmult);
1772 for (chain = 0; (chain < nch); chain++)
1774 cc = &(chains[chain]);
1776 /* set pdba, natom and nres to the current chain */
1779 natom = cc->pdba->nr;
1780 nres = cc->pdba->nres;
1782 if (cc->chainid && ( cc->chainid != ' ' ) )
1784 printf("Processing chain %d '%c' (%d atoms, %d residues)\n",
1785 chain+1, cc->chainid, natom, nres);
1789 printf("Processing chain %d (%d atoms, %d residues)\n",
1790 chain+1, natom, nres);
1793 process_chain(pdba, x, bUnA, bUnA, bUnA, bLysMan, bAspMan, bGluMan,
1794 bHisMan, bArgMan, bGlnMan, angle, distance, &symtab,
1795 nrtprename, rtprename);
1797 cc->chainstart[cc->nterpairs] = pdba->nres;
1799 for (i = 0; i < cc->nterpairs; i++)
1801 find_nc_ter(pdba, cc->chainstart[i], cc->chainstart[i+1],
1802 &(cc->r_start[j]), &(cc->r_end[j]), rt);
1804 if (cc->r_start[j] >= 0 && cc->r_end[j] >= 0)
1810 if (cc->nterpairs == 0)
1812 printf("Problem with chain definition, or missing terminal residues.\n"
1813 "This chain does not appear to contain a recognized chain molecule.\n"
1814 "If this is incorrect, you can edit residuetypes.dat to modify the behavior.\n");
1817 /* Check for disulfides and other special bonds */
1818 nssbonds = mk_specbonds(pdba, x, bCysMan, &ssbonds, bVerbose);
1822 rename_resrtp(pdba, cc->nterpairs, cc->r_start, cc->r_end, nrtprename, rtprename,
1830 sprintf(fn, "chain.pdb");
1834 sprintf(fn, "chain_%c%d.pdb", cc->chainid, cc->chainnum);
1836 write_sto_conf(fn, title, pdba, x, NULL, ePBC, box);
1840 for (i = 0; i < cc->nterpairs; i++)
1844 * We first apply a filter so we only have the
1845 * termini that can be applied to the residue in question
1846 * (or a generic terminus if no-residue specific is available).
1848 /* First the N terminus */
1851 tdblist = filter_ter(nrtp, restp, nNtdb, ntdb,
1852 *pdba->resinfo[cc->r_start[i]].name,
1853 *pdba->resinfo[cc->r_start[i]].rtp,
1857 printf("No suitable end (N or 5') terminus found in database - assuming this residue\n"
1858 "is already in a terminus-specific form and skipping terminus selection.\n");
1863 if (bTerMan && ntdblist > 1)
1865 sprintf(select, "Select start terminus type for %s-%d",
1866 *pdba->resinfo[cc->r_start[i]].name,
1867 pdba->resinfo[cc->r_start[i]].nr);
1868 cc->ntdb[i] = choose_ter(ntdblist, tdblist, select);
1872 cc->ntdb[i] = tdblist[0];
1875 printf("Start terminus %s-%d: %s\n",
1876 *pdba->resinfo[cc->r_start[i]].name,
1877 pdba->resinfo[cc->r_start[i]].nr,
1878 (cc->ntdb[i])->name);
1887 /* And the C terminus */
1890 tdblist = filter_ter(nrtp, restp, nCtdb, ctdb,
1891 *pdba->resinfo[cc->r_end[i]].name,
1892 *pdba->resinfo[cc->r_end[i]].rtp,
1896 printf("No suitable end (C or 3') terminus found in database - assuming this residue\n"
1897 "is already in a terminus-specific form and skipping terminus selection.\n");
1902 if (bTerMan && ntdblist > 1)
1904 sprintf(select, "Select end terminus type for %s-%d",
1905 *pdba->resinfo[cc->r_end[i]].name,
1906 pdba->resinfo[cc->r_end[i]].nr);
1907 cc->ctdb[i] = choose_ter(ntdblist, tdblist, select);
1911 cc->ctdb[i] = tdblist[0];
1913 printf("End terminus %s-%d: %s\n",
1914 *pdba->resinfo[cc->r_end[i]].name,
1915 pdba->resinfo[cc->r_end[i]].nr,
1916 (cc->ctdb[i])->name);
1925 /* lookup hackblocks and rtp for all residues */
1926 get_hackblocks_rtp(&hb_chain, &restp_chain,
1927 nrtp, restp, pdba->nres, pdba->resinfo,
1928 cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end);
1929 /* ideally, now we would not need the rtp itself anymore, but do
1930 everything using the hb and restp arrays. Unfortunately, that
1931 requires some re-thinking of code in gen_vsite.c, which I won't
1932 do now :( AF 26-7-99 */
1934 rename_atoms(NULL, ffdir,
1935 pdba, &symtab, restp_chain, FALSE, rt, FALSE, bVerbose);
1937 match_atomnames_with_rtp(restp_chain, hb_chain, pdba, x, bVerbose);
1941 block = new_blocka();
1943 sort_pdbatoms(restp_chain, natom, &pdba, &x, block, &gnames);
1944 natom = remove_duplicate_atoms(pdba, x, bVerbose);
1945 if (ftp2bSet(efNDX, NFILE, fnm))
1949 fprintf(stderr, "WARNING: with the -remh option the generated "
1950 "index file (%s) might be useless\n"
1951 "(the index file is generated before hydrogens are added)",
1952 ftp2fn(efNDX, NFILE, fnm));
1954 write_index(ftp2fn(efNDX, NFILE, fnm), block, gnames, FALSE, 0);
1956 for (i = 0; i < block->nr; i++)
1965 fprintf(stderr, "WARNING: "
1966 "without sorting no check for duplicate atoms can be done\n");
1969 /* Generate Hydrogen atoms (and termini) in the sequence */
1970 printf("Generating any missing hydrogen atoms and/or adding termini.\n");
1971 natom = add_h(&pdba, &x, nah, ah,
1972 cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end, bAllowMissing,
1973 NULL, NULL, TRUE, FALSE);
1974 printf("Now there are %d residues with %d atoms\n",
1975 pdba->nres, pdba->nr);
1978 write_pdbfile(debug, title, pdba, x, ePBC, box, ' ', 0, NULL, TRUE);
1983 for (i = 0; (i < natom); i++)
1985 fprintf(debug, "Res %s%d atom %d %s\n",
1986 *(pdba->resinfo[pdba->atom[i].resind].name),
1987 pdba->resinfo[pdba->atom[i].resind].nr, i+1, *pdba->atomname[i]);
1991 strcpy(posre_fn, ftp2fn(efITP, NFILE, fnm));
1993 /* make up molecule name(s) */
1995 k = (cc->nterpairs > 0 && cc->r_start[0] >= 0) ? cc->r_start[0] : 0;
1997 gmx_residuetype_get_type(rt, *pdba->resinfo[k].name, &p_restype);
2003 sprintf(molname, "Water");
2007 this_chainid = cc->chainid;
2009 /* Add the chain id if we have one */
2010 if (this_chainid != ' ')
2012 sprintf(buf, "_chain_%c", this_chainid);
2013 strcat(suffix, buf);
2016 /* Check if there have been previous chains with the same id */
2018 for (k = 0; k < chain; k++)
2020 if (cc->chainid == chains[k].chainid)
2025 /* Add the number for this chain identifier if there are multiple copies */
2029 sprintf(buf, "%d", nid_used+1);
2030 strcat(suffix, buf);
2033 if (strlen(suffix) > 0)
2035 sprintf(molname, "%s%s", p_restype, suffix);
2039 strcpy(molname, p_restype);
2043 if ((nch-nwaterchain > 1) && !cc->bAllWat)
2046 strcpy(itp_fn, top_fn);
2047 printf("Chain time...\n");
2048 c = strrchr(itp_fn, '.');
2049 sprintf(c, "_%s.itp", molname);
2050 c = strrchr(posre_fn, '.');
2051 sprintf(c, "_%s.itp", molname);
2052 if (strcmp(itp_fn, posre_fn) == 0)
2054 strcpy(buf_fn, posre_fn);
2055 c = strrchr(buf_fn, '.');
2057 sprintf(posre_fn, "%s_pr.itp", buf_fn);
2061 srenew(incls, nincl);
2062 incls[nincl-1] = gmx_strdup(itp_fn);
2063 itp_file = gmx_fio_fopen(itp_fn, "w");
2070 srenew(mols, nmol+1);
2073 mols[nmol].name = gmx_strdup("SOL");
2074 mols[nmol].nr = pdba->nres;
2078 mols[nmol].name = gmx_strdup(molname);
2085 print_top_comment(itp_file, itp_fn, ffdir, TRUE);
2095 top_file2 = itp_file;
2099 top_file2 = top_file;
2102 pdb2top(top_file2, posre_fn, molname, pdba, &x, atype, &symtab,
2104 restp_chain, hb_chain,
2106 bVsites, bVsiteAromatics, ffdir,
2107 mHmult, nssbonds, ssbonds,
2108 long_bond_dist, short_bond_dist, bDeuterate, bChargeGroups, bCmap,
2109 bRenumRes, bRTPresname);
2113 write_posres(posre_fn, pdba, posre_fc);
2118 gmx_fio_fclose(itp_file);
2121 /* pdba and natom have been reassigned somewhere so: */
2127 if (cc->chainid == ' ')
2129 sprintf(fn, "chain.pdb");
2133 sprintf(fn, "chain_%c.pdb", cc->chainid);
2135 cool_quote(quote, 255, NULL);
2136 write_sto_conf(fn, quote, pdba, x, NULL, ePBC, box);
2140 if (watermodel == NULL)
2142 for (chain = 0; chain < nch; chain++)
2144 if (chains[chain].bAllWat)
2146 gmx_fatal(FARGS, "You have chosen not to include a water model, but there is water in the input file. Select a water model or remove the water from your input file.");
2152 sprintf(buf_fn, "%s%c%s.itp", ffdir, DIR_SEPARATOR, watermodel);
2153 if (!fflib_fexist(buf_fn))
2155 gmx_fatal(FARGS, "The topology file '%s' for the selected water model '%s' can not be found in the force field directory. Select a different water model.",
2156 buf_fn, watermodel);
2160 print_top_mols(top_file, title, ffdir, watermodel, nincl, incls, nmol, mols);
2161 gmx_fio_fclose(top_file);
2163 gmx_residuetype_destroy(rt);
2165 /* now merge all chains back together */
2168 for (i = 0; (i < nch); i++)
2170 natom += chains[i].pdba->nr;
2171 nres += chains[i].pdba->nres;
2174 init_t_atoms(atoms, natom, FALSE);
2175 for (i = 0; i < atoms->nres; i++)
2177 sfree(atoms->resinfo[i].name);
2179 sfree(atoms->resinfo);
2181 snew(atoms->resinfo, nres);
2185 for (i = 0; (i < nch); i++)
2189 printf("Including chain %d in system: %d atoms %d residues\n",
2190 i+1, chains[i].pdba->nr, chains[i].pdba->nres);
2192 for (j = 0; (j < chains[i].pdba->nr); j++)
2194 atoms->atom[k] = chains[i].pdba->atom[j];
2195 atoms->atom[k].resind += l; /* l is processed nr of residues */
2196 atoms->atomname[k] = chains[i].pdba->atomname[j];
2197 atoms->resinfo[atoms->atom[k].resind].chainid = chains[i].chainid;
2198 copy_rvec(chains[i].x[j], x[k]);
2201 for (j = 0; (j < chains[i].pdba->nres); j++)
2203 atoms->resinfo[l] = chains[i].pdba->resinfo[j];
2206 atoms->resinfo[l].name = atoms->resinfo[l].rtp;
2214 fprintf(stderr, "Now there are %d atoms and %d residues\n", k, l);
2215 print_sums(atoms, TRUE);
2218 fprintf(stderr, "\nWriting coordinate file...\n");
2219 clear_rvec(box_space);
2222 make_new_box(atoms->nr, x, box, box_space, FALSE);
2224 write_sto_conf(ftp2fn(efSTO, NFILE, fnm), title, atoms, x, NULL, ePBC, box);
2226 printf("\t\t--------- PLEASE NOTE ------------\n");
2227 printf("You have successfully generated a topology from: %s.\n",
2228 opt2fn("-f", NFILE, fnm));
2229 if (watermodel != NULL)
2231 printf("The %s force field and the %s water model are used.\n",
2232 ffname, watermodel);
2236 printf("The %s force field is used.\n",
2239 printf("\t\t--------- ETON ESAELP ------------\n");