2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
49 #include "gromacs/fileio/gmxfio.h"
50 #include "gromacs/utility/smalloc.h"
52 #include "gromacs/utility/cstringutil.h"
53 #include "gromacs/fileio/confio.h"
56 #include "gromacs/commandline/pargs.h"
57 #include "gromacs/fileio/futil.h"
58 #include "gmx_fatal.h"
59 #include "gromacs/fileio/pdbio.h"
68 #include "gromacs/gmxlib/conformation-utilities.h"
73 #include "fflibutil.h"
76 #include "gromacs/fileio/strdb.h"
84 char gmx[RTP_MAXCHAR+2];
85 char main[RTP_MAXCHAR+2];
86 char nter[RTP_MAXCHAR+2];
87 char cter[RTP_MAXCHAR+2];
88 char bter[RTP_MAXCHAR+2];
92 static const char *res2bb_notermini(const char *name,
93 int nrr, const rtprename_t *rr)
95 /* NOTE: This function returns the main building block name,
96 * it does not take terminal renaming into account.
101 while (i < nrr && gmx_strcasecmp(name, rr[i].gmx) != 0)
106 return (i < nrr ? rr[i].main : name);
109 static const char *select_res(int nr, int resnr,
110 const char *name[], const char *expl[],
112 int nrr, const rtprename_t *rr)
116 printf("Which %s type do you want for residue %d\n", title, resnr+1);
117 for (sel = 0; (sel < nr); sel++)
119 printf("%d. %s (%s)\n",
120 sel, expl[sel], res2bb_notermini(name[sel], nrr, rr));
122 printf("\nType a number:"); fflush(stdout);
124 if (scanf("%d", &sel) != 1)
126 gmx_fatal(FARGS, "Answer me for res %s %d!", title, resnr+1);
132 static const char *get_asptp(int resnr, int nrr, const rtprename_t *rr)
137 const char *lh[easpNR] = { "ASP", "ASPH" };
138 const char *expl[easpNR] = {
139 "Not protonated (charge -1)",
140 "Protonated (charge 0)"
143 return select_res(easpNR, resnr, lh, expl, "ASPARTIC ACID", nrr, rr);
146 static const char *get_glutp(int resnr, int nrr, const rtprename_t *rr)
151 const char *lh[egluNR] = { "GLU", "GLUH" };
152 const char *expl[egluNR] = {
153 "Not protonated (charge -1)",
154 "Protonated (charge 0)"
157 return select_res(egluNR, resnr, lh, expl, "GLUTAMIC ACID", nrr, rr);
160 static const char *get_glntp(int resnr, int nrr, const rtprename_t *rr)
165 const char *lh[eglnNR] = { "GLN", "QLN" };
166 const char *expl[eglnNR] = {
167 "Not protonated (charge 0)",
168 "Protonated (charge +1)"
171 return select_res(eglnNR, resnr, lh, expl, "GLUTAMINE", nrr, rr);
174 static const char *get_lystp(int resnr, int nrr, const rtprename_t *rr)
179 const char *lh[elysNR] = { "LYSN", "LYS" };
180 const char *expl[elysNR] = {
181 "Not protonated (charge 0)",
182 "Protonated (charge +1)"
185 return select_res(elysNR, resnr, lh, expl, "LYSINE", nrr, rr);
188 static const char *get_argtp(int resnr, int nrr, const rtprename_t *rr)
193 const char *lh[eargNR] = { "ARGN", "ARG" };
194 const char *expl[eargNR] = {
195 "Not protonated (charge 0)",
196 "Protonated (charge +1)"
199 return select_res(eargNR, resnr, lh, expl, "ARGININE", nrr, rr);
202 static const char *get_histp(int resnr, int nrr, const rtprename_t *rr)
204 const char *expl[ehisNR] = {
211 return select_res(ehisNR, resnr, hh, expl, "HISTIDINE", nrr, rr);
214 static void read_rtprename(const char *fname, FILE *fp,
215 int *nrtprename, rtprename_t **rtprename)
217 char line[STRLEN], buf[STRLEN];
226 while (get_a_line(fp, line, STRLEN))
229 /* line is NULL-terminated and length<STRLEN, so final arg cannot overflow.
230 * For other args, we read up to 6 chars (so we can detect if the length is > 5).
231 * Note that the buffer length has been increased to 7 to allow this,
232 * so we just need to make sure the strings have zero-length initially.
235 rr[n].main[0] = '\0';
236 rr[n].nter[0] = '\0';
237 rr[n].cter[0] = '\0';
238 rr[n].bter[0] = '\0';
239 nc = sscanf(line, "%6s %6s %6s %6s %6s %s",
240 rr[n].gmx, rr[n].main, rr[n].nter, rr[n].cter, rr[n].bter, buf);
241 if (strlen(rr[n].gmx) > RTP_MAXCHAR || strlen(rr[n].main) > RTP_MAXCHAR ||
242 strlen(rr[n].nter) > RTP_MAXCHAR || strlen(rr[n].cter) > RTP_MAXCHAR || strlen(rr[n].bter) > RTP_MAXCHAR)
244 gmx_fatal(FARGS, "Residue renaming database '%s' has strings longer than %d chars in first 5 columns:\n%s",
245 fname, RTP_MAXCHAR, line);
250 if (nc != 2 && nc != 5)
252 gmx_fatal(FARGS, "Residue renaming database '%s' has %d columns instead of %d, %d or %d", fname, ncol, 2, 5);
258 gmx_fatal(FARGS, "A line in residue renaming database '%s' has %d columns, while previous lines have %d columns", fname, nc, ncol);
263 /* This file does not have special termini names, copy them from main */
264 strcpy(rr[n].nter, rr[n].main);
265 strcpy(rr[n].cter, rr[n].main);
266 strcpy(rr[n].bter, rr[n].main);
276 static char *search_resrename(int nrr, rtprename_t *rr,
278 gmx_bool bStart, gmx_bool bEnd,
279 gmx_bool bCompareFFRTPname)
287 while (i < nrr && ((!bCompareFFRTPname && strcmp(name, rr[i].gmx) != 0) ||
288 ( bCompareFFRTPname && strcmp(name, rr[i].main) != 0)))
293 /* If found in the database, rename this residue's rtp building block,
294 * otherwise keep the old name.
317 gmx_fatal(FARGS, "In the chosen force field there is no residue type for '%s'%s", name, bStart ? ( bEnd ? " as a standalone (starting & ending) residue" : " as a starting terminus") : (bEnd ? " as an ending terminus" : ""));
325 static void rename_resrtp(t_atoms *pdba, int nterpairs, int *r_start, int *r_end,
326 int nrr, rtprename_t *rr, t_symtab *symtab,
330 gmx_bool bStart, bEnd;
332 gmx_bool bFFRTPTERRNM;
334 bFFRTPTERRNM = (getenv("GMX_NO_FFRTP_TER_RENAME") == NULL);
336 for (r = 0; r < pdba->nres; r++)
340 for (j = 0; j < nterpairs; j++)
347 for (j = 0; j < nterpairs; j++)
355 nn = search_resrename(nrr, rr, *pdba->resinfo[r].rtp, bStart, bEnd, FALSE);
357 if (bFFRTPTERRNM && nn == NULL && (bStart || bEnd))
359 /* This is a terminal residue, but the residue name,
360 * currently stored in .rtp, is not a standard residue name,
361 * but probably a force field specific rtp name.
362 * Check if we need to rename it because it is terminal.
364 nn = search_resrename(nrr, rr,
365 *pdba->resinfo[r].rtp, bStart, bEnd, TRUE);
368 if (nn != NULL && strcmp(*pdba->resinfo[r].rtp, nn) != 0)
372 printf("Changing rtp entry of residue %d %s to '%s'\n",
373 pdba->resinfo[r].nr, *pdba->resinfo[r].name, nn);
375 pdba->resinfo[r].rtp = put_symtab(symtab, nn);
380 static void pdbres_to_gmxrtp(t_atoms *pdba)
384 for (i = 0; (i < pdba->nres); i++)
386 if (pdba->resinfo[i].rtp == NULL)
388 pdba->resinfo[i].rtp = pdba->resinfo[i].name;
393 static void rename_pdbres(t_atoms *pdba, const char *oldnm, const char *newnm,
394 gmx_bool bFullCompare, t_symtab *symtab)
399 for (i = 0; (i < pdba->nres); i++)
401 resnm = *pdba->resinfo[i].name;
402 if ((bFullCompare && (gmx_strcasecmp(resnm, oldnm) == 0)) ||
403 (!bFullCompare && strstr(resnm, oldnm) != NULL))
405 /* Rename the residue name (not the rtp name) */
406 pdba->resinfo[i].name = put_symtab(symtab, newnm);
411 static void rename_bb(t_atoms *pdba, const char *oldnm, const char *newnm,
412 gmx_bool bFullCompare, t_symtab *symtab)
417 for (i = 0; (i < pdba->nres); i++)
419 /* We have not set the rtp name yes, use the residue name */
420 bbnm = *pdba->resinfo[i].name;
421 if ((bFullCompare && (gmx_strcasecmp(bbnm, oldnm) == 0)) ||
422 (!bFullCompare && strstr(bbnm, oldnm) != NULL))
424 /* Change the rtp builing block name */
425 pdba->resinfo[i].rtp = put_symtab(symtab, newnm);
430 static void rename_bbint(t_atoms *pdba, const char *oldnm,
431 const char *gettp(int, int, const rtprename_t *),
432 gmx_bool bFullCompare,
434 int nrr, const rtprename_t *rr)
440 for (i = 0; i < pdba->nres; i++)
442 /* We have not set the rtp name yes, use the residue name */
443 bbnm = *pdba->resinfo[i].name;
444 if ((bFullCompare && (strcmp(bbnm, oldnm) == 0)) ||
445 (!bFullCompare && strstr(bbnm, oldnm) != NULL))
447 ptr = gettp(i, nrr, rr);
448 pdba->resinfo[i].rtp = put_symtab(symtab, ptr);
453 static void check_occupancy(t_atoms *atoms, const char *filename, gmx_bool bVerbose)
459 ftp = fn2ftp(filename);
460 if (!atoms->pdbinfo || ((ftp != efPDB) && (ftp != efBRK) && (ftp != efENT)))
462 fprintf(stderr, "No occupancies in %s\n", filename);
466 for (i = 0; (i < atoms->nr); i++)
468 if (atoms->pdbinfo[i].occup != 1)
472 fprintf(stderr, "Occupancy for atom %s%d-%s is %f rather than 1\n",
473 *atoms->resinfo[atoms->atom[i].resind].name,
474 atoms->resinfo[ atoms->atom[i].resind].nr,
476 atoms->pdbinfo[i].occup);
478 if (atoms->pdbinfo[i].occup == 0)
488 if (nzero == atoms->nr)
490 fprintf(stderr, "All occupancy fields zero. This is probably not an X-Ray structure\n");
492 else if ((nzero > 0) || (nnotone > 0))
496 "WARNING: there were %d atoms with zero occupancy and %d atoms with\n"
497 " occupancy unequal to one (out of %d atoms). Check your pdb file.\n"
499 nzero, nnotone, atoms->nr);
503 fprintf(stderr, "All occupancies are one\n");
508 void write_posres(char *fn, t_atoms *pdba, real fc)
513 fp = gmx_fio_fopen(fn, "w");
515 "; In this topology include file, you will find position restraint\n"
516 "; entries for all the heavy atoms in your original pdb file.\n"
517 "; This means that all the protons which were added by pdb2gmx are\n"
518 "; not restrained.\n"
520 "[ position_restraints ]\n"
521 "; %4s%6s%8s%8s%8s\n", "atom", "type", "fx", "fy", "fz"
523 for (i = 0; (i < pdba->nr); i++)
525 if (!is_hydrogen(*pdba->atomname[i]) && !is_dummymass(*pdba->atomname[i]))
527 fprintf(fp, "%6d%6d %g %g %g\n", i+1, 1, fc, fc, fc);
533 static int read_pdball(const char *inf, const char *outf, char *title,
534 t_atoms *atoms, rvec **x,
535 int *ePBC, matrix box, gmx_bool bRemoveH,
536 t_symtab *symtab, gmx_residuetype_t rt, const char *watres,
537 gmx_atomprop_t aps, gmx_bool bVerbose)
538 /* Read a pdb file. (containing proteins) */
540 int natom, new_natom, i;
543 printf("Reading %s...\n", inf);
544 get_stx_coordnum(inf, &natom);
545 init_t_atoms(atoms, natom, TRUE);
547 read_stx_conf(inf, title, atoms, *x, NULL, ePBC, box);
548 if (fn2ftp(inf) == efPDB)
550 get_pdb_atomnumber(atoms, aps);
555 for (i = 0; i < atoms->nr; i++)
557 if (!is_hydrogen(*atoms->atomname[i]))
559 atoms->atom[new_natom] = atoms->atom[i];
560 atoms->atomname[new_natom] = atoms->atomname[i];
561 atoms->pdbinfo[new_natom] = atoms->pdbinfo[i];
562 copy_rvec((*x)[i], (*x)[new_natom]);
566 atoms->nr = new_natom;
571 if (title && title[0])
573 printf(" '%s',", title);
575 printf(" %d atoms\n", natom);
577 /* Rename residues */
578 rename_pdbres(atoms, "HOH", watres, FALSE, symtab);
579 rename_pdbres(atoms, "SOL", watres, FALSE, symtab);
580 rename_pdbres(atoms, "WAT", watres, FALSE, symtab);
582 rename_atoms("xlateat.dat", NULL,
583 atoms, symtab, NULL, TRUE, rt, TRUE, bVerbose);
592 write_sto_conf(outf, title, atoms, *x, NULL, *ePBC, box);
598 void process_chain(t_atoms *pdba, rvec *x,
599 gmx_bool bTrpU, gmx_bool bPheU, gmx_bool bTyrU,
600 gmx_bool bLysMan, gmx_bool bAspMan, gmx_bool bGluMan,
601 gmx_bool bHisMan, gmx_bool bArgMan, gmx_bool bGlnMan,
602 real angle, real distance, t_symtab *symtab,
603 int nrr, const rtprename_t *rr)
605 /* Rename aromatics, lys, asp and histidine */
608 rename_bb(pdba, "TYR", "TYRU", FALSE, symtab);
612 rename_bb(pdba, "TRP", "TRPU", FALSE, symtab);
616 rename_bb(pdba, "PHE", "PHEU", FALSE, symtab);
620 rename_bbint(pdba, "LYS", get_lystp, FALSE, symtab, nrr, rr);
624 rename_bbint(pdba, "ARG", get_argtp, FALSE, symtab, nrr, rr);
628 rename_bbint(pdba, "GLN", get_glntp, FALSE, symtab, nrr, rr);
632 rename_bbint(pdba, "ASP", get_asptp, FALSE, symtab, nrr, rr);
636 rename_bb(pdba, "ASPH", "ASP", FALSE, symtab);
640 rename_bbint(pdba, "GLU", get_glutp, FALSE, symtab, nrr, rr);
644 rename_bb(pdba, "GLUH", "GLU", FALSE, symtab);
649 set_histp(pdba, x, angle, distance);
653 rename_bbint(pdba, "HIS", get_histp, TRUE, symtab, nrr, rr);
656 /* Initialize the rtp builing block names with the residue names
657 * for the residues that have not been processed above.
659 pdbres_to_gmxrtp(pdba);
661 /* Now we have all rtp names set.
662 * The rtp names will conform to Gromacs naming,
663 * unless the input pdb file contained one or more force field specific
664 * rtp names as residue names.
668 /* struct for sorting the atoms from the pdb file */
670 int resnr; /* residue number */
671 int j; /* database order index */
672 int index; /* original atom number */
673 char anm1; /* second letter of atom name */
674 char altloc; /* alternate location indicator */
677 int pdbicomp(const void *a, const void *b)
682 pa = (t_pdbindex *)a;
683 pb = (t_pdbindex *)b;
685 d = (pa->resnr - pb->resnr);
691 d = (pa->anm1 - pb->anm1);
694 d = (pa->altloc - pb->altloc);
702 static void sort_pdbatoms(t_restp restp[],
703 int natoms, t_atoms **pdbaptr, rvec **x,
704 t_blocka *block, char ***gnames)
706 t_atoms *pdba, *pdbnew;
721 for (i = 0; i < natoms; i++)
723 atomnm = *pdba->atomname[i];
724 rptr = &restp[pdba->atom[i].resind];
725 for (j = 0; (j < rptr->natom); j++)
727 if (gmx_strcasecmp(atomnm, *(rptr->atomname[j])) == 0)
732 if (j == rptr->natom)
737 "Atom %s in residue %s %d was not found in rtp entry %s with %d atoms\n"
738 "while sorting atoms.\n%s", atomnm,
739 *pdba->resinfo[pdba->atom[i].resind].name,
740 pdba->resinfo[pdba->atom[i].resind].nr,
743 is_hydrogen(atomnm) ?
744 "\nFor a hydrogen, this can be a different protonation state, or it\n"
745 "might have had a different number in the PDB file and was rebuilt\n"
746 "(it might for instance have been H3, and we only expected H1 & H2).\n"
747 "Note that hydrogens might have been added to the entry for the N-terminus.\n"
748 "Remove this hydrogen or choose a different protonation state to solve it.\n"
749 "Option -ignh will ignore all hydrogens in the input." : ".");
750 gmx_fatal(FARGS, buf);
752 /* make shadow array to be sorted into indexgroup */
753 pdbi[i].resnr = pdba->atom[i].resind;
756 pdbi[i].anm1 = atomnm[1];
757 pdbi[i].altloc = pdba->pdbinfo[i].altloc;
759 qsort(pdbi, natoms, (size_t)sizeof(pdbi[0]), pdbicomp);
761 /* pdba is sorted in pdbnew using the pdbi index */
764 init_t_atoms(pdbnew, natoms, TRUE);
766 pdbnew->nr = pdba->nr;
767 pdbnew->nres = pdba->nres;
768 sfree(pdbnew->resinfo);
769 pdbnew->resinfo = pdba->resinfo;
770 for (i = 0; i < natoms; i++)
772 pdbnew->atom[i] = pdba->atom[pdbi[i].index];
773 pdbnew->atomname[i] = pdba->atomname[pdbi[i].index];
774 pdbnew->pdbinfo[i] = pdba->pdbinfo[pdbi[i].index];
775 copy_rvec((*x)[pdbi[i].index], (*xnew)[i]);
776 /* make indexgroup in block */
777 a[i] = pdbi[i].index;
780 sfree(pdba->atomname);
782 sfree(pdba->pdbinfo);
785 /* copy the sorted pdbnew back to pdba */
788 add_grp(block, gnames, natoms, a, "prot_sort");
794 static int remove_duplicate_atoms(t_atoms *pdba, rvec x[], gmx_bool bVerbose)
796 int i, j, oldnatoms, ndel;
799 printf("Checking for duplicate atoms....\n");
800 oldnatoms = pdba->nr;
802 /* NOTE: pdba->nr is modified inside the loop */
803 for (i = 1; (i < pdba->nr); i++)
805 /* compare 'i' and 'i-1', throw away 'i' if they are identical
806 this is a 'while' because multiple alternate locations can be present */
807 while ( (i < pdba->nr) &&
808 (pdba->atom[i-1].resind == pdba->atom[i].resind) &&
809 (strcmp(*pdba->atomname[i-1], *pdba->atomname[i]) == 0) )
814 ri = &pdba->resinfo[pdba->atom[i].resind];
815 printf("deleting duplicate atom %4s %s%4d%c",
816 *pdba->atomname[i], *ri->name, ri->nr, ri->ic);
817 if (ri->chainid && (ri->chainid != ' '))
819 printf(" ch %c", ri->chainid);
823 if (pdba->pdbinfo[i].atomnr)
825 printf(" pdb nr %4d", pdba->pdbinfo[i].atomnr);
827 if (pdba->pdbinfo[i].altloc && (pdba->pdbinfo[i].altloc != ' '))
829 printf(" altloc %c", pdba->pdbinfo[i].altloc);
835 /* We can not free, since it might be in the symtab */
836 /* sfree(pdba->atomname[i]); */
837 for (j = i; j < pdba->nr; j++)
839 pdba->atom[j] = pdba->atom[j+1];
840 pdba->atomname[j] = pdba->atomname[j+1];
841 pdba->pdbinfo[j] = pdba->pdbinfo[j+1];
842 copy_rvec(x[j+1], x[j]);
844 srenew(pdba->atom, pdba->nr);
845 /* srenew(pdba->atomname, pdba->nr); */
846 srenew(pdba->pdbinfo, pdba->nr);
849 if (pdba->nr != oldnatoms)
851 printf("Now there are %d atoms. Deleted %d duplicates.\n", pdba->nr, ndel);
857 void find_nc_ter(t_atoms *pdba, int r0, int r1, int *r_start, int *r_end, gmx_residuetype_t rt)
860 const char *p_startrestype;
861 const char *p_restype;
862 int nstartwarn, nendwarn;
870 /* Find the starting terminus (typially N or 5') */
871 for (i = r0; i < r1 && *r_start == -1; i++)
873 gmx_residuetype_get_type(rt, *pdba->resinfo[i].name, &p_startrestype);
874 if (!gmx_strcasecmp(p_startrestype, "Protein") || !gmx_strcasecmp(p_startrestype, "DNA") || !gmx_strcasecmp(p_startrestype, "RNA") )
876 printf("Identified residue %s%d as a starting terminus.\n", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
883 printf("Warning: Starting residue %s%d in chain not identified as Protein/RNA/DNA.\n", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
887 printf("More than 5 unidentified residues at start of chain - disabling further warnings.\n");
895 /* Go through the rest of the residues, check that they are the same class, and identify the ending terminus. */
896 for (i = *r_start; i < r1; i++)
898 gmx_residuetype_get_type(rt, *pdba->resinfo[i].name, &p_restype);
899 if (!gmx_strcasecmp(p_restype, p_startrestype) && nendwarn == 0)
907 printf("Warning: Residue %s%d in chain has different type (%s) from starting residue %s%d (%s).\n",
908 *pdba->resinfo[i].name, pdba->resinfo[i].nr, p_restype,
909 *pdba->resinfo[*r_start].name, pdba->resinfo[*r_start].nr, p_startrestype);
913 printf("More than 5 unidentified residues at end of chain - disabling further warnings.\n");
922 printf("Identified residue %s%d as a ending terminus.\n", *pdba->resinfo[*r_end].name, pdba->resinfo[*r_end].nr);
928 modify_chain_numbers(t_atoms * pdba,
929 const char * chainsep)
932 char old_prev_chainid;
933 char old_this_chainid;
934 int old_prev_chainnum;
935 int old_this_chainnum;
941 const char * prev_atomname;
942 const char * this_atomname;
943 const char * prev_resname;
944 const char * this_resname;
949 int prev_chainnumber;
950 int this_chainnumber;
962 splitting = SPLIT_TER_ONLY; /* keep compiler happy */
964 /* Be a bit flexible to catch typos */
965 if (!strncmp(chainsep, "id_o", 4))
967 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
968 splitting = SPLIT_ID_OR_TER;
969 printf("Splitting chemical chains based on TER records or chain id changing.\n");
971 else if (!strncmp(chainsep, "int", 3))
973 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
974 splitting = SPLIT_INTERACTIVE;
975 printf("Splitting chemical chains interactively.\n");
977 else if (!strncmp(chainsep, "id_a", 4))
979 splitting = SPLIT_ID_AND_TER;
980 printf("Splitting chemical chains based on TER records and chain id changing.\n");
982 else if (strlen(chainsep) == 2 && !strncmp(chainsep, "id", 4))
984 splitting = SPLIT_ID_ONLY;
985 printf("Splitting chemical chains based on changing chain id only (ignoring TER records).\n");
987 else if (chainsep[0] == 't')
989 splitting = SPLIT_TER_ONLY;
990 printf("Splitting chemical chains based on TER records only (ignoring chain id).\n");
994 gmx_fatal(FARGS, "Unidentified setting for chain separation: %s\n", chainsep);
997 /* The default chain enumeration is based on TER records only, which is reflected in chainnum below */
999 old_prev_chainid = '?';
1000 old_prev_chainnum = -1;
1003 this_atomname = NULL;
1005 this_resname = NULL;
1008 this_chainnumber = -1;
1010 for (i = 0; i < pdba->nres; i++)
1012 ri = &pdba->resinfo[i];
1013 old_this_chainid = ri->chainid;
1014 old_this_chainnum = ri->chainnum;
1016 prev_atomname = this_atomname;
1017 prev_atomnum = this_atomnum;
1018 prev_resname = this_resname;
1019 prev_resnum = this_resnum;
1020 prev_chainid = this_chainid;
1021 prev_chainnumber = this_chainnumber;
1023 this_atomname = *(pdba->atomname[i]);
1024 this_atomnum = (pdba->pdbinfo != NULL) ? pdba->pdbinfo[i].atomnr : i+1;
1025 this_resname = *ri->name;
1026 this_resnum = ri->nr;
1027 this_chainid = ri->chainid;
1028 this_chainnumber = ri->chainnum;
1032 case SPLIT_ID_OR_TER:
1033 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1039 case SPLIT_ID_AND_TER:
1040 if (old_this_chainid != old_prev_chainid && old_this_chainnum != old_prev_chainnum)
1047 if (old_this_chainid != old_prev_chainid)
1053 case SPLIT_TER_ONLY:
1054 if (old_this_chainnum != old_prev_chainnum)
1059 case SPLIT_INTERACTIVE:
1060 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1064 printf("Split the chain (and introduce termini) between residue %s%d (chain id '%c', atom %d %s)\n"
1065 "and residue %s%d (chain id '%c', atom %d %s) ? [n/y]\n",
1066 prev_resname, prev_resnum, prev_chainid, prev_atomnum, prev_atomname,
1067 this_resname, this_resnum, this_chainid, this_atomnum, this_atomname);
1069 if (NULL == fgets(select, STRLEN-1, stdin))
1071 gmx_fatal(FARGS, "Error reading from stdin");
1074 if (i == 0 || select[0] == 'y')
1081 gmx_fatal(FARGS, "Internal inconsistency - this shouldn't happen...");
1084 old_prev_chainid = old_this_chainid;
1085 old_prev_chainnum = old_this_chainnum;
1087 ri->chainnum = new_chainnum;
1116 int gmx_pdb2gmx(int argc, char *argv[])
1118 const char *desc[] = {
1119 "[THISMODULE] reads a [TT].pdb[tt] (or [TT].gro[tt]) file, reads",
1120 "some database files, adds hydrogens to the molecules and generates",
1121 "coordinates in GROMACS (GROMOS), or optionally [TT].pdb[tt], format",
1122 "and a topology in GROMACS format.",
1123 "These files can subsequently be processed to generate a run input file.",
1125 "[THISMODULE] will search for force fields by looking for",
1126 "a [TT]forcefield.itp[tt] file in subdirectories [TT]<forcefield>.ff[tt]",
1127 "of the current working directory and of the GROMACS library directory",
1128 "as inferred from the path of the binary or the [TT]GMXLIB[tt] environment",
1130 "By default the forcefield selection is interactive,",
1131 "but you can use the [TT]-ff[tt] option to specify one of the short names",
1132 "in the list on the command line instead. In that case [THISMODULE] just looks",
1133 "for the corresponding [TT]<forcefield>.ff[tt] directory.",
1135 "After choosing a force field, all files will be read only from",
1136 "the corresponding force field directory.",
1137 "If you want to modify or add a residue types, you can copy the force",
1138 "field directory from the GROMACS library directory to your current",
1139 "working directory. If you want to add new protein residue types,",
1140 "you will need to modify [TT]residuetypes.dat[tt] in the library directory",
1141 "or copy the whole library directory to a local directory and set",
1142 "the environment variable [TT]GMXLIB[tt] to the name of that directory.",
1143 "Check Chapter 5 of the manual for more information about file formats.",
1146 "Note that a [TT].pdb[tt] file is nothing more than a file format, and it",
1147 "need not necessarily contain a protein structure. Every kind of",
1148 "molecule for which there is support in the database can be converted.",
1149 "If there is no support in the database, you can add it yourself.[PAR]",
1151 "The program has limited intelligence, it reads a number of database",
1152 "files, that allow it to make special bonds (Cys-Cys, Heme-His, etc.),",
1153 "if necessary this can be done manually. The program can prompt the",
1154 "user to select which kind of LYS, ASP, GLU, CYS or HIS residue is",
1155 "desired. For Lys the choice is between neutral (two protons on NZ) or",
1156 "protonated (three protons, default), for Asp and Glu unprotonated",
1157 "(default) or protonated, for His the proton can be either on ND1,",
1158 "on NE2 or on both. By default these selections are done automatically.",
1159 "For His, this is based on an optimal hydrogen bonding",
1160 "conformation. Hydrogen bonds are defined based on a simple geometric",
1161 "criterion, specified by the maximum hydrogen-donor-acceptor angle",
1162 "and donor-acceptor distance, which are set by [TT]-angle[tt] and",
1163 "[TT]-dist[tt] respectively.[PAR]",
1165 "The protonation state of N- and C-termini can be chosen interactively",
1166 "with the [TT]-ter[tt] flag. Default termini are ionized (NH3+ and COO-),",
1167 "respectively. Some force fields support zwitterionic forms for chains of",
1168 "one residue, but for polypeptides these options should NOT be selected.",
1169 "The AMBER force fields have unique forms for the terminal residues,",
1170 "and these are incompatible with the [TT]-ter[tt] mechanism. You need",
1171 "to prefix your N- or C-terminal residue names with \"N\" or \"C\"",
1172 "respectively to use these forms, making sure you preserve the format",
1173 "of the coordinate file. Alternatively, use named terminating residues",
1174 "(e.g. ACE, NME).[PAR]",
1176 "The separation of chains is not entirely trivial since the markup",
1177 "in user-generated PDB files frequently varies and sometimes it",
1178 "is desirable to merge entries across a TER record, for instance",
1179 "if you want a disulfide bridge or distance restraints between",
1180 "two protein chains or if you have a HEME group bound to a protein.",
1181 "In such cases multiple chains should be contained in a single",
1182 "[TT]moleculetype[tt] definition.",
1183 "To handle this, [THISMODULE] uses two separate options.",
1184 "First, [TT]-chainsep[tt] allows you to choose when a new chemical chain should",
1185 "start, and termini added when applicable. This can be done based on the",
1186 "existence of TER records, when the chain id changes, or combinations of either",
1187 "or both of these. You can also do the selection fully interactively.",
1188 "In addition, there is a [TT]-merge[tt] option that controls how multiple chains",
1189 "are merged into one moleculetype, after adding all the chemical termini (or not).",
1190 "This can be turned off (no merging), all non-water chains can be merged into a",
1191 "single molecule, or the selection can be done interactively.[PAR]",
1193 "[THISMODULE] will also check the occupancy field of the [TT].pdb[tt] file.",
1194 "If any of the occupancies are not one, indicating that the atom is",
1195 "not resolved well in the structure, a warning message is issued.",
1196 "When a [TT].pdb[tt] file does not originate from an X-ray structure determination",
1197 "all occupancy fields may be zero. Either way, it is up to the user",
1198 "to verify the correctness of the input data (read the article!).[PAR]",
1200 "During processing the atoms will be reordered according to GROMACS",
1201 "conventions. With [TT]-n[tt] an index file can be generated that",
1202 "contains one group reordered in the same way. This allows you to",
1203 "convert a GROMOS trajectory and coordinate file to GROMOS. There is",
1204 "one limitation: reordering is done after the hydrogens are stripped",
1205 "from the input and before new hydrogens are added. This means that",
1206 "you should not use [TT]-ignh[tt].[PAR]",
1208 "The [TT].gro[tt] and [TT].g96[tt] file formats do not support chain",
1209 "identifiers. Therefore it is useful to enter a [TT].pdb[tt] file name at",
1210 "the [TT]-o[tt] option when you want to convert a multi-chain [TT].pdb[tt] file.",
1213 "The option [TT]-vsite[tt] removes hydrogen and fast improper dihedral",
1214 "motions. Angular and out-of-plane motions can be removed by changing",
1215 "hydrogens into virtual sites and fixing angles, which fixes their",
1216 "position relative to neighboring atoms. Additionally, all atoms in the",
1217 "aromatic rings of the standard amino acids (i.e. PHE, TRP, TYR and HIS)",
1218 "can be converted into virtual sites, eliminating the fast improper dihedral",
1219 "fluctuations in these rings. [BB]Note[bb] that in this case all other hydrogen",
1220 "atoms are also converted to virtual sites. The mass of all atoms that are",
1221 "converted into virtual sites, is added to the heavy atoms.[PAR]",
1222 "Also slowing down of dihedral motion can be done with [TT]-heavyh[tt]",
1223 "done by increasing the hydrogen-mass by a factor of 4. This is also",
1224 "done for water hydrogens to slow down the rotational motion of water.",
1225 "The increase in mass of the hydrogens is subtracted from the bonded",
1226 "(heavy) atom so that the total mass of the system remains the same."
1230 FILE *fp, *top_file, *top_file2, *itp_file = NULL;
1232 t_atoms pdba_all, *pdba;
1236 int chain, nch, maxch, nwaterchain;
1238 t_chain *chains, *cc;
1239 char select[STRLEN];
1247 int i, j, k, l, nrtp;
1248 int *swap_index, si;
1252 gpp_atomtype_t atype;
1253 gmx_residuetype_t rt;
1255 char fn[256], itp_fn[STRLEN], posre_fn[STRLEN], buf_fn[STRLEN];
1256 char molname[STRLEN], title[STRLEN], quote[STRLEN];
1257 char *c, forcefield[STRLEN], ffdir[STRLEN];
1258 char ffname[STRLEN], suffix[STRLEN], buf[STRLEN];
1266 int nrtprename, naa;
1267 rtprename_t *rtprename = NULL;
1268 int nah, nNtdb, nCtdb, ntdblist;
1269 t_hackblock *ntdb, *ctdb, **tdblist;
1273 gmx_bool bVsites = FALSE, bWat, bPrevWat = FALSE, bITP, bVsiteAromatics = FALSE, bCheckMerge;
1275 t_hackblock *hb_chain;
1276 t_restp *restp_chain;
1278 const char *p_restype;
1282 const char * prev_atomname;
1283 const char * this_atomname;
1284 const char * prev_resname;
1285 const char * this_resname;
1290 int prev_chainnumber;
1291 int this_chainnumber;
1293 int this_chainstart;
1294 int prev_chainstart;
1301 { efSTX, "-f", "eiwit.pdb", ffREAD },
1302 { efSTO, "-o", "conf", ffWRITE },
1303 { efTOP, NULL, NULL, ffWRITE },
1304 { efITP, "-i", "posre", ffWRITE },
1305 { efNDX, "-n", "clean", ffOPTWR },
1306 { efSTO, "-q", "clean.pdb", ffOPTWR }
1308 #define NFILE asize(fnm)
1311 /* Command line arguments must be static */
1312 static gmx_bool bNewRTP = FALSE;
1313 static gmx_bool bInter = FALSE, bCysMan = FALSE;
1314 static gmx_bool bLysMan = FALSE, bAspMan = FALSE, bGluMan = FALSE, bHisMan = FALSE;
1315 static gmx_bool bGlnMan = FALSE, bArgMan = FALSE;
1316 static gmx_bool bTerMan = FALSE, bUnA = FALSE, bHeavyH;
1317 static gmx_bool bSort = TRUE, bAllowMissing = FALSE, bRemoveH = FALSE;
1318 static gmx_bool bDeuterate = FALSE, bVerbose = FALSE, bChargeGroups = TRUE, bCmap = TRUE;
1319 static gmx_bool bRenumRes = FALSE, bRTPresname = FALSE;
1320 static real angle = 135.0, distance = 0.3, posre_fc = 1000;
1321 static real long_bond_dist = 0.25, short_bond_dist = 0.05;
1322 static const char *vsitestr[] = { NULL, "none", "hydrogens", "aromatics", NULL };
1323 static const char *watstr[] = { NULL, "select", "none", "spc", "spce", "tip3p", "tip4p", "tip5p", NULL };
1324 static const char *chainsep[] = { NULL, "id_or_ter", "id_and_ter", "ter", "id", "interactive", NULL };
1325 static const char *merge[] = {NULL, "no", "all", "interactive", NULL };
1326 static const char *ff = "select";
1329 { "-newrtp", FALSE, etBOOL, {&bNewRTP},
1330 "HIDDENWrite the residue database in new format to [TT]new.rtp[tt]"},
1331 { "-lb", FALSE, etREAL, {&long_bond_dist},
1332 "HIDDENLong bond warning distance" },
1333 { "-sb", FALSE, etREAL, {&short_bond_dist},
1334 "HIDDENShort bond warning distance" },
1335 { "-chainsep", FALSE, etENUM, {chainsep},
1336 "Condition in PDB files when a new chain should be started (adding termini)" },
1337 { "-merge", FALSE, etENUM, {&merge},
1338 "Merge multiple chains into a single [moleculetype]" },
1339 { "-ff", FALSE, etSTR, {&ff},
1340 "Force field, interactive by default. Use [TT]-h[tt] for information." },
1341 { "-water", FALSE, etENUM, {watstr},
1342 "Water model to use" },
1343 { "-inter", FALSE, etBOOL, {&bInter},
1344 "Set the next 8 options to interactive"},
1345 { "-ss", FALSE, etBOOL, {&bCysMan},
1346 "Interactive SS bridge selection" },
1347 { "-ter", FALSE, etBOOL, {&bTerMan},
1348 "Interactive termini selection, instead of charged (default)" },
1349 { "-lys", FALSE, etBOOL, {&bLysMan},
1350 "Interactive lysine selection, instead of charged" },
1351 { "-arg", FALSE, etBOOL, {&bArgMan},
1352 "Interactive arginine selection, instead of charged" },
1353 { "-asp", FALSE, etBOOL, {&bAspMan},
1354 "Interactive aspartic acid selection, instead of charged" },
1355 { "-glu", FALSE, etBOOL, {&bGluMan},
1356 "Interactive glutamic acid selection, instead of charged" },
1357 { "-gln", FALSE, etBOOL, {&bGlnMan},
1358 "Interactive glutamine selection, instead of neutral" },
1359 { "-his", FALSE, etBOOL, {&bHisMan},
1360 "Interactive histidine selection, instead of checking H-bonds" },
1361 { "-angle", FALSE, etREAL, {&angle},
1362 "Minimum hydrogen-donor-acceptor angle for a H-bond (degrees)" },
1363 { "-dist", FALSE, etREAL, {&distance},
1364 "Maximum donor-acceptor distance for a H-bond (nm)" },
1365 { "-una", FALSE, etBOOL, {&bUnA},
1366 "Select aromatic rings with united CH atoms on phenylalanine, "
1367 "tryptophane and tyrosine" },
1368 { "-sort", FALSE, etBOOL, {&bSort},
1369 "HIDDENSort the residues according to database, turning this off is dangerous as charge groups might be broken in parts" },
1370 { "-ignh", FALSE, etBOOL, {&bRemoveH},
1371 "Ignore hydrogen atoms that are in the coordinate file" },
1372 { "-missing", FALSE, etBOOL, {&bAllowMissing},
1373 "Continue when atoms are missing, dangerous" },
1374 { "-v", FALSE, etBOOL, {&bVerbose},
1375 "Be slightly more verbose in messages" },
1376 { "-posrefc", FALSE, etREAL, {&posre_fc},
1377 "Force constant for position restraints" },
1378 { "-vsite", FALSE, etENUM, {vsitestr},
1379 "Convert atoms to virtual sites" },
1380 { "-heavyh", FALSE, etBOOL, {&bHeavyH},
1381 "Make hydrogen atoms heavy" },
1382 { "-deuterate", FALSE, etBOOL, {&bDeuterate},
1383 "Change the mass of hydrogens to 2 amu" },
1384 { "-chargegrp", TRUE, etBOOL, {&bChargeGroups},
1385 "Use charge groups in the [TT].rtp[tt] file" },
1386 { "-cmap", TRUE, etBOOL, {&bCmap},
1387 "Use cmap torsions (if enabled in the [TT].rtp[tt] file)" },
1388 { "-renum", TRUE, etBOOL, {&bRenumRes},
1389 "Renumber the residues consecutively in the output" },
1390 { "-rtpres", TRUE, etBOOL, {&bRTPresname},
1391 "Use [TT].rtp[tt] entry names as residue names" }
1393 #define NPARGS asize(pa)
1395 if (!parse_common_args(&argc, argv, 0, NFILE, fnm, asize(pa), pa, asize(desc), desc,
1401 /* Force field selection, interactive or direct */
1402 choose_ff(strcmp(ff, "select") == 0 ? NULL : ff,
1403 forcefield, sizeof(forcefield),
1404 ffdir, sizeof(ffdir));
1406 if (strlen(forcefield) > 0)
1408 strcpy(ffname, forcefield);
1409 ffname[0] = toupper(ffname[0]);
1413 gmx_fatal(FARGS, "Empty forcefield string");
1416 printf("\nUsing the %s force field in directory %s\n\n",
1419 choose_watermodel(watstr[0], ffdir, &watermodel);
1423 /* if anything changes here, also change description of -inter */
1438 else if (bDeuterate)
1447 switch (vsitestr[0][0])
1449 case 'n': /* none */
1451 bVsiteAromatics = FALSE;
1453 case 'h': /* hydrogens */
1455 bVsiteAromatics = FALSE;
1457 case 'a': /* aromatics */
1459 bVsiteAromatics = TRUE;
1462 gmx_fatal(FARGS, "DEATH HORROR in $s (%d): vsitestr[0]='%s'",
1463 __FILE__, __LINE__, vsitestr[0]);
1466 /* Open the symbol table */
1467 open_symtab(&symtab);
1469 /* Residue type database */
1470 gmx_residuetype_init(&rt);
1472 /* Read residue renaming database(s), if present */
1473 nrrn = fflib_search_file_end(ffdir, ".r2b", FALSE, &rrn);
1477 for (i = 0; i < nrrn; i++)
1479 fp = fflib_open(rrn[i]);
1480 read_rtprename(rrn[i], fp, &nrtprename, &rtprename);
1486 /* Add all alternative names from the residue renaming database to the list of recognized amino/nucleic acids. */
1488 for (i = 0; i < nrtprename; i++)
1490 rc = gmx_residuetype_get_type(rt, rtprename[i].gmx, &p_restype);
1492 /* Only add names if the 'standard' gromacs/iupac base name was found */
1495 gmx_residuetype_add(rt, rtprename[i].main, p_restype);
1496 gmx_residuetype_add(rt, rtprename[i].nter, p_restype);
1497 gmx_residuetype_add(rt, rtprename[i].cter, p_restype);
1498 gmx_residuetype_add(rt, rtprename[i].bter, p_restype);
1503 if (watermodel != NULL && (strstr(watermodel, "4p") ||
1504 strstr(watermodel, "4P")))
1508 else if (watermodel != NULL && (strstr(watermodel, "5p") ||
1509 strstr(watermodel, "5P")))
1518 aps = gmx_atomprop_init();
1519 natom = read_pdball(opt2fn("-f", NFILE, fnm), opt2fn_null("-q", NFILE, fnm), title,
1520 &pdba_all, &pdbx, &ePBC, box, bRemoveH, &symtab, rt, watres,
1525 gmx_fatal(FARGS, "No atoms found in pdb file %s\n", opt2fn("-f", NFILE, fnm));
1528 printf("Analyzing pdb file\n");
1533 modify_chain_numbers(&pdba_all, chainsep[0]);
1537 this_atomname = NULL;
1539 this_resname = NULL;
1542 this_chainnumber = -1;
1543 this_chainstart = 0;
1544 /* Keep the compiler happy */
1545 prev_chainstart = 0;
1550 for (i = 0; (i < natom); i++)
1552 ri = &pdba_all.resinfo[pdba_all.atom[i].resind];
1554 prev_atomname = this_atomname;
1555 prev_atomnum = this_atomnum;
1556 prev_resname = this_resname;
1557 prev_resnum = this_resnum;
1558 prev_chainid = this_chainid;
1559 prev_chainnumber = this_chainnumber;
1562 prev_chainstart = this_chainstart;
1565 this_atomname = *pdba_all.atomname[i];
1566 this_atomnum = (pdba_all.pdbinfo != NULL) ? pdba_all.pdbinfo[i].atomnr : i+1;
1567 this_resname = *ri->name;
1568 this_resnum = ri->nr;
1569 this_chainid = ri->chainid;
1570 this_chainnumber = ri->chainnum;
1572 bWat = gmx_strcasecmp(*ri->name, watres) == 0;
1573 if ((i == 0) || (this_chainnumber != prev_chainnumber) || (bWat != bPrevWat))
1575 this_chainstart = pdba_all.atom[i].resind;
1580 if (!strncmp(merge[0], "int", 3))
1582 printf("Merge chain ending with residue %s%d (chain id '%c', atom %d %s) and chain starting with\n"
1583 "residue %s%d (chain id '%c', atom %d %s) into a single moleculetype (keeping termini)? [n/y]\n",
1584 prev_resname, prev_resnum, prev_chainid, prev_atomnum, prev_atomname,
1585 this_resname, this_resnum, this_chainid, this_atomnum, this_atomname);
1587 if (NULL == fgets(select, STRLEN-1, stdin))
1589 gmx_fatal(FARGS, "Error reading from stdin");
1591 bMerged = (select[0] == 'y');
1593 else if (!strncmp(merge[0], "all", 3))
1601 pdb_ch[nch-1].chainstart[pdb_ch[nch-1].nterpairs] =
1602 pdba_all.atom[i].resind - prev_chainstart;
1603 pdb_ch[nch-1].nterpairs++;
1604 srenew(pdb_ch[nch-1].chainstart, pdb_ch[nch-1].nterpairs+1);
1609 /* set natom for previous chain */
1612 pdb_ch[nch-1].natom = i-pdb_ch[nch-1].start;
1619 /* check if chain identifier was used before */
1620 for (j = 0; (j < nch); j++)
1622 if (pdb_ch[j].chainid != ' ' && pdb_ch[j].chainid == ri->chainid)
1624 printf("WARNING: Chain identifier '%c' is used in two non-sequential blocks.\n"
1625 "They will be treated as separate chains unless you reorder your file.\n",
1632 srenew(pdb_ch, maxch);
1634 pdb_ch[nch].chainid = ri->chainid;
1635 pdb_ch[nch].chainnum = ri->chainnum;
1636 pdb_ch[nch].start = i;
1637 pdb_ch[nch].bAllWat = bWat;
1640 pdb_ch[nch].nterpairs = 0;
1644 pdb_ch[nch].nterpairs = 1;
1646 snew(pdb_ch[nch].chainstart, pdb_ch[nch].nterpairs+1);
1647 /* modified [nch] to [0] below */
1648 pdb_ch[nch].chainstart[0] = 0;
1654 pdb_ch[nch-1].natom = natom-pdb_ch[nch-1].start;
1656 /* set all the water blocks at the end of the chain */
1657 snew(swap_index, nch);
1659 for (i = 0; i < nch; i++)
1661 if (!pdb_ch[i].bAllWat)
1667 for (i = 0; i < nch; i++)
1669 if (pdb_ch[i].bAllWat)
1675 if (nwaterchain > 1)
1677 printf("Moved all the water blocks to the end\n");
1681 /* copy pdb data and x for all chains */
1682 for (i = 0; (i < nch); i++)
1685 chains[i].chainid = pdb_ch[si].chainid;
1686 chains[i].chainnum = pdb_ch[si].chainnum;
1687 chains[i].bAllWat = pdb_ch[si].bAllWat;
1688 chains[i].nterpairs = pdb_ch[si].nterpairs;
1689 chains[i].chainstart = pdb_ch[si].chainstart;
1690 snew(chains[i].ntdb, pdb_ch[si].nterpairs);
1691 snew(chains[i].ctdb, pdb_ch[si].nterpairs);
1692 snew(chains[i].r_start, pdb_ch[si].nterpairs);
1693 snew(chains[i].r_end, pdb_ch[si].nterpairs);
1695 snew(chains[i].pdba, 1);
1696 init_t_atoms(chains[i].pdba, pdb_ch[si].natom, TRUE);
1697 snew(chains[i].x, chains[i].pdba->nr);
1698 for (j = 0; j < chains[i].pdba->nr; j++)
1700 chains[i].pdba->atom[j] = pdba_all.atom[pdb_ch[si].start+j];
1701 snew(chains[i].pdba->atomname[j], 1);
1702 *chains[i].pdba->atomname[j] =
1703 strdup(*pdba_all.atomname[pdb_ch[si].start+j]);
1704 chains[i].pdba->pdbinfo[j] = pdba_all.pdbinfo[pdb_ch[si].start+j];
1705 copy_rvec(pdbx[pdb_ch[si].start+j], chains[i].x[j]);
1707 /* Re-index the residues assuming that the indices are continuous */
1708 k = chains[i].pdba->atom[0].resind;
1709 nres = chains[i].pdba->atom[chains[i].pdba->nr-1].resind - k + 1;
1710 chains[i].pdba->nres = nres;
1711 for (j = 0; j < chains[i].pdba->nr; j++)
1713 chains[i].pdba->atom[j].resind -= k;
1715 srenew(chains[i].pdba->resinfo, nres);
1716 for (j = 0; j < nres; j++)
1718 chains[i].pdba->resinfo[j] = pdba_all.resinfo[k+j];
1719 snew(chains[i].pdba->resinfo[j].name, 1);
1720 *chains[i].pdba->resinfo[j].name = strdup(*pdba_all.resinfo[k+j].name);
1721 /* make all chain identifiers equal to that of the chain */
1722 chains[i].pdba->resinfo[j].chainid = pdb_ch[si].chainid;
1726 if (nchainmerges > 0)
1728 printf("\nMerged chains into joint molecule definitions at %d places.\n\n",
1732 printf("There are %d chains and %d blocks of water and "
1733 "%d residues with %d atoms\n",
1734 nch-nwaterchain, nwaterchain,
1735 pdba_all.nres, natom);
1737 printf("\n %5s %4s %6s\n", "chain", "#res", "#atoms");
1738 for (i = 0; (i < nch); i++)
1740 printf(" %d '%c' %5d %6d %s\n",
1741 i+1, chains[i].chainid ? chains[i].chainid : '-',
1742 chains[i].pdba->nres, chains[i].pdba->nr,
1743 chains[i].bAllWat ? "(only water)" : "");
1747 check_occupancy(&pdba_all, opt2fn("-f", NFILE, fnm), bVerbose);
1749 /* Read atomtypes... */
1750 atype = read_atype(ffdir, &symtab);
1752 /* read residue database */
1753 printf("Reading residue database... (%s)\n", forcefield);
1754 nrtpf = fflib_search_file_end(ffdir, ".rtp", TRUE, &rtpf);
1757 for (i = 0; i < nrtpf; i++)
1759 read_resall(rtpf[i], &nrtp, &restp, atype, &symtab, FALSE);
1765 /* Not correct with multiple rtp input files with different bonded types */
1766 fp = gmx_fio_fopen("new.rtp", "w");
1767 print_resall(fp, nrtp, restp, atype);
1771 /* read hydrogen database */
1772 nah = read_h_db(ffdir, &ah);
1774 /* Read Termini database... */
1775 nNtdb = read_ter_db(ffdir, 'n', &ntdb, atype);
1776 nCtdb = read_ter_db(ffdir, 'c', &ctdb, atype);
1778 top_fn = ftp2fn(efTOP, NFILE, fnm);
1779 top_file = gmx_fio_fopen(top_fn, "w");
1781 print_top_header(top_file, top_fn, FALSE, ffdir, mHmult);
1788 for (chain = 0; (chain < nch); chain++)
1790 cc = &(chains[chain]);
1792 /* set pdba, natom and nres to the current chain */
1795 natom = cc->pdba->nr;
1796 nres = cc->pdba->nres;
1798 if (cc->chainid && ( cc->chainid != ' ' ) )
1800 printf("Processing chain %d '%c' (%d atoms, %d residues)\n",
1801 chain+1, cc->chainid, natom, nres);
1805 printf("Processing chain %d (%d atoms, %d residues)\n",
1806 chain+1, natom, nres);
1809 process_chain(pdba, x, bUnA, bUnA, bUnA, bLysMan, bAspMan, bGluMan,
1810 bHisMan, bArgMan, bGlnMan, angle, distance, &symtab,
1811 nrtprename, rtprename);
1813 cc->chainstart[cc->nterpairs] = pdba->nres;
1815 for (i = 0; i < cc->nterpairs; i++)
1817 find_nc_ter(pdba, cc->chainstart[i], cc->chainstart[i+1],
1818 &(cc->r_start[j]), &(cc->r_end[j]), rt);
1820 if (cc->r_start[j] >= 0 && cc->r_end[j] >= 0)
1826 if (cc->nterpairs == 0)
1828 printf("Problem with chain definition, or missing terminal residues.\n"
1829 "This chain does not appear to contain a recognized chain molecule.\n"
1830 "If this is incorrect, you can edit residuetypes.dat to modify the behavior.\n");
1833 /* Check for disulfides and other special bonds */
1834 nssbonds = mk_specbonds(pdba, x, bCysMan, &ssbonds, bVerbose);
1838 rename_resrtp(pdba, cc->nterpairs, cc->r_start, cc->r_end, nrtprename, rtprename,
1846 sprintf(fn, "chain.pdb");
1850 sprintf(fn, "chain_%c%d.pdb", cc->chainid, cc->chainnum);
1852 write_sto_conf(fn, title, pdba, x, NULL, ePBC, box);
1856 for (i = 0; i < cc->nterpairs; i++)
1860 * We first apply a filter so we only have the
1861 * termini that can be applied to the residue in question
1862 * (or a generic terminus if no-residue specific is available).
1864 /* First the N terminus */
1867 tdblist = filter_ter(nrtp, restp, nNtdb, ntdb,
1868 *pdba->resinfo[cc->r_start[i]].name,
1869 *pdba->resinfo[cc->r_start[i]].rtp,
1873 printf("No suitable end (N or 5') terminus found in database - assuming this residue\n"
1874 "is already in a terminus-specific form and skipping terminus selection.\n");
1879 if (bTerMan && ntdblist > 1)
1881 sprintf(select, "Select start terminus type for %s-%d",
1882 *pdba->resinfo[cc->r_start[i]].name,
1883 pdba->resinfo[cc->r_start[i]].nr);
1884 cc->ntdb[i] = choose_ter(ntdblist, tdblist, select);
1888 cc->ntdb[i] = tdblist[0];
1891 printf("Start terminus %s-%d: %s\n",
1892 *pdba->resinfo[cc->r_start[i]].name,
1893 pdba->resinfo[cc->r_start[i]].nr,
1894 (cc->ntdb[i])->name);
1903 /* And the C terminus */
1906 tdblist = filter_ter(nrtp, restp, nCtdb, ctdb,
1907 *pdba->resinfo[cc->r_end[i]].name,
1908 *pdba->resinfo[cc->r_end[i]].rtp,
1912 printf("No suitable end (C or 3') terminus found in database - assuming this residue\n"
1913 "is already in a terminus-specific form and skipping terminus selection.\n");
1918 if (bTerMan && ntdblist > 1)
1920 sprintf(select, "Select end terminus type for %s-%d",
1921 *pdba->resinfo[cc->r_end[i]].name,
1922 pdba->resinfo[cc->r_end[i]].nr);
1923 cc->ctdb[i] = choose_ter(ntdblist, tdblist, select);
1927 cc->ctdb[i] = tdblist[0];
1929 printf("End terminus %s-%d: %s\n",
1930 *pdba->resinfo[cc->r_end[i]].name,
1931 pdba->resinfo[cc->r_end[i]].nr,
1932 (cc->ctdb[i])->name);
1941 /* lookup hackblocks and rtp for all residues */
1942 get_hackblocks_rtp(&hb_chain, &restp_chain,
1943 nrtp, restp, pdba->nres, pdba->resinfo,
1944 cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end);
1945 /* ideally, now we would not need the rtp itself anymore, but do
1946 everything using the hb and restp arrays. Unfortunately, that
1947 requires some re-thinking of code in gen_vsite.c, which I won't
1948 do now :( AF 26-7-99 */
1950 rename_atoms(NULL, ffdir,
1951 pdba, &symtab, restp_chain, FALSE, rt, FALSE, bVerbose);
1953 match_atomnames_with_rtp(restp_chain, hb_chain, pdba, x, bVerbose);
1957 block = new_blocka();
1959 sort_pdbatoms(restp_chain, natom, &pdba, &x, block, &gnames);
1960 natom = remove_duplicate_atoms(pdba, x, bVerbose);
1961 if (ftp2bSet(efNDX, NFILE, fnm))
1965 fprintf(stderr, "WARNING: with the -remh option the generated "
1966 "index file (%s) might be useless\n"
1967 "(the index file is generated before hydrogens are added)",
1968 ftp2fn(efNDX, NFILE, fnm));
1970 write_index(ftp2fn(efNDX, NFILE, fnm), block, gnames, FALSE, 0);
1972 for (i = 0; i < block->nr; i++)
1981 fprintf(stderr, "WARNING: "
1982 "without sorting no check for duplicate atoms can be done\n");
1985 /* Generate Hydrogen atoms (and termini) in the sequence */
1986 printf("Generating any missing hydrogen atoms and/or adding termini.\n");
1987 natom = add_h(&pdba, &x, nah, ah,
1988 cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end, bAllowMissing,
1989 NULL, NULL, TRUE, FALSE);
1990 printf("Now there are %d residues with %d atoms\n",
1991 pdba->nres, pdba->nr);
1994 write_pdbfile(debug, title, pdba, x, ePBC, box, ' ', 0, NULL, TRUE);
1999 for (i = 0; (i < natom); i++)
2001 fprintf(debug, "Res %s%d atom %d %s\n",
2002 *(pdba->resinfo[pdba->atom[i].resind].name),
2003 pdba->resinfo[pdba->atom[i].resind].nr, i+1, *pdba->atomname[i]);
2007 strcpy(posre_fn, ftp2fn(efITP, NFILE, fnm));
2009 /* make up molecule name(s) */
2011 k = (cc->nterpairs > 0 && cc->r_start[0] >= 0) ? cc->r_start[0] : 0;
2013 gmx_residuetype_get_type(rt, *pdba->resinfo[k].name, &p_restype);
2019 sprintf(molname, "Water");
2023 this_chainid = cc->chainid;
2025 /* Add the chain id if we have one */
2026 if (this_chainid != ' ')
2028 sprintf(buf, "_chain_%c", this_chainid);
2029 strcat(suffix, buf);
2032 /* Check if there have been previous chains with the same id */
2034 for (k = 0; k < chain; k++)
2036 if (cc->chainid == chains[k].chainid)
2041 /* Add the number for this chain identifier if there are multiple copies */
2045 sprintf(buf, "%d", nid_used+1);
2046 strcat(suffix, buf);
2049 if (strlen(suffix) > 0)
2051 sprintf(molname, "%s%s", p_restype, suffix);
2055 strcpy(molname, p_restype);
2059 if ((nch-nwaterchain > 1) && !cc->bAllWat)
2062 strcpy(itp_fn, top_fn);
2063 printf("Chain time...\n");
2064 c = strrchr(itp_fn, '.');
2065 sprintf(c, "_%s.itp", molname);
2066 c = strrchr(posre_fn, '.');
2067 sprintf(c, "_%s.itp", molname);
2068 if (strcmp(itp_fn, posre_fn) == 0)
2070 strcpy(buf_fn, posre_fn);
2071 c = strrchr(buf_fn, '.');
2073 sprintf(posre_fn, "%s_pr.itp", buf_fn);
2077 srenew(incls, nincl);
2078 incls[nincl-1] = strdup(itp_fn);
2079 itp_file = gmx_fio_fopen(itp_fn, "w");
2086 srenew(mols, nmol+1);
2089 mols[nmol].name = strdup("SOL");
2090 mols[nmol].nr = pdba->nres;
2094 mols[nmol].name = strdup(molname);
2101 print_top_comment(itp_file, itp_fn, ffdir, TRUE);
2111 top_file2 = itp_file;
2115 top_file2 = top_file;
2118 pdb2top(top_file2, posre_fn, molname, pdba, &x, atype, &symtab,
2120 restp_chain, hb_chain,
2122 bVsites, bVsiteAromatics, ffdir,
2123 mHmult, nssbonds, ssbonds,
2124 long_bond_dist, short_bond_dist, bDeuterate, bChargeGroups, bCmap,
2125 bRenumRes, bRTPresname);
2129 write_posres(posre_fn, pdba, posre_fc);
2134 gmx_fio_fclose(itp_file);
2137 /* pdba and natom have been reassigned somewhere so: */
2143 if (cc->chainid == ' ')
2145 sprintf(fn, "chain.pdb");
2149 sprintf(fn, "chain_%c.pdb", cc->chainid);
2151 cool_quote(quote, 255, NULL);
2152 write_sto_conf(fn, quote, pdba, x, NULL, ePBC, box);
2156 if (watermodel == NULL)
2158 for (chain = 0; chain < nch; chain++)
2160 if (chains[chain].bAllWat)
2162 gmx_fatal(FARGS, "You have chosen not to include a water model, but there is water in the input file. Select a water model or remove the water from your input file.");
2168 sprintf(buf_fn, "%s%c%s.itp", ffdir, DIR_SEPARATOR, watermodel);
2169 if (!fflib_fexist(buf_fn))
2171 gmx_fatal(FARGS, "The topology file '%s' for the selected water model '%s' can not be found in the force field directory. Select a different water model.",
2172 buf_fn, watermodel);
2176 print_top_mols(top_file, title, ffdir, watermodel, nincl, incls, nmol, mols);
2177 gmx_fio_fclose(top_file);
2179 gmx_residuetype_destroy(rt);
2181 /* now merge all chains back together */
2184 for (i = 0; (i < nch); i++)
2186 natom += chains[i].pdba->nr;
2187 nres += chains[i].pdba->nres;
2190 init_t_atoms(atoms, natom, FALSE);
2191 for (i = 0; i < atoms->nres; i++)
2193 sfree(atoms->resinfo[i].name);
2195 sfree(atoms->resinfo);
2197 snew(atoms->resinfo, nres);
2201 for (i = 0; (i < nch); i++)
2205 printf("Including chain %d in system: %d atoms %d residues\n",
2206 i+1, chains[i].pdba->nr, chains[i].pdba->nres);
2208 for (j = 0; (j < chains[i].pdba->nr); j++)
2210 atoms->atom[k] = chains[i].pdba->atom[j];
2211 atoms->atom[k].resind += l; /* l is processed nr of residues */
2212 atoms->atomname[k] = chains[i].pdba->atomname[j];
2213 atoms->resinfo[atoms->atom[k].resind].chainid = chains[i].chainid;
2214 copy_rvec(chains[i].x[j], x[k]);
2217 for (j = 0; (j < chains[i].pdba->nres); j++)
2219 atoms->resinfo[l] = chains[i].pdba->resinfo[j];
2222 atoms->resinfo[l].name = atoms->resinfo[l].rtp;
2230 fprintf(stderr, "Now there are %d atoms and %d residues\n", k, l);
2231 print_sums(atoms, TRUE);
2234 fprintf(stderr, "\nWriting coordinate file...\n");
2235 clear_rvec(box_space);
2238 make_new_box(atoms->nr, x, box, box_space, FALSE);
2240 write_sto_conf(ftp2fn(efSTO, NFILE, fnm), title, atoms, x, NULL, ePBC, box);
2242 printf("\t\t--------- PLEASE NOTE ------------\n");
2243 printf("You have successfully generated a topology from: %s.\n",
2244 opt2fn("-f", NFILE, fnm));
2245 if (watermodel != NULL)
2247 printf("The %s force field and the %s water model are used.\n",
2248 ffname, watermodel);
2252 printf("The %s force field is used.\n",
2255 printf("\t\t--------- ETON ESAELP ------------\n");