2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
45 #include "gromacs/fileio/gmxfio.h"
49 #include "gromacs/fileio/confio.h"
53 #include "gromacs/fileio/futil.h"
54 #include "gmx_fatal.h"
55 #include "gromacs/fileio/pdbio.h"
70 #include "fflibutil.h"
86 static const char *res2bb_notermini(const char *name,
87 int nrr, const rtprename_t *rr)
89 /* NOTE: This function returns the main building block name,
90 * it does not take terminal renaming into account.
95 while (i < nrr && gmx_strcasecmp(name, rr[i].gmx) != 0)
100 return (i < nrr ? rr[i].main : name);
103 static const char *select_res(int nr, int resnr,
104 const char *name[], const char *expl[],
106 int nrr, const rtprename_t *rr)
110 printf("Which %s type do you want for residue %d\n", title, resnr+1);
111 for (sel = 0; (sel < nr); sel++)
113 printf("%d. %s (%s)\n",
114 sel, expl[sel], res2bb_notermini(name[sel], nrr, rr));
116 printf("\nType a number:"); fflush(stdout);
118 if (scanf("%d", &sel) != 1)
120 gmx_fatal(FARGS, "Answer me for res %s %d!", title, resnr+1);
126 static const char *get_asptp(int resnr, int nrr, const rtprename_t *rr)
131 const char *lh[easpNR] = { "ASP", "ASPH" };
132 const char *expl[easpNR] = {
133 "Not protonated (charge -1)",
134 "Protonated (charge 0)"
137 return select_res(easpNR, resnr, lh, expl, "ASPARTIC ACID", nrr, rr);
140 static const char *get_glutp(int resnr, int nrr, const rtprename_t *rr)
145 const char *lh[egluNR] = { "GLU", "GLUH" };
146 const char *expl[egluNR] = {
147 "Not protonated (charge -1)",
148 "Protonated (charge 0)"
151 return select_res(egluNR, resnr, lh, expl, "GLUTAMIC ACID", nrr, rr);
154 static const char *get_glntp(int resnr, int nrr, const rtprename_t *rr)
159 const char *lh[eglnNR] = { "GLN", "QLN" };
160 const char *expl[eglnNR] = {
161 "Not protonated (charge 0)",
162 "Protonated (charge +1)"
165 return select_res(eglnNR, resnr, lh, expl, "GLUTAMINE", nrr, rr);
168 static const char *get_lystp(int resnr, int nrr, const rtprename_t *rr)
173 const char *lh[elysNR] = { "LYSN", "LYS" };
174 const char *expl[elysNR] = {
175 "Not protonated (charge 0)",
176 "Protonated (charge +1)"
179 return select_res(elysNR, resnr, lh, expl, "LYSINE", nrr, rr);
182 static const char *get_argtp(int resnr, int nrr, const rtprename_t *rr)
187 const char *lh[eargNR] = { "ARGN", "ARG" };
188 const char *expl[eargNR] = {
189 "Not protonated (charge 0)",
190 "Protonated (charge +1)"
193 return select_res(eargNR, resnr, lh, expl, "ARGININE", nrr, rr);
196 static const char *get_histp(int resnr, int nrr, const rtprename_t *rr)
198 const char *expl[ehisNR] = {
205 return select_res(ehisNR, resnr, hh, expl, "HISTIDINE", nrr, rr);
208 static void read_rtprename(const char *fname, FILE *fp,
209 int *nrtprename, rtprename_t **rtprename)
211 char line[STRLEN], buf[STRLEN];
220 while (get_a_line(fp, line, STRLEN))
223 nc = sscanf(line, "%s %s %s %s %s %s",
224 rr[n].gmx, rr[n].main, rr[n].nter, rr[n].cter, rr[n].bter, buf);
227 if (nc != 2 && nc != 5)
229 gmx_fatal(FARGS, "Residue renaming database '%s' has %d columns instead of %d, %d or %d", fname, ncol, 2, 5);
235 gmx_fatal(FARGS, "A line in residue renaming database '%s' has %d columns, while previous lines have %d columns", fname, nc, ncol);
240 /* This file does not have special termini names, copy them from main */
241 strcpy(rr[n].nter, rr[n].main);
242 strcpy(rr[n].cter, rr[n].main);
243 strcpy(rr[n].bter, rr[n].main);
253 static char *search_resrename(int nrr, rtprename_t *rr,
255 gmx_bool bStart, gmx_bool bEnd,
256 gmx_bool bCompareFFRTPname)
264 while (i < nrr && ((!bCompareFFRTPname && strcmp(name, rr[i].gmx) != 0) ||
265 ( bCompareFFRTPname && strcmp(name, rr[i].main) != 0)))
270 /* If found in the database, rename this residue's rtp building block,
271 * otherwise keep the old name.
294 gmx_fatal(FARGS, "In the chosen force field there is no residue type for '%s'%s", name, bStart ? ( bEnd ? " as a standalone (starting & ending) residue" : " as a starting terminus") : (bEnd ? " as an ending terminus" : ""));
302 static void rename_resrtp(t_atoms *pdba, int nterpairs, int *r_start, int *r_end,
303 int nrr, rtprename_t *rr, t_symtab *symtab,
307 gmx_bool bStart, bEnd;
309 gmx_bool bFFRTPTERRNM;
311 bFFRTPTERRNM = (getenv("GMX_NO_FFRTP_TER_RENAME") == NULL);
313 for (r = 0; r < pdba->nres; r++)
317 for (j = 0; j < nterpairs; j++)
324 for (j = 0; j < nterpairs; j++)
332 nn = search_resrename(nrr, rr, *pdba->resinfo[r].rtp, bStart, bEnd, FALSE);
334 if (bFFRTPTERRNM && nn == NULL && (bStart || bEnd))
336 /* This is a terminal residue, but the residue name,
337 * currently stored in .rtp, is not a standard residue name,
338 * but probably a force field specific rtp name.
339 * Check if we need to rename it because it is terminal.
341 nn = search_resrename(nrr, rr,
342 *pdba->resinfo[r].rtp, bStart, bEnd, TRUE);
345 if (nn != NULL && strcmp(*pdba->resinfo[r].rtp, nn) != 0)
349 printf("Changing rtp entry of residue %d %s to '%s'\n",
350 pdba->resinfo[r].nr, *pdba->resinfo[r].name, nn);
352 pdba->resinfo[r].rtp = put_symtab(symtab, nn);
357 static void pdbres_to_gmxrtp(t_atoms *pdba)
361 for (i = 0; (i < pdba->nres); i++)
363 if (pdba->resinfo[i].rtp == NULL)
365 pdba->resinfo[i].rtp = pdba->resinfo[i].name;
370 static void rename_pdbres(t_atoms *pdba, const char *oldnm, const char *newnm,
371 gmx_bool bFullCompare, t_symtab *symtab)
376 for (i = 0; (i < pdba->nres); i++)
378 resnm = *pdba->resinfo[i].name;
379 if ((bFullCompare && (gmx_strcasecmp(resnm, oldnm) == 0)) ||
380 (!bFullCompare && strstr(resnm, oldnm) != NULL))
382 /* Rename the residue name (not the rtp name) */
383 pdba->resinfo[i].name = put_symtab(symtab, newnm);
388 static void rename_bb(t_atoms *pdba, const char *oldnm, const char *newnm,
389 gmx_bool bFullCompare, t_symtab *symtab)
394 for (i = 0; (i < pdba->nres); i++)
396 /* We have not set the rtp name yes, use the residue name */
397 bbnm = *pdba->resinfo[i].name;
398 if ((bFullCompare && (gmx_strcasecmp(bbnm, oldnm) == 0)) ||
399 (!bFullCompare && strstr(bbnm, oldnm) != NULL))
401 /* Change the rtp builing block name */
402 pdba->resinfo[i].rtp = put_symtab(symtab, newnm);
407 static void rename_bbint(t_atoms *pdba, const char *oldnm,
408 const char *gettp(int, int, const rtprename_t *),
409 gmx_bool bFullCompare,
411 int nrr, const rtprename_t *rr)
417 for (i = 0; i < pdba->nres; i++)
419 /* We have not set the rtp name yes, use the residue name */
420 bbnm = *pdba->resinfo[i].name;
421 if ((bFullCompare && (strcmp(bbnm, oldnm) == 0)) ||
422 (!bFullCompare && strstr(bbnm, oldnm) != NULL))
424 ptr = gettp(i, nrr, rr);
425 pdba->resinfo[i].rtp = put_symtab(symtab, ptr);
430 static void check_occupancy(t_atoms *atoms, const char *filename, gmx_bool bVerbose)
436 ftp = fn2ftp(filename);
437 if (!atoms->pdbinfo || ((ftp != efPDB) && (ftp != efBRK) && (ftp != efENT)))
439 fprintf(stderr, "No occupancies in %s\n", filename);
443 for (i = 0; (i < atoms->nr); i++)
445 if (atoms->pdbinfo[i].occup != 1)
449 fprintf(stderr, "Occupancy for atom %s%d-%s is %f rather than 1\n",
450 *atoms->resinfo[atoms->atom[i].resind].name,
451 atoms->resinfo[ atoms->atom[i].resind].nr,
453 atoms->pdbinfo[i].occup);
455 if (atoms->pdbinfo[i].occup == 0)
465 if (nzero == atoms->nr)
467 fprintf(stderr, "All occupancy fields zero. This is probably not an X-Ray structure\n");
469 else if ((nzero > 0) || (nnotone > 0))
473 "WARNING: there were %d atoms with zero occupancy and %d atoms with\n"
474 " occupancy unequal to one (out of %d atoms). Check your pdb file.\n"
476 nzero, nnotone, atoms->nr);
480 fprintf(stderr, "All occupancies are one\n");
485 void write_posres(char *fn, t_atoms *pdba, real fc)
490 fp = gmx_fio_fopen(fn, "w");
492 "; In this topology include file, you will find position restraint\n"
493 "; entries for all the heavy atoms in your original pdb file.\n"
494 "; This means that all the protons which were added by pdb2gmx are\n"
495 "; not restrained.\n"
497 "[ position_restraints ]\n"
498 "; %4s%6s%8s%8s%8s\n", "atom", "type", "fx", "fy", "fz"
500 for (i = 0; (i < pdba->nr); i++)
502 if (!is_hydrogen(*pdba->atomname[i]) && !is_dummymass(*pdba->atomname[i]))
504 fprintf(fp, "%6d%6d %g %g %g\n", i+1, 1, fc, fc, fc);
510 static int read_pdball(const char *inf, const char *outf, char *title,
511 t_atoms *atoms, rvec **x,
512 int *ePBC, matrix box, gmx_bool bRemoveH,
513 t_symtab *symtab, gmx_residuetype_t rt, const char *watres,
514 gmx_atomprop_t aps, gmx_bool bVerbose)
515 /* Read a pdb file. (containing proteins) */
517 int natom, new_natom, i;
520 printf("Reading %s...\n", inf);
521 get_stx_coordnum(inf, &natom);
522 init_t_atoms(atoms, natom, TRUE);
524 read_stx_conf(inf, title, atoms, *x, NULL, ePBC, box);
525 if (fn2ftp(inf) == efPDB)
527 get_pdb_atomnumber(atoms, aps);
532 for (i = 0; i < atoms->nr; i++)
534 if (!is_hydrogen(*atoms->atomname[i]))
536 atoms->atom[new_natom] = atoms->atom[i];
537 atoms->atomname[new_natom] = atoms->atomname[i];
538 atoms->pdbinfo[new_natom] = atoms->pdbinfo[i];
539 copy_rvec((*x)[i], (*x)[new_natom]);
543 atoms->nr = new_natom;
548 if (title && title[0])
550 printf(" '%s',", title);
552 printf(" %d atoms\n", natom);
554 /* Rename residues */
555 rename_pdbres(atoms, "HOH", watres, FALSE, symtab);
556 rename_pdbres(atoms, "SOL", watres, FALSE, symtab);
557 rename_pdbres(atoms, "WAT", watres, FALSE, symtab);
559 rename_atoms("xlateat.dat", NULL,
560 atoms, symtab, NULL, TRUE, rt, TRUE, bVerbose);
569 write_sto_conf(outf, title, atoms, *x, NULL, *ePBC, box);
575 void process_chain(t_atoms *pdba, rvec *x,
576 gmx_bool bTrpU, gmx_bool bPheU, gmx_bool bTyrU,
577 gmx_bool bLysMan, gmx_bool bAspMan, gmx_bool bGluMan,
578 gmx_bool bHisMan, gmx_bool bArgMan, gmx_bool bGlnMan,
579 real angle, real distance, t_symtab *symtab,
580 int nrr, const rtprename_t *rr)
582 /* Rename aromatics, lys, asp and histidine */
585 rename_bb(pdba, "TYR", "TYRU", FALSE, symtab);
589 rename_bb(pdba, "TRP", "TRPU", FALSE, symtab);
593 rename_bb(pdba, "PHE", "PHEU", FALSE, symtab);
597 rename_bbint(pdba, "LYS", get_lystp, FALSE, symtab, nrr, rr);
601 rename_bbint(pdba, "ARG", get_argtp, FALSE, symtab, nrr, rr);
605 rename_bbint(pdba, "GLN", get_glntp, FALSE, symtab, nrr, rr);
609 rename_bbint(pdba, "ASP", get_asptp, FALSE, symtab, nrr, rr);
613 rename_bb(pdba, "ASPH", "ASP", FALSE, symtab);
617 rename_bbint(pdba, "GLU", get_glutp, FALSE, symtab, nrr, rr);
621 rename_bb(pdba, "GLUH", "GLU", FALSE, symtab);
626 set_histp(pdba, x, angle, distance);
630 rename_bbint(pdba, "HIS", get_histp, TRUE, symtab, nrr, rr);
633 /* Initialize the rtp builing block names with the residue names
634 * for the residues that have not been processed above.
636 pdbres_to_gmxrtp(pdba);
638 /* Now we have all rtp names set.
639 * The rtp names will conform to Gromacs naming,
640 * unless the input pdb file contained one or more force field specific
641 * rtp names as residue names.
645 /* struct for sorting the atoms from the pdb file */
647 int resnr; /* residue number */
648 int j; /* database order index */
649 int index; /* original atom number */
650 char anm1; /* second letter of atom name */
651 char altloc; /* alternate location indicator */
654 int pdbicomp(const void *a, const void *b)
659 pa = (t_pdbindex *)a;
660 pb = (t_pdbindex *)b;
662 d = (pa->resnr - pb->resnr);
668 d = (pa->anm1 - pb->anm1);
671 d = (pa->altloc - pb->altloc);
679 static void sort_pdbatoms(t_restp restp[],
680 int natoms, t_atoms **pdbaptr, rvec **x,
681 t_blocka *block, char ***gnames)
683 t_atoms *pdba, *pdbnew;
698 for (i = 0; i < natoms; i++)
700 atomnm = *pdba->atomname[i];
701 rptr = &restp[pdba->atom[i].resind];
702 for (j = 0; (j < rptr->natom); j++)
704 if (gmx_strcasecmp(atomnm, *(rptr->atomname[j])) == 0)
709 if (j == rptr->natom)
714 "Atom %s in residue %s %d was not found in rtp entry %s with %d atoms\n"
715 "while sorting atoms.\n%s", atomnm,
716 *pdba->resinfo[pdba->atom[i].resind].name,
717 pdba->resinfo[pdba->atom[i].resind].nr,
720 is_hydrogen(atomnm) ?
721 "\nFor a hydrogen, this can be a different protonation state, or it\n"
722 "might have had a different number in the PDB file and was rebuilt\n"
723 "(it might for instance have been H3, and we only expected H1 & H2).\n"
724 "Note that hydrogens might have been added to the entry for the N-terminus.\n"
725 "Remove this hydrogen or choose a different protonation state to solve it.\n"
726 "Option -ignh will ignore all hydrogens in the input." : ".");
727 gmx_fatal(FARGS, buf);
729 /* make shadow array to be sorted into indexgroup */
730 pdbi[i].resnr = pdba->atom[i].resind;
733 pdbi[i].anm1 = atomnm[1];
734 pdbi[i].altloc = pdba->pdbinfo[i].altloc;
736 qsort(pdbi, natoms, (size_t)sizeof(pdbi[0]), pdbicomp);
738 /* pdba is sorted in pdbnew using the pdbi index */
741 init_t_atoms(pdbnew, natoms, TRUE);
743 pdbnew->nr = pdba->nr;
744 pdbnew->nres = pdba->nres;
745 sfree(pdbnew->resinfo);
746 pdbnew->resinfo = pdba->resinfo;
747 for (i = 0; i < natoms; i++)
749 pdbnew->atom[i] = pdba->atom[pdbi[i].index];
750 pdbnew->atomname[i] = pdba->atomname[pdbi[i].index];
751 pdbnew->pdbinfo[i] = pdba->pdbinfo[pdbi[i].index];
752 copy_rvec((*x)[pdbi[i].index], (*xnew)[i]);
753 /* make indexgroup in block */
754 a[i] = pdbi[i].index;
757 sfree(pdba->atomname);
759 sfree(pdba->pdbinfo);
762 /* copy the sorted pdbnew back to pdba */
765 add_grp(block, gnames, natoms, a, "prot_sort");
771 static int remove_duplicate_atoms(t_atoms *pdba, rvec x[], gmx_bool bVerbose)
773 int i, j, oldnatoms, ndel;
776 printf("Checking for duplicate atoms....\n");
777 oldnatoms = pdba->nr;
779 /* NOTE: pdba->nr is modified inside the loop */
780 for (i = 1; (i < pdba->nr); i++)
782 /* compare 'i' and 'i-1', throw away 'i' if they are identical
783 this is a 'while' because multiple alternate locations can be present */
784 while ( (i < pdba->nr) &&
785 (pdba->atom[i-1].resind == pdba->atom[i].resind) &&
786 (strcmp(*pdba->atomname[i-1], *pdba->atomname[i]) == 0) )
791 ri = &pdba->resinfo[pdba->atom[i].resind];
792 printf("deleting duplicate atom %4s %s%4d%c",
793 *pdba->atomname[i], *ri->name, ri->nr, ri->ic);
794 if (ri->chainid && (ri->chainid != ' '))
796 printf(" ch %c", ri->chainid);
800 if (pdba->pdbinfo[i].atomnr)
802 printf(" pdb nr %4d", pdba->pdbinfo[i].atomnr);
804 if (pdba->pdbinfo[i].altloc && (pdba->pdbinfo[i].altloc != ' '))
806 printf(" altloc %c", pdba->pdbinfo[i].altloc);
812 /* We can not free, since it might be in the symtab */
813 /* sfree(pdba->atomname[i]); */
814 for (j = i; j < pdba->nr; j++)
816 pdba->atom[j] = pdba->atom[j+1];
817 pdba->atomname[j] = pdba->atomname[j+1];
818 pdba->pdbinfo[j] = pdba->pdbinfo[j+1];
819 copy_rvec(x[j+1], x[j]);
821 srenew(pdba->atom, pdba->nr);
822 /* srenew(pdba->atomname, pdba->nr); */
823 srenew(pdba->pdbinfo, pdba->nr);
826 if (pdba->nr != oldnatoms)
828 printf("Now there are %d atoms. Deleted %d duplicates.\n", pdba->nr, ndel);
834 void find_nc_ter(t_atoms *pdba, int r0, int r1, int *r_start, int *r_end, gmx_residuetype_t rt)
837 const char *p_startrestype;
838 const char *p_restype;
839 int nstartwarn, nendwarn;
847 /* Find the starting terminus (typially N or 5') */
848 for (i = r0; i < r1 && *r_start == -1; i++)
850 gmx_residuetype_get_type(rt, *pdba->resinfo[i].name, &p_startrestype);
851 if (!gmx_strcasecmp(p_startrestype, "Protein") || !gmx_strcasecmp(p_startrestype, "DNA") || !gmx_strcasecmp(p_startrestype, "RNA") )
853 printf("Identified residue %s%d as a starting terminus.\n", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
860 printf("Warning: Starting residue %s%d in chain not identified as Protein/RNA/DNA.\n", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
864 printf("More than 5 unidentified residues at start of chain - disabling further warnings.\n");
872 /* Go through the rest of the residues, check that they are the same class, and identify the ending terminus. */
873 for (i = *r_start; i < r1; i++)
875 gmx_residuetype_get_type(rt, *pdba->resinfo[i].name, &p_restype);
876 if (!gmx_strcasecmp(p_restype, p_startrestype) && nendwarn == 0)
884 printf("Warning: Residue %s%d in chain has different type (%s) from starting residue %s%d (%s).\n",
885 *pdba->resinfo[i].name, pdba->resinfo[i].nr, p_restype,
886 *pdba->resinfo[*r_start].name, pdba->resinfo[*r_start].nr, p_startrestype);
890 printf("More than 5 unidentified residues at end of chain - disabling further warnings.\n");
899 printf("Identified residue %s%d as a ending terminus.\n", *pdba->resinfo[*r_end].name, pdba->resinfo[*r_end].nr);
905 modify_chain_numbers(t_atoms * pdba,
906 const char * chainsep)
909 char old_prev_chainid;
910 char old_this_chainid;
911 int old_prev_chainnum;
912 int old_this_chainnum;
918 const char * prev_atomname;
919 const char * this_atomname;
920 const char * prev_resname;
921 const char * this_resname;
926 int prev_chainnumber;
927 int this_chainnumber;
939 splitting = SPLIT_TER_ONLY; /* keep compiler happy */
941 /* Be a bit flexible to catch typos */
942 if (!strncmp(chainsep, "id_o", 4))
944 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
945 splitting = SPLIT_ID_OR_TER;
946 printf("Splitting chemical chains based on TER records or chain id changing.\n");
948 else if (!strncmp(chainsep, "int", 3))
950 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
951 splitting = SPLIT_INTERACTIVE;
952 printf("Splitting chemical chains interactively.\n");
954 else if (!strncmp(chainsep, "id_a", 4))
956 splitting = SPLIT_ID_AND_TER;
957 printf("Splitting chemical chains based on TER records and chain id changing.\n");
959 else if (strlen(chainsep) == 2 && !strncmp(chainsep, "id", 4))
961 splitting = SPLIT_ID_ONLY;
962 printf("Splitting chemical chains based on changing chain id only (ignoring TER records).\n");
964 else if (chainsep[0] == 't')
966 splitting = SPLIT_TER_ONLY;
967 printf("Splitting chemical chains based on TER records only (ignoring chain id).\n");
971 gmx_fatal(FARGS, "Unidentified setting for chain separation: %s\n", chainsep);
974 /* The default chain enumeration is based on TER records only, which is reflected in chainnum below */
976 old_prev_chainid = '?';
977 old_prev_chainnum = -1;
980 this_atomname = NULL;
985 this_chainnumber = -1;
987 for (i = 0; i < pdba->nres; i++)
989 ri = &pdba->resinfo[i];
990 old_this_chainid = ri->chainid;
991 old_this_chainnum = ri->chainnum;
993 prev_atomname = this_atomname;
994 prev_atomnum = this_atomnum;
995 prev_resname = this_resname;
996 prev_resnum = this_resnum;
997 prev_chainid = this_chainid;
998 prev_chainnumber = this_chainnumber;
1000 this_atomname = *(pdba->atomname[i]);
1001 this_atomnum = (pdba->pdbinfo != NULL) ? pdba->pdbinfo[i].atomnr : i+1;
1002 this_resname = *ri->name;
1003 this_resnum = ri->nr;
1004 this_chainid = ri->chainid;
1005 this_chainnumber = ri->chainnum;
1009 case SPLIT_ID_OR_TER:
1010 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1016 case SPLIT_ID_AND_TER:
1017 if (old_this_chainid != old_prev_chainid && old_this_chainnum != old_prev_chainnum)
1024 if (old_this_chainid != old_prev_chainid)
1030 case SPLIT_TER_ONLY:
1031 if (old_this_chainnum != old_prev_chainnum)
1036 case SPLIT_INTERACTIVE:
1037 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1041 printf("Split the chain (and introduce termini) between residue %s%d (chain id '%c', atom %d %s)\n"
1042 "and residue %s%d (chain id '%c', atom %d %s) ? [n/y]\n",
1043 prev_resname, prev_resnum, prev_chainid, prev_atomnum, prev_atomname,
1044 this_resname, this_resnum, this_chainid, this_atomnum, this_atomname);
1046 if (NULL == fgets(select, STRLEN-1, stdin))
1048 gmx_fatal(FARGS, "Error reading from stdin");
1051 if (i == 0 || select[0] == 'y')
1058 gmx_fatal(FARGS, "Internal inconsistency - this shouldn't happen...");
1061 old_prev_chainid = old_this_chainid;
1062 old_prev_chainnum = old_this_chainnum;
1064 ri->chainnum = new_chainnum;
1093 int gmx_pdb2gmx(int argc, char *argv[])
1095 const char *desc[] = {
1096 "[THISMODULE] reads a [TT].pdb[tt] (or [TT].gro[tt]) file, reads",
1097 "some database files, adds hydrogens to the molecules and generates",
1098 "coordinates in GROMACS (GROMOS), or optionally [TT].pdb[tt], format",
1099 "and a topology in GROMACS format.",
1100 "These files can subsequently be processed to generate a run input file.",
1102 "[THISMODULE] will search for force fields by looking for",
1103 "a [TT]forcefield.itp[tt] file in subdirectories [TT]<forcefield>.ff[tt]",
1104 "of the current working directory and of the GROMACS library directory",
1105 "as inferred from the path of the binary or the [TT]GMXLIB[tt] environment",
1107 "By default the forcefield selection is interactive,",
1108 "but you can use the [TT]-ff[tt] option to specify one of the short names",
1109 "in the list on the command line instead. In that case [THISMODULE] just looks",
1110 "for the corresponding [TT]<forcefield>.ff[tt] directory.",
1112 "After choosing a force field, all files will be read only from",
1113 "the corresponding force field directory.",
1114 "If you want to modify or add a residue types, you can copy the force",
1115 "field directory from the GROMACS library directory to your current",
1116 "working directory. If you want to add new protein residue types,",
1117 "you will need to modify [TT]residuetypes.dat[tt] in the library directory",
1118 "or copy the whole library directory to a local directory and set",
1119 "the environment variable [TT]GMXLIB[tt] to the name of that directory.",
1120 "Check Chapter 5 of the manual for more information about file formats.",
1123 "Note that a [TT].pdb[tt] file is nothing more than a file format, and it",
1124 "need not necessarily contain a protein structure. Every kind of",
1125 "molecule for which there is support in the database can be converted.",
1126 "If there is no support in the database, you can add it yourself.[PAR]",
1128 "The program has limited intelligence, it reads a number of database",
1129 "files, that allow it to make special bonds (Cys-Cys, Heme-His, etc.),",
1130 "if necessary this can be done manually. The program can prompt the",
1131 "user to select which kind of LYS, ASP, GLU, CYS or HIS residue is",
1132 "desired. For Lys the choice is between neutral (two protons on NZ) or",
1133 "protonated (three protons, default), for Asp and Glu unprotonated",
1134 "(default) or protonated, for His the proton can be either on ND1,",
1135 "on NE2 or on both. By default these selections are done automatically.",
1136 "For His, this is based on an optimal hydrogen bonding",
1137 "conformation. Hydrogen bonds are defined based on a simple geometric",
1138 "criterion, specified by the maximum hydrogen-donor-acceptor angle",
1139 "and donor-acceptor distance, which are set by [TT]-angle[tt] and",
1140 "[TT]-dist[tt] respectively.[PAR]",
1142 "The protonation state of N- and C-termini can be chosen interactively",
1143 "with the [TT]-ter[tt] flag. Default termini are ionized (NH3+ and COO-),",
1144 "respectively. Some force fields support zwitterionic forms for chains of",
1145 "one residue, but for polypeptides these options should NOT be selected.",
1146 "The AMBER force fields have unique forms for the terminal residues,",
1147 "and these are incompatible with the [TT]-ter[tt] mechanism. You need",
1148 "to prefix your N- or C-terminal residue names with \"N\" or \"C\"",
1149 "respectively to use these forms, making sure you preserve the format",
1150 "of the coordinate file. Alternatively, use named terminating residues",
1151 "(e.g. ACE, NME).[PAR]",
1153 "The separation of chains is not entirely trivial since the markup",
1154 "in user-generated PDB files frequently varies and sometimes it",
1155 "is desirable to merge entries across a TER record, for instance",
1156 "if you want a disulfide bridge or distance restraints between",
1157 "two protein chains or if you have a HEME group bound to a protein.",
1158 "In such cases multiple chains should be contained in a single",
1159 "[TT]moleculetype[tt] definition.",
1160 "To handle this, [THISMODULE] uses two separate options.",
1161 "First, [TT]-chainsep[tt] allows you to choose when a new chemical chain should",
1162 "start, and termini added when applicable. This can be done based on the",
1163 "existence of TER records, when the chain id changes, or combinations of either",
1164 "or both of these. You can also do the selection fully interactively.",
1165 "In addition, there is a [TT]-merge[tt] option that controls how multiple chains",
1166 "are merged into one moleculetype, after adding all the chemical termini (or not).",
1167 "This can be turned off (no merging), all non-water chains can be merged into a",
1168 "single molecule, or the selection can be done interactively.[PAR]",
1170 "[THISMODULE] will also check the occupancy field of the [TT].pdb[tt] file.",
1171 "If any of the occupancies are not one, indicating that the atom is",
1172 "not resolved well in the structure, a warning message is issued.",
1173 "When a [TT].pdb[tt] file does not originate from an X-ray structure determination",
1174 "all occupancy fields may be zero. Either way, it is up to the user",
1175 "to verify the correctness of the input data (read the article!).[PAR]",
1177 "During processing the atoms will be reordered according to GROMACS",
1178 "conventions. With [TT]-n[tt] an index file can be generated that",
1179 "contains one group reordered in the same way. This allows you to",
1180 "convert a GROMOS trajectory and coordinate file to GROMOS. There is",
1181 "one limitation: reordering is done after the hydrogens are stripped",
1182 "from the input and before new hydrogens are added. This means that",
1183 "you should not use [TT]-ignh[tt].[PAR]",
1185 "The [TT].gro[tt] and [TT].g96[tt] file formats do not support chain",
1186 "identifiers. Therefore it is useful to enter a [TT].pdb[tt] file name at",
1187 "the [TT]-o[tt] option when you want to convert a multi-chain [TT].pdb[tt] file.",
1190 "The option [TT]-vsite[tt] removes hydrogen and fast improper dihedral",
1191 "motions. Angular and out-of-plane motions can be removed by changing",
1192 "hydrogens into virtual sites and fixing angles, which fixes their",
1193 "position relative to neighboring atoms. Additionally, all atoms in the",
1194 "aromatic rings of the standard amino acids (i.e. PHE, TRP, TYR and HIS)",
1195 "can be converted into virtual sites, eliminating the fast improper dihedral",
1196 "fluctuations in these rings. [BB]Note[bb] that in this case all other hydrogen",
1197 "atoms are also converted to virtual sites. The mass of all atoms that are",
1198 "converted into virtual sites, is added to the heavy atoms.[PAR]",
1199 "Also slowing down of dihedral motion can be done with [TT]-heavyh[tt]",
1200 "done by increasing the hydrogen-mass by a factor of 4. This is also",
1201 "done for water hydrogens to slow down the rotational motion of water.",
1202 "The increase in mass of the hydrogens is subtracted from the bonded",
1203 "(heavy) atom so that the total mass of the system remains the same."
1207 FILE *fp, *top_file, *top_file2, *itp_file = NULL;
1209 t_atoms pdba_all, *pdba;
1213 int chain, nch, maxch, nwaterchain;
1215 t_chain *chains, *cc;
1216 char select[STRLEN];
1224 int i, j, k, l, nrtp;
1225 int *swap_index, si;
1229 gpp_atomtype_t atype;
1230 gmx_residuetype_t rt;
1232 char fn[256], itp_fn[STRLEN], posre_fn[STRLEN], buf_fn[STRLEN];
1233 char molname[STRLEN], title[STRLEN], quote[STRLEN];
1234 char *c, forcefield[STRLEN], ffdir[STRLEN];
1235 char ffname[STRLEN], suffix[STRLEN], buf[STRLEN];
1243 int nrtprename, naa;
1244 rtprename_t *rtprename = NULL;
1245 int nah, nNtdb, nCtdb, ntdblist;
1246 t_hackblock *ntdb, *ctdb, **tdblist;
1250 gmx_bool bVsites = FALSE, bWat, bPrevWat = FALSE, bITP, bVsiteAromatics = FALSE, bCheckMerge;
1252 t_hackblock *hb_chain;
1253 t_restp *restp_chain;
1255 const char *p_restype;
1259 const char * prev_atomname;
1260 const char * this_atomname;
1261 const char * prev_resname;
1262 const char * this_resname;
1267 int prev_chainnumber;
1268 int this_chainnumber;
1270 int this_chainstart;
1271 int prev_chainstart;
1278 { efSTX, "-f", "eiwit.pdb", ffREAD },
1279 { efSTO, "-o", "conf", ffWRITE },
1280 { efTOP, NULL, NULL, ffWRITE },
1281 { efITP, "-i", "posre", ffWRITE },
1282 { efNDX, "-n", "clean", ffOPTWR },
1283 { efSTO, "-q", "clean.pdb", ffOPTWR }
1285 #define NFILE asize(fnm)
1288 /* Command line arguments must be static */
1289 static gmx_bool bNewRTP = FALSE;
1290 static gmx_bool bInter = FALSE, bCysMan = FALSE;
1291 static gmx_bool bLysMan = FALSE, bAspMan = FALSE, bGluMan = FALSE, bHisMan = FALSE;
1292 static gmx_bool bGlnMan = FALSE, bArgMan = FALSE;
1293 static gmx_bool bTerMan = FALSE, bUnA = FALSE, bHeavyH;
1294 static gmx_bool bSort = TRUE, bAllowMissing = FALSE, bRemoveH = FALSE;
1295 static gmx_bool bDeuterate = FALSE, bVerbose = FALSE, bChargeGroups = TRUE, bCmap = TRUE;
1296 static gmx_bool bRenumRes = FALSE, bRTPresname = FALSE;
1297 static real angle = 135.0, distance = 0.3, posre_fc = 1000;
1298 static real long_bond_dist = 0.25, short_bond_dist = 0.05;
1299 static const char *vsitestr[] = { NULL, "none", "hydrogens", "aromatics", NULL };
1300 static const char *watstr[] = { NULL, "select", "none", "spc", "spce", "tip3p", "tip4p", "tip5p", NULL };
1301 static const char *chainsep[] = { NULL, "id_or_ter", "id_and_ter", "ter", "id", "interactive", NULL };
1302 static const char *merge[] = {NULL, "no", "all", "interactive", NULL };
1303 static const char *ff = "select";
1306 { "-newrtp", FALSE, etBOOL, {&bNewRTP},
1307 "HIDDENWrite the residue database in new format to [TT]new.rtp[tt]"},
1308 { "-lb", FALSE, etREAL, {&long_bond_dist},
1309 "HIDDENLong bond warning distance" },
1310 { "-sb", FALSE, etREAL, {&short_bond_dist},
1311 "HIDDENShort bond warning distance" },
1312 { "-chainsep", FALSE, etENUM, {chainsep},
1313 "Condition in PDB files when a new chain should be started (adding termini)" },
1314 { "-merge", FALSE, etENUM, {&merge},
1315 "Merge multiple chains into a single [moleculetype]" },
1316 { "-ff", FALSE, etSTR, {&ff},
1317 "Force field, interactive by default. Use [TT]-h[tt] for information." },
1318 { "-water", FALSE, etENUM, {watstr},
1319 "Water model to use" },
1320 { "-inter", FALSE, etBOOL, {&bInter},
1321 "Set the next 8 options to interactive"},
1322 { "-ss", FALSE, etBOOL, {&bCysMan},
1323 "Interactive SS bridge selection" },
1324 { "-ter", FALSE, etBOOL, {&bTerMan},
1325 "Interactive termini selection, instead of charged (default)" },
1326 { "-lys", FALSE, etBOOL, {&bLysMan},
1327 "Interactive lysine selection, instead of charged" },
1328 { "-arg", FALSE, etBOOL, {&bArgMan},
1329 "Interactive arginine selection, instead of charged" },
1330 { "-asp", FALSE, etBOOL, {&bAspMan},
1331 "Interactive aspartic acid selection, instead of charged" },
1332 { "-glu", FALSE, etBOOL, {&bGluMan},
1333 "Interactive glutamic acid selection, instead of charged" },
1334 { "-gln", FALSE, etBOOL, {&bGlnMan},
1335 "Interactive glutamine selection, instead of neutral" },
1336 { "-his", FALSE, etBOOL, {&bHisMan},
1337 "Interactive histidine selection, instead of checking H-bonds" },
1338 { "-angle", FALSE, etREAL, {&angle},
1339 "Minimum hydrogen-donor-acceptor angle for a H-bond (degrees)" },
1340 { "-dist", FALSE, etREAL, {&distance},
1341 "Maximum donor-acceptor distance for a H-bond (nm)" },
1342 { "-una", FALSE, etBOOL, {&bUnA},
1343 "Select aromatic rings with united CH atoms on phenylalanine, "
1344 "tryptophane and tyrosine" },
1345 { "-sort", FALSE, etBOOL, {&bSort},
1346 "HIDDENSort the residues according to database, turning this off is dangerous as charge groups might be broken in parts" },
1347 { "-ignh", FALSE, etBOOL, {&bRemoveH},
1348 "Ignore hydrogen atoms that are in the coordinate file" },
1349 { "-missing", FALSE, etBOOL, {&bAllowMissing},
1350 "Continue when atoms are missing, dangerous" },
1351 { "-v", FALSE, etBOOL, {&bVerbose},
1352 "Be slightly more verbose in messages" },
1353 { "-posrefc", FALSE, etREAL, {&posre_fc},
1354 "Force constant for position restraints" },
1355 { "-vsite", FALSE, etENUM, {vsitestr},
1356 "Convert atoms to virtual sites" },
1357 { "-heavyh", FALSE, etBOOL, {&bHeavyH},
1358 "Make hydrogen atoms heavy" },
1359 { "-deuterate", FALSE, etBOOL, {&bDeuterate},
1360 "Change the mass of hydrogens to 2 amu" },
1361 { "-chargegrp", TRUE, etBOOL, {&bChargeGroups},
1362 "Use charge groups in the [TT].rtp[tt] file" },
1363 { "-cmap", TRUE, etBOOL, {&bCmap},
1364 "Use cmap torsions (if enabled in the [TT].rtp[tt] file)" },
1365 { "-renum", TRUE, etBOOL, {&bRenumRes},
1366 "Renumber the residues consecutively in the output" },
1367 { "-rtpres", TRUE, etBOOL, {&bRTPresname},
1368 "Use [TT].rtp[tt] entry names as residue names" }
1370 #define NPARGS asize(pa)
1372 if (!parse_common_args(&argc, argv, 0, NFILE, fnm, asize(pa), pa, asize(desc), desc,
1378 /* Force field selection, interactive or direct */
1379 choose_ff(strcmp(ff, "select") == 0 ? NULL : ff,
1380 forcefield, sizeof(forcefield),
1381 ffdir, sizeof(ffdir));
1383 if (strlen(forcefield) > 0)
1385 strcpy(ffname, forcefield);
1386 ffname[0] = toupper(ffname[0]);
1390 gmx_fatal(FARGS, "Empty forcefield string");
1393 printf("\nUsing the %s force field in directory %s\n\n",
1396 choose_watermodel(watstr[0], ffdir, &watermodel);
1400 /* if anything changes here, also change description of -inter */
1415 else if (bDeuterate)
1424 switch (vsitestr[0][0])
1426 case 'n': /* none */
1428 bVsiteAromatics = FALSE;
1430 case 'h': /* hydrogens */
1432 bVsiteAromatics = FALSE;
1434 case 'a': /* aromatics */
1436 bVsiteAromatics = TRUE;
1439 gmx_fatal(FARGS, "DEATH HORROR in $s (%d): vsitestr[0]='%s'",
1440 __FILE__, __LINE__, vsitestr[0]);
1443 /* Open the symbol table */
1444 open_symtab(&symtab);
1446 /* Residue type database */
1447 gmx_residuetype_init(&rt);
1449 /* Read residue renaming database(s), if present */
1450 nrrn = fflib_search_file_end(ffdir, ".r2b", FALSE, &rrn);
1454 for (i = 0; i < nrrn; i++)
1456 fp = fflib_open(rrn[i]);
1457 read_rtprename(rrn[i], fp, &nrtprename, &rtprename);
1463 /* Add all alternative names from the residue renaming database to the list of recognized amino/nucleic acids. */
1465 for (i = 0; i < nrtprename; i++)
1467 rc = gmx_residuetype_get_type(rt, rtprename[i].gmx, &p_restype);
1469 /* Only add names if the 'standard' gromacs/iupac base name was found */
1472 gmx_residuetype_add(rt, rtprename[i].main, p_restype);
1473 gmx_residuetype_add(rt, rtprename[i].nter, p_restype);
1474 gmx_residuetype_add(rt, rtprename[i].cter, p_restype);
1475 gmx_residuetype_add(rt, rtprename[i].bter, p_restype);
1480 if (watermodel != NULL && (strstr(watermodel, "4p") ||
1481 strstr(watermodel, "4P")))
1485 else if (watermodel != NULL && (strstr(watermodel, "5p") ||
1486 strstr(watermodel, "5P")))
1495 aps = gmx_atomprop_init();
1496 natom = read_pdball(opt2fn("-f", NFILE, fnm), opt2fn_null("-q", NFILE, fnm), title,
1497 &pdba_all, &pdbx, &ePBC, box, bRemoveH, &symtab, rt, watres,
1502 gmx_fatal(FARGS, "No atoms found in pdb file %s\n", opt2fn("-f", NFILE, fnm));
1505 printf("Analyzing pdb file\n");
1510 modify_chain_numbers(&pdba_all, chainsep[0]);
1514 this_atomname = NULL;
1516 this_resname = NULL;
1519 this_chainnumber = -1;
1520 this_chainstart = 0;
1521 /* Keep the compiler happy */
1522 prev_chainstart = 0;
1527 for (i = 0; (i < natom); i++)
1529 ri = &pdba_all.resinfo[pdba_all.atom[i].resind];
1531 prev_atomname = this_atomname;
1532 prev_atomnum = this_atomnum;
1533 prev_resname = this_resname;
1534 prev_resnum = this_resnum;
1535 prev_chainid = this_chainid;
1536 prev_chainnumber = this_chainnumber;
1539 prev_chainstart = this_chainstart;
1542 this_atomname = *pdba_all.atomname[i];
1543 this_atomnum = (pdba_all.pdbinfo != NULL) ? pdba_all.pdbinfo[i].atomnr : i+1;
1544 this_resname = *ri->name;
1545 this_resnum = ri->nr;
1546 this_chainid = ri->chainid;
1547 this_chainnumber = ri->chainnum;
1549 bWat = gmx_strcasecmp(*ri->name, watres) == 0;
1550 if ((i == 0) || (this_chainnumber != prev_chainnumber) || (bWat != bPrevWat))
1552 this_chainstart = pdba_all.atom[i].resind;
1557 if (!strncmp(merge[0], "int", 3))
1559 printf("Merge chain ending with residue %s%d (chain id '%c', atom %d %s) and chain starting with\n"
1560 "residue %s%d (chain id '%c', atom %d %s) into a single moleculetype (keeping termini)? [n/y]\n",
1561 prev_resname, prev_resnum, prev_chainid, prev_atomnum, prev_atomname,
1562 this_resname, this_resnum, this_chainid, this_atomnum, this_atomname);
1564 if (NULL == fgets(select, STRLEN-1, stdin))
1566 gmx_fatal(FARGS, "Error reading from stdin");
1568 bMerged = (select[0] == 'y');
1570 else if (!strncmp(merge[0], "all", 3))
1578 pdb_ch[nch-1].chainstart[pdb_ch[nch-1].nterpairs] =
1579 pdba_all.atom[i].resind - prev_chainstart;
1580 pdb_ch[nch-1].nterpairs++;
1581 srenew(pdb_ch[nch-1].chainstart, pdb_ch[nch-1].nterpairs+1);
1586 /* set natom for previous chain */
1589 pdb_ch[nch-1].natom = i-pdb_ch[nch-1].start;
1596 /* check if chain identifier was used before */
1597 for (j = 0; (j < nch); j++)
1599 if (pdb_ch[j].chainid != ' ' && pdb_ch[j].chainid == ri->chainid)
1601 printf("WARNING: Chain identifier '%c' is used in two non-sequential blocks.\n"
1602 "They will be treated as separate chains unless you reorder your file.\n",
1609 srenew(pdb_ch, maxch);
1611 pdb_ch[nch].chainid = ri->chainid;
1612 pdb_ch[nch].chainnum = ri->chainnum;
1613 pdb_ch[nch].start = i;
1614 pdb_ch[nch].bAllWat = bWat;
1617 pdb_ch[nch].nterpairs = 0;
1621 pdb_ch[nch].nterpairs = 1;
1623 snew(pdb_ch[nch].chainstart, pdb_ch[nch].nterpairs+1);
1624 /* modified [nch] to [0] below */
1625 pdb_ch[nch].chainstart[0] = 0;
1631 pdb_ch[nch-1].natom = natom-pdb_ch[nch-1].start;
1633 /* set all the water blocks at the end of the chain */
1634 snew(swap_index, nch);
1636 for (i = 0; i < nch; i++)
1638 if (!pdb_ch[i].bAllWat)
1644 for (i = 0; i < nch; i++)
1646 if (pdb_ch[i].bAllWat)
1652 if (nwaterchain > 1)
1654 printf("Moved all the water blocks to the end\n");
1658 /* copy pdb data and x for all chains */
1659 for (i = 0; (i < nch); i++)
1662 chains[i].chainid = pdb_ch[si].chainid;
1663 chains[i].chainnum = pdb_ch[si].chainnum;
1664 chains[i].bAllWat = pdb_ch[si].bAllWat;
1665 chains[i].nterpairs = pdb_ch[si].nterpairs;
1666 chains[i].chainstart = pdb_ch[si].chainstart;
1667 snew(chains[i].ntdb, pdb_ch[si].nterpairs);
1668 snew(chains[i].ctdb, pdb_ch[si].nterpairs);
1669 snew(chains[i].r_start, pdb_ch[si].nterpairs);
1670 snew(chains[i].r_end, pdb_ch[si].nterpairs);
1672 snew(chains[i].pdba, 1);
1673 init_t_atoms(chains[i].pdba, pdb_ch[si].natom, TRUE);
1674 snew(chains[i].x, chains[i].pdba->nr);
1675 for (j = 0; j < chains[i].pdba->nr; j++)
1677 chains[i].pdba->atom[j] = pdba_all.atom[pdb_ch[si].start+j];
1678 snew(chains[i].pdba->atomname[j], 1);
1679 *chains[i].pdba->atomname[j] =
1680 strdup(*pdba_all.atomname[pdb_ch[si].start+j]);
1681 chains[i].pdba->pdbinfo[j] = pdba_all.pdbinfo[pdb_ch[si].start+j];
1682 copy_rvec(pdbx[pdb_ch[si].start+j], chains[i].x[j]);
1684 /* Re-index the residues assuming that the indices are continuous */
1685 k = chains[i].pdba->atom[0].resind;
1686 nres = chains[i].pdba->atom[chains[i].pdba->nr-1].resind - k + 1;
1687 chains[i].pdba->nres = nres;
1688 for (j = 0; j < chains[i].pdba->nr; j++)
1690 chains[i].pdba->atom[j].resind -= k;
1692 srenew(chains[i].pdba->resinfo, nres);
1693 for (j = 0; j < nres; j++)
1695 chains[i].pdba->resinfo[j] = pdba_all.resinfo[k+j];
1696 snew(chains[i].pdba->resinfo[j].name, 1);
1697 *chains[i].pdba->resinfo[j].name = strdup(*pdba_all.resinfo[k+j].name);
1698 /* make all chain identifiers equal to that of the chain */
1699 chains[i].pdba->resinfo[j].chainid = pdb_ch[si].chainid;
1703 if (nchainmerges > 0)
1705 printf("\nMerged chains into joint molecule definitions at %d places.\n\n",
1709 printf("There are %d chains and %d blocks of water and "
1710 "%d residues with %d atoms\n",
1711 nch-nwaterchain, nwaterchain,
1712 pdba_all.resinfo[pdba_all.atom[natom-1].resind].nr, natom);
1714 printf("\n %5s %4s %6s\n", "chain", "#res", "#atoms");
1715 for (i = 0; (i < nch); i++)
1717 printf(" %d '%c' %5d %6d %s\n",
1718 i+1, chains[i].chainid ? chains[i].chainid : '-',
1719 chains[i].pdba->nres, chains[i].pdba->nr,
1720 chains[i].bAllWat ? "(only water)" : "");
1724 check_occupancy(&pdba_all, opt2fn("-f", NFILE, fnm), bVerbose);
1726 /* Read atomtypes... */
1727 atype = read_atype(ffdir, &symtab);
1729 /* read residue database */
1730 printf("Reading residue database... (%s)\n", forcefield);
1731 nrtpf = fflib_search_file_end(ffdir, ".rtp", TRUE, &rtpf);
1734 for (i = 0; i < nrtpf; i++)
1736 read_resall(rtpf[i], &nrtp, &restp, atype, &symtab, FALSE);
1742 /* Not correct with multiple rtp input files with different bonded types */
1743 fp = gmx_fio_fopen("new.rtp", "w");
1744 print_resall(fp, nrtp, restp, atype);
1748 /* read hydrogen database */
1749 nah = read_h_db(ffdir, &ah);
1751 /* Read Termini database... */
1752 nNtdb = read_ter_db(ffdir, 'n', &ntdb, atype);
1753 nCtdb = read_ter_db(ffdir, 'c', &ctdb, atype);
1755 top_fn = ftp2fn(efTOP, NFILE, fnm);
1756 top_file = gmx_fio_fopen(top_fn, "w");
1758 print_top_header(top_file, top_fn, FALSE, ffdir, mHmult);
1765 for (chain = 0; (chain < nch); chain++)
1767 cc = &(chains[chain]);
1769 /* set pdba, natom and nres to the current chain */
1772 natom = cc->pdba->nr;
1773 nres = cc->pdba->nres;
1775 if (cc->chainid && ( cc->chainid != ' ' ) )
1777 printf("Processing chain %d '%c' (%d atoms, %d residues)\n",
1778 chain+1, cc->chainid, natom, nres);
1782 printf("Processing chain %d (%d atoms, %d residues)\n",
1783 chain+1, natom, nres);
1786 process_chain(pdba, x, bUnA, bUnA, bUnA, bLysMan, bAspMan, bGluMan,
1787 bHisMan, bArgMan, bGlnMan, angle, distance, &symtab,
1788 nrtprename, rtprename);
1790 cc->chainstart[cc->nterpairs] = pdba->nres;
1792 for (i = 0; i < cc->nterpairs; i++)
1794 find_nc_ter(pdba, cc->chainstart[i], cc->chainstart[i+1],
1795 &(cc->r_start[j]), &(cc->r_end[j]), rt);
1797 if (cc->r_start[j] >= 0 && cc->r_end[j] >= 0)
1803 if (cc->nterpairs == 0)
1805 printf("Problem with chain definition, or missing terminal residues.\n"
1806 "This chain does not appear to contain a recognized chain molecule.\n"
1807 "If this is incorrect, you can edit residuetypes.dat to modify the behavior.\n");
1810 /* Check for disulfides and other special bonds */
1811 nssbonds = mk_specbonds(pdba, x, bCysMan, &ssbonds, bVerbose);
1815 rename_resrtp(pdba, cc->nterpairs, cc->r_start, cc->r_end, nrtprename, rtprename,
1823 sprintf(fn, "chain.pdb");
1827 sprintf(fn, "chain_%c%d.pdb", cc->chainid, cc->chainnum);
1829 write_sto_conf(fn, title, pdba, x, NULL, ePBC, box);
1833 for (i = 0; i < cc->nterpairs; i++)
1837 * We first apply a filter so we only have the
1838 * termini that can be applied to the residue in question
1839 * (or a generic terminus if no-residue specific is available).
1841 /* First the N terminus */
1844 tdblist = filter_ter(nrtp, restp, nNtdb, ntdb,
1845 *pdba->resinfo[cc->r_start[i]].name,
1846 *pdba->resinfo[cc->r_start[i]].rtp,
1850 printf("No suitable end (N or 5') terminus found in database - assuming this residue\n"
1851 "is already in a terminus-specific form and skipping terminus selection.\n");
1856 if (bTerMan && ntdblist > 1)
1858 sprintf(select, "Select start terminus type for %s-%d",
1859 *pdba->resinfo[cc->r_start[i]].name,
1860 pdba->resinfo[cc->r_start[i]].nr);
1861 cc->ntdb[i] = choose_ter(ntdblist, tdblist, select);
1865 cc->ntdb[i] = tdblist[0];
1868 printf("Start terminus %s-%d: %s\n",
1869 *pdba->resinfo[cc->r_start[i]].name,
1870 pdba->resinfo[cc->r_start[i]].nr,
1871 (cc->ntdb[i])->name);
1880 /* And the C terminus */
1883 tdblist = filter_ter(nrtp, restp, nCtdb, ctdb,
1884 *pdba->resinfo[cc->r_end[i]].name,
1885 *pdba->resinfo[cc->r_end[i]].rtp,
1889 printf("No suitable end (C or 3') terminus found in database - assuming this residue\n"
1890 "is already in a terminus-specific form and skipping terminus selection.\n");
1895 if (bTerMan && ntdblist > 1)
1897 sprintf(select, "Select end terminus type for %s-%d",
1898 *pdba->resinfo[cc->r_end[i]].name,
1899 pdba->resinfo[cc->r_end[i]].nr);
1900 cc->ctdb[i] = choose_ter(ntdblist, tdblist, select);
1904 cc->ctdb[i] = tdblist[0];
1906 printf("End terminus %s-%d: %s\n",
1907 *pdba->resinfo[cc->r_end[i]].name,
1908 pdba->resinfo[cc->r_end[i]].nr,
1909 (cc->ctdb[i])->name);
1918 /* lookup hackblocks and rtp for all residues */
1919 get_hackblocks_rtp(&hb_chain, &restp_chain,
1920 nrtp, restp, pdba->nres, pdba->resinfo,
1921 cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end);
1922 /* ideally, now we would not need the rtp itself anymore, but do
1923 everything using the hb and restp arrays. Unfortunately, that
1924 requires some re-thinking of code in gen_vsite.c, which I won't
1925 do now :( AF 26-7-99 */
1927 rename_atoms(NULL, ffdir,
1928 pdba, &symtab, restp_chain, FALSE, rt, FALSE, bVerbose);
1930 match_atomnames_with_rtp(restp_chain, hb_chain, pdba, x, bVerbose);
1934 block = new_blocka();
1936 sort_pdbatoms(restp_chain, natom, &pdba, &x, block, &gnames);
1937 natom = remove_duplicate_atoms(pdba, x, bVerbose);
1938 if (ftp2bSet(efNDX, NFILE, fnm))
1942 fprintf(stderr, "WARNING: with the -remh option the generated "
1943 "index file (%s) might be useless\n"
1944 "(the index file is generated before hydrogens are added)",
1945 ftp2fn(efNDX, NFILE, fnm));
1947 write_index(ftp2fn(efNDX, NFILE, fnm), block, gnames);
1949 for (i = 0; i < block->nr; i++)
1958 fprintf(stderr, "WARNING: "
1959 "without sorting no check for duplicate atoms can be done\n");
1962 /* Generate Hydrogen atoms (and termini) in the sequence */
1963 printf("Generating any missing hydrogen atoms and/or adding termini.\n");
1964 natom = add_h(&pdba, &x, nah, ah,
1965 cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end, bAllowMissing,
1966 NULL, NULL, TRUE, FALSE);
1967 printf("Now there are %d residues with %d atoms\n",
1968 pdba->nres, pdba->nr);
1971 write_pdbfile(debug, title, pdba, x, ePBC, box, ' ', 0, NULL, TRUE);
1976 for (i = 0; (i < natom); i++)
1978 fprintf(debug, "Res %s%d atom %d %s\n",
1979 *(pdba->resinfo[pdba->atom[i].resind].name),
1980 pdba->resinfo[pdba->atom[i].resind].nr, i+1, *pdba->atomname[i]);
1984 strcpy(posre_fn, ftp2fn(efITP, NFILE, fnm));
1986 /* make up molecule name(s) */
1988 k = (cc->nterpairs > 0 && cc->r_start[0] >= 0) ? cc->r_start[0] : 0;
1990 gmx_residuetype_get_type(rt, *pdba->resinfo[k].name, &p_restype);
1996 sprintf(molname, "Water");
2000 this_chainid = cc->chainid;
2002 /* Add the chain id if we have one */
2003 if (this_chainid != ' ')
2005 sprintf(buf, "_chain_%c", this_chainid);
2006 strcat(suffix, buf);
2009 /* Check if there have been previous chains with the same id */
2011 for (k = 0; k < chain; k++)
2013 if (cc->chainid == chains[k].chainid)
2018 /* Add the number for this chain identifier if there are multiple copies */
2022 sprintf(buf, "%d", nid_used+1);
2023 strcat(suffix, buf);
2026 if (strlen(suffix) > 0)
2028 sprintf(molname, "%s%s", p_restype, suffix);
2032 strcpy(molname, p_restype);
2036 if ((nch-nwaterchain > 1) && !cc->bAllWat)
2039 strcpy(itp_fn, top_fn);
2040 printf("Chain time...\n");
2041 c = strrchr(itp_fn, '.');
2042 sprintf(c, "_%s.itp", molname);
2043 c = strrchr(posre_fn, '.');
2044 sprintf(c, "_%s.itp", molname);
2045 if (strcmp(itp_fn, posre_fn) == 0)
2047 strcpy(buf_fn, posre_fn);
2048 c = strrchr(buf_fn, '.');
2050 sprintf(posre_fn, "%s_pr.itp", buf_fn);
2054 srenew(incls, nincl);
2055 incls[nincl-1] = strdup(itp_fn);
2056 itp_file = gmx_fio_fopen(itp_fn, "w");
2063 srenew(mols, nmol+1);
2066 mols[nmol].name = strdup("SOL");
2067 mols[nmol].nr = pdba->nres;
2071 mols[nmol].name = strdup(molname);
2078 print_top_comment(itp_file, itp_fn, ffdir, TRUE);
2088 top_file2 = itp_file;
2092 top_file2 = top_file;
2095 pdb2top(top_file2, posre_fn, molname, pdba, &x, atype, &symtab,
2097 restp_chain, hb_chain,
2099 bVsites, bVsiteAromatics, ffdir,
2100 mHmult, nssbonds, ssbonds,
2101 long_bond_dist, short_bond_dist, bDeuterate, bChargeGroups, bCmap,
2102 bRenumRes, bRTPresname);
2106 write_posres(posre_fn, pdba, posre_fc);
2111 gmx_fio_fclose(itp_file);
2114 /* pdba and natom have been reassigned somewhere so: */
2120 if (cc->chainid == ' ')
2122 sprintf(fn, "chain.pdb");
2126 sprintf(fn, "chain_%c.pdb", cc->chainid);
2128 cool_quote(quote, 255, NULL);
2129 write_sto_conf(fn, quote, pdba, x, NULL, ePBC, box);
2133 if (watermodel == NULL)
2135 for (chain = 0; chain < nch; chain++)
2137 if (chains[chain].bAllWat)
2139 gmx_fatal(FARGS, "You have chosen not to include a water model, but there is water in the input file. Select a water model or remove the water from your input file.");
2145 sprintf(buf_fn, "%s%c%s.itp", ffdir, DIR_SEPARATOR, watermodel);
2146 if (!fflib_fexist(buf_fn))
2148 gmx_fatal(FARGS, "The topology file '%s' for the selected water model '%s' can not be found in the force field directory. Select a different water model.",
2149 buf_fn, watermodel);
2153 print_top_mols(top_file, title, ffdir, watermodel, nincl, incls, nmol, mols);
2154 gmx_fio_fclose(top_file);
2156 gmx_residuetype_destroy(rt);
2158 /* now merge all chains back together */
2161 for (i = 0; (i < nch); i++)
2163 natom += chains[i].pdba->nr;
2164 nres += chains[i].pdba->nres;
2167 init_t_atoms(atoms, natom, FALSE);
2168 for (i = 0; i < atoms->nres; i++)
2170 sfree(atoms->resinfo[i].name);
2172 sfree(atoms->resinfo);
2174 snew(atoms->resinfo, nres);
2178 for (i = 0; (i < nch); i++)
2182 printf("Including chain %d in system: %d atoms %d residues\n",
2183 i+1, chains[i].pdba->nr, chains[i].pdba->nres);
2185 for (j = 0; (j < chains[i].pdba->nr); j++)
2187 atoms->atom[k] = chains[i].pdba->atom[j];
2188 atoms->atom[k].resind += l; /* l is processed nr of residues */
2189 atoms->atomname[k] = chains[i].pdba->atomname[j];
2190 atoms->resinfo[atoms->atom[k].resind].chainid = chains[i].chainid;
2191 copy_rvec(chains[i].x[j], x[k]);
2194 for (j = 0; (j < chains[i].pdba->nres); j++)
2196 atoms->resinfo[l] = chains[i].pdba->resinfo[j];
2199 atoms->resinfo[l].name = atoms->resinfo[l].rtp;
2207 fprintf(stderr, "Now there are %d atoms and %d residues\n", k, l);
2208 print_sums(atoms, TRUE);
2211 fprintf(stderr, "\nWriting coordinate file...\n");
2212 clear_rvec(box_space);
2215 gen_box(0, atoms->nr, x, box, box_space, FALSE);
2217 write_sto_conf(ftp2fn(efSTO, NFILE, fnm), title, atoms, x, NULL, ePBC, box);
2219 printf("\t\t--------- PLEASE NOTE ------------\n");
2220 printf("You have successfully generated a topology from: %s.\n",
2221 opt2fn("-f", NFILE, fnm));
2222 if (watermodel != NULL)
2224 printf("The %s force field and the %s water model are used.\n",
2225 ffname, watermodel);
2229 printf("The %s force field is used.\n",
2232 printf("\t\t--------- ETON ESAELP ------------\n");