2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
49 #include "gromacs/fileio/gmxfio.h"
51 #include "gromacs/utility/cstringutil.h"
52 #include "gromacs/fileio/confio.h"
53 #include "gromacs/math/vec.h"
54 #include "gromacs/utility/futil.h"
55 #include "gromacs/fileio/pdbio.h"
62 #include "gromacs/gmxlib/conformation-utilities.h"
67 #include "fflibutil.h"
70 #include "gromacs/commandline/pargs.h"
71 #include "gromacs/fileio/strdb.h"
72 #include "gromacs/topology/block.h"
73 #include "gromacs/topology/symtab.h"
74 #include "gromacs/utility/smalloc.h"
75 #include "gromacs/utility/fatalerror.h"
90 static const char *res2bb_notermini(const char *name,
91 int nrr, const rtprename_t *rr)
93 /* NOTE: This function returns the main building block name,
94 * it does not take terminal renaming into account.
99 while (i < nrr && gmx_strcasecmp(name, rr[i].gmx) != 0)
104 return (i < nrr ? rr[i].main : name);
107 static const char *select_res(int nr, int resnr,
108 const char *name[], const char *expl[],
110 int nrr, const rtprename_t *rr)
114 printf("Which %s type do you want for residue %d\n", title, resnr+1);
115 for (sel = 0; (sel < nr); sel++)
117 printf("%d. %s (%s)\n",
118 sel, expl[sel], res2bb_notermini(name[sel], nrr, rr));
120 printf("\nType a number:"); fflush(stdout);
122 if (scanf("%d", &sel) != 1)
124 gmx_fatal(FARGS, "Answer me for res %s %d!", title, resnr+1);
130 static const char *get_asptp(int resnr, int nrr, const rtprename_t *rr)
135 const char *lh[easpNR] = { "ASP", "ASPH" };
136 const char *expl[easpNR] = {
137 "Not protonated (charge -1)",
138 "Protonated (charge 0)"
141 return select_res(easpNR, resnr, lh, expl, "ASPARTIC ACID", nrr, rr);
144 static const char *get_glutp(int resnr, int nrr, const rtprename_t *rr)
149 const char *lh[egluNR] = { "GLU", "GLUH" };
150 const char *expl[egluNR] = {
151 "Not protonated (charge -1)",
152 "Protonated (charge 0)"
155 return select_res(egluNR, resnr, lh, expl, "GLUTAMIC ACID", nrr, rr);
158 static const char *get_glntp(int resnr, int nrr, const rtprename_t *rr)
163 const char *lh[eglnNR] = { "GLN", "QLN" };
164 const char *expl[eglnNR] = {
165 "Not protonated (charge 0)",
166 "Protonated (charge +1)"
169 return select_res(eglnNR, resnr, lh, expl, "GLUTAMINE", nrr, rr);
172 static const char *get_lystp(int resnr, int nrr, const rtprename_t *rr)
177 const char *lh[elysNR] = { "LYSN", "LYS" };
178 const char *expl[elysNR] = {
179 "Not protonated (charge 0)",
180 "Protonated (charge +1)"
183 return select_res(elysNR, resnr, lh, expl, "LYSINE", nrr, rr);
186 static const char *get_argtp(int resnr, int nrr, const rtprename_t *rr)
191 const char *lh[eargNR] = { "ARGN", "ARG" };
192 const char *expl[eargNR] = {
193 "Not protonated (charge 0)",
194 "Protonated (charge +1)"
197 return select_res(eargNR, resnr, lh, expl, "ARGININE", nrr, rr);
200 static const char *get_histp(int resnr, int nrr, const rtprename_t *rr)
202 const char *expl[ehisNR] = {
209 return select_res(ehisNR, resnr, hh, expl, "HISTIDINE", nrr, rr);
212 static void read_rtprename(const char *fname, FILE *fp,
213 int *nrtprename, rtprename_t **rtprename)
215 char line[STRLEN], buf[STRLEN];
224 while (get_a_line(fp, line, STRLEN))
227 nc = sscanf(line, "%s %s %s %s %s %s",
228 rr[n].gmx, rr[n].main, rr[n].nter, rr[n].cter, rr[n].bter, buf);
231 if (nc != 2 && nc != 5)
233 gmx_fatal(FARGS, "Residue renaming database '%s' has %d columns instead of %d, %d or %d", fname, ncol, 2, 5);
239 gmx_fatal(FARGS, "A line in residue renaming database '%s' has %d columns, while previous lines have %d columns", fname, nc, ncol);
244 /* This file does not have special termini names, copy them from main */
245 strcpy(rr[n].nter, rr[n].main);
246 strcpy(rr[n].cter, rr[n].main);
247 strcpy(rr[n].bter, rr[n].main);
257 static char *search_resrename(int nrr, rtprename_t *rr,
259 gmx_bool bStart, gmx_bool bEnd,
260 gmx_bool bCompareFFRTPname)
268 while (i < nrr && ((!bCompareFFRTPname && strcmp(name, rr[i].gmx) != 0) ||
269 ( bCompareFFRTPname && strcmp(name, rr[i].main) != 0)))
274 /* If found in the database, rename this residue's rtp building block,
275 * otherwise keep the old name.
298 gmx_fatal(FARGS, "In the chosen force field there is no residue type for '%s'%s", name, bStart ? ( bEnd ? " as a standalone (starting & ending) residue" : " as a starting terminus") : (bEnd ? " as an ending terminus" : ""));
306 static void rename_resrtp(t_atoms *pdba, int nterpairs, int *r_start, int *r_end,
307 int nrr, rtprename_t *rr, t_symtab *symtab,
311 gmx_bool bStart, bEnd;
313 gmx_bool bFFRTPTERRNM;
315 bFFRTPTERRNM = (getenv("GMX_NO_FFRTP_TER_RENAME") == NULL);
317 for (r = 0; r < pdba->nres; r++)
321 for (j = 0; j < nterpairs; j++)
328 for (j = 0; j < nterpairs; j++)
336 nn = search_resrename(nrr, rr, *pdba->resinfo[r].rtp, bStart, bEnd, FALSE);
338 if (bFFRTPTERRNM && nn == NULL && (bStart || bEnd))
340 /* This is a terminal residue, but the residue name,
341 * currently stored in .rtp, is not a standard residue name,
342 * but probably a force field specific rtp name.
343 * Check if we need to rename it because it is terminal.
345 nn = search_resrename(nrr, rr,
346 *pdba->resinfo[r].rtp, bStart, bEnd, TRUE);
349 if (nn != NULL && strcmp(*pdba->resinfo[r].rtp, nn) != 0)
353 printf("Changing rtp entry of residue %d %s to '%s'\n",
354 pdba->resinfo[r].nr, *pdba->resinfo[r].name, nn);
356 pdba->resinfo[r].rtp = put_symtab(symtab, nn);
361 static void pdbres_to_gmxrtp(t_atoms *pdba)
365 for (i = 0; (i < pdba->nres); i++)
367 if (pdba->resinfo[i].rtp == NULL)
369 pdba->resinfo[i].rtp = pdba->resinfo[i].name;
374 static void rename_pdbres(t_atoms *pdba, const char *oldnm, const char *newnm,
375 gmx_bool bFullCompare, t_symtab *symtab)
380 for (i = 0; (i < pdba->nres); i++)
382 resnm = *pdba->resinfo[i].name;
383 if ((bFullCompare && (gmx_strcasecmp(resnm, oldnm) == 0)) ||
384 (!bFullCompare && strstr(resnm, oldnm) != NULL))
386 /* Rename the residue name (not the rtp name) */
387 pdba->resinfo[i].name = put_symtab(symtab, newnm);
392 static void rename_bb(t_atoms *pdba, const char *oldnm, const char *newnm,
393 gmx_bool bFullCompare, t_symtab *symtab)
398 for (i = 0; (i < pdba->nres); i++)
400 /* We have not set the rtp name yes, use the residue name */
401 bbnm = *pdba->resinfo[i].name;
402 if ((bFullCompare && (gmx_strcasecmp(bbnm, oldnm) == 0)) ||
403 (!bFullCompare && strstr(bbnm, oldnm) != NULL))
405 /* Change the rtp builing block name */
406 pdba->resinfo[i].rtp = put_symtab(symtab, newnm);
411 static void rename_bbint(t_atoms *pdba, const char *oldnm,
412 const char *gettp(int, int, const rtprename_t *),
413 gmx_bool bFullCompare,
415 int nrr, const rtprename_t *rr)
421 for (i = 0; i < pdba->nres; i++)
423 /* We have not set the rtp name yes, use the residue name */
424 bbnm = *pdba->resinfo[i].name;
425 if ((bFullCompare && (strcmp(bbnm, oldnm) == 0)) ||
426 (!bFullCompare && strstr(bbnm, oldnm) != NULL))
428 ptr = gettp(i, nrr, rr);
429 pdba->resinfo[i].rtp = put_symtab(symtab, ptr);
434 static void check_occupancy(t_atoms *atoms, const char *filename, gmx_bool bVerbose)
440 ftp = fn2ftp(filename);
441 if (!atoms->pdbinfo || ((ftp != efPDB) && (ftp != efBRK) && (ftp != efENT)))
443 fprintf(stderr, "No occupancies in %s\n", filename);
447 for (i = 0; (i < atoms->nr); i++)
449 if (atoms->pdbinfo[i].occup != 1)
453 fprintf(stderr, "Occupancy for atom %s%d-%s is %f rather than 1\n",
454 *atoms->resinfo[atoms->atom[i].resind].name,
455 atoms->resinfo[ atoms->atom[i].resind].nr,
457 atoms->pdbinfo[i].occup);
459 if (atoms->pdbinfo[i].occup == 0)
469 if (nzero == atoms->nr)
471 fprintf(stderr, "All occupancy fields zero. This is probably not an X-Ray structure\n");
473 else if ((nzero > 0) || (nnotone > 0))
477 "WARNING: there were %d atoms with zero occupancy and %d atoms with\n"
478 " occupancy unequal to one (out of %d atoms). Check your pdb file.\n"
480 nzero, nnotone, atoms->nr);
484 fprintf(stderr, "All occupancies are one\n");
489 void write_posres(char *fn, t_atoms *pdba, real fc)
494 fp = gmx_fio_fopen(fn, "w");
496 "; In this topology include file, you will find position restraint\n"
497 "; entries for all the heavy atoms in your original pdb file.\n"
498 "; This means that all the protons which were added by pdb2gmx are\n"
499 "; not restrained.\n"
501 "[ position_restraints ]\n"
502 "; %4s%6s%8s%8s%8s\n", "atom", "type", "fx", "fy", "fz"
504 for (i = 0; (i < pdba->nr); i++)
506 if (!is_hydrogen(*pdba->atomname[i]) && !is_dummymass(*pdba->atomname[i]))
508 fprintf(fp, "%6d%6d %g %g %g\n", i+1, 1, fc, fc, fc);
514 static int read_pdball(const char *inf, const char *outf, char *title,
515 t_atoms *atoms, rvec **x,
516 int *ePBC, matrix box, gmx_bool bRemoveH,
517 t_symtab *symtab, gmx_residuetype_t rt, const char *watres,
518 gmx_atomprop_t aps, gmx_bool bVerbose)
519 /* Read a pdb file. (containing proteins) */
521 int natom, new_natom, i;
524 printf("Reading %s...\n", inf);
525 get_stx_coordnum(inf, &natom);
526 init_t_atoms(atoms, natom, TRUE);
528 read_stx_conf(inf, title, atoms, *x, NULL, ePBC, box);
529 if (fn2ftp(inf) == efPDB)
531 get_pdb_atomnumber(atoms, aps);
536 for (i = 0; i < atoms->nr; i++)
538 if (!is_hydrogen(*atoms->atomname[i]))
540 atoms->atom[new_natom] = atoms->atom[i];
541 atoms->atomname[new_natom] = atoms->atomname[i];
542 atoms->pdbinfo[new_natom] = atoms->pdbinfo[i];
543 copy_rvec((*x)[i], (*x)[new_natom]);
547 atoms->nr = new_natom;
552 if (title && title[0])
554 printf(" '%s',", title);
556 printf(" %d atoms\n", natom);
558 /* Rename residues */
559 rename_pdbres(atoms, "HOH", watres, FALSE, symtab);
560 rename_pdbres(atoms, "SOL", watres, FALSE, symtab);
561 rename_pdbres(atoms, "WAT", watres, FALSE, symtab);
563 rename_atoms("xlateat.dat", NULL,
564 atoms, symtab, NULL, TRUE, rt, TRUE, bVerbose);
573 write_sto_conf(outf, title, atoms, *x, NULL, *ePBC, box);
579 void process_chain(t_atoms *pdba, rvec *x,
580 gmx_bool bTrpU, gmx_bool bPheU, gmx_bool bTyrU,
581 gmx_bool bLysMan, gmx_bool bAspMan, gmx_bool bGluMan,
582 gmx_bool bHisMan, gmx_bool bArgMan, gmx_bool bGlnMan,
583 real angle, real distance, t_symtab *symtab,
584 int nrr, const rtprename_t *rr)
586 /* Rename aromatics, lys, asp and histidine */
589 rename_bb(pdba, "TYR", "TYRU", FALSE, symtab);
593 rename_bb(pdba, "TRP", "TRPU", FALSE, symtab);
597 rename_bb(pdba, "PHE", "PHEU", FALSE, symtab);
601 rename_bbint(pdba, "LYS", get_lystp, FALSE, symtab, nrr, rr);
605 rename_bbint(pdba, "ARG", get_argtp, FALSE, symtab, nrr, rr);
609 rename_bbint(pdba, "GLN", get_glntp, FALSE, symtab, nrr, rr);
613 rename_bbint(pdba, "ASP", get_asptp, FALSE, symtab, nrr, rr);
617 rename_bb(pdba, "ASPH", "ASP", FALSE, symtab);
621 rename_bbint(pdba, "GLU", get_glutp, FALSE, symtab, nrr, rr);
625 rename_bb(pdba, "GLUH", "GLU", FALSE, symtab);
630 set_histp(pdba, x, angle, distance);
634 rename_bbint(pdba, "HIS", get_histp, TRUE, symtab, nrr, rr);
637 /* Initialize the rtp builing block names with the residue names
638 * for the residues that have not been processed above.
640 pdbres_to_gmxrtp(pdba);
642 /* Now we have all rtp names set.
643 * The rtp names will conform to Gromacs naming,
644 * unless the input pdb file contained one or more force field specific
645 * rtp names as residue names.
649 /* struct for sorting the atoms from the pdb file */
651 int resnr; /* residue number */
652 int j; /* database order index */
653 int index; /* original atom number */
654 char anm1; /* second letter of atom name */
655 char altloc; /* alternate location indicator */
658 int pdbicomp(const void *a, const void *b)
663 pa = (t_pdbindex *)a;
664 pb = (t_pdbindex *)b;
666 d = (pa->resnr - pb->resnr);
672 d = (pa->anm1 - pb->anm1);
675 d = (pa->altloc - pb->altloc);
683 static void sort_pdbatoms(t_restp restp[],
684 int natoms, t_atoms **pdbaptr, rvec **x,
685 t_blocka *block, char ***gnames)
687 t_atoms *pdba, *pdbnew;
702 for (i = 0; i < natoms; i++)
704 atomnm = *pdba->atomname[i];
705 rptr = &restp[pdba->atom[i].resind];
706 for (j = 0; (j < rptr->natom); j++)
708 if (gmx_strcasecmp(atomnm, *(rptr->atomname[j])) == 0)
713 if (j == rptr->natom)
718 "Atom %s in residue %s %d was not found in rtp entry %s with %d atoms\n"
719 "while sorting atoms.\n%s", atomnm,
720 *pdba->resinfo[pdba->atom[i].resind].name,
721 pdba->resinfo[pdba->atom[i].resind].nr,
724 is_hydrogen(atomnm) ?
725 "\nFor a hydrogen, this can be a different protonation state, or it\n"
726 "might have had a different number in the PDB file and was rebuilt\n"
727 "(it might for instance have been H3, and we only expected H1 & H2).\n"
728 "Note that hydrogens might have been added to the entry for the N-terminus.\n"
729 "Remove this hydrogen or choose a different protonation state to solve it.\n"
730 "Option -ignh will ignore all hydrogens in the input." : ".");
731 gmx_fatal(FARGS, buf);
733 /* make shadow array to be sorted into indexgroup */
734 pdbi[i].resnr = pdba->atom[i].resind;
737 pdbi[i].anm1 = atomnm[1];
738 pdbi[i].altloc = pdba->pdbinfo[i].altloc;
740 qsort(pdbi, natoms, (size_t)sizeof(pdbi[0]), pdbicomp);
742 /* pdba is sorted in pdbnew using the pdbi index */
745 init_t_atoms(pdbnew, natoms, TRUE);
747 pdbnew->nr = pdba->nr;
748 pdbnew->nres = pdba->nres;
749 sfree(pdbnew->resinfo);
750 pdbnew->resinfo = pdba->resinfo;
751 for (i = 0; i < natoms; i++)
753 pdbnew->atom[i] = pdba->atom[pdbi[i].index];
754 pdbnew->atomname[i] = pdba->atomname[pdbi[i].index];
755 pdbnew->pdbinfo[i] = pdba->pdbinfo[pdbi[i].index];
756 copy_rvec((*x)[pdbi[i].index], (*xnew)[i]);
757 /* make indexgroup in block */
758 a[i] = pdbi[i].index;
761 sfree(pdba->atomname);
763 sfree(pdba->pdbinfo);
766 /* copy the sorted pdbnew back to pdba */
769 add_grp(block, gnames, natoms, a, "prot_sort");
775 static int remove_duplicate_atoms(t_atoms *pdba, rvec x[], gmx_bool bVerbose)
777 int i, j, oldnatoms, ndel;
780 printf("Checking for duplicate atoms....\n");
781 oldnatoms = pdba->nr;
783 /* NOTE: pdba->nr is modified inside the loop */
784 for (i = 1; (i < pdba->nr); i++)
786 /* compare 'i' and 'i-1', throw away 'i' if they are identical
787 this is a 'while' because multiple alternate locations can be present */
788 while ( (i < pdba->nr) &&
789 (pdba->atom[i-1].resind == pdba->atom[i].resind) &&
790 (strcmp(*pdba->atomname[i-1], *pdba->atomname[i]) == 0) )
795 ri = &pdba->resinfo[pdba->atom[i].resind];
796 printf("deleting duplicate atom %4s %s%4d%c",
797 *pdba->atomname[i], *ri->name, ri->nr, ri->ic);
798 if (ri->chainid && (ri->chainid != ' '))
800 printf(" ch %c", ri->chainid);
804 if (pdba->pdbinfo[i].atomnr)
806 printf(" pdb nr %4d", pdba->pdbinfo[i].atomnr);
808 if (pdba->pdbinfo[i].altloc && (pdba->pdbinfo[i].altloc != ' '))
810 printf(" altloc %c", pdba->pdbinfo[i].altloc);
816 /* We can not free, since it might be in the symtab */
817 /* sfree(pdba->atomname[i]); */
818 for (j = i; j < pdba->nr; j++)
820 pdba->atom[j] = pdba->atom[j+1];
821 pdba->atomname[j] = pdba->atomname[j+1];
822 pdba->pdbinfo[j] = pdba->pdbinfo[j+1];
823 copy_rvec(x[j+1], x[j]);
825 srenew(pdba->atom, pdba->nr);
826 /* srenew(pdba->atomname, pdba->nr); */
827 srenew(pdba->pdbinfo, pdba->nr);
830 if (pdba->nr != oldnatoms)
832 printf("Now there are %d atoms. Deleted %d duplicates.\n", pdba->nr, ndel);
838 void find_nc_ter(t_atoms *pdba, int r0, int r1, int *r_start, int *r_end, gmx_residuetype_t rt)
841 const char *p_startrestype;
842 const char *p_restype;
843 int nstartwarn, nendwarn;
851 /* Find the starting terminus (typially N or 5') */
852 for (i = r0; i < r1 && *r_start == -1; i++)
854 gmx_residuetype_get_type(rt, *pdba->resinfo[i].name, &p_startrestype);
855 if (!gmx_strcasecmp(p_startrestype, "Protein") || !gmx_strcasecmp(p_startrestype, "DNA") || !gmx_strcasecmp(p_startrestype, "RNA") )
857 printf("Identified residue %s%d as a starting terminus.\n", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
864 printf("Warning: Starting residue %s%d in chain not identified as Protein/RNA/DNA.\n", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
868 printf("More than 5 unidentified residues at start of chain - disabling further warnings.\n");
876 /* Go through the rest of the residues, check that they are the same class, and identify the ending terminus. */
877 for (i = *r_start; i < r1; i++)
879 gmx_residuetype_get_type(rt, *pdba->resinfo[i].name, &p_restype);
880 if (!gmx_strcasecmp(p_restype, p_startrestype) && nendwarn == 0)
888 printf("Warning: Residue %s%d in chain has different type (%s) from starting residue %s%d (%s).\n",
889 *pdba->resinfo[i].name, pdba->resinfo[i].nr, p_restype,
890 *pdba->resinfo[*r_start].name, pdba->resinfo[*r_start].nr, p_startrestype);
894 printf("More than 5 unidentified residues at end of chain - disabling further warnings.\n");
903 printf("Identified residue %s%d as a ending terminus.\n", *pdba->resinfo[*r_end].name, pdba->resinfo[*r_end].nr);
909 modify_chain_numbers(t_atoms * pdba,
910 const char * chainsep)
913 char old_prev_chainid;
914 char old_this_chainid;
915 int old_prev_chainnum;
916 int old_this_chainnum;
922 const char * prev_atomname;
923 const char * this_atomname;
924 const char * prev_resname;
925 const char * this_resname;
930 int prev_chainnumber;
931 int this_chainnumber;
943 splitting = SPLIT_TER_ONLY; /* keep compiler happy */
945 /* Be a bit flexible to catch typos */
946 if (!strncmp(chainsep, "id_o", 4))
948 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
949 splitting = SPLIT_ID_OR_TER;
950 printf("Splitting chemical chains based on TER records or chain id changing.\n");
952 else if (!strncmp(chainsep, "int", 3))
954 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
955 splitting = SPLIT_INTERACTIVE;
956 printf("Splitting chemical chains interactively.\n");
958 else if (!strncmp(chainsep, "id_a", 4))
960 splitting = SPLIT_ID_AND_TER;
961 printf("Splitting chemical chains based on TER records and chain id changing.\n");
963 else if (strlen(chainsep) == 2 && !strncmp(chainsep, "id", 4))
965 splitting = SPLIT_ID_ONLY;
966 printf("Splitting chemical chains based on changing chain id only (ignoring TER records).\n");
968 else if (chainsep[0] == 't')
970 splitting = SPLIT_TER_ONLY;
971 printf("Splitting chemical chains based on TER records only (ignoring chain id).\n");
975 gmx_fatal(FARGS, "Unidentified setting for chain separation: %s\n", chainsep);
978 /* The default chain enumeration is based on TER records only, which is reflected in chainnum below */
980 old_prev_chainid = '?';
981 old_prev_chainnum = -1;
984 this_atomname = NULL;
989 this_chainnumber = -1;
991 for (i = 0; i < pdba->nres; i++)
993 ri = &pdba->resinfo[i];
994 old_this_chainid = ri->chainid;
995 old_this_chainnum = ri->chainnum;
997 prev_atomname = this_atomname;
998 prev_atomnum = this_atomnum;
999 prev_resname = this_resname;
1000 prev_resnum = this_resnum;
1001 prev_chainid = this_chainid;
1002 prev_chainnumber = this_chainnumber;
1004 this_atomname = *(pdba->atomname[i]);
1005 this_atomnum = (pdba->pdbinfo != NULL) ? pdba->pdbinfo[i].atomnr : i+1;
1006 this_resname = *ri->name;
1007 this_resnum = ri->nr;
1008 this_chainid = ri->chainid;
1009 this_chainnumber = ri->chainnum;
1013 case SPLIT_ID_OR_TER:
1014 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1020 case SPLIT_ID_AND_TER:
1021 if (old_this_chainid != old_prev_chainid && old_this_chainnum != old_prev_chainnum)
1028 if (old_this_chainid != old_prev_chainid)
1034 case SPLIT_TER_ONLY:
1035 if (old_this_chainnum != old_prev_chainnum)
1040 case SPLIT_INTERACTIVE:
1041 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1045 printf("Split the chain (and introduce termini) between residue %s%d (chain id '%c', atom %d %s)\n"
1046 "and residue %s%d (chain id '%c', atom %d %s) ? [n/y]\n",
1047 prev_resname, prev_resnum, prev_chainid, prev_atomnum, prev_atomname,
1048 this_resname, this_resnum, this_chainid, this_atomnum, this_atomname);
1050 if (NULL == fgets(select, STRLEN-1, stdin))
1052 gmx_fatal(FARGS, "Error reading from stdin");
1055 if (i == 0 || select[0] == 'y')
1062 gmx_fatal(FARGS, "Internal inconsistency - this shouldn't happen...");
1065 old_prev_chainid = old_this_chainid;
1066 old_prev_chainnum = old_this_chainnum;
1068 ri->chainnum = new_chainnum;
1097 int gmx_pdb2gmx(int argc, char *argv[])
1099 const char *desc[] = {
1100 "[THISMODULE] reads a [TT].pdb[tt] (or [TT].gro[tt]) file, reads",
1101 "some database files, adds hydrogens to the molecules and generates",
1102 "coordinates in GROMACS (GROMOS), or optionally [TT].pdb[tt], format",
1103 "and a topology in GROMACS format.",
1104 "These files can subsequently be processed to generate a run input file.",
1106 "[THISMODULE] will search for force fields by looking for",
1107 "a [TT]forcefield.itp[tt] file in subdirectories [TT]<forcefield>.ff[tt]",
1108 "of the current working directory and of the GROMACS library directory",
1109 "as inferred from the path of the binary or the [TT]GMXLIB[tt] environment",
1111 "By default the forcefield selection is interactive,",
1112 "but you can use the [TT]-ff[tt] option to specify one of the short names",
1113 "in the list on the command line instead. In that case [THISMODULE] just looks",
1114 "for the corresponding [TT]<forcefield>.ff[tt] directory.",
1116 "After choosing a force field, all files will be read only from",
1117 "the corresponding force field directory.",
1118 "If you want to modify or add a residue types, you can copy the force",
1119 "field directory from the GROMACS library directory to your current",
1120 "working directory. If you want to add new protein residue types,",
1121 "you will need to modify [TT]residuetypes.dat[tt] in the library directory",
1122 "or copy the whole library directory to a local directory and set",
1123 "the environment variable [TT]GMXLIB[tt] to the name of that directory.",
1124 "Check Chapter 5 of the manual for more information about file formats.",
1127 "Note that a [TT].pdb[tt] file is nothing more than a file format, and it",
1128 "need not necessarily contain a protein structure. Every kind of",
1129 "molecule for which there is support in the database can be converted.",
1130 "If there is no support in the database, you can add it yourself.[PAR]",
1132 "The program has limited intelligence, it reads a number of database",
1133 "files, that allow it to make special bonds (Cys-Cys, Heme-His, etc.),",
1134 "if necessary this can be done manually. The program can prompt the",
1135 "user to select which kind of LYS, ASP, GLU, CYS or HIS residue is",
1136 "desired. For Lys the choice is between neutral (two protons on NZ) or",
1137 "protonated (three protons, default), for Asp and Glu unprotonated",
1138 "(default) or protonated, for His the proton can be either on ND1,",
1139 "on NE2 or on both. By default these selections are done automatically.",
1140 "For His, this is based on an optimal hydrogen bonding",
1141 "conformation. Hydrogen bonds are defined based on a simple geometric",
1142 "criterion, specified by the maximum hydrogen-donor-acceptor angle",
1143 "and donor-acceptor distance, which are set by [TT]-angle[tt] and",
1144 "[TT]-dist[tt] respectively.[PAR]",
1146 "The protonation state of N- and C-termini can be chosen interactively",
1147 "with the [TT]-ter[tt] flag. Default termini are ionized (NH3+ and COO-),",
1148 "respectively. Some force fields support zwitterionic forms for chains of",
1149 "one residue, but for polypeptides these options should NOT be selected.",
1150 "The AMBER force fields have unique forms for the terminal residues,",
1151 "and these are incompatible with the [TT]-ter[tt] mechanism. You need",
1152 "to prefix your N- or C-terminal residue names with \"N\" or \"C\"",
1153 "respectively to use these forms, making sure you preserve the format",
1154 "of the coordinate file. Alternatively, use named terminating residues",
1155 "(e.g. ACE, NME).[PAR]",
1157 "The separation of chains is not entirely trivial since the markup",
1158 "in user-generated PDB files frequently varies and sometimes it",
1159 "is desirable to merge entries across a TER record, for instance",
1160 "if you want a disulfide bridge or distance restraints between",
1161 "two protein chains or if you have a HEME group bound to a protein.",
1162 "In such cases multiple chains should be contained in a single",
1163 "[TT]moleculetype[tt] definition.",
1164 "To handle this, [THISMODULE] uses two separate options.",
1165 "First, [TT]-chainsep[tt] allows you to choose when a new chemical chain should",
1166 "start, and termini added when applicable. This can be done based on the",
1167 "existence of TER records, when the chain id changes, or combinations of either",
1168 "or both of these. You can also do the selection fully interactively.",
1169 "In addition, there is a [TT]-merge[tt] option that controls how multiple chains",
1170 "are merged into one moleculetype, after adding all the chemical termini (or not).",
1171 "This can be turned off (no merging), all non-water chains can be merged into a",
1172 "single molecule, or the selection can be done interactively.[PAR]",
1174 "[THISMODULE] will also check the occupancy field of the [TT].pdb[tt] file.",
1175 "If any of the occupancies are not one, indicating that the atom is",
1176 "not resolved well in the structure, a warning message is issued.",
1177 "When a [TT].pdb[tt] file does not originate from an X-ray structure determination",
1178 "all occupancy fields may be zero. Either way, it is up to the user",
1179 "to verify the correctness of the input data (read the article!).[PAR]",
1181 "During processing the atoms will be reordered according to GROMACS",
1182 "conventions. With [TT]-n[tt] an index file can be generated that",
1183 "contains one group reordered in the same way. This allows you to",
1184 "convert a GROMOS trajectory and coordinate file to GROMOS. There is",
1185 "one limitation: reordering is done after the hydrogens are stripped",
1186 "from the input and before new hydrogens are added. This means that",
1187 "you should not use [TT]-ignh[tt].[PAR]",
1189 "The [TT].gro[tt] and [TT].g96[tt] file formats do not support chain",
1190 "identifiers. Therefore it is useful to enter a [TT].pdb[tt] file name at",
1191 "the [TT]-o[tt] option when you want to convert a multi-chain [TT].pdb[tt] file.",
1194 "The option [TT]-vsite[tt] removes hydrogen and fast improper dihedral",
1195 "motions. Angular and out-of-plane motions can be removed by changing",
1196 "hydrogens into virtual sites and fixing angles, which fixes their",
1197 "position relative to neighboring atoms. Additionally, all atoms in the",
1198 "aromatic rings of the standard amino acids (i.e. PHE, TRP, TYR and HIS)",
1199 "can be converted into virtual sites, eliminating the fast improper dihedral",
1200 "fluctuations in these rings. [BB]Note[bb] that in this case all other hydrogen",
1201 "atoms are also converted to virtual sites. The mass of all atoms that are",
1202 "converted into virtual sites, is added to the heavy atoms.[PAR]",
1203 "Also slowing down of dihedral motion can be done with [TT]-heavyh[tt]",
1204 "done by increasing the hydrogen-mass by a factor of 4. This is also",
1205 "done for water hydrogens to slow down the rotational motion of water.",
1206 "The increase in mass of the hydrogens is subtracted from the bonded",
1207 "(heavy) atom so that the total mass of the system remains the same."
1211 FILE *fp, *top_file, *top_file2, *itp_file = NULL;
1213 t_atoms pdba_all, *pdba;
1217 int chain, nch, maxch, nwaterchain;
1219 t_chain *chains, *cc;
1220 char select[STRLEN];
1228 int i, j, k, l, nrtp;
1229 int *swap_index, si;
1233 gpp_atomtype_t atype;
1234 gmx_residuetype_t rt;
1236 char fn[256], itp_fn[STRLEN], posre_fn[STRLEN], buf_fn[STRLEN];
1237 char molname[STRLEN], title[STRLEN], quote[STRLEN];
1238 char *c, forcefield[STRLEN], ffdir[STRLEN];
1239 char ffname[STRLEN], suffix[STRLEN], buf[STRLEN];
1247 int nrtprename, naa;
1248 rtprename_t *rtprename = NULL;
1249 int nah, nNtdb, nCtdb, ntdblist;
1250 t_hackblock *ntdb, *ctdb, **tdblist;
1254 gmx_bool bVsites = FALSE, bWat, bPrevWat = FALSE, bITP, bVsiteAromatics = FALSE, bCheckMerge;
1256 t_hackblock *hb_chain;
1257 t_restp *restp_chain;
1259 const char *p_restype;
1263 const char * prev_atomname;
1264 const char * this_atomname;
1265 const char * prev_resname;
1266 const char * this_resname;
1271 int prev_chainnumber;
1272 int this_chainnumber;
1274 int this_chainstart;
1275 int prev_chainstart;
1282 { efSTX, "-f", "eiwit.pdb", ffREAD },
1283 { efSTO, "-o", "conf", ffWRITE },
1284 { efTOP, NULL, NULL, ffWRITE },
1285 { efITP, "-i", "posre", ffWRITE },
1286 { efNDX, "-n", "clean", ffOPTWR },
1287 { efSTO, "-q", "clean.pdb", ffOPTWR }
1289 #define NFILE asize(fnm)
1292 /* Command line arguments must be static */
1293 static gmx_bool bNewRTP = FALSE;
1294 static gmx_bool bInter = FALSE, bCysMan = FALSE;
1295 static gmx_bool bLysMan = FALSE, bAspMan = FALSE, bGluMan = FALSE, bHisMan = FALSE;
1296 static gmx_bool bGlnMan = FALSE, bArgMan = FALSE;
1297 static gmx_bool bTerMan = FALSE, bUnA = FALSE, bHeavyH;
1298 static gmx_bool bSort = TRUE, bAllowMissing = FALSE, bRemoveH = FALSE;
1299 static gmx_bool bDeuterate = FALSE, bVerbose = FALSE, bChargeGroups = TRUE, bCmap = TRUE;
1300 static gmx_bool bRenumRes = FALSE, bRTPresname = FALSE;
1301 static real angle = 135.0, distance = 0.3, posre_fc = 1000;
1302 static real long_bond_dist = 0.25, short_bond_dist = 0.05;
1303 static const char *vsitestr[] = { NULL, "none", "hydrogens", "aromatics", NULL };
1304 static const char *watstr[] = { NULL, "select", "none", "spc", "spce", "tip3p", "tip4p", "tip5p", NULL };
1305 static const char *chainsep[] = { NULL, "id_or_ter", "id_and_ter", "ter", "id", "interactive", NULL };
1306 static const char *merge[] = {NULL, "no", "all", "interactive", NULL };
1307 static const char *ff = "select";
1310 { "-newrtp", FALSE, etBOOL, {&bNewRTP},
1311 "HIDDENWrite the residue database in new format to [TT]new.rtp[tt]"},
1312 { "-lb", FALSE, etREAL, {&long_bond_dist},
1313 "HIDDENLong bond warning distance" },
1314 { "-sb", FALSE, etREAL, {&short_bond_dist},
1315 "HIDDENShort bond warning distance" },
1316 { "-chainsep", FALSE, etENUM, {chainsep},
1317 "Condition in PDB files when a new chain should be started (adding termini)" },
1318 { "-merge", FALSE, etENUM, {&merge},
1319 "Merge multiple chains into a single [moleculetype]" },
1320 { "-ff", FALSE, etSTR, {&ff},
1321 "Force field, interactive by default. Use [TT]-h[tt] for information." },
1322 { "-water", FALSE, etENUM, {watstr},
1323 "Water model to use" },
1324 { "-inter", FALSE, etBOOL, {&bInter},
1325 "Set the next 8 options to interactive"},
1326 { "-ss", FALSE, etBOOL, {&bCysMan},
1327 "Interactive SS bridge selection" },
1328 { "-ter", FALSE, etBOOL, {&bTerMan},
1329 "Interactive termini selection, instead of charged (default)" },
1330 { "-lys", FALSE, etBOOL, {&bLysMan},
1331 "Interactive lysine selection, instead of charged" },
1332 { "-arg", FALSE, etBOOL, {&bArgMan},
1333 "Interactive arginine selection, instead of charged" },
1334 { "-asp", FALSE, etBOOL, {&bAspMan},
1335 "Interactive aspartic acid selection, instead of charged" },
1336 { "-glu", FALSE, etBOOL, {&bGluMan},
1337 "Interactive glutamic acid selection, instead of charged" },
1338 { "-gln", FALSE, etBOOL, {&bGlnMan},
1339 "Interactive glutamine selection, instead of neutral" },
1340 { "-his", FALSE, etBOOL, {&bHisMan},
1341 "Interactive histidine selection, instead of checking H-bonds" },
1342 { "-angle", FALSE, etREAL, {&angle},
1343 "Minimum hydrogen-donor-acceptor angle for a H-bond (degrees)" },
1344 { "-dist", FALSE, etREAL, {&distance},
1345 "Maximum donor-acceptor distance for a H-bond (nm)" },
1346 { "-una", FALSE, etBOOL, {&bUnA},
1347 "Select aromatic rings with united CH atoms on phenylalanine, "
1348 "tryptophane and tyrosine" },
1349 { "-sort", FALSE, etBOOL, {&bSort},
1350 "HIDDENSort the residues according to database, turning this off is dangerous as charge groups might be broken in parts" },
1351 { "-ignh", FALSE, etBOOL, {&bRemoveH},
1352 "Ignore hydrogen atoms that are in the coordinate file" },
1353 { "-missing", FALSE, etBOOL, {&bAllowMissing},
1354 "Continue when atoms are missing, dangerous" },
1355 { "-v", FALSE, etBOOL, {&bVerbose},
1356 "Be slightly more verbose in messages" },
1357 { "-posrefc", FALSE, etREAL, {&posre_fc},
1358 "Force constant for position restraints" },
1359 { "-vsite", FALSE, etENUM, {vsitestr},
1360 "Convert atoms to virtual sites" },
1361 { "-heavyh", FALSE, etBOOL, {&bHeavyH},
1362 "Make hydrogen atoms heavy" },
1363 { "-deuterate", FALSE, etBOOL, {&bDeuterate},
1364 "Change the mass of hydrogens to 2 amu" },
1365 { "-chargegrp", TRUE, etBOOL, {&bChargeGroups},
1366 "Use charge groups in the [TT].rtp[tt] file" },
1367 { "-cmap", TRUE, etBOOL, {&bCmap},
1368 "Use cmap torsions (if enabled in the [TT].rtp[tt] file)" },
1369 { "-renum", TRUE, etBOOL, {&bRenumRes},
1370 "Renumber the residues consecutively in the output" },
1371 { "-rtpres", TRUE, etBOOL, {&bRTPresname},
1372 "Use [TT].rtp[tt] entry names as residue names" }
1374 #define NPARGS asize(pa)
1376 if (!parse_common_args(&argc, argv, 0, NFILE, fnm, asize(pa), pa, asize(desc), desc,
1382 /* Force field selection, interactive or direct */
1383 choose_ff(strcmp(ff, "select") == 0 ? NULL : ff,
1384 forcefield, sizeof(forcefield),
1385 ffdir, sizeof(ffdir));
1387 if (strlen(forcefield) > 0)
1389 strcpy(ffname, forcefield);
1390 ffname[0] = toupper(ffname[0]);
1394 gmx_fatal(FARGS, "Empty forcefield string");
1397 printf("\nUsing the %s force field in directory %s\n\n",
1400 choose_watermodel(watstr[0], ffdir, &watermodel);
1404 /* if anything changes here, also change description of -inter */
1419 else if (bDeuterate)
1428 switch (vsitestr[0][0])
1430 case 'n': /* none */
1432 bVsiteAromatics = FALSE;
1434 case 'h': /* hydrogens */
1436 bVsiteAromatics = FALSE;
1438 case 'a': /* aromatics */
1440 bVsiteAromatics = TRUE;
1443 gmx_fatal(FARGS, "DEATH HORROR in $s (%d): vsitestr[0]='%s'",
1444 __FILE__, __LINE__, vsitestr[0]);
1447 /* Open the symbol table */
1448 open_symtab(&symtab);
1450 /* Residue type database */
1451 gmx_residuetype_init(&rt);
1453 /* Read residue renaming database(s), if present */
1454 nrrn = fflib_search_file_end(ffdir, ".r2b", FALSE, &rrn);
1458 for (i = 0; i < nrrn; i++)
1460 fp = fflib_open(rrn[i]);
1461 read_rtprename(rrn[i], fp, &nrtprename, &rtprename);
1467 /* Add all alternative names from the residue renaming database to the list of recognized amino/nucleic acids. */
1469 for (i = 0; i < nrtprename; i++)
1471 rc = gmx_residuetype_get_type(rt, rtprename[i].gmx, &p_restype);
1473 /* Only add names if the 'standard' gromacs/iupac base name was found */
1476 gmx_residuetype_add(rt, rtprename[i].main, p_restype);
1477 gmx_residuetype_add(rt, rtprename[i].nter, p_restype);
1478 gmx_residuetype_add(rt, rtprename[i].cter, p_restype);
1479 gmx_residuetype_add(rt, rtprename[i].bter, p_restype);
1484 if (watermodel != NULL && (strstr(watermodel, "4p") ||
1485 strstr(watermodel, "4P")))
1489 else if (watermodel != NULL && (strstr(watermodel, "5p") ||
1490 strstr(watermodel, "5P")))
1499 aps = gmx_atomprop_init();
1500 natom = read_pdball(opt2fn("-f", NFILE, fnm), opt2fn_null("-q", NFILE, fnm), title,
1501 &pdba_all, &pdbx, &ePBC, box, bRemoveH, &symtab, rt, watres,
1506 gmx_fatal(FARGS, "No atoms found in pdb file %s\n", opt2fn("-f", NFILE, fnm));
1509 printf("Analyzing pdb file\n");
1514 modify_chain_numbers(&pdba_all, chainsep[0]);
1518 this_atomname = NULL;
1520 this_resname = NULL;
1523 this_chainnumber = -1;
1524 this_chainstart = 0;
1525 /* Keep the compiler happy */
1526 prev_chainstart = 0;
1531 for (i = 0; (i < natom); i++)
1533 ri = &pdba_all.resinfo[pdba_all.atom[i].resind];
1535 prev_atomname = this_atomname;
1536 prev_atomnum = this_atomnum;
1537 prev_resname = this_resname;
1538 prev_resnum = this_resnum;
1539 prev_chainid = this_chainid;
1540 prev_chainnumber = this_chainnumber;
1543 prev_chainstart = this_chainstart;
1546 this_atomname = *pdba_all.atomname[i];
1547 this_atomnum = (pdba_all.pdbinfo != NULL) ? pdba_all.pdbinfo[i].atomnr : i+1;
1548 this_resname = *ri->name;
1549 this_resnum = ri->nr;
1550 this_chainid = ri->chainid;
1551 this_chainnumber = ri->chainnum;
1553 bWat = gmx_strcasecmp(*ri->name, watres) == 0;
1554 if ((i == 0) || (this_chainnumber != prev_chainnumber) || (bWat != bPrevWat))
1556 this_chainstart = pdba_all.atom[i].resind;
1561 if (!strncmp(merge[0], "int", 3))
1563 printf("Merge chain ending with residue %s%d (chain id '%c', atom %d %s) and chain starting with\n"
1564 "residue %s%d (chain id '%c', atom %d %s) into a single moleculetype (keeping termini)? [n/y]\n",
1565 prev_resname, prev_resnum, prev_chainid, prev_atomnum, prev_atomname,
1566 this_resname, this_resnum, this_chainid, this_atomnum, this_atomname);
1568 if (NULL == fgets(select, STRLEN-1, stdin))
1570 gmx_fatal(FARGS, "Error reading from stdin");
1572 bMerged = (select[0] == 'y');
1574 else if (!strncmp(merge[0], "all", 3))
1582 pdb_ch[nch-1].chainstart[pdb_ch[nch-1].nterpairs] =
1583 pdba_all.atom[i].resind - prev_chainstart;
1584 pdb_ch[nch-1].nterpairs++;
1585 srenew(pdb_ch[nch-1].chainstart, pdb_ch[nch-1].nterpairs+1);
1590 /* set natom for previous chain */
1593 pdb_ch[nch-1].natom = i-pdb_ch[nch-1].start;
1600 /* check if chain identifier was used before */
1601 for (j = 0; (j < nch); j++)
1603 if (pdb_ch[j].chainid != ' ' && pdb_ch[j].chainid == ri->chainid)
1605 printf("WARNING: Chain identifier '%c' is used in two non-sequential blocks.\n"
1606 "They will be treated as separate chains unless you reorder your file.\n",
1613 srenew(pdb_ch, maxch);
1615 pdb_ch[nch].chainid = ri->chainid;
1616 pdb_ch[nch].chainnum = ri->chainnum;
1617 pdb_ch[nch].start = i;
1618 pdb_ch[nch].bAllWat = bWat;
1621 pdb_ch[nch].nterpairs = 0;
1625 pdb_ch[nch].nterpairs = 1;
1627 snew(pdb_ch[nch].chainstart, pdb_ch[nch].nterpairs+1);
1628 /* modified [nch] to [0] below */
1629 pdb_ch[nch].chainstart[0] = 0;
1635 pdb_ch[nch-1].natom = natom-pdb_ch[nch-1].start;
1637 /* set all the water blocks at the end of the chain */
1638 snew(swap_index, nch);
1640 for (i = 0; i < nch; i++)
1642 if (!pdb_ch[i].bAllWat)
1648 for (i = 0; i < nch; i++)
1650 if (pdb_ch[i].bAllWat)
1656 if (nwaterchain > 1)
1658 printf("Moved all the water blocks to the end\n");
1662 /* copy pdb data and x for all chains */
1663 for (i = 0; (i < nch); i++)
1666 chains[i].chainid = pdb_ch[si].chainid;
1667 chains[i].chainnum = pdb_ch[si].chainnum;
1668 chains[i].bAllWat = pdb_ch[si].bAllWat;
1669 chains[i].nterpairs = pdb_ch[si].nterpairs;
1670 chains[i].chainstart = pdb_ch[si].chainstart;
1671 snew(chains[i].ntdb, pdb_ch[si].nterpairs);
1672 snew(chains[i].ctdb, pdb_ch[si].nterpairs);
1673 snew(chains[i].r_start, pdb_ch[si].nterpairs);
1674 snew(chains[i].r_end, pdb_ch[si].nterpairs);
1676 snew(chains[i].pdba, 1);
1677 init_t_atoms(chains[i].pdba, pdb_ch[si].natom, TRUE);
1678 snew(chains[i].x, chains[i].pdba->nr);
1679 for (j = 0; j < chains[i].pdba->nr; j++)
1681 chains[i].pdba->atom[j] = pdba_all.atom[pdb_ch[si].start+j];
1682 snew(chains[i].pdba->atomname[j], 1);
1683 *chains[i].pdba->atomname[j] =
1684 strdup(*pdba_all.atomname[pdb_ch[si].start+j]);
1685 chains[i].pdba->pdbinfo[j] = pdba_all.pdbinfo[pdb_ch[si].start+j];
1686 copy_rvec(pdbx[pdb_ch[si].start+j], chains[i].x[j]);
1688 /* Re-index the residues assuming that the indices are continuous */
1689 k = chains[i].pdba->atom[0].resind;
1690 nres = chains[i].pdba->atom[chains[i].pdba->nr-1].resind - k + 1;
1691 chains[i].pdba->nres = nres;
1692 for (j = 0; j < chains[i].pdba->nr; j++)
1694 chains[i].pdba->atom[j].resind -= k;
1696 srenew(chains[i].pdba->resinfo, nres);
1697 for (j = 0; j < nres; j++)
1699 chains[i].pdba->resinfo[j] = pdba_all.resinfo[k+j];
1700 snew(chains[i].pdba->resinfo[j].name, 1);
1701 *chains[i].pdba->resinfo[j].name = strdup(*pdba_all.resinfo[k+j].name);
1702 /* make all chain identifiers equal to that of the chain */
1703 chains[i].pdba->resinfo[j].chainid = pdb_ch[si].chainid;
1707 if (nchainmerges > 0)
1709 printf("\nMerged chains into joint molecule definitions at %d places.\n\n",
1713 printf("There are %d chains and %d blocks of water and "
1714 "%d residues with %d atoms\n",
1715 nch-nwaterchain, nwaterchain,
1716 pdba_all.resinfo[pdba_all.atom[natom-1].resind].nr, natom);
1718 printf("\n %5s %4s %6s\n", "chain", "#res", "#atoms");
1719 for (i = 0; (i < nch); i++)
1721 printf(" %d '%c' %5d %6d %s\n",
1722 i+1, chains[i].chainid ? chains[i].chainid : '-',
1723 chains[i].pdba->nres, chains[i].pdba->nr,
1724 chains[i].bAllWat ? "(only water)" : "");
1728 check_occupancy(&pdba_all, opt2fn("-f", NFILE, fnm), bVerbose);
1730 /* Read atomtypes... */
1731 atype = read_atype(ffdir, &symtab);
1733 /* read residue database */
1734 printf("Reading residue database... (%s)\n", forcefield);
1735 nrtpf = fflib_search_file_end(ffdir, ".rtp", TRUE, &rtpf);
1738 for (i = 0; i < nrtpf; i++)
1740 read_resall(rtpf[i], &nrtp, &restp, atype, &symtab, FALSE);
1746 /* Not correct with multiple rtp input files with different bonded types */
1747 fp = gmx_fio_fopen("new.rtp", "w");
1748 print_resall(fp, nrtp, restp, atype);
1752 /* read hydrogen database */
1753 nah = read_h_db(ffdir, &ah);
1755 /* Read Termini database... */
1756 nNtdb = read_ter_db(ffdir, 'n', &ntdb, atype);
1757 nCtdb = read_ter_db(ffdir, 'c', &ctdb, atype);
1759 top_fn = ftp2fn(efTOP, NFILE, fnm);
1760 top_file = gmx_fio_fopen(top_fn, "w");
1762 print_top_header(top_file, top_fn, FALSE, ffdir, mHmult);
1769 for (chain = 0; (chain < nch); chain++)
1771 cc = &(chains[chain]);
1773 /* set pdba, natom and nres to the current chain */
1776 natom = cc->pdba->nr;
1777 nres = cc->pdba->nres;
1779 if (cc->chainid && ( cc->chainid != ' ' ) )
1781 printf("Processing chain %d '%c' (%d atoms, %d residues)\n",
1782 chain+1, cc->chainid, natom, nres);
1786 printf("Processing chain %d (%d atoms, %d residues)\n",
1787 chain+1, natom, nres);
1790 process_chain(pdba, x, bUnA, bUnA, bUnA, bLysMan, bAspMan, bGluMan,
1791 bHisMan, bArgMan, bGlnMan, angle, distance, &symtab,
1792 nrtprename, rtprename);
1794 cc->chainstart[cc->nterpairs] = pdba->nres;
1796 for (i = 0; i < cc->nterpairs; i++)
1798 find_nc_ter(pdba, cc->chainstart[i], cc->chainstart[i+1],
1799 &(cc->r_start[j]), &(cc->r_end[j]), rt);
1801 if (cc->r_start[j] >= 0 && cc->r_end[j] >= 0)
1807 if (cc->nterpairs == 0)
1809 printf("Problem with chain definition, or missing terminal residues.\n"
1810 "This chain does not appear to contain a recognized chain molecule.\n"
1811 "If this is incorrect, you can edit residuetypes.dat to modify the behavior.\n");
1814 /* Check for disulfides and other special bonds */
1815 nssbonds = mk_specbonds(pdba, x, bCysMan, &ssbonds, bVerbose);
1819 rename_resrtp(pdba, cc->nterpairs, cc->r_start, cc->r_end, nrtprename, rtprename,
1827 sprintf(fn, "chain.pdb");
1831 sprintf(fn, "chain_%c%d.pdb", cc->chainid, cc->chainnum);
1833 write_sto_conf(fn, title, pdba, x, NULL, ePBC, box);
1837 for (i = 0; i < cc->nterpairs; i++)
1841 * We first apply a filter so we only have the
1842 * termini that can be applied to the residue in question
1843 * (or a generic terminus if no-residue specific is available).
1845 /* First the N terminus */
1848 tdblist = filter_ter(nrtp, restp, nNtdb, ntdb,
1849 *pdba->resinfo[cc->r_start[i]].name,
1850 *pdba->resinfo[cc->r_start[i]].rtp,
1854 printf("No suitable end (N or 5') terminus found in database - assuming this residue\n"
1855 "is already in a terminus-specific form and skipping terminus selection.\n");
1860 if (bTerMan && ntdblist > 1)
1862 sprintf(select, "Select start terminus type for %s-%d",
1863 *pdba->resinfo[cc->r_start[i]].name,
1864 pdba->resinfo[cc->r_start[i]].nr);
1865 cc->ntdb[i] = choose_ter(ntdblist, tdblist, select);
1869 cc->ntdb[i] = tdblist[0];
1872 printf("Start terminus %s-%d: %s\n",
1873 *pdba->resinfo[cc->r_start[i]].name,
1874 pdba->resinfo[cc->r_start[i]].nr,
1875 (cc->ntdb[i])->name);
1884 /* And the C terminus */
1887 tdblist = filter_ter(nrtp, restp, nCtdb, ctdb,
1888 *pdba->resinfo[cc->r_end[i]].name,
1889 *pdba->resinfo[cc->r_end[i]].rtp,
1893 printf("No suitable end (C or 3') terminus found in database - assuming this residue\n"
1894 "is already in a terminus-specific form and skipping terminus selection.\n");
1899 if (bTerMan && ntdblist > 1)
1901 sprintf(select, "Select end terminus type for %s-%d",
1902 *pdba->resinfo[cc->r_end[i]].name,
1903 pdba->resinfo[cc->r_end[i]].nr);
1904 cc->ctdb[i] = choose_ter(ntdblist, tdblist, select);
1908 cc->ctdb[i] = tdblist[0];
1910 printf("End terminus %s-%d: %s\n",
1911 *pdba->resinfo[cc->r_end[i]].name,
1912 pdba->resinfo[cc->r_end[i]].nr,
1913 (cc->ctdb[i])->name);
1922 /* lookup hackblocks and rtp for all residues */
1923 get_hackblocks_rtp(&hb_chain, &restp_chain,
1924 nrtp, restp, pdba->nres, pdba->resinfo,
1925 cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end);
1926 /* ideally, now we would not need the rtp itself anymore, but do
1927 everything using the hb and restp arrays. Unfortunately, that
1928 requires some re-thinking of code in gen_vsite.c, which I won't
1929 do now :( AF 26-7-99 */
1931 rename_atoms(NULL, ffdir,
1932 pdba, &symtab, restp_chain, FALSE, rt, FALSE, bVerbose);
1934 match_atomnames_with_rtp(restp_chain, hb_chain, pdba, x, bVerbose);
1938 block = new_blocka();
1940 sort_pdbatoms(restp_chain, natom, &pdba, &x, block, &gnames);
1941 natom = remove_duplicate_atoms(pdba, x, bVerbose);
1942 if (ftp2bSet(efNDX, NFILE, fnm))
1946 fprintf(stderr, "WARNING: with the -remh option the generated "
1947 "index file (%s) might be useless\n"
1948 "(the index file is generated before hydrogens are added)",
1949 ftp2fn(efNDX, NFILE, fnm));
1951 write_index(ftp2fn(efNDX, NFILE, fnm), block, gnames, FALSE, 0);
1953 for (i = 0; i < block->nr; i++)
1962 fprintf(stderr, "WARNING: "
1963 "without sorting no check for duplicate atoms can be done\n");
1966 /* Generate Hydrogen atoms (and termini) in the sequence */
1967 printf("Generating any missing hydrogen atoms and/or adding termini.\n");
1968 natom = add_h(&pdba, &x, nah, ah,
1969 cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end, bAllowMissing,
1970 NULL, NULL, TRUE, FALSE);
1971 printf("Now there are %d residues with %d atoms\n",
1972 pdba->nres, pdba->nr);
1975 write_pdbfile(debug, title, pdba, x, ePBC, box, ' ', 0, NULL, TRUE);
1980 for (i = 0; (i < natom); i++)
1982 fprintf(debug, "Res %s%d atom %d %s\n",
1983 *(pdba->resinfo[pdba->atom[i].resind].name),
1984 pdba->resinfo[pdba->atom[i].resind].nr, i+1, *pdba->atomname[i]);
1988 strcpy(posre_fn, ftp2fn(efITP, NFILE, fnm));
1990 /* make up molecule name(s) */
1992 k = (cc->nterpairs > 0 && cc->r_start[0] >= 0) ? cc->r_start[0] : 0;
1994 gmx_residuetype_get_type(rt, *pdba->resinfo[k].name, &p_restype);
2000 sprintf(molname, "Water");
2004 this_chainid = cc->chainid;
2006 /* Add the chain id if we have one */
2007 if (this_chainid != ' ')
2009 sprintf(buf, "_chain_%c", this_chainid);
2010 strcat(suffix, buf);
2013 /* Check if there have been previous chains with the same id */
2015 for (k = 0; k < chain; k++)
2017 if (cc->chainid == chains[k].chainid)
2022 /* Add the number for this chain identifier if there are multiple copies */
2026 sprintf(buf, "%d", nid_used+1);
2027 strcat(suffix, buf);
2030 if (strlen(suffix) > 0)
2032 sprintf(molname, "%s%s", p_restype, suffix);
2036 strcpy(molname, p_restype);
2040 if ((nch-nwaterchain > 1) && !cc->bAllWat)
2043 strcpy(itp_fn, top_fn);
2044 printf("Chain time...\n");
2045 c = strrchr(itp_fn, '.');
2046 sprintf(c, "_%s.itp", molname);
2047 c = strrchr(posre_fn, '.');
2048 sprintf(c, "_%s.itp", molname);
2049 if (strcmp(itp_fn, posre_fn) == 0)
2051 strcpy(buf_fn, posre_fn);
2052 c = strrchr(buf_fn, '.');
2054 sprintf(posre_fn, "%s_pr.itp", buf_fn);
2058 srenew(incls, nincl);
2059 incls[nincl-1] = strdup(itp_fn);
2060 itp_file = gmx_fio_fopen(itp_fn, "w");
2067 srenew(mols, nmol+1);
2070 mols[nmol].name = strdup("SOL");
2071 mols[nmol].nr = pdba->nres;
2075 mols[nmol].name = strdup(molname);
2082 print_top_comment(itp_file, itp_fn, ffdir, TRUE);
2092 top_file2 = itp_file;
2096 top_file2 = top_file;
2099 pdb2top(top_file2, posre_fn, molname, pdba, &x, atype, &symtab,
2101 restp_chain, hb_chain,
2103 bVsites, bVsiteAromatics, ffdir,
2104 mHmult, nssbonds, ssbonds,
2105 long_bond_dist, short_bond_dist, bDeuterate, bChargeGroups, bCmap,
2106 bRenumRes, bRTPresname);
2110 write_posres(posre_fn, pdba, posre_fc);
2115 gmx_fio_fclose(itp_file);
2118 /* pdba and natom have been reassigned somewhere so: */
2124 if (cc->chainid == ' ')
2126 sprintf(fn, "chain.pdb");
2130 sprintf(fn, "chain_%c.pdb", cc->chainid);
2132 cool_quote(quote, 255, NULL);
2133 write_sto_conf(fn, quote, pdba, x, NULL, ePBC, box);
2137 if (watermodel == NULL)
2139 for (chain = 0; chain < nch; chain++)
2141 if (chains[chain].bAllWat)
2143 gmx_fatal(FARGS, "You have chosen not to include a water model, but there is water in the input file. Select a water model or remove the water from your input file.");
2149 sprintf(buf_fn, "%s%c%s.itp", ffdir, DIR_SEPARATOR, watermodel);
2150 if (!fflib_fexist(buf_fn))
2152 gmx_fatal(FARGS, "The topology file '%s' for the selected water model '%s' can not be found in the force field directory. Select a different water model.",
2153 buf_fn, watermodel);
2157 print_top_mols(top_file, title, ffdir, watermodel, nincl, incls, nmol, mols);
2158 gmx_fio_fclose(top_file);
2160 gmx_residuetype_destroy(rt);
2162 /* now merge all chains back together */
2165 for (i = 0; (i < nch); i++)
2167 natom += chains[i].pdba->nr;
2168 nres += chains[i].pdba->nres;
2171 init_t_atoms(atoms, natom, FALSE);
2172 for (i = 0; i < atoms->nres; i++)
2174 sfree(atoms->resinfo[i].name);
2176 sfree(atoms->resinfo);
2178 snew(atoms->resinfo, nres);
2182 for (i = 0; (i < nch); i++)
2186 printf("Including chain %d in system: %d atoms %d residues\n",
2187 i+1, chains[i].pdba->nr, chains[i].pdba->nres);
2189 for (j = 0; (j < chains[i].pdba->nr); j++)
2191 atoms->atom[k] = chains[i].pdba->atom[j];
2192 atoms->atom[k].resind += l; /* l is processed nr of residues */
2193 atoms->atomname[k] = chains[i].pdba->atomname[j];
2194 atoms->resinfo[atoms->atom[k].resind].chainid = chains[i].chainid;
2195 copy_rvec(chains[i].x[j], x[k]);
2198 for (j = 0; (j < chains[i].pdba->nres); j++)
2200 atoms->resinfo[l] = chains[i].pdba->resinfo[j];
2203 atoms->resinfo[l].name = atoms->resinfo[l].rtp;
2211 fprintf(stderr, "Now there are %d atoms and %d residues\n", k, l);
2212 print_sums(atoms, TRUE);
2215 fprintf(stderr, "\nWriting coordinate file...\n");
2216 clear_rvec(box_space);
2219 make_new_box(atoms->nr, x, box, box_space, FALSE);
2221 write_sto_conf(ftp2fn(efSTO, NFILE, fnm), title, atoms, x, NULL, ePBC, box);
2223 printf("\t\t--------- PLEASE NOTE ------------\n");
2224 printf("You have successfully generated a topology from: %s.\n",
2225 opt2fn("-f", NFILE, fnm));
2226 if (watermodel != NULL)
2228 printf("The %s force field and the %s water model are used.\n",
2229 ffname, watermodel);
2233 printf("The %s force field is used.\n",
2236 printf("\t\t--------- ETON ESAELP ------------\n");