2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
47 #include "gromacs/fileio/gmxfio.h"
49 #include "gromacs/utility/cstringutil.h"
50 #include "gromacs/fileio/confio.h"
51 #include "gromacs/math/vec.h"
52 #include "gromacs/utility/futil.h"
53 #include "gromacs/fileio/pdbio.h"
60 #include "gromacs/gmxlib/conformation-utilities.h"
63 #include "gromacs/topology/index.h"
64 #include "fflibutil.h"
67 #include "gromacs/commandline/pargs.h"
68 #include "gromacs/fileio/strdb.h"
69 #include "gromacs/topology/atomprop.h"
70 #include "gromacs/topology/block.h"
71 #include "gromacs/topology/index.h"
72 #include "gromacs/topology/residuetypes.h"
73 #include "gromacs/topology/symtab.h"
74 #include "gromacs/utility/smalloc.h"
75 #include "gromacs/utility/fatalerror.h"
90 static const char *res2bb_notermini(const char *name,
91 int nrr, const rtprename_t *rr)
93 /* NOTE: This function returns the main building block name,
94 * it does not take terminal renaming into account.
99 while (i < nrr && gmx_strcasecmp(name, rr[i].gmx) != 0)
104 return (i < nrr ? rr[i].main : name);
107 static const char *select_res(int nr, int resnr,
108 const char *name[], const char *expl[],
110 int nrr, const rtprename_t *rr)
114 printf("Which %s type do you want for residue %d\n", title, resnr+1);
115 for (sel = 0; (sel < nr); sel++)
117 printf("%d. %s (%s)\n",
118 sel, expl[sel], res2bb_notermini(name[sel], nrr, rr));
120 printf("\nType a number:"); fflush(stdout);
122 if (scanf("%d", &sel) != 1)
124 gmx_fatal(FARGS, "Answer me for res %s %d!", title, resnr+1);
130 static const char *get_asptp(int resnr, int nrr, const rtprename_t *rr)
135 const char *lh[easpNR] = { "ASP", "ASPH" };
136 const char *expl[easpNR] = {
137 "Not protonated (charge -1)",
138 "Protonated (charge 0)"
141 return select_res(easpNR, resnr, lh, expl, "ASPARTIC ACID", nrr, rr);
144 static const char *get_glutp(int resnr, int nrr, const rtprename_t *rr)
149 const char *lh[egluNR] = { "GLU", "GLUH" };
150 const char *expl[egluNR] = {
151 "Not protonated (charge -1)",
152 "Protonated (charge 0)"
155 return select_res(egluNR, resnr, lh, expl, "GLUTAMIC ACID", nrr, rr);
158 static const char *get_glntp(int resnr, int nrr, const rtprename_t *rr)
163 const char *lh[eglnNR] = { "GLN", "QLN" };
164 const char *expl[eglnNR] = {
165 "Not protonated (charge 0)",
166 "Protonated (charge +1)"
169 return select_res(eglnNR, resnr, lh, expl, "GLUTAMINE", nrr, rr);
172 static const char *get_lystp(int resnr, int nrr, const rtprename_t *rr)
177 const char *lh[elysNR] = { "LYSN", "LYS" };
178 const char *expl[elysNR] = {
179 "Not protonated (charge 0)",
180 "Protonated (charge +1)"
183 return select_res(elysNR, resnr, lh, expl, "LYSINE", nrr, rr);
186 static const char *get_argtp(int resnr, int nrr, const rtprename_t *rr)
191 const char *lh[eargNR] = { "ARGN", "ARG" };
192 const char *expl[eargNR] = {
193 "Not protonated (charge 0)",
194 "Protonated (charge +1)"
197 return select_res(eargNR, resnr, lh, expl, "ARGININE", nrr, rr);
200 static const char *get_histp(int resnr, int nrr, const rtprename_t *rr)
202 const char *expl[ehisNR] = {
209 return select_res(ehisNR, resnr, hh, expl, "HISTIDINE", nrr, rr);
212 static void read_rtprename(const char *fname, FILE *fp,
213 int *nrtprename, rtprename_t **rtprename)
215 char line[STRLEN], buf[STRLEN];
224 while (get_a_line(fp, line, STRLEN))
227 nc = sscanf(line, "%s %s %s %s %s %s",
228 rr[n].gmx, rr[n].main, rr[n].nter, rr[n].cter, rr[n].bter, buf);
231 if (nc != 2 && nc != 5)
233 gmx_fatal(FARGS, "Residue renaming database '%s' has %d columns instead of %d, %d or %d", fname, ncol, 2, 5);
239 gmx_fatal(FARGS, "A line in residue renaming database '%s' has %d columns, while previous lines have %d columns", fname, nc, ncol);
244 /* This file does not have special termini names, copy them from main */
245 strcpy(rr[n].nter, rr[n].main);
246 strcpy(rr[n].cter, rr[n].main);
247 strcpy(rr[n].bter, rr[n].main);
257 static char *search_resrename(int nrr, rtprename_t *rr,
259 gmx_bool bStart, gmx_bool bEnd,
260 gmx_bool bCompareFFRTPname)
268 while (i < nrr && ((!bCompareFFRTPname && strcmp(name, rr[i].gmx) != 0) ||
269 ( bCompareFFRTPname && strcmp(name, rr[i].main) != 0)))
274 /* If found in the database, rename this residue's rtp building block,
275 * otherwise keep the old name.
298 gmx_fatal(FARGS, "In the chosen force field there is no residue type for '%s'%s", name, bStart ? ( bEnd ? " as a standalone (starting & ending) residue" : " as a starting terminus") : (bEnd ? " as an ending terminus" : ""));
306 static void rename_resrtp(t_atoms *pdba, int nterpairs, int *r_start, int *r_end,
307 int nrr, rtprename_t *rr, t_symtab *symtab,
311 gmx_bool bStart, bEnd;
313 gmx_bool bFFRTPTERRNM;
315 bFFRTPTERRNM = (getenv("GMX_NO_FFRTP_TER_RENAME") == NULL);
317 for (r = 0; r < pdba->nres; r++)
321 for (j = 0; j < nterpairs; j++)
328 for (j = 0; j < nterpairs; j++)
336 nn = search_resrename(nrr, rr, *pdba->resinfo[r].rtp, bStart, bEnd, FALSE);
338 if (bFFRTPTERRNM && nn == NULL && (bStart || bEnd))
340 /* This is a terminal residue, but the residue name,
341 * currently stored in .rtp, is not a standard residue name,
342 * but probably a force field specific rtp name.
343 * Check if we need to rename it because it is terminal.
345 nn = search_resrename(nrr, rr,
346 *pdba->resinfo[r].rtp, bStart, bEnd, TRUE);
349 if (nn != NULL && strcmp(*pdba->resinfo[r].rtp, nn) != 0)
353 printf("Changing rtp entry of residue %d %s to '%s'\n",
354 pdba->resinfo[r].nr, *pdba->resinfo[r].name, nn);
356 pdba->resinfo[r].rtp = put_symtab(symtab, nn);
361 static void pdbres_to_gmxrtp(t_atoms *pdba)
365 for (i = 0; (i < pdba->nres); i++)
367 if (pdba->resinfo[i].rtp == NULL)
369 pdba->resinfo[i].rtp = pdba->resinfo[i].name;
374 static void rename_pdbres(t_atoms *pdba, const char *oldnm, const char *newnm,
375 gmx_bool bFullCompare, t_symtab *symtab)
380 for (i = 0; (i < pdba->nres); i++)
382 resnm = *pdba->resinfo[i].name;
383 if ((bFullCompare && (gmx_strcasecmp(resnm, oldnm) == 0)) ||
384 (!bFullCompare && strstr(resnm, oldnm) != NULL))
386 /* Rename the residue name (not the rtp name) */
387 pdba->resinfo[i].name = put_symtab(symtab, newnm);
392 static void rename_bb(t_atoms *pdba, const char *oldnm, const char *newnm,
393 gmx_bool bFullCompare, t_symtab *symtab)
398 for (i = 0; (i < pdba->nres); i++)
400 /* We have not set the rtp name yes, use the residue name */
401 bbnm = *pdba->resinfo[i].name;
402 if ((bFullCompare && (gmx_strcasecmp(bbnm, oldnm) == 0)) ||
403 (!bFullCompare && strstr(bbnm, oldnm) != NULL))
405 /* Change the rtp builing block name */
406 pdba->resinfo[i].rtp = put_symtab(symtab, newnm);
411 static void rename_bbint(t_atoms *pdba, const char *oldnm,
412 const char *gettp(int, int, const rtprename_t *),
413 gmx_bool bFullCompare,
415 int nrr, const rtprename_t *rr)
421 for (i = 0; i < pdba->nres; i++)
423 /* We have not set the rtp name yes, use the residue name */
424 bbnm = *pdba->resinfo[i].name;
425 if ((bFullCompare && (strcmp(bbnm, oldnm) == 0)) ||
426 (!bFullCompare && strstr(bbnm, oldnm) != NULL))
428 ptr = gettp(i, nrr, rr);
429 pdba->resinfo[i].rtp = put_symtab(symtab, ptr);
434 static void check_occupancy(t_atoms *atoms, const char *filename, gmx_bool bVerbose)
440 ftp = fn2ftp(filename);
441 if (!atoms->pdbinfo || ((ftp != efPDB) && (ftp != efBRK) && (ftp != efENT)))
443 fprintf(stderr, "No occupancies in %s\n", filename);
447 for (i = 0; (i < atoms->nr); i++)
449 if (atoms->pdbinfo[i].occup != 1)
453 fprintf(stderr, "Occupancy for atom %s%d-%s is %f rather than 1\n",
454 *atoms->resinfo[atoms->atom[i].resind].name,
455 atoms->resinfo[ atoms->atom[i].resind].nr,
457 atoms->pdbinfo[i].occup);
459 if (atoms->pdbinfo[i].occup == 0)
469 if (nzero == atoms->nr)
471 fprintf(stderr, "All occupancy fields zero. This is probably not an X-Ray structure\n");
473 else if ((nzero > 0) || (nnotone > 0))
477 "WARNING: there were %d atoms with zero occupancy and %d atoms with\n"
478 " occupancy unequal to one (out of %d atoms). Check your pdb file.\n"
480 nzero, nnotone, atoms->nr);
484 fprintf(stderr, "All occupancies are one\n");
489 void write_posres(char *fn, t_atoms *pdba, real fc)
494 fp = gmx_fio_fopen(fn, "w");
496 "; In this topology include file, you will find position restraint\n"
497 "; entries for all the heavy atoms in your original pdb file.\n"
498 "; This means that all the protons which were added by pdb2gmx are\n"
499 "; not restrained.\n"
501 "[ position_restraints ]\n"
502 "; %4s%6s%8s%8s%8s\n", "atom", "type", "fx", "fy", "fz"
504 for (i = 0; (i < pdba->nr); i++)
506 if (!is_hydrogen(*pdba->atomname[i]) && !is_dummymass(*pdba->atomname[i]))
508 fprintf(fp, "%6d%6d %g %g %g\n", i+1, 1, fc, fc, fc);
514 static int read_pdball(const char *inf, const char *outf, char *title,
515 t_atoms *atoms, rvec **x,
516 int *ePBC, matrix box, gmx_bool bRemoveH,
517 t_symtab *symtab, gmx_residuetype_t *rt, const char *watres,
518 gmx_atomprop_t aps, gmx_bool bVerbose)
519 /* Read a pdb file. (containing proteins) */
521 int natom, new_natom, i;
524 printf("Reading %s...\n", inf);
525 get_stx_coordnum(inf, &natom);
526 init_t_atoms(atoms, natom, TRUE);
528 read_stx_conf(inf, title, atoms, *x, NULL, ePBC, box);
529 if (fn2ftp(inf) == efPDB)
531 get_pdb_atomnumber(atoms, aps);
536 for (i = 0; i < atoms->nr; i++)
538 if (!is_hydrogen(*atoms->atomname[i]))
540 atoms->atom[new_natom] = atoms->atom[i];
541 atoms->atomname[new_natom] = atoms->atomname[i];
542 atoms->pdbinfo[new_natom] = atoms->pdbinfo[i];
543 copy_rvec((*x)[i], (*x)[new_natom]);
547 atoms->nr = new_natom;
552 if (title && title[0])
554 printf(" '%s',", title);
556 printf(" %d atoms\n", natom);
558 /* Rename residues */
559 rename_pdbres(atoms, "HOH", watres, FALSE, symtab);
560 rename_pdbres(atoms, "SOL", watres, FALSE, symtab);
561 rename_pdbres(atoms, "WAT", watres, FALSE, symtab);
563 rename_atoms("xlateat.dat", NULL,
564 atoms, symtab, NULL, TRUE, rt, TRUE, bVerbose);
573 write_sto_conf(outf, title, atoms, *x, NULL, *ePBC, box);
579 void process_chain(t_atoms *pdba, rvec *x,
580 gmx_bool bTrpU, gmx_bool bPheU, gmx_bool bTyrU,
581 gmx_bool bLysMan, gmx_bool bAspMan, gmx_bool bGluMan,
582 gmx_bool bHisMan, gmx_bool bArgMan, gmx_bool bGlnMan,
583 real angle, real distance, t_symtab *symtab,
584 int nrr, const rtprename_t *rr)
586 /* Rename aromatics, lys, asp and histidine */
589 rename_bb(pdba, "TYR", "TYRU", FALSE, symtab);
593 rename_bb(pdba, "TRP", "TRPU", FALSE, symtab);
597 rename_bb(pdba, "PHE", "PHEU", FALSE, symtab);
601 rename_bbint(pdba, "LYS", get_lystp, FALSE, symtab, nrr, rr);
605 rename_bbint(pdba, "ARG", get_argtp, FALSE, symtab, nrr, rr);
609 rename_bbint(pdba, "GLN", get_glntp, FALSE, symtab, nrr, rr);
613 rename_bbint(pdba, "ASP", get_asptp, FALSE, symtab, nrr, rr);
617 rename_bb(pdba, "ASPH", "ASP", FALSE, symtab);
621 rename_bbint(pdba, "GLU", get_glutp, FALSE, symtab, nrr, rr);
625 rename_bb(pdba, "GLUH", "GLU", FALSE, symtab);
630 set_histp(pdba, x, angle, distance);
634 rename_bbint(pdba, "HIS", get_histp, TRUE, symtab, nrr, rr);
637 /* Initialize the rtp builing block names with the residue names
638 * for the residues that have not been processed above.
640 pdbres_to_gmxrtp(pdba);
642 /* Now we have all rtp names set.
643 * The rtp names will conform to Gromacs naming,
644 * unless the input pdb file contained one or more force field specific
645 * rtp names as residue names.
649 /* struct for sorting the atoms from the pdb file */
651 int resnr; /* residue number */
652 int j; /* database order index */
653 int index; /* original atom number */
654 char anm1; /* second letter of atom name */
655 char altloc; /* alternate location indicator */
658 int pdbicomp(const void *a, const void *b)
663 pa = (t_pdbindex *)a;
664 pb = (t_pdbindex *)b;
666 d = (pa->resnr - pb->resnr);
672 d = (pa->anm1 - pb->anm1);
675 d = (pa->altloc - pb->altloc);
683 static void sort_pdbatoms(t_restp restp[],
684 int natoms, t_atoms **pdbaptr, rvec **x,
685 t_blocka *block, char ***gnames)
687 t_atoms *pdba, *pdbnew;
702 for (i = 0; i < natoms; i++)
704 atomnm = *pdba->atomname[i];
705 rptr = &restp[pdba->atom[i].resind];
706 for (j = 0; (j < rptr->natom); j++)
708 if (gmx_strcasecmp(atomnm, *(rptr->atomname[j])) == 0)
713 if (j == rptr->natom)
718 "Atom %s in residue %s %d was not found in rtp entry %s with %d atoms\n"
719 "while sorting atoms.\n%s", atomnm,
720 *pdba->resinfo[pdba->atom[i].resind].name,
721 pdba->resinfo[pdba->atom[i].resind].nr,
724 is_hydrogen(atomnm) ?
725 "\nFor a hydrogen, this can be a different protonation state, or it\n"
726 "might have had a different number in the PDB file and was rebuilt\n"
727 "(it might for instance have been H3, and we only expected H1 & H2).\n"
728 "Note that hydrogens might have been added to the entry for the N-terminus.\n"
729 "Remove this hydrogen or choose a different protonation state to solve it.\n"
730 "Option -ignh will ignore all hydrogens in the input." : ".");
731 gmx_fatal(FARGS, buf);
733 /* make shadow array to be sorted into indexgroup */
734 pdbi[i].resnr = pdba->atom[i].resind;
737 pdbi[i].anm1 = atomnm[1];
738 pdbi[i].altloc = pdba->pdbinfo[i].altloc;
740 qsort(pdbi, natoms, (size_t)sizeof(pdbi[0]), pdbicomp);
742 /* pdba is sorted in pdbnew using the pdbi index */
745 init_t_atoms(pdbnew, natoms, TRUE);
747 pdbnew->nr = pdba->nr;
748 pdbnew->nres = pdba->nres;
749 sfree(pdbnew->resinfo);
750 pdbnew->resinfo = pdba->resinfo;
751 for (i = 0; i < natoms; i++)
753 pdbnew->atom[i] = pdba->atom[pdbi[i].index];
754 pdbnew->atomname[i] = pdba->atomname[pdbi[i].index];
755 pdbnew->pdbinfo[i] = pdba->pdbinfo[pdbi[i].index];
756 copy_rvec((*x)[pdbi[i].index], (*xnew)[i]);
757 /* make indexgroup in block */
758 a[i] = pdbi[i].index;
761 sfree(pdba->atomname);
763 sfree(pdba->pdbinfo);
766 /* copy the sorted pdbnew back to pdba */
769 add_grp(block, gnames, natoms, a, "prot_sort");
775 static int remove_duplicate_atoms(t_atoms *pdba, rvec x[], gmx_bool bVerbose)
777 int i, j, oldnatoms, ndel;
780 printf("Checking for duplicate atoms....\n");
781 oldnatoms = pdba->nr;
783 /* NOTE: pdba->nr is modified inside the loop */
784 for (i = 1; (i < pdba->nr); i++)
786 /* compare 'i' and 'i-1', throw away 'i' if they are identical
787 this is a 'while' because multiple alternate locations can be present */
788 while ( (i < pdba->nr) &&
789 (pdba->atom[i-1].resind == pdba->atom[i].resind) &&
790 (strcmp(*pdba->atomname[i-1], *pdba->atomname[i]) == 0) )
795 ri = &pdba->resinfo[pdba->atom[i].resind];
796 printf("deleting duplicate atom %4s %s%4d%c",
797 *pdba->atomname[i], *ri->name, ri->nr, ri->ic);
798 if (ri->chainid && (ri->chainid != ' '))
800 printf(" ch %c", ri->chainid);
804 if (pdba->pdbinfo[i].atomnr)
806 printf(" pdb nr %4d", pdba->pdbinfo[i].atomnr);
808 if (pdba->pdbinfo[i].altloc && (pdba->pdbinfo[i].altloc != ' '))
810 printf(" altloc %c", pdba->pdbinfo[i].altloc);
816 /* We can not free, since it might be in the symtab */
817 /* sfree(pdba->atomname[i]); */
818 for (j = i; j < pdba->nr; j++)
820 pdba->atom[j] = pdba->atom[j+1];
821 pdba->atomname[j] = pdba->atomname[j+1];
822 pdba->pdbinfo[j] = pdba->pdbinfo[j+1];
823 copy_rvec(x[j+1], x[j]);
825 srenew(pdba->atom, pdba->nr);
826 /* srenew(pdba->atomname, pdba->nr); */
827 srenew(pdba->pdbinfo, pdba->nr);
830 if (pdba->nr != oldnatoms)
832 printf("Now there are %d atoms. Deleted %d duplicates.\n", pdba->nr, ndel);
838 void find_nc_ter(t_atoms *pdba, int r0, int r1, int *r_start, int *r_end,
839 gmx_residuetype_t *rt)
842 const char *p_startrestype;
843 const char *p_restype;
844 int nstartwarn, nendwarn;
852 /* Find the starting terminus (typially N or 5') */
853 for (i = r0; i < r1 && *r_start == -1; i++)
855 gmx_residuetype_get_type(rt, *pdba->resinfo[i].name, &p_startrestype);
856 if (!gmx_strcasecmp(p_startrestype, "Protein") || !gmx_strcasecmp(p_startrestype, "DNA") || !gmx_strcasecmp(p_startrestype, "RNA") )
858 printf("Identified residue %s%d as a starting terminus.\n", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
865 printf("Warning: Starting residue %s%d in chain not identified as Protein/RNA/DNA.\n", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
869 printf("More than 5 unidentified residues at start of chain - disabling further warnings.\n");
877 /* Go through the rest of the residues, check that they are the same class, and identify the ending terminus. */
878 for (i = *r_start; i < r1; i++)
880 gmx_residuetype_get_type(rt, *pdba->resinfo[i].name, &p_restype);
881 if (!gmx_strcasecmp(p_restype, p_startrestype) && nendwarn == 0)
889 printf("Warning: Residue %s%d in chain has different type (%s) from starting residue %s%d (%s).\n",
890 *pdba->resinfo[i].name, pdba->resinfo[i].nr, p_restype,
891 *pdba->resinfo[*r_start].name, pdba->resinfo[*r_start].nr, p_startrestype);
895 printf("More than 5 unidentified residues at end of chain - disabling further warnings.\n");
904 printf("Identified residue %s%d as a ending terminus.\n", *pdba->resinfo[*r_end].name, pdba->resinfo[*r_end].nr);
910 modify_chain_numbers(t_atoms * pdba,
911 const char * chainsep)
914 char old_prev_chainid;
915 char old_this_chainid;
916 int old_prev_chainnum;
917 int old_this_chainnum;
923 const char * prev_atomname;
924 const char * this_atomname;
925 const char * prev_resname;
926 const char * this_resname;
931 int prev_chainnumber;
932 int this_chainnumber;
944 splitting = SPLIT_TER_ONLY; /* keep compiler happy */
946 /* Be a bit flexible to catch typos */
947 if (!strncmp(chainsep, "id_o", 4))
949 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
950 splitting = SPLIT_ID_OR_TER;
951 printf("Splitting chemical chains based on TER records or chain id changing.\n");
953 else if (!strncmp(chainsep, "int", 3))
955 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
956 splitting = SPLIT_INTERACTIVE;
957 printf("Splitting chemical chains interactively.\n");
959 else if (!strncmp(chainsep, "id_a", 4))
961 splitting = SPLIT_ID_AND_TER;
962 printf("Splitting chemical chains based on TER records and chain id changing.\n");
964 else if (strlen(chainsep) == 2 && !strncmp(chainsep, "id", 4))
966 splitting = SPLIT_ID_ONLY;
967 printf("Splitting chemical chains based on changing chain id only (ignoring TER records).\n");
969 else if (chainsep[0] == 't')
971 splitting = SPLIT_TER_ONLY;
972 printf("Splitting chemical chains based on TER records only (ignoring chain id).\n");
976 gmx_fatal(FARGS, "Unidentified setting for chain separation: %s\n", chainsep);
979 /* The default chain enumeration is based on TER records only, which is reflected in chainnum below */
981 old_prev_chainid = '?';
982 old_prev_chainnum = -1;
985 this_atomname = NULL;
990 this_chainnumber = -1;
992 for (i = 0; i < pdba->nres; i++)
994 ri = &pdba->resinfo[i];
995 old_this_chainid = ri->chainid;
996 old_this_chainnum = ri->chainnum;
998 prev_atomname = this_atomname;
999 prev_atomnum = this_atomnum;
1000 prev_resname = this_resname;
1001 prev_resnum = this_resnum;
1002 prev_chainid = this_chainid;
1003 prev_chainnumber = this_chainnumber;
1005 this_atomname = *(pdba->atomname[i]);
1006 this_atomnum = (pdba->pdbinfo != NULL) ? pdba->pdbinfo[i].atomnr : i+1;
1007 this_resname = *ri->name;
1008 this_resnum = ri->nr;
1009 this_chainid = ri->chainid;
1010 this_chainnumber = ri->chainnum;
1014 case SPLIT_ID_OR_TER:
1015 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1021 case SPLIT_ID_AND_TER:
1022 if (old_this_chainid != old_prev_chainid && old_this_chainnum != old_prev_chainnum)
1029 if (old_this_chainid != old_prev_chainid)
1035 case SPLIT_TER_ONLY:
1036 if (old_this_chainnum != old_prev_chainnum)
1041 case SPLIT_INTERACTIVE:
1042 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1046 printf("Split the chain (and introduce termini) between residue %s%d (chain id '%c', atom %d %s)\n"
1047 "and residue %s%d (chain id '%c', atom %d %s) ? [n/y]\n",
1048 prev_resname, prev_resnum, prev_chainid, prev_atomnum, prev_atomname,
1049 this_resname, this_resnum, this_chainid, this_atomnum, this_atomname);
1051 if (NULL == fgets(select, STRLEN-1, stdin))
1053 gmx_fatal(FARGS, "Error reading from stdin");
1056 if (i == 0 || select[0] == 'y')
1063 gmx_fatal(FARGS, "Internal inconsistency - this shouldn't happen...");
1066 old_prev_chainid = old_this_chainid;
1067 old_prev_chainnum = old_this_chainnum;
1069 ri->chainnum = new_chainnum;
1098 int gmx_pdb2gmx(int argc, char *argv[])
1100 const char *desc[] = {
1101 "[THISMODULE] reads a [TT].pdb[tt] (or [TT].gro[tt]) file, reads",
1102 "some database files, adds hydrogens to the molecules and generates",
1103 "coordinates in GROMACS (GROMOS), or optionally [TT].pdb[tt], format",
1104 "and a topology in GROMACS format.",
1105 "These files can subsequently be processed to generate a run input file.",
1107 "[THISMODULE] will search for force fields by looking for",
1108 "a [TT]forcefield.itp[tt] file in subdirectories [TT]<forcefield>.ff[tt]",
1109 "of the current working directory and of the GROMACS library directory",
1110 "as inferred from the path of the binary or the [TT]GMXLIB[tt] environment",
1112 "By default the forcefield selection is interactive,",
1113 "but you can use the [TT]-ff[tt] option to specify one of the short names",
1114 "in the list on the command line instead. In that case [THISMODULE] just looks",
1115 "for the corresponding [TT]<forcefield>.ff[tt] directory.",
1117 "After choosing a force field, all files will be read only from",
1118 "the corresponding force field directory.",
1119 "If you want to modify or add a residue types, you can copy the force",
1120 "field directory from the GROMACS library directory to your current",
1121 "working directory. If you want to add new protein residue types,",
1122 "you will need to modify [TT]residuetypes.dat[tt] in the library directory",
1123 "or copy the whole library directory to a local directory and set",
1124 "the environment variable [TT]GMXLIB[tt] to the name of that directory.",
1125 "Check Chapter 5 of the manual for more information about file formats.",
1128 "Note that a [TT].pdb[tt] file is nothing more than a file format, and it",
1129 "need not necessarily contain a protein structure. Every kind of",
1130 "molecule for which there is support in the database can be converted.",
1131 "If there is no support in the database, you can add it yourself.[PAR]",
1133 "The program has limited intelligence, it reads a number of database",
1134 "files, that allow it to make special bonds (Cys-Cys, Heme-His, etc.),",
1135 "if necessary this can be done manually. The program can prompt the",
1136 "user to select which kind of LYS, ASP, GLU, CYS or HIS residue is",
1137 "desired. For Lys the choice is between neutral (two protons on NZ) or",
1138 "protonated (three protons, default), for Asp and Glu unprotonated",
1139 "(default) or protonated, for His the proton can be either on ND1,",
1140 "on NE2 or on both. By default these selections are done automatically.",
1141 "For His, this is based on an optimal hydrogen bonding",
1142 "conformation. Hydrogen bonds are defined based on a simple geometric",
1143 "criterion, specified by the maximum hydrogen-donor-acceptor angle",
1144 "and donor-acceptor distance, which are set by [TT]-angle[tt] and",
1145 "[TT]-dist[tt] respectively.[PAR]",
1147 "The protonation state of N- and C-termini can be chosen interactively",
1148 "with the [TT]-ter[tt] flag. Default termini are ionized (NH3+ and COO-),",
1149 "respectively. Some force fields support zwitterionic forms for chains of",
1150 "one residue, but for polypeptides these options should NOT be selected.",
1151 "The AMBER force fields have unique forms for the terminal residues,",
1152 "and these are incompatible with the [TT]-ter[tt] mechanism. You need",
1153 "to prefix your N- or C-terminal residue names with \"N\" or \"C\"",
1154 "respectively to use these forms, making sure you preserve the format",
1155 "of the coordinate file. Alternatively, use named terminating residues",
1156 "(e.g. ACE, NME).[PAR]",
1158 "The separation of chains is not entirely trivial since the markup",
1159 "in user-generated PDB files frequently varies and sometimes it",
1160 "is desirable to merge entries across a TER record, for instance",
1161 "if you want a disulfide bridge or distance restraints between",
1162 "two protein chains or if you have a HEME group bound to a protein.",
1163 "In such cases multiple chains should be contained in a single",
1164 "[TT]moleculetype[tt] definition.",
1165 "To handle this, [THISMODULE] uses two separate options.",
1166 "First, [TT]-chainsep[tt] allows you to choose when a new chemical chain should",
1167 "start, and termini added when applicable. This can be done based on the",
1168 "existence of TER records, when the chain id changes, or combinations of either",
1169 "or both of these. You can also do the selection fully interactively.",
1170 "In addition, there is a [TT]-merge[tt] option that controls how multiple chains",
1171 "are merged into one moleculetype, after adding all the chemical termini (or not).",
1172 "This can be turned off (no merging), all non-water chains can be merged into a",
1173 "single molecule, or the selection can be done interactively.[PAR]",
1175 "[THISMODULE] will also check the occupancy field of the [TT].pdb[tt] file.",
1176 "If any of the occupancies are not one, indicating that the atom is",
1177 "not resolved well in the structure, a warning message is issued.",
1178 "When a [TT].pdb[tt] file does not originate from an X-ray structure determination",
1179 "all occupancy fields may be zero. Either way, it is up to the user",
1180 "to verify the correctness of the input data (read the article!).[PAR]",
1182 "During processing the atoms will be reordered according to GROMACS",
1183 "conventions. With [TT]-n[tt] an index file can be generated that",
1184 "contains one group reordered in the same way. This allows you to",
1185 "convert a GROMOS trajectory and coordinate file to GROMOS. There is",
1186 "one limitation: reordering is done after the hydrogens are stripped",
1187 "from the input and before new hydrogens are added. This means that",
1188 "you should not use [TT]-ignh[tt].[PAR]",
1190 "The [TT].gro[tt] and [TT].g96[tt] file formats do not support chain",
1191 "identifiers. Therefore it is useful to enter a [TT].pdb[tt] file name at",
1192 "the [TT]-o[tt] option when you want to convert a multi-chain [TT].pdb[tt] file.",
1195 "The option [TT]-vsite[tt] removes hydrogen and fast improper dihedral",
1196 "motions. Angular and out-of-plane motions can be removed by changing",
1197 "hydrogens into virtual sites and fixing angles, which fixes their",
1198 "position relative to neighboring atoms. Additionally, all atoms in the",
1199 "aromatic rings of the standard amino acids (i.e. PHE, TRP, TYR and HIS)",
1200 "can be converted into virtual sites, eliminating the fast improper dihedral",
1201 "fluctuations in these rings. [BB]Note[bb] that in this case all other hydrogen",
1202 "atoms are also converted to virtual sites. The mass of all atoms that are",
1203 "converted into virtual sites, is added to the heavy atoms.[PAR]",
1204 "Also slowing down of dihedral motion can be done with [TT]-heavyh[tt]",
1205 "done by increasing the hydrogen-mass by a factor of 4. This is also",
1206 "done for water hydrogens to slow down the rotational motion of water.",
1207 "The increase in mass of the hydrogens is subtracted from the bonded",
1208 "(heavy) atom so that the total mass of the system remains the same."
1212 FILE *fp, *top_file, *top_file2, *itp_file = NULL;
1214 t_atoms pdba_all, *pdba;
1218 int chain, nch, maxch, nwaterchain;
1220 t_chain *chains, *cc;
1221 char select[STRLEN];
1229 int i, j, k, l, nrtp;
1230 int *swap_index, si;
1234 gpp_atomtype_t atype;
1235 gmx_residuetype_t*rt;
1237 char fn[256], itp_fn[STRLEN], posre_fn[STRLEN], buf_fn[STRLEN];
1238 char molname[STRLEN], title[STRLEN], quote[STRLEN];
1239 char *c, forcefield[STRLEN], ffdir[STRLEN];
1240 char ffname[STRLEN], suffix[STRLEN], buf[STRLEN];
1248 int nrtprename, naa;
1249 rtprename_t *rtprename = NULL;
1250 int nah, nNtdb, nCtdb, ntdblist;
1251 t_hackblock *ntdb, *ctdb, **tdblist;
1255 gmx_bool bVsites = FALSE, bWat, bPrevWat = FALSE, bITP, bVsiteAromatics = FALSE, bCheckMerge;
1257 t_hackblock *hb_chain;
1258 t_restp *restp_chain;
1260 const char *p_restype;
1264 const char * prev_atomname;
1265 const char * this_atomname;
1266 const char * prev_resname;
1267 const char * this_resname;
1272 int prev_chainnumber;
1273 int this_chainnumber;
1275 int this_chainstart;
1276 int prev_chainstart;
1283 { efSTX, "-f", "eiwit.pdb", ffREAD },
1284 { efSTO, "-o", "conf", ffWRITE },
1285 { efTOP, NULL, NULL, ffWRITE },
1286 { efITP, "-i", "posre", ffWRITE },
1287 { efNDX, "-n", "clean", ffOPTWR },
1288 { efSTO, "-q", "clean.pdb", ffOPTWR }
1290 #define NFILE asize(fnm)
1293 /* Command line arguments must be static */
1294 static gmx_bool bNewRTP = FALSE;
1295 static gmx_bool bInter = FALSE, bCysMan = FALSE;
1296 static gmx_bool bLysMan = FALSE, bAspMan = FALSE, bGluMan = FALSE, bHisMan = FALSE;
1297 static gmx_bool bGlnMan = FALSE, bArgMan = FALSE;
1298 static gmx_bool bTerMan = FALSE, bUnA = FALSE, bHeavyH;
1299 static gmx_bool bSort = TRUE, bAllowMissing = FALSE, bRemoveH = FALSE;
1300 static gmx_bool bDeuterate = FALSE, bVerbose = FALSE, bChargeGroups = TRUE, bCmap = TRUE;
1301 static gmx_bool bRenumRes = FALSE, bRTPresname = FALSE;
1302 static real angle = 135.0, distance = 0.3, posre_fc = 1000;
1303 static real long_bond_dist = 0.25, short_bond_dist = 0.05;
1304 static const char *vsitestr[] = { NULL, "none", "hydrogens", "aromatics", NULL };
1305 static const char *watstr[] = { NULL, "select", "none", "spc", "spce", "tip3p", "tip4p", "tip5p", NULL };
1306 static const char *chainsep[] = { NULL, "id_or_ter", "id_and_ter", "ter", "id", "interactive", NULL };
1307 static const char *merge[] = {NULL, "no", "all", "interactive", NULL };
1308 static const char *ff = "select";
1311 { "-newrtp", FALSE, etBOOL, {&bNewRTP},
1312 "HIDDENWrite the residue database in new format to [TT]new.rtp[tt]"},
1313 { "-lb", FALSE, etREAL, {&long_bond_dist},
1314 "HIDDENLong bond warning distance" },
1315 { "-sb", FALSE, etREAL, {&short_bond_dist},
1316 "HIDDENShort bond warning distance" },
1317 { "-chainsep", FALSE, etENUM, {chainsep},
1318 "Condition in PDB files when a new chain should be started (adding termini)" },
1319 { "-merge", FALSE, etENUM, {&merge},
1320 "Merge multiple chains into a single [moleculetype]" },
1321 { "-ff", FALSE, etSTR, {&ff},
1322 "Force field, interactive by default. Use [TT]-h[tt] for information." },
1323 { "-water", FALSE, etENUM, {watstr},
1324 "Water model to use" },
1325 { "-inter", FALSE, etBOOL, {&bInter},
1326 "Set the next 8 options to interactive"},
1327 { "-ss", FALSE, etBOOL, {&bCysMan},
1328 "Interactive SS bridge selection" },
1329 { "-ter", FALSE, etBOOL, {&bTerMan},
1330 "Interactive termini selection, instead of charged (default)" },
1331 { "-lys", FALSE, etBOOL, {&bLysMan},
1332 "Interactive lysine selection, instead of charged" },
1333 { "-arg", FALSE, etBOOL, {&bArgMan},
1334 "Interactive arginine selection, instead of charged" },
1335 { "-asp", FALSE, etBOOL, {&bAspMan},
1336 "Interactive aspartic acid selection, instead of charged" },
1337 { "-glu", FALSE, etBOOL, {&bGluMan},
1338 "Interactive glutamic acid selection, instead of charged" },
1339 { "-gln", FALSE, etBOOL, {&bGlnMan},
1340 "Interactive glutamine selection, instead of neutral" },
1341 { "-his", FALSE, etBOOL, {&bHisMan},
1342 "Interactive histidine selection, instead of checking H-bonds" },
1343 { "-angle", FALSE, etREAL, {&angle},
1344 "Minimum hydrogen-donor-acceptor angle for a H-bond (degrees)" },
1345 { "-dist", FALSE, etREAL, {&distance},
1346 "Maximum donor-acceptor distance for a H-bond (nm)" },
1347 { "-una", FALSE, etBOOL, {&bUnA},
1348 "Select aromatic rings with united CH atoms on phenylalanine, "
1349 "tryptophane and tyrosine" },
1350 { "-sort", FALSE, etBOOL, {&bSort},
1351 "HIDDENSort the residues according to database, turning this off is dangerous as charge groups might be broken in parts" },
1352 { "-ignh", FALSE, etBOOL, {&bRemoveH},
1353 "Ignore hydrogen atoms that are in the coordinate file" },
1354 { "-missing", FALSE, etBOOL, {&bAllowMissing},
1355 "Continue when atoms are missing, dangerous" },
1356 { "-v", FALSE, etBOOL, {&bVerbose},
1357 "Be slightly more verbose in messages" },
1358 { "-posrefc", FALSE, etREAL, {&posre_fc},
1359 "Force constant for position restraints" },
1360 { "-vsite", FALSE, etENUM, {vsitestr},
1361 "Convert atoms to virtual sites" },
1362 { "-heavyh", FALSE, etBOOL, {&bHeavyH},
1363 "Make hydrogen atoms heavy" },
1364 { "-deuterate", FALSE, etBOOL, {&bDeuterate},
1365 "Change the mass of hydrogens to 2 amu" },
1366 { "-chargegrp", TRUE, etBOOL, {&bChargeGroups},
1367 "Use charge groups in the [TT].rtp[tt] file" },
1368 { "-cmap", TRUE, etBOOL, {&bCmap},
1369 "Use cmap torsions (if enabled in the [TT].rtp[tt] file)" },
1370 { "-renum", TRUE, etBOOL, {&bRenumRes},
1371 "Renumber the residues consecutively in the output" },
1372 { "-rtpres", TRUE, etBOOL, {&bRTPresname},
1373 "Use [TT].rtp[tt] entry names as residue names" }
1375 #define NPARGS asize(pa)
1377 if (!parse_common_args(&argc, argv, 0, NFILE, fnm, asize(pa), pa, asize(desc), desc,
1383 /* Force field selection, interactive or direct */
1384 choose_ff(strcmp(ff, "select") == 0 ? NULL : ff,
1385 forcefield, sizeof(forcefield),
1386 ffdir, sizeof(ffdir));
1388 if (strlen(forcefield) > 0)
1390 strcpy(ffname, forcefield);
1391 ffname[0] = toupper(ffname[0]);
1395 gmx_fatal(FARGS, "Empty forcefield string");
1398 printf("\nUsing the %s force field in directory %s\n\n",
1401 choose_watermodel(watstr[0], ffdir, &watermodel);
1405 /* if anything changes here, also change description of -inter */
1420 else if (bDeuterate)
1429 switch (vsitestr[0][0])
1431 case 'n': /* none */
1433 bVsiteAromatics = FALSE;
1435 case 'h': /* hydrogens */
1437 bVsiteAromatics = FALSE;
1439 case 'a': /* aromatics */
1441 bVsiteAromatics = TRUE;
1444 gmx_fatal(FARGS, "DEATH HORROR in $s (%d): vsitestr[0]='%s'",
1445 __FILE__, __LINE__, vsitestr[0]);
1448 /* Open the symbol table */
1449 open_symtab(&symtab);
1451 /* Residue type database */
1452 gmx_residuetype_init(&rt);
1454 /* Read residue renaming database(s), if present */
1455 nrrn = fflib_search_file_end(ffdir, ".r2b", FALSE, &rrn);
1459 for (i = 0; i < nrrn; i++)
1461 fp = fflib_open(rrn[i]);
1462 read_rtprename(rrn[i], fp, &nrtprename, &rtprename);
1468 /* Add all alternative names from the residue renaming database to the list of recognized amino/nucleic acids. */
1470 for (i = 0; i < nrtprename; i++)
1472 rc = gmx_residuetype_get_type(rt, rtprename[i].gmx, &p_restype);
1474 /* Only add names if the 'standard' gromacs/iupac base name was found */
1477 gmx_residuetype_add(rt, rtprename[i].main, p_restype);
1478 gmx_residuetype_add(rt, rtprename[i].nter, p_restype);
1479 gmx_residuetype_add(rt, rtprename[i].cter, p_restype);
1480 gmx_residuetype_add(rt, rtprename[i].bter, p_restype);
1485 if (watermodel != NULL && (strstr(watermodel, "4p") ||
1486 strstr(watermodel, "4P")))
1490 else if (watermodel != NULL && (strstr(watermodel, "5p") ||
1491 strstr(watermodel, "5P")))
1500 aps = gmx_atomprop_init();
1501 natom = read_pdball(opt2fn("-f", NFILE, fnm), opt2fn_null("-q", NFILE, fnm), title,
1502 &pdba_all, &pdbx, &ePBC, box, bRemoveH, &symtab, rt, watres,
1507 gmx_fatal(FARGS, "No atoms found in pdb file %s\n", opt2fn("-f", NFILE, fnm));
1510 printf("Analyzing pdb file\n");
1515 modify_chain_numbers(&pdba_all, chainsep[0]);
1519 this_atomname = NULL;
1521 this_resname = NULL;
1524 this_chainnumber = -1;
1525 this_chainstart = 0;
1526 /* Keep the compiler happy */
1527 prev_chainstart = 0;
1532 for (i = 0; (i < natom); i++)
1534 ri = &pdba_all.resinfo[pdba_all.atom[i].resind];
1536 prev_atomname = this_atomname;
1537 prev_atomnum = this_atomnum;
1538 prev_resname = this_resname;
1539 prev_resnum = this_resnum;
1540 prev_chainid = this_chainid;
1541 prev_chainnumber = this_chainnumber;
1544 prev_chainstart = this_chainstart;
1547 this_atomname = *pdba_all.atomname[i];
1548 this_atomnum = (pdba_all.pdbinfo != NULL) ? pdba_all.pdbinfo[i].atomnr : i+1;
1549 this_resname = *ri->name;
1550 this_resnum = ri->nr;
1551 this_chainid = ri->chainid;
1552 this_chainnumber = ri->chainnum;
1554 bWat = gmx_strcasecmp(*ri->name, watres) == 0;
1555 if ((i == 0) || (this_chainnumber != prev_chainnumber) || (bWat != bPrevWat))
1557 this_chainstart = pdba_all.atom[i].resind;
1562 if (!strncmp(merge[0], "int", 3))
1564 printf("Merge chain ending with residue %s%d (chain id '%c', atom %d %s) and chain starting with\n"
1565 "residue %s%d (chain id '%c', atom %d %s) into a single moleculetype (keeping termini)? [n/y]\n",
1566 prev_resname, prev_resnum, prev_chainid, prev_atomnum, prev_atomname,
1567 this_resname, this_resnum, this_chainid, this_atomnum, this_atomname);
1569 if (NULL == fgets(select, STRLEN-1, stdin))
1571 gmx_fatal(FARGS, "Error reading from stdin");
1573 bMerged = (select[0] == 'y');
1575 else if (!strncmp(merge[0], "all", 3))
1583 pdb_ch[nch-1].chainstart[pdb_ch[nch-1].nterpairs] =
1584 pdba_all.atom[i].resind - prev_chainstart;
1585 pdb_ch[nch-1].nterpairs++;
1586 srenew(pdb_ch[nch-1].chainstart, pdb_ch[nch-1].nterpairs+1);
1591 /* set natom for previous chain */
1594 pdb_ch[nch-1].natom = i-pdb_ch[nch-1].start;
1601 /* check if chain identifier was used before */
1602 for (j = 0; (j < nch); j++)
1604 if (pdb_ch[j].chainid != ' ' && pdb_ch[j].chainid == ri->chainid)
1606 printf("WARNING: Chain identifier '%c' is used in two non-sequential blocks.\n"
1607 "They will be treated as separate chains unless you reorder your file.\n",
1614 srenew(pdb_ch, maxch);
1616 pdb_ch[nch].chainid = ri->chainid;
1617 pdb_ch[nch].chainnum = ri->chainnum;
1618 pdb_ch[nch].start = i;
1619 pdb_ch[nch].bAllWat = bWat;
1622 pdb_ch[nch].nterpairs = 0;
1626 pdb_ch[nch].nterpairs = 1;
1628 snew(pdb_ch[nch].chainstart, pdb_ch[nch].nterpairs+1);
1629 /* modified [nch] to [0] below */
1630 pdb_ch[nch].chainstart[0] = 0;
1636 pdb_ch[nch-1].natom = natom-pdb_ch[nch-1].start;
1638 /* set all the water blocks at the end of the chain */
1639 snew(swap_index, nch);
1641 for (i = 0; i < nch; i++)
1643 if (!pdb_ch[i].bAllWat)
1649 for (i = 0; i < nch; i++)
1651 if (pdb_ch[i].bAllWat)
1657 if (nwaterchain > 1)
1659 printf("Moved all the water blocks to the end\n");
1663 /* copy pdb data and x for all chains */
1664 for (i = 0; (i < nch); i++)
1667 chains[i].chainid = pdb_ch[si].chainid;
1668 chains[i].chainnum = pdb_ch[si].chainnum;
1669 chains[i].bAllWat = pdb_ch[si].bAllWat;
1670 chains[i].nterpairs = pdb_ch[si].nterpairs;
1671 chains[i].chainstart = pdb_ch[si].chainstart;
1672 snew(chains[i].ntdb, pdb_ch[si].nterpairs);
1673 snew(chains[i].ctdb, pdb_ch[si].nterpairs);
1674 snew(chains[i].r_start, pdb_ch[si].nterpairs);
1675 snew(chains[i].r_end, pdb_ch[si].nterpairs);
1677 snew(chains[i].pdba, 1);
1678 init_t_atoms(chains[i].pdba, pdb_ch[si].natom, TRUE);
1679 snew(chains[i].x, chains[i].pdba->nr);
1680 for (j = 0; j < chains[i].pdba->nr; j++)
1682 chains[i].pdba->atom[j] = pdba_all.atom[pdb_ch[si].start+j];
1683 snew(chains[i].pdba->atomname[j], 1);
1684 *chains[i].pdba->atomname[j] =
1685 gmx_strdup(*pdba_all.atomname[pdb_ch[si].start+j]);
1686 chains[i].pdba->pdbinfo[j] = pdba_all.pdbinfo[pdb_ch[si].start+j];
1687 copy_rvec(pdbx[pdb_ch[si].start+j], chains[i].x[j]);
1689 /* Re-index the residues assuming that the indices are continuous */
1690 k = chains[i].pdba->atom[0].resind;
1691 nres = chains[i].pdba->atom[chains[i].pdba->nr-1].resind - k + 1;
1692 chains[i].pdba->nres = nres;
1693 for (j = 0; j < chains[i].pdba->nr; j++)
1695 chains[i].pdba->atom[j].resind -= k;
1697 srenew(chains[i].pdba->resinfo, nres);
1698 for (j = 0; j < nres; j++)
1700 chains[i].pdba->resinfo[j] = pdba_all.resinfo[k+j];
1701 snew(chains[i].pdba->resinfo[j].name, 1);
1702 *chains[i].pdba->resinfo[j].name = gmx_strdup(*pdba_all.resinfo[k+j].name);
1703 /* make all chain identifiers equal to that of the chain */
1704 chains[i].pdba->resinfo[j].chainid = pdb_ch[si].chainid;
1708 if (nchainmerges > 0)
1710 printf("\nMerged chains into joint molecule definitions at %d places.\n\n",
1714 printf("There are %d chains and %d blocks of water and "
1715 "%d residues with %d atoms\n",
1716 nch-nwaterchain, nwaterchain,
1717 pdba_all.nres, natom);
1719 printf("\n %5s %4s %6s\n", "chain", "#res", "#atoms");
1720 for (i = 0; (i < nch); i++)
1722 printf(" %d '%c' %5d %6d %s\n",
1723 i+1, chains[i].chainid ? chains[i].chainid : '-',
1724 chains[i].pdba->nres, chains[i].pdba->nr,
1725 chains[i].bAllWat ? "(only water)" : "");
1729 check_occupancy(&pdba_all, opt2fn("-f", NFILE, fnm), bVerbose);
1731 /* Read atomtypes... */
1732 atype = read_atype(ffdir, &symtab);
1734 /* read residue database */
1735 printf("Reading residue database... (%s)\n", forcefield);
1736 nrtpf = fflib_search_file_end(ffdir, ".rtp", TRUE, &rtpf);
1739 for (i = 0; i < nrtpf; i++)
1741 read_resall(rtpf[i], &nrtp, &restp, atype, &symtab, FALSE);
1747 /* Not correct with multiple rtp input files with different bonded types */
1748 fp = gmx_fio_fopen("new.rtp", "w");
1749 print_resall(fp, nrtp, restp, atype);
1753 /* read hydrogen database */
1754 nah = read_h_db(ffdir, &ah);
1756 /* Read Termini database... */
1757 nNtdb = read_ter_db(ffdir, 'n', &ntdb, atype);
1758 nCtdb = read_ter_db(ffdir, 'c', &ctdb, atype);
1760 top_fn = ftp2fn(efTOP, NFILE, fnm);
1761 top_file = gmx_fio_fopen(top_fn, "w");
1763 print_top_header(top_file, top_fn, FALSE, ffdir, mHmult);
1770 for (chain = 0; (chain < nch); chain++)
1772 cc = &(chains[chain]);
1774 /* set pdba, natom and nres to the current chain */
1777 natom = cc->pdba->nr;
1778 nres = cc->pdba->nres;
1780 if (cc->chainid && ( cc->chainid != ' ' ) )
1782 printf("Processing chain %d '%c' (%d atoms, %d residues)\n",
1783 chain+1, cc->chainid, natom, nres);
1787 printf("Processing chain %d (%d atoms, %d residues)\n",
1788 chain+1, natom, nres);
1791 process_chain(pdba, x, bUnA, bUnA, bUnA, bLysMan, bAspMan, bGluMan,
1792 bHisMan, bArgMan, bGlnMan, angle, distance, &symtab,
1793 nrtprename, rtprename);
1795 cc->chainstart[cc->nterpairs] = pdba->nres;
1797 for (i = 0; i < cc->nterpairs; i++)
1799 find_nc_ter(pdba, cc->chainstart[i], cc->chainstart[i+1],
1800 &(cc->r_start[j]), &(cc->r_end[j]), rt);
1802 if (cc->r_start[j] >= 0 && cc->r_end[j] >= 0)
1808 if (cc->nterpairs == 0)
1810 printf("Problem with chain definition, or missing terminal residues.\n"
1811 "This chain does not appear to contain a recognized chain molecule.\n"
1812 "If this is incorrect, you can edit residuetypes.dat to modify the behavior.\n");
1815 /* Check for disulfides and other special bonds */
1816 nssbonds = mk_specbonds(pdba, x, bCysMan, &ssbonds, bVerbose);
1820 rename_resrtp(pdba, cc->nterpairs, cc->r_start, cc->r_end, nrtprename, rtprename,
1828 sprintf(fn, "chain.pdb");
1832 sprintf(fn, "chain_%c%d.pdb", cc->chainid, cc->chainnum);
1834 write_sto_conf(fn, title, pdba, x, NULL, ePBC, box);
1838 for (i = 0; i < cc->nterpairs; i++)
1842 * We first apply a filter so we only have the
1843 * termini that can be applied to the residue in question
1844 * (or a generic terminus if no-residue specific is available).
1846 /* First the N terminus */
1849 tdblist = filter_ter(nrtp, restp, nNtdb, ntdb,
1850 *pdba->resinfo[cc->r_start[i]].name,
1851 *pdba->resinfo[cc->r_start[i]].rtp,
1855 printf("No suitable end (N or 5') terminus found in database - assuming this residue\n"
1856 "is already in a terminus-specific form and skipping terminus selection.\n");
1861 if (bTerMan && ntdblist > 1)
1863 sprintf(select, "Select start terminus type for %s-%d",
1864 *pdba->resinfo[cc->r_start[i]].name,
1865 pdba->resinfo[cc->r_start[i]].nr);
1866 cc->ntdb[i] = choose_ter(ntdblist, tdblist, select);
1870 cc->ntdb[i] = tdblist[0];
1873 printf("Start terminus %s-%d: %s\n",
1874 *pdba->resinfo[cc->r_start[i]].name,
1875 pdba->resinfo[cc->r_start[i]].nr,
1876 (cc->ntdb[i])->name);
1885 /* And the C terminus */
1888 tdblist = filter_ter(nrtp, restp, nCtdb, ctdb,
1889 *pdba->resinfo[cc->r_end[i]].name,
1890 *pdba->resinfo[cc->r_end[i]].rtp,
1894 printf("No suitable end (C or 3') terminus found in database - assuming this residue\n"
1895 "is already in a terminus-specific form and skipping terminus selection.\n");
1900 if (bTerMan && ntdblist > 1)
1902 sprintf(select, "Select end terminus type for %s-%d",
1903 *pdba->resinfo[cc->r_end[i]].name,
1904 pdba->resinfo[cc->r_end[i]].nr);
1905 cc->ctdb[i] = choose_ter(ntdblist, tdblist, select);
1909 cc->ctdb[i] = tdblist[0];
1911 printf("End terminus %s-%d: %s\n",
1912 *pdba->resinfo[cc->r_end[i]].name,
1913 pdba->resinfo[cc->r_end[i]].nr,
1914 (cc->ctdb[i])->name);
1923 /* lookup hackblocks and rtp for all residues */
1924 get_hackblocks_rtp(&hb_chain, &restp_chain,
1925 nrtp, restp, pdba->nres, pdba->resinfo,
1926 cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end);
1927 /* ideally, now we would not need the rtp itself anymore, but do
1928 everything using the hb and restp arrays. Unfortunately, that
1929 requires some re-thinking of code in gen_vsite.c, which I won't
1930 do now :( AF 26-7-99 */
1932 rename_atoms(NULL, ffdir,
1933 pdba, &symtab, restp_chain, FALSE, rt, FALSE, bVerbose);
1935 match_atomnames_with_rtp(restp_chain, hb_chain, pdba, x, bVerbose);
1939 block = new_blocka();
1941 sort_pdbatoms(restp_chain, natom, &pdba, &x, block, &gnames);
1942 natom = remove_duplicate_atoms(pdba, x, bVerbose);
1943 if (ftp2bSet(efNDX, NFILE, fnm))
1947 fprintf(stderr, "WARNING: with the -remh option the generated "
1948 "index file (%s) might be useless\n"
1949 "(the index file is generated before hydrogens are added)",
1950 ftp2fn(efNDX, NFILE, fnm));
1952 write_index(ftp2fn(efNDX, NFILE, fnm), block, gnames, FALSE, 0);
1954 for (i = 0; i < block->nr; i++)
1963 fprintf(stderr, "WARNING: "
1964 "without sorting no check for duplicate atoms can be done\n");
1967 /* Generate Hydrogen atoms (and termini) in the sequence */
1968 printf("Generating any missing hydrogen atoms and/or adding termini.\n");
1969 natom = add_h(&pdba, &x, nah, ah,
1970 cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end, bAllowMissing,
1971 NULL, NULL, TRUE, FALSE);
1972 printf("Now there are %d residues with %d atoms\n",
1973 pdba->nres, pdba->nr);
1976 write_pdbfile(debug, title, pdba, x, ePBC, box, ' ', 0, NULL, TRUE);
1981 for (i = 0; (i < natom); i++)
1983 fprintf(debug, "Res %s%d atom %d %s\n",
1984 *(pdba->resinfo[pdba->atom[i].resind].name),
1985 pdba->resinfo[pdba->atom[i].resind].nr, i+1, *pdba->atomname[i]);
1989 strcpy(posre_fn, ftp2fn(efITP, NFILE, fnm));
1991 /* make up molecule name(s) */
1993 k = (cc->nterpairs > 0 && cc->r_start[0] >= 0) ? cc->r_start[0] : 0;
1995 gmx_residuetype_get_type(rt, *pdba->resinfo[k].name, &p_restype);
2001 sprintf(molname, "Water");
2005 this_chainid = cc->chainid;
2007 /* Add the chain id if we have one */
2008 if (this_chainid != ' ')
2010 sprintf(buf, "_chain_%c", this_chainid);
2011 strcat(suffix, buf);
2014 /* Check if there have been previous chains with the same id */
2016 for (k = 0; k < chain; k++)
2018 if (cc->chainid == chains[k].chainid)
2023 /* Add the number for this chain identifier if there are multiple copies */
2027 sprintf(buf, "%d", nid_used+1);
2028 strcat(suffix, buf);
2031 if (strlen(suffix) > 0)
2033 sprintf(molname, "%s%s", p_restype, suffix);
2037 strcpy(molname, p_restype);
2041 if ((nch-nwaterchain > 1) && !cc->bAllWat)
2044 strcpy(itp_fn, top_fn);
2045 printf("Chain time...\n");
2046 c = strrchr(itp_fn, '.');
2047 sprintf(c, "_%s.itp", molname);
2048 c = strrchr(posre_fn, '.');
2049 sprintf(c, "_%s.itp", molname);
2050 if (strcmp(itp_fn, posre_fn) == 0)
2052 strcpy(buf_fn, posre_fn);
2053 c = strrchr(buf_fn, '.');
2055 sprintf(posre_fn, "%s_pr.itp", buf_fn);
2059 srenew(incls, nincl);
2060 incls[nincl-1] = gmx_strdup(itp_fn);
2061 itp_file = gmx_fio_fopen(itp_fn, "w");
2068 srenew(mols, nmol+1);
2071 mols[nmol].name = gmx_strdup("SOL");
2072 mols[nmol].nr = pdba->nres;
2076 mols[nmol].name = gmx_strdup(molname);
2083 print_top_comment(itp_file, itp_fn, ffdir, TRUE);
2093 top_file2 = itp_file;
2097 top_file2 = top_file;
2100 pdb2top(top_file2, posre_fn, molname, pdba, &x, atype, &symtab,
2102 restp_chain, hb_chain,
2104 bVsites, bVsiteAromatics, ffdir,
2105 mHmult, nssbonds, ssbonds,
2106 long_bond_dist, short_bond_dist, bDeuterate, bChargeGroups, bCmap,
2107 bRenumRes, bRTPresname);
2111 write_posres(posre_fn, pdba, posre_fc);
2116 gmx_fio_fclose(itp_file);
2119 /* pdba and natom have been reassigned somewhere so: */
2125 if (cc->chainid == ' ')
2127 sprintf(fn, "chain.pdb");
2131 sprintf(fn, "chain_%c.pdb", cc->chainid);
2133 cool_quote(quote, 255, NULL);
2134 write_sto_conf(fn, quote, pdba, x, NULL, ePBC, box);
2138 if (watermodel == NULL)
2140 for (chain = 0; chain < nch; chain++)
2142 if (chains[chain].bAllWat)
2144 gmx_fatal(FARGS, "You have chosen not to include a water model, but there is water in the input file. Select a water model or remove the water from your input file.");
2150 sprintf(buf_fn, "%s%c%s.itp", ffdir, DIR_SEPARATOR, watermodel);
2151 if (!fflib_fexist(buf_fn))
2153 gmx_fatal(FARGS, "The topology file '%s' for the selected water model '%s' can not be found in the force field directory. Select a different water model.",
2154 buf_fn, watermodel);
2158 print_top_mols(top_file, title, ffdir, watermodel, nincl, incls, nmol, mols);
2159 gmx_fio_fclose(top_file);
2161 gmx_residuetype_destroy(rt);
2163 /* now merge all chains back together */
2166 for (i = 0; (i < nch); i++)
2168 natom += chains[i].pdba->nr;
2169 nres += chains[i].pdba->nres;
2172 init_t_atoms(atoms, natom, FALSE);
2173 for (i = 0; i < atoms->nres; i++)
2175 sfree(atoms->resinfo[i].name);
2177 sfree(atoms->resinfo);
2179 snew(atoms->resinfo, nres);
2183 for (i = 0; (i < nch); i++)
2187 printf("Including chain %d in system: %d atoms %d residues\n",
2188 i+1, chains[i].pdba->nr, chains[i].pdba->nres);
2190 for (j = 0; (j < chains[i].pdba->nr); j++)
2192 atoms->atom[k] = chains[i].pdba->atom[j];
2193 atoms->atom[k].resind += l; /* l is processed nr of residues */
2194 atoms->atomname[k] = chains[i].pdba->atomname[j];
2195 atoms->resinfo[atoms->atom[k].resind].chainid = chains[i].chainid;
2196 copy_rvec(chains[i].x[j], x[k]);
2199 for (j = 0; (j < chains[i].pdba->nres); j++)
2201 atoms->resinfo[l] = chains[i].pdba->resinfo[j];
2204 atoms->resinfo[l].name = atoms->resinfo[l].rtp;
2212 fprintf(stderr, "Now there are %d atoms and %d residues\n", k, l);
2213 print_sums(atoms, TRUE);
2216 fprintf(stderr, "\nWriting coordinate file...\n");
2217 clear_rvec(box_space);
2220 make_new_box(atoms->nr, x, box, box_space, FALSE);
2222 write_sto_conf(ftp2fn(efSTO, NFILE, fnm), title, atoms, x, NULL, ePBC, box);
2224 printf("\t\t--------- PLEASE NOTE ------------\n");
2225 printf("You have successfully generated a topology from: %s.\n",
2226 opt2fn("-f", NFILE, fnm));
2227 if (watermodel != NULL)
2229 printf("The %s force field and the %s water model are used.\n",
2230 ffname, watermodel);
2234 printf("The %s force field is used.\n",
2237 printf("\t\t--------- ETON ESAELP ------------\n");