2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
47 #include "gromacs/fileio/gmxfio.h"
51 #include "gromacs/fileio/confio.h"
54 #include "gromacs/commandline/pargs.h"
55 #include "gromacs/fileio/futil.h"
56 #include "gmx_fatal.h"
57 #include "gromacs/fileio/pdbio.h"
66 #include "gromacs/gmxlib/conformation-utilities.h"
71 #include "fflibutil.h"
74 #include "gromacs/fileio/strdb.h"
89 static const char *res2bb_notermini(const char *name,
90 int nrr, const rtprename_t *rr)
92 /* NOTE: This function returns the main building block name,
93 * it does not take terminal renaming into account.
98 while (i < nrr && gmx_strcasecmp(name, rr[i].gmx) != 0)
103 return (i < nrr ? rr[i].main : name);
106 static const char *select_res(int nr, int resnr,
107 const char *name[], const char *expl[],
109 int nrr, const rtprename_t *rr)
113 printf("Which %s type do you want for residue %d\n", title, resnr+1);
114 for (sel = 0; (sel < nr); sel++)
116 printf("%d. %s (%s)\n",
117 sel, expl[sel], res2bb_notermini(name[sel], nrr, rr));
119 printf("\nType a number:"); fflush(stdout);
121 if (scanf("%d", &sel) != 1)
123 gmx_fatal(FARGS, "Answer me for res %s %d!", title, resnr+1);
129 static const char *get_asptp(int resnr, int nrr, const rtprename_t *rr)
134 const char *lh[easpNR] = { "ASP", "ASPH" };
135 const char *expl[easpNR] = {
136 "Not protonated (charge -1)",
137 "Protonated (charge 0)"
140 return select_res(easpNR, resnr, lh, expl, "ASPARTIC ACID", nrr, rr);
143 static const char *get_glutp(int resnr, int nrr, const rtprename_t *rr)
148 const char *lh[egluNR] = { "GLU", "GLUH" };
149 const char *expl[egluNR] = {
150 "Not protonated (charge -1)",
151 "Protonated (charge 0)"
154 return select_res(egluNR, resnr, lh, expl, "GLUTAMIC ACID", nrr, rr);
157 static const char *get_glntp(int resnr, int nrr, const rtprename_t *rr)
162 const char *lh[eglnNR] = { "GLN", "QLN" };
163 const char *expl[eglnNR] = {
164 "Not protonated (charge 0)",
165 "Protonated (charge +1)"
168 return select_res(eglnNR, resnr, lh, expl, "GLUTAMINE", nrr, rr);
171 static const char *get_lystp(int resnr, int nrr, const rtprename_t *rr)
176 const char *lh[elysNR] = { "LYSN", "LYS" };
177 const char *expl[elysNR] = {
178 "Not protonated (charge 0)",
179 "Protonated (charge +1)"
182 return select_res(elysNR, resnr, lh, expl, "LYSINE", nrr, rr);
185 static const char *get_argtp(int resnr, int nrr, const rtprename_t *rr)
190 const char *lh[eargNR] = { "ARGN", "ARG" };
191 const char *expl[eargNR] = {
192 "Not protonated (charge 0)",
193 "Protonated (charge +1)"
196 return select_res(eargNR, resnr, lh, expl, "ARGININE", nrr, rr);
199 static const char *get_histp(int resnr, int nrr, const rtprename_t *rr)
201 const char *expl[ehisNR] = {
208 return select_res(ehisNR, resnr, hh, expl, "HISTIDINE", nrr, rr);
211 static void read_rtprename(const char *fname, FILE *fp,
212 int *nrtprename, rtprename_t **rtprename)
214 char line[STRLEN], buf[STRLEN];
223 while (get_a_line(fp, line, STRLEN))
226 nc = sscanf(line, "%s %s %s %s %s %s",
227 rr[n].gmx, rr[n].main, rr[n].nter, rr[n].cter, rr[n].bter, buf);
230 if (nc != 2 && nc != 5)
232 gmx_fatal(FARGS, "Residue renaming database '%s' has %d columns instead of %d, %d or %d", fname, ncol, 2, 5);
238 gmx_fatal(FARGS, "A line in residue renaming database '%s' has %d columns, while previous lines have %d columns", fname, nc, ncol);
243 /* This file does not have special termini names, copy them from main */
244 strcpy(rr[n].nter, rr[n].main);
245 strcpy(rr[n].cter, rr[n].main);
246 strcpy(rr[n].bter, rr[n].main);
256 static char *search_resrename(int nrr, rtprename_t *rr,
258 gmx_bool bStart, gmx_bool bEnd,
259 gmx_bool bCompareFFRTPname)
267 while (i < nrr && ((!bCompareFFRTPname && strcmp(name, rr[i].gmx) != 0) ||
268 ( bCompareFFRTPname && strcmp(name, rr[i].main) != 0)))
273 /* If found in the database, rename this residue's rtp building block,
274 * otherwise keep the old name.
297 gmx_fatal(FARGS, "In the chosen force field there is no residue type for '%s'%s", name, bStart ? ( bEnd ? " as a standalone (starting & ending) residue" : " as a starting terminus") : (bEnd ? " as an ending terminus" : ""));
305 static void rename_resrtp(t_atoms *pdba, int nterpairs, int *r_start, int *r_end,
306 int nrr, rtprename_t *rr, t_symtab *symtab,
310 gmx_bool bStart, bEnd;
312 gmx_bool bFFRTPTERRNM;
314 bFFRTPTERRNM = (getenv("GMX_NO_FFRTP_TER_RENAME") == NULL);
316 for (r = 0; r < pdba->nres; r++)
320 for (j = 0; j < nterpairs; j++)
327 for (j = 0; j < nterpairs; j++)
335 nn = search_resrename(nrr, rr, *pdba->resinfo[r].rtp, bStart, bEnd, FALSE);
337 if (bFFRTPTERRNM && nn == NULL && (bStart || bEnd))
339 /* This is a terminal residue, but the residue name,
340 * currently stored in .rtp, is not a standard residue name,
341 * but probably a force field specific rtp name.
342 * Check if we need to rename it because it is terminal.
344 nn = search_resrename(nrr, rr,
345 *pdba->resinfo[r].rtp, bStart, bEnd, TRUE);
348 if (nn != NULL && strcmp(*pdba->resinfo[r].rtp, nn) != 0)
352 printf("Changing rtp entry of residue %d %s to '%s'\n",
353 pdba->resinfo[r].nr, *pdba->resinfo[r].name, nn);
355 pdba->resinfo[r].rtp = put_symtab(symtab, nn);
360 static void pdbres_to_gmxrtp(t_atoms *pdba)
364 for (i = 0; (i < pdba->nres); i++)
366 if (pdba->resinfo[i].rtp == NULL)
368 pdba->resinfo[i].rtp = pdba->resinfo[i].name;
373 static void rename_pdbres(t_atoms *pdba, const char *oldnm, const char *newnm,
374 gmx_bool bFullCompare, t_symtab *symtab)
379 for (i = 0; (i < pdba->nres); i++)
381 resnm = *pdba->resinfo[i].name;
382 if ((bFullCompare && (gmx_strcasecmp(resnm, oldnm) == 0)) ||
383 (!bFullCompare && strstr(resnm, oldnm) != NULL))
385 /* Rename the residue name (not the rtp name) */
386 pdba->resinfo[i].name = put_symtab(symtab, newnm);
391 static void rename_bb(t_atoms *pdba, const char *oldnm, const char *newnm,
392 gmx_bool bFullCompare, t_symtab *symtab)
397 for (i = 0; (i < pdba->nres); i++)
399 /* We have not set the rtp name yes, use the residue name */
400 bbnm = *pdba->resinfo[i].name;
401 if ((bFullCompare && (gmx_strcasecmp(bbnm, oldnm) == 0)) ||
402 (!bFullCompare && strstr(bbnm, oldnm) != NULL))
404 /* Change the rtp builing block name */
405 pdba->resinfo[i].rtp = put_symtab(symtab, newnm);
410 static void rename_bbint(t_atoms *pdba, const char *oldnm,
411 const char *gettp(int, int, const rtprename_t *),
412 gmx_bool bFullCompare,
414 int nrr, const rtprename_t *rr)
420 for (i = 0; i < pdba->nres; i++)
422 /* We have not set the rtp name yes, use the residue name */
423 bbnm = *pdba->resinfo[i].name;
424 if ((bFullCompare && (strcmp(bbnm, oldnm) == 0)) ||
425 (!bFullCompare && strstr(bbnm, oldnm) != NULL))
427 ptr = gettp(i, nrr, rr);
428 pdba->resinfo[i].rtp = put_symtab(symtab, ptr);
433 static void check_occupancy(t_atoms *atoms, const char *filename, gmx_bool bVerbose)
439 ftp = fn2ftp(filename);
440 if (!atoms->pdbinfo || ((ftp != efPDB) && (ftp != efBRK) && (ftp != efENT)))
442 fprintf(stderr, "No occupancies in %s\n", filename);
446 for (i = 0; (i < atoms->nr); i++)
448 if (atoms->pdbinfo[i].occup != 1)
452 fprintf(stderr, "Occupancy for atom %s%d-%s is %f rather than 1\n",
453 *atoms->resinfo[atoms->atom[i].resind].name,
454 atoms->resinfo[ atoms->atom[i].resind].nr,
456 atoms->pdbinfo[i].occup);
458 if (atoms->pdbinfo[i].occup == 0)
468 if (nzero == atoms->nr)
470 fprintf(stderr, "All occupancy fields zero. This is probably not an X-Ray structure\n");
472 else if ((nzero > 0) || (nnotone > 0))
476 "WARNING: there were %d atoms with zero occupancy and %d atoms with\n"
477 " occupancy unequal to one (out of %d atoms). Check your pdb file.\n"
479 nzero, nnotone, atoms->nr);
483 fprintf(stderr, "All occupancies are one\n");
488 void write_posres(char *fn, t_atoms *pdba, real fc)
493 fp = gmx_fio_fopen(fn, "w");
495 "; In this topology include file, you will find position restraint\n"
496 "; entries for all the heavy atoms in your original pdb file.\n"
497 "; This means that all the protons which were added by pdb2gmx are\n"
498 "; not restrained.\n"
500 "[ position_restraints ]\n"
501 "; %4s%6s%8s%8s%8s\n", "atom", "type", "fx", "fy", "fz"
503 for (i = 0; (i < pdba->nr); i++)
505 if (!is_hydrogen(*pdba->atomname[i]) && !is_dummymass(*pdba->atomname[i]))
507 fprintf(fp, "%6d%6d %g %g %g\n", i+1, 1, fc, fc, fc);
513 static int read_pdball(const char *inf, const char *outf, char *title,
514 t_atoms *atoms, rvec **x,
515 int *ePBC, matrix box, gmx_bool bRemoveH,
516 t_symtab *symtab, gmx_residuetype_t rt, const char *watres,
517 gmx_atomprop_t aps, gmx_bool bVerbose)
518 /* Read a pdb file. (containing proteins) */
520 int natom, new_natom, i;
523 printf("Reading %s...\n", inf);
524 get_stx_coordnum(inf, &natom);
525 init_t_atoms(atoms, natom, TRUE);
527 read_stx_conf(inf, title, atoms, *x, NULL, ePBC, box);
528 if (fn2ftp(inf) == efPDB)
530 get_pdb_atomnumber(atoms, aps);
535 for (i = 0; i < atoms->nr; i++)
537 if (!is_hydrogen(*atoms->atomname[i]))
539 atoms->atom[new_natom] = atoms->atom[i];
540 atoms->atomname[new_natom] = atoms->atomname[i];
541 atoms->pdbinfo[new_natom] = atoms->pdbinfo[i];
542 copy_rvec((*x)[i], (*x)[new_natom]);
546 atoms->nr = new_natom;
551 if (title && title[0])
553 printf(" '%s',", title);
555 printf(" %d atoms\n", natom);
557 /* Rename residues */
558 rename_pdbres(atoms, "HOH", watres, FALSE, symtab);
559 rename_pdbres(atoms, "SOL", watres, FALSE, symtab);
560 rename_pdbres(atoms, "WAT", watres, FALSE, symtab);
562 rename_atoms("xlateat.dat", NULL,
563 atoms, symtab, NULL, TRUE, rt, TRUE, bVerbose);
572 write_sto_conf(outf, title, atoms, *x, NULL, *ePBC, box);
578 void process_chain(t_atoms *pdba, rvec *x,
579 gmx_bool bTrpU, gmx_bool bPheU, gmx_bool bTyrU,
580 gmx_bool bLysMan, gmx_bool bAspMan, gmx_bool bGluMan,
581 gmx_bool bHisMan, gmx_bool bArgMan, gmx_bool bGlnMan,
582 real angle, real distance, t_symtab *symtab,
583 int nrr, const rtprename_t *rr)
585 /* Rename aromatics, lys, asp and histidine */
588 rename_bb(pdba, "TYR", "TYRU", FALSE, symtab);
592 rename_bb(pdba, "TRP", "TRPU", FALSE, symtab);
596 rename_bb(pdba, "PHE", "PHEU", FALSE, symtab);
600 rename_bbint(pdba, "LYS", get_lystp, FALSE, symtab, nrr, rr);
604 rename_bbint(pdba, "ARG", get_argtp, FALSE, symtab, nrr, rr);
608 rename_bbint(pdba, "GLN", get_glntp, FALSE, symtab, nrr, rr);
612 rename_bbint(pdba, "ASP", get_asptp, FALSE, symtab, nrr, rr);
616 rename_bb(pdba, "ASPH", "ASP", FALSE, symtab);
620 rename_bbint(pdba, "GLU", get_glutp, FALSE, symtab, nrr, rr);
624 rename_bb(pdba, "GLUH", "GLU", FALSE, symtab);
629 set_histp(pdba, x, angle, distance);
633 rename_bbint(pdba, "HIS", get_histp, TRUE, symtab, nrr, rr);
636 /* Initialize the rtp builing block names with the residue names
637 * for the residues that have not been processed above.
639 pdbres_to_gmxrtp(pdba);
641 /* Now we have all rtp names set.
642 * The rtp names will conform to Gromacs naming,
643 * unless the input pdb file contained one or more force field specific
644 * rtp names as residue names.
648 /* struct for sorting the atoms from the pdb file */
650 int resnr; /* residue number */
651 int j; /* database order index */
652 int index; /* original atom number */
653 char anm1; /* second letter of atom name */
654 char altloc; /* alternate location indicator */
657 int pdbicomp(const void *a, const void *b)
662 pa = (t_pdbindex *)a;
663 pb = (t_pdbindex *)b;
665 d = (pa->resnr - pb->resnr);
671 d = (pa->anm1 - pb->anm1);
674 d = (pa->altloc - pb->altloc);
682 static void sort_pdbatoms(t_restp restp[],
683 int natoms, t_atoms **pdbaptr, rvec **x,
684 t_blocka *block, char ***gnames)
686 t_atoms *pdba, *pdbnew;
701 for (i = 0; i < natoms; i++)
703 atomnm = *pdba->atomname[i];
704 rptr = &restp[pdba->atom[i].resind];
705 for (j = 0; (j < rptr->natom); j++)
707 if (gmx_strcasecmp(atomnm, *(rptr->atomname[j])) == 0)
712 if (j == rptr->natom)
717 "Atom %s in residue %s %d was not found in rtp entry %s with %d atoms\n"
718 "while sorting atoms.\n%s", atomnm,
719 *pdba->resinfo[pdba->atom[i].resind].name,
720 pdba->resinfo[pdba->atom[i].resind].nr,
723 is_hydrogen(atomnm) ?
724 "\nFor a hydrogen, this can be a different protonation state, or it\n"
725 "might have had a different number in the PDB file and was rebuilt\n"
726 "(it might for instance have been H3, and we only expected H1 & H2).\n"
727 "Note that hydrogens might have been added to the entry for the N-terminus.\n"
728 "Remove this hydrogen or choose a different protonation state to solve it.\n"
729 "Option -ignh will ignore all hydrogens in the input." : ".");
730 gmx_fatal(FARGS, buf);
732 /* make shadow array to be sorted into indexgroup */
733 pdbi[i].resnr = pdba->atom[i].resind;
736 pdbi[i].anm1 = atomnm[1];
737 pdbi[i].altloc = pdba->pdbinfo[i].altloc;
739 qsort(pdbi, natoms, (size_t)sizeof(pdbi[0]), pdbicomp);
741 /* pdba is sorted in pdbnew using the pdbi index */
744 init_t_atoms(pdbnew, natoms, TRUE);
746 pdbnew->nr = pdba->nr;
747 pdbnew->nres = pdba->nres;
748 sfree(pdbnew->resinfo);
749 pdbnew->resinfo = pdba->resinfo;
750 for (i = 0; i < natoms; i++)
752 pdbnew->atom[i] = pdba->atom[pdbi[i].index];
753 pdbnew->atomname[i] = pdba->atomname[pdbi[i].index];
754 pdbnew->pdbinfo[i] = pdba->pdbinfo[pdbi[i].index];
755 copy_rvec((*x)[pdbi[i].index], (*xnew)[i]);
756 /* make indexgroup in block */
757 a[i] = pdbi[i].index;
760 sfree(pdba->atomname);
762 sfree(pdba->pdbinfo);
765 /* copy the sorted pdbnew back to pdba */
768 add_grp(block, gnames, natoms, a, "prot_sort");
774 static int remove_duplicate_atoms(t_atoms *pdba, rvec x[], gmx_bool bVerbose)
776 int i, j, oldnatoms, ndel;
779 printf("Checking for duplicate atoms....\n");
780 oldnatoms = pdba->nr;
782 /* NOTE: pdba->nr is modified inside the loop */
783 for (i = 1; (i < pdba->nr); i++)
785 /* compare 'i' and 'i-1', throw away 'i' if they are identical
786 this is a 'while' because multiple alternate locations can be present */
787 while ( (i < pdba->nr) &&
788 (pdba->atom[i-1].resind == pdba->atom[i].resind) &&
789 (strcmp(*pdba->atomname[i-1], *pdba->atomname[i]) == 0) )
794 ri = &pdba->resinfo[pdba->atom[i].resind];
795 printf("deleting duplicate atom %4s %s%4d%c",
796 *pdba->atomname[i], *ri->name, ri->nr, ri->ic);
797 if (ri->chainid && (ri->chainid != ' '))
799 printf(" ch %c", ri->chainid);
803 if (pdba->pdbinfo[i].atomnr)
805 printf(" pdb nr %4d", pdba->pdbinfo[i].atomnr);
807 if (pdba->pdbinfo[i].altloc && (pdba->pdbinfo[i].altloc != ' '))
809 printf(" altloc %c", pdba->pdbinfo[i].altloc);
815 /* We can not free, since it might be in the symtab */
816 /* sfree(pdba->atomname[i]); */
817 for (j = i; j < pdba->nr; j++)
819 pdba->atom[j] = pdba->atom[j+1];
820 pdba->atomname[j] = pdba->atomname[j+1];
821 pdba->pdbinfo[j] = pdba->pdbinfo[j+1];
822 copy_rvec(x[j+1], x[j]);
824 srenew(pdba->atom, pdba->nr);
825 /* srenew(pdba->atomname, pdba->nr); */
826 srenew(pdba->pdbinfo, pdba->nr);
829 if (pdba->nr != oldnatoms)
831 printf("Now there are %d atoms. Deleted %d duplicates.\n", pdba->nr, ndel);
837 void find_nc_ter(t_atoms *pdba, int r0, int r1, int *r_start, int *r_end, gmx_residuetype_t rt)
840 const char *p_startrestype;
841 const char *p_restype;
842 int nstartwarn, nendwarn;
850 /* Find the starting terminus (typially N or 5') */
851 for (i = r0; i < r1 && *r_start == -1; i++)
853 gmx_residuetype_get_type(rt, *pdba->resinfo[i].name, &p_startrestype);
854 if (!gmx_strcasecmp(p_startrestype, "Protein") || !gmx_strcasecmp(p_startrestype, "DNA") || !gmx_strcasecmp(p_startrestype, "RNA") )
856 printf("Identified residue %s%d as a starting terminus.\n", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
863 printf("Warning: Starting residue %s%d in chain not identified as Protein/RNA/DNA.\n", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
867 printf("More than 5 unidentified residues at start of chain - disabling further warnings.\n");
875 /* Go through the rest of the residues, check that they are the same class, and identify the ending terminus. */
876 for (i = *r_start; i < r1; i++)
878 gmx_residuetype_get_type(rt, *pdba->resinfo[i].name, &p_restype);
879 if (!gmx_strcasecmp(p_restype, p_startrestype) && nendwarn == 0)
887 printf("Warning: Residue %s%d in chain has different type (%s) from starting residue %s%d (%s).\n",
888 *pdba->resinfo[i].name, pdba->resinfo[i].nr, p_restype,
889 *pdba->resinfo[*r_start].name, pdba->resinfo[*r_start].nr, p_startrestype);
893 printf("More than 5 unidentified residues at end of chain - disabling further warnings.\n");
902 printf("Identified residue %s%d as a ending terminus.\n", *pdba->resinfo[*r_end].name, pdba->resinfo[*r_end].nr);
908 modify_chain_numbers(t_atoms * pdba,
909 const char * chainsep)
912 char old_prev_chainid;
913 char old_this_chainid;
914 int old_prev_chainnum;
915 int old_this_chainnum;
921 const char * prev_atomname;
922 const char * this_atomname;
923 const char * prev_resname;
924 const char * this_resname;
929 int prev_chainnumber;
930 int this_chainnumber;
942 splitting = SPLIT_TER_ONLY; /* keep compiler happy */
944 /* Be a bit flexible to catch typos */
945 if (!strncmp(chainsep, "id_o", 4))
947 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
948 splitting = SPLIT_ID_OR_TER;
949 printf("Splitting chemical chains based on TER records or chain id changing.\n");
951 else if (!strncmp(chainsep, "int", 3))
953 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
954 splitting = SPLIT_INTERACTIVE;
955 printf("Splitting chemical chains interactively.\n");
957 else if (!strncmp(chainsep, "id_a", 4))
959 splitting = SPLIT_ID_AND_TER;
960 printf("Splitting chemical chains based on TER records and chain id changing.\n");
962 else if (strlen(chainsep) == 2 && !strncmp(chainsep, "id", 4))
964 splitting = SPLIT_ID_ONLY;
965 printf("Splitting chemical chains based on changing chain id only (ignoring TER records).\n");
967 else if (chainsep[0] == 't')
969 splitting = SPLIT_TER_ONLY;
970 printf("Splitting chemical chains based on TER records only (ignoring chain id).\n");
974 gmx_fatal(FARGS, "Unidentified setting for chain separation: %s\n", chainsep);
977 /* The default chain enumeration is based on TER records only, which is reflected in chainnum below */
979 old_prev_chainid = '?';
980 old_prev_chainnum = -1;
983 this_atomname = NULL;
988 this_chainnumber = -1;
990 for (i = 0; i < pdba->nres; i++)
992 ri = &pdba->resinfo[i];
993 old_this_chainid = ri->chainid;
994 old_this_chainnum = ri->chainnum;
996 prev_atomname = this_atomname;
997 prev_atomnum = this_atomnum;
998 prev_resname = this_resname;
999 prev_resnum = this_resnum;
1000 prev_chainid = this_chainid;
1001 prev_chainnumber = this_chainnumber;
1003 this_atomname = *(pdba->atomname[i]);
1004 this_atomnum = (pdba->pdbinfo != NULL) ? pdba->pdbinfo[i].atomnr : i+1;
1005 this_resname = *ri->name;
1006 this_resnum = ri->nr;
1007 this_chainid = ri->chainid;
1008 this_chainnumber = ri->chainnum;
1012 case SPLIT_ID_OR_TER:
1013 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1019 case SPLIT_ID_AND_TER:
1020 if (old_this_chainid != old_prev_chainid && old_this_chainnum != old_prev_chainnum)
1027 if (old_this_chainid != old_prev_chainid)
1033 case SPLIT_TER_ONLY:
1034 if (old_this_chainnum != old_prev_chainnum)
1039 case SPLIT_INTERACTIVE:
1040 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1044 printf("Split the chain (and introduce termini) between residue %s%d (chain id '%c', atom %d %s)\n"
1045 "and residue %s%d (chain id '%c', atom %d %s) ? [n/y]\n",
1046 prev_resname, prev_resnum, prev_chainid, prev_atomnum, prev_atomname,
1047 this_resname, this_resnum, this_chainid, this_atomnum, this_atomname);
1049 if (NULL == fgets(select, STRLEN-1, stdin))
1051 gmx_fatal(FARGS, "Error reading from stdin");
1054 if (i == 0 || select[0] == 'y')
1061 gmx_fatal(FARGS, "Internal inconsistency - this shouldn't happen...");
1064 old_prev_chainid = old_this_chainid;
1065 old_prev_chainnum = old_this_chainnum;
1067 ri->chainnum = new_chainnum;
1096 int gmx_pdb2gmx(int argc, char *argv[])
1098 const char *desc[] = {
1099 "[THISMODULE] reads a [TT].pdb[tt] (or [TT].gro[tt]) file, reads",
1100 "some database files, adds hydrogens to the molecules and generates",
1101 "coordinates in GROMACS (GROMOS), or optionally [TT].pdb[tt], format",
1102 "and a topology in GROMACS format.",
1103 "These files can subsequently be processed to generate a run input file.",
1105 "[THISMODULE] will search for force fields by looking for",
1106 "a [TT]forcefield.itp[tt] file in subdirectories [TT]<forcefield>.ff[tt]",
1107 "of the current working directory and of the GROMACS library directory",
1108 "as inferred from the path of the binary or the [TT]GMXLIB[tt] environment",
1110 "By default the forcefield selection is interactive,",
1111 "but you can use the [TT]-ff[tt] option to specify one of the short names",
1112 "in the list on the command line instead. In that case [THISMODULE] just looks",
1113 "for the corresponding [TT]<forcefield>.ff[tt] directory.",
1115 "After choosing a force field, all files will be read only from",
1116 "the corresponding force field directory.",
1117 "If you want to modify or add a residue types, you can copy the force",
1118 "field directory from the GROMACS library directory to your current",
1119 "working directory. If you want to add new protein residue types,",
1120 "you will need to modify [TT]residuetypes.dat[tt] in the library directory",
1121 "or copy the whole library directory to a local directory and set",
1122 "the environment variable [TT]GMXLIB[tt] to the name of that directory.",
1123 "Check Chapter 5 of the manual for more information about file formats.",
1126 "Note that a [TT].pdb[tt] file is nothing more than a file format, and it",
1127 "need not necessarily contain a protein structure. Every kind of",
1128 "molecule for which there is support in the database can be converted.",
1129 "If there is no support in the database, you can add it yourself.[PAR]",
1131 "The program has limited intelligence, it reads a number of database",
1132 "files, that allow it to make special bonds (Cys-Cys, Heme-His, etc.),",
1133 "if necessary this can be done manually. The program can prompt the",
1134 "user to select which kind of LYS, ASP, GLU, CYS or HIS residue is",
1135 "desired. For Lys the choice is between neutral (two protons on NZ) or",
1136 "protonated (three protons, default), for Asp and Glu unprotonated",
1137 "(default) or protonated, for His the proton can be either on ND1,",
1138 "on NE2 or on both. By default these selections are done automatically.",
1139 "For His, this is based on an optimal hydrogen bonding",
1140 "conformation. Hydrogen bonds are defined based on a simple geometric",
1141 "criterion, specified by the maximum hydrogen-donor-acceptor angle",
1142 "and donor-acceptor distance, which are set by [TT]-angle[tt] and",
1143 "[TT]-dist[tt] respectively.[PAR]",
1145 "The protonation state of N- and C-termini can be chosen interactively",
1146 "with the [TT]-ter[tt] flag. Default termini are ionized (NH3+ and COO-),",
1147 "respectively. Some force fields support zwitterionic forms for chains of",
1148 "one residue, but for polypeptides these options should NOT be selected.",
1149 "The AMBER force fields have unique forms for the terminal residues,",
1150 "and these are incompatible with the [TT]-ter[tt] mechanism. You need",
1151 "to prefix your N- or C-terminal residue names with \"N\" or \"C\"",
1152 "respectively to use these forms, making sure you preserve the format",
1153 "of the coordinate file. Alternatively, use named terminating residues",
1154 "(e.g. ACE, NME).[PAR]",
1156 "The separation of chains is not entirely trivial since the markup",
1157 "in user-generated PDB files frequently varies and sometimes it",
1158 "is desirable to merge entries across a TER record, for instance",
1159 "if you want a disulfide bridge or distance restraints between",
1160 "two protein chains or if you have a HEME group bound to a protein.",
1161 "In such cases multiple chains should be contained in a single",
1162 "[TT]moleculetype[tt] definition.",
1163 "To handle this, [THISMODULE] uses two separate options.",
1164 "First, [TT]-chainsep[tt] allows you to choose when a new chemical chain should",
1165 "start, and termini added when applicable. This can be done based on the",
1166 "existence of TER records, when the chain id changes, or combinations of either",
1167 "or both of these. You can also do the selection fully interactively.",
1168 "In addition, there is a [TT]-merge[tt] option that controls how multiple chains",
1169 "are merged into one moleculetype, after adding all the chemical termini (or not).",
1170 "This can be turned off (no merging), all non-water chains can be merged into a",
1171 "single molecule, or the selection can be done interactively.[PAR]",
1173 "[THISMODULE] will also check the occupancy field of the [TT].pdb[tt] file.",
1174 "If any of the occupancies are not one, indicating that the atom is",
1175 "not resolved well in the structure, a warning message is issued.",
1176 "When a [TT].pdb[tt] file does not originate from an X-ray structure determination",
1177 "all occupancy fields may be zero. Either way, it is up to the user",
1178 "to verify the correctness of the input data (read the article!).[PAR]",
1180 "During processing the atoms will be reordered according to GROMACS",
1181 "conventions. With [TT]-n[tt] an index file can be generated that",
1182 "contains one group reordered in the same way. This allows you to",
1183 "convert a GROMOS trajectory and coordinate file to GROMOS. There is",
1184 "one limitation: reordering is done after the hydrogens are stripped",
1185 "from the input and before new hydrogens are added. This means that",
1186 "you should not use [TT]-ignh[tt].[PAR]",
1188 "The [TT].gro[tt] and [TT].g96[tt] file formats do not support chain",
1189 "identifiers. Therefore it is useful to enter a [TT].pdb[tt] file name at",
1190 "the [TT]-o[tt] option when you want to convert a multi-chain [TT].pdb[tt] file.",
1193 "The option [TT]-vsite[tt] removes hydrogen and fast improper dihedral",
1194 "motions. Angular and out-of-plane motions can be removed by changing",
1195 "hydrogens into virtual sites and fixing angles, which fixes their",
1196 "position relative to neighboring atoms. Additionally, all atoms in the",
1197 "aromatic rings of the standard amino acids (i.e. PHE, TRP, TYR and HIS)",
1198 "can be converted into virtual sites, eliminating the fast improper dihedral",
1199 "fluctuations in these rings. [BB]Note[bb] that in this case all other hydrogen",
1200 "atoms are also converted to virtual sites. The mass of all atoms that are",
1201 "converted into virtual sites, is added to the heavy atoms.[PAR]",
1202 "Also slowing down of dihedral motion can be done with [TT]-heavyh[tt]",
1203 "done by increasing the hydrogen-mass by a factor of 4. This is also",
1204 "done for water hydrogens to slow down the rotational motion of water.",
1205 "The increase in mass of the hydrogens is subtracted from the bonded",
1206 "(heavy) atom so that the total mass of the system remains the same."
1210 FILE *fp, *top_file, *top_file2, *itp_file = NULL;
1212 t_atoms pdba_all, *pdba;
1216 int chain, nch, maxch, nwaterchain;
1218 t_chain *chains, *cc;
1219 char select[STRLEN];
1227 int i, j, k, l, nrtp;
1228 int *swap_index, si;
1232 gpp_atomtype_t atype;
1233 gmx_residuetype_t rt;
1235 char fn[256], itp_fn[STRLEN], posre_fn[STRLEN], buf_fn[STRLEN];
1236 char molname[STRLEN], title[STRLEN], quote[STRLEN];
1237 char *c, forcefield[STRLEN], ffdir[STRLEN];
1238 char ffname[STRLEN], suffix[STRLEN], buf[STRLEN];
1246 int nrtprename, naa;
1247 rtprename_t *rtprename = NULL;
1248 int nah, nNtdb, nCtdb, ntdblist;
1249 t_hackblock *ntdb, *ctdb, **tdblist;
1253 gmx_bool bVsites = FALSE, bWat, bPrevWat = FALSE, bITP, bVsiteAromatics = FALSE, bCheckMerge;
1255 t_hackblock *hb_chain;
1256 t_restp *restp_chain;
1258 const char *p_restype;
1262 const char * prev_atomname;
1263 const char * this_atomname;
1264 const char * prev_resname;
1265 const char * this_resname;
1270 int prev_chainnumber;
1271 int this_chainnumber;
1273 int this_chainstart;
1274 int prev_chainstart;
1281 { efSTX, "-f", "eiwit.pdb", ffREAD },
1282 { efSTO, "-o", "conf", ffWRITE },
1283 { efTOP, NULL, NULL, ffWRITE },
1284 { efITP, "-i", "posre", ffWRITE },
1285 { efNDX, "-n", "clean", ffOPTWR },
1286 { efSTO, "-q", "clean.pdb", ffOPTWR }
1288 #define NFILE asize(fnm)
1291 /* Command line arguments must be static */
1292 static gmx_bool bNewRTP = FALSE;
1293 static gmx_bool bInter = FALSE, bCysMan = FALSE;
1294 static gmx_bool bLysMan = FALSE, bAspMan = FALSE, bGluMan = FALSE, bHisMan = FALSE;
1295 static gmx_bool bGlnMan = FALSE, bArgMan = FALSE;
1296 static gmx_bool bTerMan = FALSE, bUnA = FALSE, bHeavyH;
1297 static gmx_bool bSort = TRUE, bAllowMissing = FALSE, bRemoveH = FALSE;
1298 static gmx_bool bDeuterate = FALSE, bVerbose = FALSE, bChargeGroups = TRUE, bCmap = TRUE;
1299 static gmx_bool bRenumRes = FALSE, bRTPresname = FALSE;
1300 static real angle = 135.0, distance = 0.3, posre_fc = 1000;
1301 static real long_bond_dist = 0.25, short_bond_dist = 0.05;
1302 static const char *vsitestr[] = { NULL, "none", "hydrogens", "aromatics", NULL };
1303 static const char *watstr[] = { NULL, "select", "none", "spc", "spce", "tip3p", "tip4p", "tip5p", NULL };
1304 static const char *chainsep[] = { NULL, "id_or_ter", "id_and_ter", "ter", "id", "interactive", NULL };
1305 static const char *merge[] = {NULL, "no", "all", "interactive", NULL };
1306 static const char *ff = "select";
1309 { "-newrtp", FALSE, etBOOL, {&bNewRTP},
1310 "HIDDENWrite the residue database in new format to [TT]new.rtp[tt]"},
1311 { "-lb", FALSE, etREAL, {&long_bond_dist},
1312 "HIDDENLong bond warning distance" },
1313 { "-sb", FALSE, etREAL, {&short_bond_dist},
1314 "HIDDENShort bond warning distance" },
1315 { "-chainsep", FALSE, etENUM, {chainsep},
1316 "Condition in PDB files when a new chain should be started (adding termini)" },
1317 { "-merge", FALSE, etENUM, {&merge},
1318 "Merge multiple chains into a single [moleculetype]" },
1319 { "-ff", FALSE, etSTR, {&ff},
1320 "Force field, interactive by default. Use [TT]-h[tt] for information." },
1321 { "-water", FALSE, etENUM, {watstr},
1322 "Water model to use" },
1323 { "-inter", FALSE, etBOOL, {&bInter},
1324 "Set the next 8 options to interactive"},
1325 { "-ss", FALSE, etBOOL, {&bCysMan},
1326 "Interactive SS bridge selection" },
1327 { "-ter", FALSE, etBOOL, {&bTerMan},
1328 "Interactive termini selection, instead of charged (default)" },
1329 { "-lys", FALSE, etBOOL, {&bLysMan},
1330 "Interactive lysine selection, instead of charged" },
1331 { "-arg", FALSE, etBOOL, {&bArgMan},
1332 "Interactive arginine selection, instead of charged" },
1333 { "-asp", FALSE, etBOOL, {&bAspMan},
1334 "Interactive aspartic acid selection, instead of charged" },
1335 { "-glu", FALSE, etBOOL, {&bGluMan},
1336 "Interactive glutamic acid selection, instead of charged" },
1337 { "-gln", FALSE, etBOOL, {&bGlnMan},
1338 "Interactive glutamine selection, instead of neutral" },
1339 { "-his", FALSE, etBOOL, {&bHisMan},
1340 "Interactive histidine selection, instead of checking H-bonds" },
1341 { "-angle", FALSE, etREAL, {&angle},
1342 "Minimum hydrogen-donor-acceptor angle for a H-bond (degrees)" },
1343 { "-dist", FALSE, etREAL, {&distance},
1344 "Maximum donor-acceptor distance for a H-bond (nm)" },
1345 { "-una", FALSE, etBOOL, {&bUnA},
1346 "Select aromatic rings with united CH atoms on phenylalanine, "
1347 "tryptophane and tyrosine" },
1348 { "-sort", FALSE, etBOOL, {&bSort},
1349 "HIDDENSort the residues according to database, turning this off is dangerous as charge groups might be broken in parts" },
1350 { "-ignh", FALSE, etBOOL, {&bRemoveH},
1351 "Ignore hydrogen atoms that are in the coordinate file" },
1352 { "-missing", FALSE, etBOOL, {&bAllowMissing},
1353 "Continue when atoms are missing, dangerous" },
1354 { "-v", FALSE, etBOOL, {&bVerbose},
1355 "Be slightly more verbose in messages" },
1356 { "-posrefc", FALSE, etREAL, {&posre_fc},
1357 "Force constant for position restraints" },
1358 { "-vsite", FALSE, etENUM, {vsitestr},
1359 "Convert atoms to virtual sites" },
1360 { "-heavyh", FALSE, etBOOL, {&bHeavyH},
1361 "Make hydrogen atoms heavy" },
1362 { "-deuterate", FALSE, etBOOL, {&bDeuterate},
1363 "Change the mass of hydrogens to 2 amu" },
1364 { "-chargegrp", TRUE, etBOOL, {&bChargeGroups},
1365 "Use charge groups in the [TT].rtp[tt] file" },
1366 { "-cmap", TRUE, etBOOL, {&bCmap},
1367 "Use cmap torsions (if enabled in the [TT].rtp[tt] file)" },
1368 { "-renum", TRUE, etBOOL, {&bRenumRes},
1369 "Renumber the residues consecutively in the output" },
1370 { "-rtpres", TRUE, etBOOL, {&bRTPresname},
1371 "Use [TT].rtp[tt] entry names as residue names" }
1373 #define NPARGS asize(pa)
1375 if (!parse_common_args(&argc, argv, 0, NFILE, fnm, asize(pa), pa, asize(desc), desc,
1381 /* Force field selection, interactive or direct */
1382 choose_ff(strcmp(ff, "select") == 0 ? NULL : ff,
1383 forcefield, sizeof(forcefield),
1384 ffdir, sizeof(ffdir));
1386 if (strlen(forcefield) > 0)
1388 strcpy(ffname, forcefield);
1389 ffname[0] = toupper(ffname[0]);
1393 gmx_fatal(FARGS, "Empty forcefield string");
1396 printf("\nUsing the %s force field in directory %s\n\n",
1399 choose_watermodel(watstr[0], ffdir, &watermodel);
1403 /* if anything changes here, also change description of -inter */
1418 else if (bDeuterate)
1427 switch (vsitestr[0][0])
1429 case 'n': /* none */
1431 bVsiteAromatics = FALSE;
1433 case 'h': /* hydrogens */
1435 bVsiteAromatics = FALSE;
1437 case 'a': /* aromatics */
1439 bVsiteAromatics = TRUE;
1442 gmx_fatal(FARGS, "DEATH HORROR in $s (%d): vsitestr[0]='%s'",
1443 __FILE__, __LINE__, vsitestr[0]);
1446 /* Open the symbol table */
1447 open_symtab(&symtab);
1449 /* Residue type database */
1450 gmx_residuetype_init(&rt);
1452 /* Read residue renaming database(s), if present */
1453 nrrn = fflib_search_file_end(ffdir, ".r2b", FALSE, &rrn);
1457 for (i = 0; i < nrrn; i++)
1459 fp = fflib_open(rrn[i]);
1460 read_rtprename(rrn[i], fp, &nrtprename, &rtprename);
1466 /* Add all alternative names from the residue renaming database to the list of recognized amino/nucleic acids. */
1468 for (i = 0; i < nrtprename; i++)
1470 rc = gmx_residuetype_get_type(rt, rtprename[i].gmx, &p_restype);
1472 /* Only add names if the 'standard' gromacs/iupac base name was found */
1475 gmx_residuetype_add(rt, rtprename[i].main, p_restype);
1476 gmx_residuetype_add(rt, rtprename[i].nter, p_restype);
1477 gmx_residuetype_add(rt, rtprename[i].cter, p_restype);
1478 gmx_residuetype_add(rt, rtprename[i].bter, p_restype);
1483 if (watermodel != NULL && (strstr(watermodel, "4p") ||
1484 strstr(watermodel, "4P")))
1488 else if (watermodel != NULL && (strstr(watermodel, "5p") ||
1489 strstr(watermodel, "5P")))
1498 aps = gmx_atomprop_init();
1499 natom = read_pdball(opt2fn("-f", NFILE, fnm), opt2fn_null("-q", NFILE, fnm), title,
1500 &pdba_all, &pdbx, &ePBC, box, bRemoveH, &symtab, rt, watres,
1505 gmx_fatal(FARGS, "No atoms found in pdb file %s\n", opt2fn("-f", NFILE, fnm));
1508 printf("Analyzing pdb file\n");
1513 modify_chain_numbers(&pdba_all, chainsep[0]);
1517 this_atomname = NULL;
1519 this_resname = NULL;
1522 this_chainnumber = -1;
1523 this_chainstart = 0;
1524 /* Keep the compiler happy */
1525 prev_chainstart = 0;
1530 for (i = 0; (i < natom); i++)
1532 ri = &pdba_all.resinfo[pdba_all.atom[i].resind];
1534 prev_atomname = this_atomname;
1535 prev_atomnum = this_atomnum;
1536 prev_resname = this_resname;
1537 prev_resnum = this_resnum;
1538 prev_chainid = this_chainid;
1539 prev_chainnumber = this_chainnumber;
1542 prev_chainstart = this_chainstart;
1545 this_atomname = *pdba_all.atomname[i];
1546 this_atomnum = (pdba_all.pdbinfo != NULL) ? pdba_all.pdbinfo[i].atomnr : i+1;
1547 this_resname = *ri->name;
1548 this_resnum = ri->nr;
1549 this_chainid = ri->chainid;
1550 this_chainnumber = ri->chainnum;
1552 bWat = gmx_strcasecmp(*ri->name, watres) == 0;
1553 if ((i == 0) || (this_chainnumber != prev_chainnumber) || (bWat != bPrevWat))
1555 this_chainstart = pdba_all.atom[i].resind;
1560 if (!strncmp(merge[0], "int", 3))
1562 printf("Merge chain ending with residue %s%d (chain id '%c', atom %d %s) and chain starting with\n"
1563 "residue %s%d (chain id '%c', atom %d %s) into a single moleculetype (keeping termini)? [n/y]\n",
1564 prev_resname, prev_resnum, prev_chainid, prev_atomnum, prev_atomname,
1565 this_resname, this_resnum, this_chainid, this_atomnum, this_atomname);
1567 if (NULL == fgets(select, STRLEN-1, stdin))
1569 gmx_fatal(FARGS, "Error reading from stdin");
1571 bMerged = (select[0] == 'y');
1573 else if (!strncmp(merge[0], "all", 3))
1581 pdb_ch[nch-1].chainstart[pdb_ch[nch-1].nterpairs] =
1582 pdba_all.atom[i].resind - prev_chainstart;
1583 pdb_ch[nch-1].nterpairs++;
1584 srenew(pdb_ch[nch-1].chainstart, pdb_ch[nch-1].nterpairs+1);
1589 /* set natom for previous chain */
1592 pdb_ch[nch-1].natom = i-pdb_ch[nch-1].start;
1599 /* check if chain identifier was used before */
1600 for (j = 0; (j < nch); j++)
1602 if (pdb_ch[j].chainid != ' ' && pdb_ch[j].chainid == ri->chainid)
1604 printf("WARNING: Chain identifier '%c' is used in two non-sequential blocks.\n"
1605 "They will be treated as separate chains unless you reorder your file.\n",
1612 srenew(pdb_ch, maxch);
1614 pdb_ch[nch].chainid = ri->chainid;
1615 pdb_ch[nch].chainnum = ri->chainnum;
1616 pdb_ch[nch].start = i;
1617 pdb_ch[nch].bAllWat = bWat;
1620 pdb_ch[nch].nterpairs = 0;
1624 pdb_ch[nch].nterpairs = 1;
1626 snew(pdb_ch[nch].chainstart, pdb_ch[nch].nterpairs+1);
1627 /* modified [nch] to [0] below */
1628 pdb_ch[nch].chainstart[0] = 0;
1634 pdb_ch[nch-1].natom = natom-pdb_ch[nch-1].start;
1636 /* set all the water blocks at the end of the chain */
1637 snew(swap_index, nch);
1639 for (i = 0; i < nch; i++)
1641 if (!pdb_ch[i].bAllWat)
1647 for (i = 0; i < nch; i++)
1649 if (pdb_ch[i].bAllWat)
1655 if (nwaterchain > 1)
1657 printf("Moved all the water blocks to the end\n");
1661 /* copy pdb data and x for all chains */
1662 for (i = 0; (i < nch); i++)
1665 chains[i].chainid = pdb_ch[si].chainid;
1666 chains[i].chainnum = pdb_ch[si].chainnum;
1667 chains[i].bAllWat = pdb_ch[si].bAllWat;
1668 chains[i].nterpairs = pdb_ch[si].nterpairs;
1669 chains[i].chainstart = pdb_ch[si].chainstart;
1670 snew(chains[i].ntdb, pdb_ch[si].nterpairs);
1671 snew(chains[i].ctdb, pdb_ch[si].nterpairs);
1672 snew(chains[i].r_start, pdb_ch[si].nterpairs);
1673 snew(chains[i].r_end, pdb_ch[si].nterpairs);
1675 snew(chains[i].pdba, 1);
1676 init_t_atoms(chains[i].pdba, pdb_ch[si].natom, TRUE);
1677 snew(chains[i].x, chains[i].pdba->nr);
1678 for (j = 0; j < chains[i].pdba->nr; j++)
1680 chains[i].pdba->atom[j] = pdba_all.atom[pdb_ch[si].start+j];
1681 snew(chains[i].pdba->atomname[j], 1);
1682 *chains[i].pdba->atomname[j] =
1683 strdup(*pdba_all.atomname[pdb_ch[si].start+j]);
1684 chains[i].pdba->pdbinfo[j] = pdba_all.pdbinfo[pdb_ch[si].start+j];
1685 copy_rvec(pdbx[pdb_ch[si].start+j], chains[i].x[j]);
1687 /* Re-index the residues assuming that the indices are continuous */
1688 k = chains[i].pdba->atom[0].resind;
1689 nres = chains[i].pdba->atom[chains[i].pdba->nr-1].resind - k + 1;
1690 chains[i].pdba->nres = nres;
1691 for (j = 0; j < chains[i].pdba->nr; j++)
1693 chains[i].pdba->atom[j].resind -= k;
1695 srenew(chains[i].pdba->resinfo, nres);
1696 for (j = 0; j < nres; j++)
1698 chains[i].pdba->resinfo[j] = pdba_all.resinfo[k+j];
1699 snew(chains[i].pdba->resinfo[j].name, 1);
1700 *chains[i].pdba->resinfo[j].name = strdup(*pdba_all.resinfo[k+j].name);
1701 /* make all chain identifiers equal to that of the chain */
1702 chains[i].pdba->resinfo[j].chainid = pdb_ch[si].chainid;
1706 if (nchainmerges > 0)
1708 printf("\nMerged chains into joint molecule definitions at %d places.\n\n",
1712 printf("There are %d chains and %d blocks of water and "
1713 "%d residues with %d atoms\n",
1714 nch-nwaterchain, nwaterchain,
1715 pdba_all.resinfo[pdba_all.atom[natom-1].resind].nr, natom);
1717 printf("\n %5s %4s %6s\n", "chain", "#res", "#atoms");
1718 for (i = 0; (i < nch); i++)
1720 printf(" %d '%c' %5d %6d %s\n",
1721 i+1, chains[i].chainid ? chains[i].chainid : '-',
1722 chains[i].pdba->nres, chains[i].pdba->nr,
1723 chains[i].bAllWat ? "(only water)" : "");
1727 check_occupancy(&pdba_all, opt2fn("-f", NFILE, fnm), bVerbose);
1729 /* Read atomtypes... */
1730 atype = read_atype(ffdir, &symtab);
1732 /* read residue database */
1733 printf("Reading residue database... (%s)\n", forcefield);
1734 nrtpf = fflib_search_file_end(ffdir, ".rtp", TRUE, &rtpf);
1737 for (i = 0; i < nrtpf; i++)
1739 read_resall(rtpf[i], &nrtp, &restp, atype, &symtab, FALSE);
1745 /* Not correct with multiple rtp input files with different bonded types */
1746 fp = gmx_fio_fopen("new.rtp", "w");
1747 print_resall(fp, nrtp, restp, atype);
1751 /* read hydrogen database */
1752 nah = read_h_db(ffdir, &ah);
1754 /* Read Termini database... */
1755 nNtdb = read_ter_db(ffdir, 'n', &ntdb, atype);
1756 nCtdb = read_ter_db(ffdir, 'c', &ctdb, atype);
1758 top_fn = ftp2fn(efTOP, NFILE, fnm);
1759 top_file = gmx_fio_fopen(top_fn, "w");
1761 print_top_header(top_file, top_fn, FALSE, ffdir, mHmult);
1768 for (chain = 0; (chain < nch); chain++)
1770 cc = &(chains[chain]);
1772 /* set pdba, natom and nres to the current chain */
1775 natom = cc->pdba->nr;
1776 nres = cc->pdba->nres;
1778 if (cc->chainid && ( cc->chainid != ' ' ) )
1780 printf("Processing chain %d '%c' (%d atoms, %d residues)\n",
1781 chain+1, cc->chainid, natom, nres);
1785 printf("Processing chain %d (%d atoms, %d residues)\n",
1786 chain+1, natom, nres);
1789 process_chain(pdba, x, bUnA, bUnA, bUnA, bLysMan, bAspMan, bGluMan,
1790 bHisMan, bArgMan, bGlnMan, angle, distance, &symtab,
1791 nrtprename, rtprename);
1793 cc->chainstart[cc->nterpairs] = pdba->nres;
1795 for (i = 0; i < cc->nterpairs; i++)
1797 find_nc_ter(pdba, cc->chainstart[i], cc->chainstart[i+1],
1798 &(cc->r_start[j]), &(cc->r_end[j]), rt);
1800 if (cc->r_start[j] >= 0 && cc->r_end[j] >= 0)
1806 if (cc->nterpairs == 0)
1808 printf("Problem with chain definition, or missing terminal residues.\n"
1809 "This chain does not appear to contain a recognized chain molecule.\n"
1810 "If this is incorrect, you can edit residuetypes.dat to modify the behavior.\n");
1813 /* Check for disulfides and other special bonds */
1814 nssbonds = mk_specbonds(pdba, x, bCysMan, &ssbonds, bVerbose);
1818 rename_resrtp(pdba, cc->nterpairs, cc->r_start, cc->r_end, nrtprename, rtprename,
1826 sprintf(fn, "chain.pdb");
1830 sprintf(fn, "chain_%c%d.pdb", cc->chainid, cc->chainnum);
1832 write_sto_conf(fn, title, pdba, x, NULL, ePBC, box);
1836 for (i = 0; i < cc->nterpairs; i++)
1840 * We first apply a filter so we only have the
1841 * termini that can be applied to the residue in question
1842 * (or a generic terminus if no-residue specific is available).
1844 /* First the N terminus */
1847 tdblist = filter_ter(nrtp, restp, nNtdb, ntdb,
1848 *pdba->resinfo[cc->r_start[i]].name,
1849 *pdba->resinfo[cc->r_start[i]].rtp,
1853 printf("No suitable end (N or 5') terminus found in database - assuming this residue\n"
1854 "is already in a terminus-specific form and skipping terminus selection.\n");
1859 if (bTerMan && ntdblist > 1)
1861 sprintf(select, "Select start terminus type for %s-%d",
1862 *pdba->resinfo[cc->r_start[i]].name,
1863 pdba->resinfo[cc->r_start[i]].nr);
1864 cc->ntdb[i] = choose_ter(ntdblist, tdblist, select);
1868 cc->ntdb[i] = tdblist[0];
1871 printf("Start terminus %s-%d: %s\n",
1872 *pdba->resinfo[cc->r_start[i]].name,
1873 pdba->resinfo[cc->r_start[i]].nr,
1874 (cc->ntdb[i])->name);
1883 /* And the C terminus */
1886 tdblist = filter_ter(nrtp, restp, nCtdb, ctdb,
1887 *pdba->resinfo[cc->r_end[i]].name,
1888 *pdba->resinfo[cc->r_end[i]].rtp,
1892 printf("No suitable end (C or 3') terminus found in database - assuming this residue\n"
1893 "is already in a terminus-specific form and skipping terminus selection.\n");
1898 if (bTerMan && ntdblist > 1)
1900 sprintf(select, "Select end terminus type for %s-%d",
1901 *pdba->resinfo[cc->r_end[i]].name,
1902 pdba->resinfo[cc->r_end[i]].nr);
1903 cc->ctdb[i] = choose_ter(ntdblist, tdblist, select);
1907 cc->ctdb[i] = tdblist[0];
1909 printf("End terminus %s-%d: %s\n",
1910 *pdba->resinfo[cc->r_end[i]].name,
1911 pdba->resinfo[cc->r_end[i]].nr,
1912 (cc->ctdb[i])->name);
1921 /* lookup hackblocks and rtp for all residues */
1922 get_hackblocks_rtp(&hb_chain, &restp_chain,
1923 nrtp, restp, pdba->nres, pdba->resinfo,
1924 cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end);
1925 /* ideally, now we would not need the rtp itself anymore, but do
1926 everything using the hb and restp arrays. Unfortunately, that
1927 requires some re-thinking of code in gen_vsite.c, which I won't
1928 do now :( AF 26-7-99 */
1930 rename_atoms(NULL, ffdir,
1931 pdba, &symtab, restp_chain, FALSE, rt, FALSE, bVerbose);
1933 match_atomnames_with_rtp(restp_chain, hb_chain, pdba, x, bVerbose);
1937 block = new_blocka();
1939 sort_pdbatoms(restp_chain, natom, &pdba, &x, block, &gnames);
1940 natom = remove_duplicate_atoms(pdba, x, bVerbose);
1941 if (ftp2bSet(efNDX, NFILE, fnm))
1945 fprintf(stderr, "WARNING: with the -remh option the generated "
1946 "index file (%s) might be useless\n"
1947 "(the index file is generated before hydrogens are added)",
1948 ftp2fn(efNDX, NFILE, fnm));
1950 write_index(ftp2fn(efNDX, NFILE, fnm), block, gnames, FALSE, 0);
1952 for (i = 0; i < block->nr; i++)
1961 fprintf(stderr, "WARNING: "
1962 "without sorting no check for duplicate atoms can be done\n");
1965 /* Generate Hydrogen atoms (and termini) in the sequence */
1966 printf("Generating any missing hydrogen atoms and/or adding termini.\n");
1967 natom = add_h(&pdba, &x, nah, ah,
1968 cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end, bAllowMissing,
1969 NULL, NULL, TRUE, FALSE);
1970 printf("Now there are %d residues with %d atoms\n",
1971 pdba->nres, pdba->nr);
1974 write_pdbfile(debug, title, pdba, x, ePBC, box, ' ', 0, NULL, TRUE);
1979 for (i = 0; (i < natom); i++)
1981 fprintf(debug, "Res %s%d atom %d %s\n",
1982 *(pdba->resinfo[pdba->atom[i].resind].name),
1983 pdba->resinfo[pdba->atom[i].resind].nr, i+1, *pdba->atomname[i]);
1987 strcpy(posre_fn, ftp2fn(efITP, NFILE, fnm));
1989 /* make up molecule name(s) */
1991 k = (cc->nterpairs > 0 && cc->r_start[0] >= 0) ? cc->r_start[0] : 0;
1993 gmx_residuetype_get_type(rt, *pdba->resinfo[k].name, &p_restype);
1999 sprintf(molname, "Water");
2003 this_chainid = cc->chainid;
2005 /* Add the chain id if we have one */
2006 if (this_chainid != ' ')
2008 sprintf(buf, "_chain_%c", this_chainid);
2009 strcat(suffix, buf);
2012 /* Check if there have been previous chains with the same id */
2014 for (k = 0; k < chain; k++)
2016 if (cc->chainid == chains[k].chainid)
2021 /* Add the number for this chain identifier if there are multiple copies */
2025 sprintf(buf, "%d", nid_used+1);
2026 strcat(suffix, buf);
2029 if (strlen(suffix) > 0)
2031 sprintf(molname, "%s%s", p_restype, suffix);
2035 strcpy(molname, p_restype);
2039 if ((nch-nwaterchain > 1) && !cc->bAllWat)
2042 strcpy(itp_fn, top_fn);
2043 printf("Chain time...\n");
2044 c = strrchr(itp_fn, '.');
2045 sprintf(c, "_%s.itp", molname);
2046 c = strrchr(posre_fn, '.');
2047 sprintf(c, "_%s.itp", molname);
2048 if (strcmp(itp_fn, posre_fn) == 0)
2050 strcpy(buf_fn, posre_fn);
2051 c = strrchr(buf_fn, '.');
2053 sprintf(posre_fn, "%s_pr.itp", buf_fn);
2057 srenew(incls, nincl);
2058 incls[nincl-1] = strdup(itp_fn);
2059 itp_file = gmx_fio_fopen(itp_fn, "w");
2066 srenew(mols, nmol+1);
2069 mols[nmol].name = strdup("SOL");
2070 mols[nmol].nr = pdba->nres;
2074 mols[nmol].name = strdup(molname);
2081 print_top_comment(itp_file, itp_fn, ffdir, TRUE);
2091 top_file2 = itp_file;
2095 top_file2 = top_file;
2098 pdb2top(top_file2, posre_fn, molname, pdba, &x, atype, &symtab,
2100 restp_chain, hb_chain,
2102 bVsites, bVsiteAromatics, ffdir,
2103 mHmult, nssbonds, ssbonds,
2104 long_bond_dist, short_bond_dist, bDeuterate, bChargeGroups, bCmap,
2105 bRenumRes, bRTPresname);
2109 write_posres(posre_fn, pdba, posre_fc);
2114 gmx_fio_fclose(itp_file);
2117 /* pdba and natom have been reassigned somewhere so: */
2123 if (cc->chainid == ' ')
2125 sprintf(fn, "chain.pdb");
2129 sprintf(fn, "chain_%c.pdb", cc->chainid);
2131 cool_quote(quote, 255, NULL);
2132 write_sto_conf(fn, quote, pdba, x, NULL, ePBC, box);
2136 if (watermodel == NULL)
2138 for (chain = 0; chain < nch; chain++)
2140 if (chains[chain].bAllWat)
2142 gmx_fatal(FARGS, "You have chosen not to include a water model, but there is water in the input file. Select a water model or remove the water from your input file.");
2148 sprintf(buf_fn, "%s%c%s.itp", ffdir, DIR_SEPARATOR, watermodel);
2149 if (!fflib_fexist(buf_fn))
2151 gmx_fatal(FARGS, "The topology file '%s' for the selected water model '%s' can not be found in the force field directory. Select a different water model.",
2152 buf_fn, watermodel);
2156 print_top_mols(top_file, title, ffdir, watermodel, nincl, incls, nmol, mols);
2157 gmx_fio_fclose(top_file);
2159 gmx_residuetype_destroy(rt);
2161 /* now merge all chains back together */
2164 for (i = 0; (i < nch); i++)
2166 natom += chains[i].pdba->nr;
2167 nres += chains[i].pdba->nres;
2170 init_t_atoms(atoms, natom, FALSE);
2171 for (i = 0; i < atoms->nres; i++)
2173 sfree(atoms->resinfo[i].name);
2175 sfree(atoms->resinfo);
2177 snew(atoms->resinfo, nres);
2181 for (i = 0; (i < nch); i++)
2185 printf("Including chain %d in system: %d atoms %d residues\n",
2186 i+1, chains[i].pdba->nr, chains[i].pdba->nres);
2188 for (j = 0; (j < chains[i].pdba->nr); j++)
2190 atoms->atom[k] = chains[i].pdba->atom[j];
2191 atoms->atom[k].resind += l; /* l is processed nr of residues */
2192 atoms->atomname[k] = chains[i].pdba->atomname[j];
2193 atoms->resinfo[atoms->atom[k].resind].chainid = chains[i].chainid;
2194 copy_rvec(chains[i].x[j], x[k]);
2197 for (j = 0; (j < chains[i].pdba->nres); j++)
2199 atoms->resinfo[l] = chains[i].pdba->resinfo[j];
2202 atoms->resinfo[l].name = atoms->resinfo[l].rtp;
2210 fprintf(stderr, "Now there are %d atoms and %d residues\n", k, l);
2211 print_sums(atoms, TRUE);
2214 fprintf(stderr, "\nWriting coordinate file...\n");
2215 clear_rvec(box_space);
2218 make_new_box(atoms->nr, x, box, box_space, FALSE);
2220 write_sto_conf(ftp2fn(efSTO, NFILE, fnm), title, atoms, x, NULL, ePBC, box);
2222 printf("\t\t--------- PLEASE NOTE ------------\n");
2223 printf("You have successfully generated a topology from: %s.\n",
2224 opt2fn("-f", NFILE, fnm));
2225 if (watermodel != NULL)
2227 printf("The %s force field and the %s water model are used.\n",
2228 ffname, watermodel);
2232 printf("The %s force field is used.\n",
2235 printf("\t\t--------- ETON ESAELP ------------\n");