2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
46 #include "gromacs/commandline/pargs.h"
47 #include "gromacs/fileio/confio.h"
48 #include "gromacs/fileio/gmxfio.h"
49 #include "gromacs/fileio/pdbio.h"
50 #include "gromacs/fileio/strdb.h"
51 #include "gromacs/gmxlib/conformation-utilities.h"
52 #include "gromacs/gmxpreprocess/fflibutil.h"
53 #include "gromacs/gmxpreprocess/genhydro.h"
54 #include "gromacs/gmxpreprocess/h_db.h"
55 #include "gromacs/gmxpreprocess/hizzie.h"
56 #include "gromacs/gmxpreprocess/pdb2top.h"
57 #include "gromacs/gmxpreprocess/pgutil.h"
58 #include "gromacs/gmxpreprocess/resall.h"
59 #include "gromacs/gmxpreprocess/specbond.h"
60 #include "gromacs/gmxpreprocess/ter_db.h"
61 #include "gromacs/gmxpreprocess/toputil.h"
62 #include "gromacs/gmxpreprocess/xlate.h"
63 #include "gromacs/legacyheaders/copyrite.h"
64 #include "gromacs/legacyheaders/macros.h"
65 #include "gromacs/legacyheaders/readinp.h"
66 #include "gromacs/legacyheaders/typedefs.h"
67 #include "gromacs/math/vec.h"
68 #include "gromacs/topology/atomprop.h"
69 #include "gromacs/topology/block.h"
70 #include "gromacs/topology/index.h"
71 #include "gromacs/topology/residuetypes.h"
72 #include "gromacs/topology/symtab.h"
73 #include "gromacs/utility/cstringutil.h"
74 #include "gromacs/utility/dir_separator.h"
75 #include "gromacs/utility/fatalerror.h"
76 #include "gromacs/utility/smalloc.h"
80 char gmx[RTP_MAXCHAR+2];
81 char main[RTP_MAXCHAR+2];
82 char nter[RTP_MAXCHAR+2];
83 char cter[RTP_MAXCHAR+2];
84 char bter[RTP_MAXCHAR+2];
88 static const char *res2bb_notermini(const char *name,
89 int nrr, const rtprename_t *rr)
91 /* NOTE: This function returns the main building block name,
92 * it does not take terminal renaming into account.
97 while (i < nrr && gmx_strcasecmp(name, rr[i].gmx) != 0)
102 return (i < nrr ? rr[i].main : name);
105 static const char *select_res(int nr, int resnr,
106 const char *name[], const char *expl[],
108 int nrr, const rtprename_t *rr)
112 printf("Which %s type do you want for residue %d\n", title, resnr+1);
113 for (sel = 0; (sel < nr); sel++)
115 printf("%d. %s (%s)\n",
116 sel, expl[sel], res2bb_notermini(name[sel], nrr, rr));
118 printf("\nType a number:"); fflush(stdout);
120 if (scanf("%d", &sel) != 1)
122 gmx_fatal(FARGS, "Answer me for res %s %d!", title, resnr+1);
128 static const char *get_asptp(int resnr, int nrr, const rtprename_t *rr)
133 const char *lh[easpNR] = { "ASP", "ASPH" };
134 const char *expl[easpNR] = {
135 "Not protonated (charge -1)",
136 "Protonated (charge 0)"
139 return select_res(easpNR, resnr, lh, expl, "ASPARTIC ACID", nrr, rr);
142 static const char *get_glutp(int resnr, int nrr, const rtprename_t *rr)
147 const char *lh[egluNR] = { "GLU", "GLUH" };
148 const char *expl[egluNR] = {
149 "Not protonated (charge -1)",
150 "Protonated (charge 0)"
153 return select_res(egluNR, resnr, lh, expl, "GLUTAMIC ACID", nrr, rr);
156 static const char *get_glntp(int resnr, int nrr, const rtprename_t *rr)
161 const char *lh[eglnNR] = { "GLN", "QLN" };
162 const char *expl[eglnNR] = {
163 "Not protonated (charge 0)",
164 "Protonated (charge +1)"
167 return select_res(eglnNR, resnr, lh, expl, "GLUTAMINE", nrr, rr);
170 static const char *get_lystp(int resnr, int nrr, const rtprename_t *rr)
175 const char *lh[elysNR] = { "LYSN", "LYS" };
176 const char *expl[elysNR] = {
177 "Not protonated (charge 0)",
178 "Protonated (charge +1)"
181 return select_res(elysNR, resnr, lh, expl, "LYSINE", nrr, rr);
184 static const char *get_argtp(int resnr, int nrr, const rtprename_t *rr)
189 const char *lh[eargNR] = { "ARGN", "ARG" };
190 const char *expl[eargNR] = {
191 "Not protonated (charge 0)",
192 "Protonated (charge +1)"
195 return select_res(eargNR, resnr, lh, expl, "ARGININE", nrr, rr);
198 static const char *get_histp(int resnr, int nrr, const rtprename_t *rr)
200 const char *expl[ehisNR] = {
207 return select_res(ehisNR, resnr, hh, expl, "HISTIDINE", nrr, rr);
210 static void read_rtprename(const char *fname, FILE *fp,
211 int *nrtprename, rtprename_t **rtprename)
213 char line[STRLEN], buf[STRLEN];
222 while (get_a_line(fp, line, STRLEN))
225 /* line is NULL-terminated and length<STRLEN, so final arg cannot overflow.
226 * For other args, we read up to 6 chars (so we can detect if the length is > 5).
227 * Note that the buffer length has been increased to 7 to allow this,
228 * so we just need to make sure the strings have zero-length initially.
231 rr[n].main[0] = '\0';
232 rr[n].nter[0] = '\0';
233 rr[n].cter[0] = '\0';
234 rr[n].bter[0] = '\0';
235 nc = sscanf(line, "%6s %6s %6s %6s %6s %s",
236 rr[n].gmx, rr[n].main, rr[n].nter, rr[n].cter, rr[n].bter, buf);
237 if (strlen(rr[n].gmx) > RTP_MAXCHAR || strlen(rr[n].main) > RTP_MAXCHAR ||
238 strlen(rr[n].nter) > RTP_MAXCHAR || strlen(rr[n].cter) > RTP_MAXCHAR || strlen(rr[n].bter) > RTP_MAXCHAR)
240 gmx_fatal(FARGS, "Residue renaming database '%s' has strings longer than %d chars in first 5 columns:\n%s",
241 fname, RTP_MAXCHAR, line);
246 if (nc != 2 && nc != 5)
248 gmx_fatal(FARGS, "Residue renaming database '%s' has %d columns instead of %d, %d or %d", fname, ncol, 2, 5);
254 gmx_fatal(FARGS, "A line in residue renaming database '%s' has %d columns, while previous lines have %d columns", fname, nc, ncol);
259 /* This file does not have special termini names, copy them from main */
260 strcpy(rr[n].nter, rr[n].main);
261 strcpy(rr[n].cter, rr[n].main);
262 strcpy(rr[n].bter, rr[n].main);
272 static char *search_resrename(int nrr, rtprename_t *rr,
274 gmx_bool bStart, gmx_bool bEnd,
275 gmx_bool bCompareFFRTPname)
283 while (i < nrr && ((!bCompareFFRTPname && strcmp(name, rr[i].gmx) != 0) ||
284 ( bCompareFFRTPname && strcmp(name, rr[i].main) != 0)))
289 /* If found in the database, rename this residue's rtp building block,
290 * otherwise keep the old name.
313 gmx_fatal(FARGS, "In the chosen force field there is no residue type for '%s'%s", name, bStart ? ( bEnd ? " as a standalone (starting & ending) residue" : " as a starting terminus") : (bEnd ? " as an ending terminus" : ""));
321 static void rename_resrtp(t_atoms *pdba, int nterpairs, int *r_start, int *r_end,
322 int nrr, rtprename_t *rr, t_symtab *symtab,
326 gmx_bool bStart, bEnd;
328 gmx_bool bFFRTPTERRNM;
330 bFFRTPTERRNM = (getenv("GMX_NO_FFRTP_TER_RENAME") == NULL);
332 for (r = 0; r < pdba->nres; r++)
336 for (j = 0; j < nterpairs; j++)
343 for (j = 0; j < nterpairs; j++)
351 nn = search_resrename(nrr, rr, *pdba->resinfo[r].rtp, bStart, bEnd, FALSE);
353 if (bFFRTPTERRNM && nn == NULL && (bStart || bEnd))
355 /* This is a terminal residue, but the residue name,
356 * currently stored in .rtp, is not a standard residue name,
357 * but probably a force field specific rtp name.
358 * Check if we need to rename it because it is terminal.
360 nn = search_resrename(nrr, rr,
361 *pdba->resinfo[r].rtp, bStart, bEnd, TRUE);
364 if (nn != NULL && strcmp(*pdba->resinfo[r].rtp, nn) != 0)
368 printf("Changing rtp entry of residue %d %s to '%s'\n",
369 pdba->resinfo[r].nr, *pdba->resinfo[r].name, nn);
371 pdba->resinfo[r].rtp = put_symtab(symtab, nn);
376 static void pdbres_to_gmxrtp(t_atoms *pdba)
380 for (i = 0; (i < pdba->nres); i++)
382 if (pdba->resinfo[i].rtp == NULL)
384 pdba->resinfo[i].rtp = pdba->resinfo[i].name;
389 static void rename_pdbres(t_atoms *pdba, const char *oldnm, const char *newnm,
390 gmx_bool bFullCompare, t_symtab *symtab)
395 for (i = 0; (i < pdba->nres); i++)
397 resnm = *pdba->resinfo[i].name;
398 if ((bFullCompare && (gmx_strcasecmp(resnm, oldnm) == 0)) ||
399 (!bFullCompare && strstr(resnm, oldnm) != NULL))
401 /* Rename the residue name (not the rtp name) */
402 pdba->resinfo[i].name = put_symtab(symtab, newnm);
407 static void rename_bb(t_atoms *pdba, const char *oldnm, const char *newnm,
408 gmx_bool bFullCompare, t_symtab *symtab)
413 for (i = 0; (i < pdba->nres); i++)
415 /* We have not set the rtp name yes, use the residue name */
416 bbnm = *pdba->resinfo[i].name;
417 if ((bFullCompare && (gmx_strcasecmp(bbnm, oldnm) == 0)) ||
418 (!bFullCompare && strstr(bbnm, oldnm) != NULL))
420 /* Change the rtp builing block name */
421 pdba->resinfo[i].rtp = put_symtab(symtab, newnm);
426 static void rename_bbint(t_atoms *pdba, const char *oldnm,
427 const char *gettp(int, int, const rtprename_t *),
428 gmx_bool bFullCompare,
430 int nrr, const rtprename_t *rr)
436 for (i = 0; i < pdba->nres; i++)
438 /* We have not set the rtp name yes, use the residue name */
439 bbnm = *pdba->resinfo[i].name;
440 if ((bFullCompare && (strcmp(bbnm, oldnm) == 0)) ||
441 (!bFullCompare && strstr(bbnm, oldnm) != NULL))
443 ptr = gettp(i, nrr, rr);
444 pdba->resinfo[i].rtp = put_symtab(symtab, ptr);
449 static void check_occupancy(t_atoms *atoms, const char *filename, gmx_bool bVerbose)
455 ftp = fn2ftp(filename);
456 if (!atoms->pdbinfo || ((ftp != efPDB) && (ftp != efBRK) && (ftp != efENT)))
458 fprintf(stderr, "No occupancies in %s\n", filename);
462 for (i = 0; (i < atoms->nr); i++)
464 if (atoms->pdbinfo[i].occup != 1)
468 fprintf(stderr, "Occupancy for atom %s%d-%s is %f rather than 1\n",
469 *atoms->resinfo[atoms->atom[i].resind].name,
470 atoms->resinfo[ atoms->atom[i].resind].nr,
472 atoms->pdbinfo[i].occup);
474 if (atoms->pdbinfo[i].occup == 0)
484 if (nzero == atoms->nr)
486 fprintf(stderr, "All occupancy fields zero. This is probably not an X-Ray structure\n");
488 else if ((nzero > 0) || (nnotone > 0))
492 "WARNING: there were %d atoms with zero occupancy and %d atoms with\n"
493 " occupancy unequal to one (out of %d atoms). Check your pdb file.\n"
495 nzero, nnotone, atoms->nr);
499 fprintf(stderr, "All occupancies are one\n");
504 void write_posres(char *fn, t_atoms *pdba, real fc)
509 fp = gmx_fio_fopen(fn, "w");
511 "; In this topology include file, you will find position restraint\n"
512 "; entries for all the heavy atoms in your original pdb file.\n"
513 "; This means that all the protons which were added by pdb2gmx are\n"
514 "; not restrained.\n"
516 "[ position_restraints ]\n"
517 "; %4s%6s%8s%8s%8s\n", "atom", "type", "fx", "fy", "fz"
519 for (i = 0; (i < pdba->nr); i++)
521 if (!is_hydrogen(*pdba->atomname[i]) && !is_dummymass(*pdba->atomname[i]))
523 fprintf(fp, "%6d%6d %g %g %g\n", i+1, 1, fc, fc, fc);
529 static int read_pdball(const char *inf, const char *outf, char *title,
530 t_atoms *atoms, rvec **x,
531 int *ePBC, matrix box, gmx_bool bRemoveH,
532 t_symtab *symtab, gmx_residuetype_t *rt, const char *watres,
533 gmx_atomprop_t aps, gmx_bool bVerbose)
534 /* Read a pdb file. (containing proteins) */
536 int natom, new_natom, i;
539 printf("Reading %s...\n", inf);
540 get_stx_coordnum(inf, &natom);
541 init_t_atoms(atoms, natom, TRUE);
543 read_stx_conf(inf, title, atoms, *x, NULL, ePBC, box);
544 if (fn2ftp(inf) == efPDB)
546 get_pdb_atomnumber(atoms, aps);
551 for (i = 0; i < atoms->nr; i++)
553 if (!is_hydrogen(*atoms->atomname[i]))
555 atoms->atom[new_natom] = atoms->atom[i];
556 atoms->atomname[new_natom] = atoms->atomname[i];
557 atoms->pdbinfo[new_natom] = atoms->pdbinfo[i];
558 copy_rvec((*x)[i], (*x)[new_natom]);
562 atoms->nr = new_natom;
567 if (title && title[0])
569 printf(" '%s',", title);
571 printf(" %d atoms\n", natom);
573 /* Rename residues */
574 rename_pdbres(atoms, "HOH", watres, FALSE, symtab);
575 rename_pdbres(atoms, "SOL", watres, FALSE, symtab);
576 rename_pdbres(atoms, "WAT", watres, FALSE, symtab);
578 rename_atoms("xlateat.dat", NULL,
579 atoms, symtab, NULL, TRUE, rt, TRUE, bVerbose);
588 write_sto_conf(outf, title, atoms, *x, NULL, *ePBC, box);
594 void process_chain(t_atoms *pdba, rvec *x,
595 gmx_bool bTrpU, gmx_bool bPheU, gmx_bool bTyrU,
596 gmx_bool bLysMan, gmx_bool bAspMan, gmx_bool bGluMan,
597 gmx_bool bHisMan, gmx_bool bArgMan, gmx_bool bGlnMan,
598 real angle, real distance, t_symtab *symtab,
599 int nrr, const rtprename_t *rr)
601 /* Rename aromatics, lys, asp and histidine */
604 rename_bb(pdba, "TYR", "TYRU", FALSE, symtab);
608 rename_bb(pdba, "TRP", "TRPU", FALSE, symtab);
612 rename_bb(pdba, "PHE", "PHEU", FALSE, symtab);
616 rename_bbint(pdba, "LYS", get_lystp, FALSE, symtab, nrr, rr);
620 rename_bbint(pdba, "ARG", get_argtp, FALSE, symtab, nrr, rr);
624 rename_bbint(pdba, "GLN", get_glntp, FALSE, symtab, nrr, rr);
628 rename_bbint(pdba, "ASP", get_asptp, FALSE, symtab, nrr, rr);
632 rename_bb(pdba, "ASPH", "ASP", FALSE, symtab);
636 rename_bbint(pdba, "GLU", get_glutp, FALSE, symtab, nrr, rr);
640 rename_bb(pdba, "GLUH", "GLU", FALSE, symtab);
645 set_histp(pdba, x, angle, distance);
649 rename_bbint(pdba, "HIS", get_histp, TRUE, symtab, nrr, rr);
652 /* Initialize the rtp builing block names with the residue names
653 * for the residues that have not been processed above.
655 pdbres_to_gmxrtp(pdba);
657 /* Now we have all rtp names set.
658 * The rtp names will conform to Gromacs naming,
659 * unless the input pdb file contained one or more force field specific
660 * rtp names as residue names.
664 /* struct for sorting the atoms from the pdb file */
666 int resnr; /* residue number */
667 int j; /* database order index */
668 int index; /* original atom number */
669 char anm1; /* second letter of atom name */
670 char altloc; /* alternate location indicator */
673 int pdbicomp(const void *a, const void *b)
678 pa = (t_pdbindex *)a;
679 pb = (t_pdbindex *)b;
681 d = (pa->resnr - pb->resnr);
687 d = (pa->anm1 - pb->anm1);
690 d = (pa->altloc - pb->altloc);
698 static void sort_pdbatoms(t_restp restp[],
699 int natoms, t_atoms **pdbaptr, rvec **x,
700 t_blocka *block, char ***gnames)
702 t_atoms *pdba, *pdbnew;
717 for (i = 0; i < natoms; i++)
719 atomnm = *pdba->atomname[i];
720 rptr = &restp[pdba->atom[i].resind];
721 for (j = 0; (j < rptr->natom); j++)
723 if (gmx_strcasecmp(atomnm, *(rptr->atomname[j])) == 0)
728 if (j == rptr->natom)
733 "Atom %s in residue %s %d was not found in rtp entry %s with %d atoms\n"
734 "while sorting atoms.\n%s", atomnm,
735 *pdba->resinfo[pdba->atom[i].resind].name,
736 pdba->resinfo[pdba->atom[i].resind].nr,
739 is_hydrogen(atomnm) ?
740 "\nFor a hydrogen, this can be a different protonation state, or it\n"
741 "might have had a different number in the PDB file and was rebuilt\n"
742 "(it might for instance have been H3, and we only expected H1 & H2).\n"
743 "Note that hydrogens might have been added to the entry for the N-terminus.\n"
744 "Remove this hydrogen or choose a different protonation state to solve it.\n"
745 "Option -ignh will ignore all hydrogens in the input." : ".");
746 gmx_fatal(FARGS, buf);
748 /* make shadow array to be sorted into indexgroup */
749 pdbi[i].resnr = pdba->atom[i].resind;
752 pdbi[i].anm1 = atomnm[1];
753 pdbi[i].altloc = pdba->pdbinfo[i].altloc;
755 qsort(pdbi, natoms, (size_t)sizeof(pdbi[0]), pdbicomp);
757 /* pdba is sorted in pdbnew using the pdbi index */
760 init_t_atoms(pdbnew, natoms, TRUE);
762 pdbnew->nr = pdba->nr;
763 pdbnew->nres = pdba->nres;
764 sfree(pdbnew->resinfo);
765 pdbnew->resinfo = pdba->resinfo;
766 for (i = 0; i < natoms; i++)
768 pdbnew->atom[i] = pdba->atom[pdbi[i].index];
769 pdbnew->atomname[i] = pdba->atomname[pdbi[i].index];
770 pdbnew->pdbinfo[i] = pdba->pdbinfo[pdbi[i].index];
771 copy_rvec((*x)[pdbi[i].index], (*xnew)[i]);
772 /* make indexgroup in block */
773 a[i] = pdbi[i].index;
776 sfree(pdba->atomname);
778 sfree(pdba->pdbinfo);
781 /* copy the sorted pdbnew back to pdba */
784 add_grp(block, gnames, natoms, a, "prot_sort");
790 static int remove_duplicate_atoms(t_atoms *pdba, rvec x[], gmx_bool bVerbose)
792 int i, j, oldnatoms, ndel;
795 printf("Checking for duplicate atoms....\n");
796 oldnatoms = pdba->nr;
798 /* NOTE: pdba->nr is modified inside the loop */
799 for (i = 1; (i < pdba->nr); i++)
801 /* compare 'i' and 'i-1', throw away 'i' if they are identical
802 this is a 'while' because multiple alternate locations can be present */
803 while ( (i < pdba->nr) &&
804 (pdba->atom[i-1].resind == pdba->atom[i].resind) &&
805 (strcmp(*pdba->atomname[i-1], *pdba->atomname[i]) == 0) )
810 ri = &pdba->resinfo[pdba->atom[i].resind];
811 printf("deleting duplicate atom %4s %s%4d%c",
812 *pdba->atomname[i], *ri->name, ri->nr, ri->ic);
813 if (ri->chainid && (ri->chainid != ' '))
815 printf(" ch %c", ri->chainid);
819 if (pdba->pdbinfo[i].atomnr)
821 printf(" pdb nr %4d", pdba->pdbinfo[i].atomnr);
823 if (pdba->pdbinfo[i].altloc && (pdba->pdbinfo[i].altloc != ' '))
825 printf(" altloc %c", pdba->pdbinfo[i].altloc);
831 /* We can not free, since it might be in the symtab */
832 /* sfree(pdba->atomname[i]); */
833 for (j = i; j < pdba->nr; j++)
835 pdba->atom[j] = pdba->atom[j+1];
836 pdba->atomname[j] = pdba->atomname[j+1];
837 pdba->pdbinfo[j] = pdba->pdbinfo[j+1];
838 copy_rvec(x[j+1], x[j]);
840 srenew(pdba->atom, pdba->nr);
841 /* srenew(pdba->atomname, pdba->nr); */
842 srenew(pdba->pdbinfo, pdba->nr);
845 if (pdba->nr != oldnatoms)
847 printf("Now there are %d atoms. Deleted %d duplicates.\n", pdba->nr, ndel);
853 void find_nc_ter(t_atoms *pdba, int r0, int r1, int *r_start, int *r_end,
854 gmx_residuetype_t *rt)
857 const char *p_startrestype;
858 const char *p_restype;
859 int nstartwarn, nendwarn;
867 /* Find the starting terminus (typially N or 5') */
868 for (i = r0; i < r1 && *r_start == -1; i++)
870 gmx_residuetype_get_type(rt, *pdba->resinfo[i].name, &p_startrestype);
871 if (!gmx_strcasecmp(p_startrestype, "Protein") || !gmx_strcasecmp(p_startrestype, "DNA") || !gmx_strcasecmp(p_startrestype, "RNA") )
873 printf("Identified residue %s%d as a starting terminus.\n", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
880 printf("Warning: Starting residue %s%d in chain not identified as Protein/RNA/DNA.\n", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
884 printf("More than 5 unidentified residues at start of chain - disabling further warnings.\n");
892 /* Go through the rest of the residues, check that they are the same class, and identify the ending terminus. */
893 for (i = *r_start; i < r1; i++)
895 gmx_residuetype_get_type(rt, *pdba->resinfo[i].name, &p_restype);
896 if (!gmx_strcasecmp(p_restype, p_startrestype) && nendwarn == 0)
904 printf("Warning: Residue %s%d in chain has different type (%s) from starting residue %s%d (%s).\n",
905 *pdba->resinfo[i].name, pdba->resinfo[i].nr, p_restype,
906 *pdba->resinfo[*r_start].name, pdba->resinfo[*r_start].nr, p_startrestype);
910 printf("More than 5 unidentified residues at end of chain - disabling further warnings.\n");
919 printf("Identified residue %s%d as a ending terminus.\n", *pdba->resinfo[*r_end].name, pdba->resinfo[*r_end].nr);
925 modify_chain_numbers(t_atoms * pdba,
926 const char * chainsep)
929 char old_prev_chainid;
930 char old_this_chainid;
931 int old_prev_chainnum;
932 int old_this_chainnum;
938 const char * prev_atomname;
939 const char * this_atomname;
940 const char * prev_resname;
941 const char * this_resname;
946 int prev_chainnumber;
947 int this_chainnumber;
959 splitting = SPLIT_TER_ONLY; /* keep compiler happy */
961 /* Be a bit flexible to catch typos */
962 if (!strncmp(chainsep, "id_o", 4))
964 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
965 splitting = SPLIT_ID_OR_TER;
966 printf("Splitting chemical chains based on TER records or chain id changing.\n");
968 else if (!strncmp(chainsep, "int", 3))
970 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
971 splitting = SPLIT_INTERACTIVE;
972 printf("Splitting chemical chains interactively.\n");
974 else if (!strncmp(chainsep, "id_a", 4))
976 splitting = SPLIT_ID_AND_TER;
977 printf("Splitting chemical chains based on TER records and chain id changing.\n");
979 else if (strlen(chainsep) == 2 && !strncmp(chainsep, "id", 4))
981 splitting = SPLIT_ID_ONLY;
982 printf("Splitting chemical chains based on changing chain id only (ignoring TER records).\n");
984 else if (chainsep[0] == 't')
986 splitting = SPLIT_TER_ONLY;
987 printf("Splitting chemical chains based on TER records only (ignoring chain id).\n");
991 gmx_fatal(FARGS, "Unidentified setting for chain separation: %s\n", chainsep);
994 /* The default chain enumeration is based on TER records only, which is reflected in chainnum below */
996 old_prev_chainid = '?';
997 old_prev_chainnum = -1;
1000 this_atomname = NULL;
1002 this_resname = NULL;
1005 this_chainnumber = -1;
1007 for (i = 0; i < pdba->nres; i++)
1009 ri = &pdba->resinfo[i];
1010 old_this_chainid = ri->chainid;
1011 old_this_chainnum = ri->chainnum;
1013 prev_atomname = this_atomname;
1014 prev_atomnum = this_atomnum;
1015 prev_resname = this_resname;
1016 prev_resnum = this_resnum;
1017 prev_chainid = this_chainid;
1018 prev_chainnumber = this_chainnumber;
1020 this_atomname = *(pdba->atomname[i]);
1021 this_atomnum = (pdba->pdbinfo != NULL) ? pdba->pdbinfo[i].atomnr : i+1;
1022 this_resname = *ri->name;
1023 this_resnum = ri->nr;
1024 this_chainid = ri->chainid;
1025 this_chainnumber = ri->chainnum;
1029 case SPLIT_ID_OR_TER:
1030 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1036 case SPLIT_ID_AND_TER:
1037 if (old_this_chainid != old_prev_chainid && old_this_chainnum != old_prev_chainnum)
1044 if (old_this_chainid != old_prev_chainid)
1050 case SPLIT_TER_ONLY:
1051 if (old_this_chainnum != old_prev_chainnum)
1056 case SPLIT_INTERACTIVE:
1057 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1061 printf("Split the chain (and introduce termini) between residue %s%d (chain id '%c', atom %d %s)\n"
1062 "and residue %s%d (chain id '%c', atom %d %s) ? [n/y]\n",
1063 prev_resname, prev_resnum, prev_chainid, prev_atomnum, prev_atomname,
1064 this_resname, this_resnum, this_chainid, this_atomnum, this_atomname);
1066 if (NULL == fgets(select, STRLEN-1, stdin))
1068 gmx_fatal(FARGS, "Error reading from stdin");
1071 if (i == 0 || select[0] == 'y')
1078 gmx_fatal(FARGS, "Internal inconsistency - this shouldn't happen...");
1081 old_prev_chainid = old_this_chainid;
1082 old_prev_chainnum = old_this_chainnum;
1084 ri->chainnum = new_chainnum;
1113 int gmx_pdb2gmx(int argc, char *argv[])
1115 const char *desc[] = {
1116 "[THISMODULE] reads a [REF].pdb[ref] (or [REF].gro[ref]) file, reads",
1117 "some database files, adds hydrogens to the molecules and generates",
1118 "coordinates in GROMACS (GROMOS), or optionally [REF].pdb[ref], format",
1119 "and a topology in GROMACS format.",
1120 "These files can subsequently be processed to generate a run input file.",
1122 "[THISMODULE] will search for force fields by looking for",
1123 "a [TT]forcefield.itp[tt] file in subdirectories [TT]<forcefield>.ff[tt]",
1124 "of the current working directory and of the GROMACS library directory",
1125 "as inferred from the path of the binary or the [TT]GMXLIB[tt] environment",
1127 "By default the forcefield selection is interactive,",
1128 "but you can use the [TT]-ff[tt] option to specify one of the short names",
1129 "in the list on the command line instead. In that case [THISMODULE] just looks",
1130 "for the corresponding [TT]<forcefield>.ff[tt] directory.",
1132 "After choosing a force field, all files will be read only from",
1133 "the corresponding force field directory.",
1134 "If you want to modify or add a residue types, you can copy the force",
1135 "field directory from the GROMACS library directory to your current",
1136 "working directory. If you want to add new protein residue types,",
1137 "you will need to modify [TT]residuetypes.dat[tt] in the library directory",
1138 "or copy the whole library directory to a local directory and set",
1139 "the environment variable [TT]GMXLIB[tt] to the name of that directory.",
1140 "Check Chapter 5 of the manual for more information about file formats.",
1143 "Note that a [REF].pdb[ref] file is nothing more than a file format, and it",
1144 "need not necessarily contain a protein structure. Every kind of",
1145 "molecule for which there is support in the database can be converted.",
1146 "If there is no support in the database, you can add it yourself.[PAR]",
1148 "The program has limited intelligence, it reads a number of database",
1149 "files, that allow it to make special bonds (Cys-Cys, Heme-His, etc.),",
1150 "if necessary this can be done manually. The program can prompt the",
1151 "user to select which kind of LYS, ASP, GLU, CYS or HIS residue is",
1152 "desired. For Lys the choice is between neutral (two protons on NZ) or",
1153 "protonated (three protons, default), for Asp and Glu unprotonated",
1154 "(default) or protonated, for His the proton can be either on ND1,",
1155 "on NE2 or on both. By default these selections are done automatically.",
1156 "For His, this is based on an optimal hydrogen bonding",
1157 "conformation. Hydrogen bonds are defined based on a simple geometric",
1158 "criterion, specified by the maximum hydrogen-donor-acceptor angle",
1159 "and donor-acceptor distance, which are set by [TT]-angle[tt] and",
1160 "[TT]-dist[tt] respectively.[PAR]",
1162 "The protonation state of N- and C-termini can be chosen interactively",
1163 "with the [TT]-ter[tt] flag. Default termini are ionized (NH3+ and COO-),",
1164 "respectively. Some force fields support zwitterionic forms for chains of",
1165 "one residue, but for polypeptides these options should NOT be selected.",
1166 "The AMBER force fields have unique forms for the terminal residues,",
1167 "and these are incompatible with the [TT]-ter[tt] mechanism. You need",
1168 "to prefix your N- or C-terminal residue names with \"N\" or \"C\"",
1169 "respectively to use these forms, making sure you preserve the format",
1170 "of the coordinate file. Alternatively, use named terminating residues",
1171 "(e.g. ACE, NME).[PAR]",
1173 "The separation of chains is not entirely trivial since the markup",
1174 "in user-generated PDB files frequently varies and sometimes it",
1175 "is desirable to merge entries across a TER record, for instance",
1176 "if you want a disulfide bridge or distance restraints between",
1177 "two protein chains or if you have a HEME group bound to a protein.",
1178 "In such cases multiple chains should be contained in a single",
1179 "[TT]moleculetype[tt] definition.",
1180 "To handle this, [THISMODULE] uses two separate options.",
1181 "First, [TT]-chainsep[tt] allows you to choose when a new chemical chain should",
1182 "start, and termini added when applicable. This can be done based on the",
1183 "existence of TER records, when the chain id changes, or combinations of either",
1184 "or both of these. You can also do the selection fully interactively.",
1185 "In addition, there is a [TT]-merge[tt] option that controls how multiple chains",
1186 "are merged into one moleculetype, after adding all the chemical termini (or not).",
1187 "This can be turned off (no merging), all non-water chains can be merged into a",
1188 "single molecule, or the selection can be done interactively.[PAR]",
1190 "[THISMODULE] will also check the occupancy field of the [REF].pdb[ref] file.",
1191 "If any of the occupancies are not one, indicating that the atom is",
1192 "not resolved well in the structure, a warning message is issued.",
1193 "When a [REF].pdb[ref] file does not originate from an X-ray structure determination",
1194 "all occupancy fields may be zero. Either way, it is up to the user",
1195 "to verify the correctness of the input data (read the article!).[PAR]",
1197 "During processing the atoms will be reordered according to GROMACS",
1198 "conventions. With [TT]-n[tt] an index file can be generated that",
1199 "contains one group reordered in the same way. This allows you to",
1200 "convert a GROMOS trajectory and coordinate file to GROMOS. There is",
1201 "one limitation: reordering is done after the hydrogens are stripped",
1202 "from the input and before new hydrogens are added. This means that",
1203 "you should not use [TT]-ignh[tt].[PAR]",
1205 "The [REF].gro[ref] and [TT].g96[tt] file formats do not support chain",
1206 "identifiers. Therefore it is useful to enter a [REF].pdb[ref] file name at",
1207 "the [TT]-o[tt] option when you want to convert a multi-chain [REF].pdb[ref] file.",
1210 "The option [TT]-vsite[tt] removes hydrogen and fast improper dihedral",
1211 "motions. Angular and out-of-plane motions can be removed by changing",
1212 "hydrogens into virtual sites and fixing angles, which fixes their",
1213 "position relative to neighboring atoms. Additionally, all atoms in the",
1214 "aromatic rings of the standard amino acids (i.e. PHE, TRP, TYR and HIS)",
1215 "can be converted into virtual sites, eliminating the fast improper dihedral",
1216 "fluctuations in these rings. [BB]Note[bb] that in this case all other hydrogen",
1217 "atoms are also converted to virtual sites. The mass of all atoms that are",
1218 "converted into virtual sites, is added to the heavy atoms.[PAR]",
1219 "Also slowing down of dihedral motion can be done with [TT]-heavyh[tt]",
1220 "done by increasing the hydrogen-mass by a factor of 4. This is also",
1221 "done for water hydrogens to slow down the rotational motion of water.",
1222 "The increase in mass of the hydrogens is subtracted from the bonded",
1223 "(heavy) atom so that the total mass of the system remains the same."
1227 FILE *fp, *top_file, *top_file2, *itp_file = NULL;
1229 t_atoms pdba_all, *pdba;
1233 int chain, nch, maxch, nwaterchain;
1235 t_chain *chains, *cc;
1236 char select[STRLEN];
1244 int i, j, k, l, nrtp;
1245 int *swap_index, si;
1249 gpp_atomtype_t atype;
1250 gmx_residuetype_t*rt;
1252 char fn[256], itp_fn[STRLEN], posre_fn[STRLEN], buf_fn[STRLEN];
1253 char molname[STRLEN], title[STRLEN], quote[STRLEN];
1254 char *c, forcefield[STRLEN], ffdir[STRLEN];
1255 char ffname[STRLEN], suffix[STRLEN], buf[STRLEN];
1263 int nrtprename, naa;
1264 rtprename_t *rtprename = NULL;
1265 int nah, nNtdb, nCtdb, ntdblist;
1266 t_hackblock *ntdb, *ctdb, **tdblist;
1270 gmx_bool bVsites = FALSE, bWat, bPrevWat = FALSE, bITP, bVsiteAromatics = FALSE, bCheckMerge;
1272 t_hackblock *hb_chain;
1273 t_restp *restp_chain;
1275 const char *p_restype;
1279 const char * prev_atomname;
1280 const char * this_atomname;
1281 const char * prev_resname;
1282 const char * this_resname;
1287 int prev_chainnumber;
1288 int this_chainnumber;
1290 int this_chainstart;
1291 int prev_chainstart;
1298 { efSTX, "-f", "eiwit.pdb", ffREAD },
1299 { efSTO, "-o", "conf", ffWRITE },
1300 { efTOP, NULL, NULL, ffWRITE },
1301 { efITP, "-i", "posre", ffWRITE },
1302 { efNDX, "-n", "clean", ffOPTWR },
1303 { efSTO, "-q", "clean.pdb", ffOPTWR }
1305 #define NFILE asize(fnm)
1308 /* Command line arguments must be static */
1309 static gmx_bool bNewRTP = FALSE;
1310 static gmx_bool bInter = FALSE, bCysMan = FALSE;
1311 static gmx_bool bLysMan = FALSE, bAspMan = FALSE, bGluMan = FALSE, bHisMan = FALSE;
1312 static gmx_bool bGlnMan = FALSE, bArgMan = FALSE;
1313 static gmx_bool bTerMan = FALSE, bUnA = FALSE, bHeavyH;
1314 static gmx_bool bSort = TRUE, bAllowMissing = FALSE, bRemoveH = FALSE;
1315 static gmx_bool bDeuterate = FALSE, bVerbose = FALSE, bChargeGroups = TRUE, bCmap = TRUE;
1316 static gmx_bool bRenumRes = FALSE, bRTPresname = FALSE;
1317 static real angle = 135.0, distance = 0.3, posre_fc = 1000;
1318 static real long_bond_dist = 0.25, short_bond_dist = 0.05;
1319 static const char *vsitestr[] = { NULL, "none", "hydrogens", "aromatics", NULL };
1320 static const char *watstr[] = { NULL, "select", "none", "spc", "spce", "tip3p", "tip4p", "tip5p", NULL };
1321 static const char *chainsep[] = { NULL, "id_or_ter", "id_and_ter", "ter", "id", "interactive", NULL };
1322 static const char *merge[] = {NULL, "no", "all", "interactive", NULL };
1323 static const char *ff = "select";
1326 { "-newrtp", FALSE, etBOOL, {&bNewRTP},
1327 "HIDDENWrite the residue database in new format to [TT]new.rtp[tt]"},
1328 { "-lb", FALSE, etREAL, {&long_bond_dist},
1329 "HIDDENLong bond warning distance" },
1330 { "-sb", FALSE, etREAL, {&short_bond_dist},
1331 "HIDDENShort bond warning distance" },
1332 { "-chainsep", FALSE, etENUM, {chainsep},
1333 "Condition in PDB files when a new chain should be started (adding termini)" },
1334 { "-merge", FALSE, etENUM, {&merge},
1335 "Merge multiple chains into a single [moleculetype]" },
1336 { "-ff", FALSE, etSTR, {&ff},
1337 "Force field, interactive by default. Use [TT]-h[tt] for information." },
1338 { "-water", FALSE, etENUM, {watstr},
1339 "Water model to use" },
1340 { "-inter", FALSE, etBOOL, {&bInter},
1341 "Set the next 8 options to interactive"},
1342 { "-ss", FALSE, etBOOL, {&bCysMan},
1343 "Interactive SS bridge selection" },
1344 { "-ter", FALSE, etBOOL, {&bTerMan},
1345 "Interactive termini selection, instead of charged (default)" },
1346 { "-lys", FALSE, etBOOL, {&bLysMan},
1347 "Interactive lysine selection, instead of charged" },
1348 { "-arg", FALSE, etBOOL, {&bArgMan},
1349 "Interactive arginine selection, instead of charged" },
1350 { "-asp", FALSE, etBOOL, {&bAspMan},
1351 "Interactive aspartic acid selection, instead of charged" },
1352 { "-glu", FALSE, etBOOL, {&bGluMan},
1353 "Interactive glutamic acid selection, instead of charged" },
1354 { "-gln", FALSE, etBOOL, {&bGlnMan},
1355 "Interactive glutamine selection, instead of neutral" },
1356 { "-his", FALSE, etBOOL, {&bHisMan},
1357 "Interactive histidine selection, instead of checking H-bonds" },
1358 { "-angle", FALSE, etREAL, {&angle},
1359 "Minimum hydrogen-donor-acceptor angle for a H-bond (degrees)" },
1360 { "-dist", FALSE, etREAL, {&distance},
1361 "Maximum donor-acceptor distance for a H-bond (nm)" },
1362 { "-una", FALSE, etBOOL, {&bUnA},
1363 "Select aromatic rings with united CH atoms on phenylalanine, "
1364 "tryptophane and tyrosine" },
1365 { "-sort", FALSE, etBOOL, {&bSort},
1366 "HIDDENSort the residues according to database, turning this off is dangerous as charge groups might be broken in parts" },
1367 { "-ignh", FALSE, etBOOL, {&bRemoveH},
1368 "Ignore hydrogen atoms that are in the coordinate file" },
1369 { "-missing", FALSE, etBOOL, {&bAllowMissing},
1370 "Continue when atoms are missing, dangerous" },
1371 { "-v", FALSE, etBOOL, {&bVerbose},
1372 "Be slightly more verbose in messages" },
1373 { "-posrefc", FALSE, etREAL, {&posre_fc},
1374 "Force constant for position restraints" },
1375 { "-vsite", FALSE, etENUM, {vsitestr},
1376 "Convert atoms to virtual sites" },
1377 { "-heavyh", FALSE, etBOOL, {&bHeavyH},
1378 "Make hydrogen atoms heavy" },
1379 { "-deuterate", FALSE, etBOOL, {&bDeuterate},
1380 "Change the mass of hydrogens to 2 amu" },
1381 { "-chargegrp", TRUE, etBOOL, {&bChargeGroups},
1382 "Use charge groups in the [REF].rtp[ref] file" },
1383 { "-cmap", TRUE, etBOOL, {&bCmap},
1384 "Use cmap torsions (if enabled in the [REF].rtp[ref] file)" },
1385 { "-renum", TRUE, etBOOL, {&bRenumRes},
1386 "Renumber the residues consecutively in the output" },
1387 { "-rtpres", TRUE, etBOOL, {&bRTPresname},
1388 "Use [REF].rtp[ref] entry names as residue names" }
1390 #define NPARGS asize(pa)
1392 if (!parse_common_args(&argc, argv, 0, NFILE, fnm, asize(pa), pa, asize(desc), desc,
1398 /* Force field selection, interactive or direct */
1399 choose_ff(strcmp(ff, "select") == 0 ? NULL : ff,
1400 forcefield, sizeof(forcefield),
1401 ffdir, sizeof(ffdir));
1403 if (strlen(forcefield) > 0)
1405 strcpy(ffname, forcefield);
1406 ffname[0] = toupper(ffname[0]);
1410 gmx_fatal(FARGS, "Empty forcefield string");
1413 printf("\nUsing the %s force field in directory %s\n\n",
1416 choose_watermodel(watstr[0], ffdir, &watermodel);
1420 /* if anything changes here, also change description of -inter */
1435 else if (bDeuterate)
1444 switch (vsitestr[0][0])
1446 case 'n': /* none */
1448 bVsiteAromatics = FALSE;
1450 case 'h': /* hydrogens */
1452 bVsiteAromatics = FALSE;
1454 case 'a': /* aromatics */
1456 bVsiteAromatics = TRUE;
1459 gmx_fatal(FARGS, "DEATH HORROR in $s (%d): vsitestr[0]='%s'",
1460 __FILE__, __LINE__, vsitestr[0]);
1463 /* Open the symbol table */
1464 open_symtab(&symtab);
1466 /* Residue type database */
1467 gmx_residuetype_init(&rt);
1469 /* Read residue renaming database(s), if present */
1470 nrrn = fflib_search_file_end(ffdir, ".r2b", FALSE, &rrn);
1474 for (i = 0; i < nrrn; i++)
1476 fp = fflib_open(rrn[i]);
1477 read_rtprename(rrn[i], fp, &nrtprename, &rtprename);
1483 /* Add all alternative names from the residue renaming database to the list of recognized amino/nucleic acids. */
1485 for (i = 0; i < nrtprename; i++)
1487 rc = gmx_residuetype_get_type(rt, rtprename[i].gmx, &p_restype);
1489 /* Only add names if the 'standard' gromacs/iupac base name was found */
1492 gmx_residuetype_add(rt, rtprename[i].main, p_restype);
1493 gmx_residuetype_add(rt, rtprename[i].nter, p_restype);
1494 gmx_residuetype_add(rt, rtprename[i].cter, p_restype);
1495 gmx_residuetype_add(rt, rtprename[i].bter, p_restype);
1500 if (watermodel != NULL && (strstr(watermodel, "4p") ||
1501 strstr(watermodel, "4P")))
1505 else if (watermodel != NULL && (strstr(watermodel, "5p") ||
1506 strstr(watermodel, "5P")))
1515 aps = gmx_atomprop_init();
1516 natom = read_pdball(opt2fn("-f", NFILE, fnm), opt2fn_null("-q", NFILE, fnm), title,
1517 &pdba_all, &pdbx, &ePBC, box, bRemoveH, &symtab, rt, watres,
1522 gmx_fatal(FARGS, "No atoms found in pdb file %s\n", opt2fn("-f", NFILE, fnm));
1525 printf("Analyzing pdb file\n");
1530 modify_chain_numbers(&pdba_all, chainsep[0]);
1534 this_atomname = NULL;
1536 this_resname = NULL;
1539 this_chainnumber = -1;
1540 this_chainstart = 0;
1541 /* Keep the compiler happy */
1542 prev_chainstart = 0;
1547 for (i = 0; (i < natom); i++)
1549 ri = &pdba_all.resinfo[pdba_all.atom[i].resind];
1551 prev_atomname = this_atomname;
1552 prev_atomnum = this_atomnum;
1553 prev_resname = this_resname;
1554 prev_resnum = this_resnum;
1555 prev_chainid = this_chainid;
1556 prev_chainnumber = this_chainnumber;
1559 prev_chainstart = this_chainstart;
1562 this_atomname = *pdba_all.atomname[i];
1563 this_atomnum = (pdba_all.pdbinfo != NULL) ? pdba_all.pdbinfo[i].atomnr : i+1;
1564 this_resname = *ri->name;
1565 this_resnum = ri->nr;
1566 this_chainid = ri->chainid;
1567 this_chainnumber = ri->chainnum;
1569 bWat = gmx_strcasecmp(*ri->name, watres) == 0;
1570 if ((i == 0) || (this_chainnumber != prev_chainnumber) || (bWat != bPrevWat))
1572 this_chainstart = pdba_all.atom[i].resind;
1577 if (!strncmp(merge[0], "int", 3))
1579 printf("Merge chain ending with residue %s%d (chain id '%c', atom %d %s) and chain starting with\n"
1580 "residue %s%d (chain id '%c', atom %d %s) into a single moleculetype (keeping termini)? [n/y]\n",
1581 prev_resname, prev_resnum, prev_chainid, prev_atomnum, prev_atomname,
1582 this_resname, this_resnum, this_chainid, this_atomnum, this_atomname);
1584 if (NULL == fgets(select, STRLEN-1, stdin))
1586 gmx_fatal(FARGS, "Error reading from stdin");
1588 bMerged = (select[0] == 'y');
1590 else if (!strncmp(merge[0], "all", 3))
1598 pdb_ch[nch-1].chainstart[pdb_ch[nch-1].nterpairs] =
1599 pdba_all.atom[i].resind - prev_chainstart;
1600 pdb_ch[nch-1].nterpairs++;
1601 srenew(pdb_ch[nch-1].chainstart, pdb_ch[nch-1].nterpairs+1);
1606 /* set natom for previous chain */
1609 pdb_ch[nch-1].natom = i-pdb_ch[nch-1].start;
1616 /* check if chain identifier was used before */
1617 for (j = 0; (j < nch); j++)
1619 if (pdb_ch[j].chainid != ' ' && pdb_ch[j].chainid == ri->chainid)
1621 printf("WARNING: Chain identifier '%c' is used in two non-sequential blocks.\n"
1622 "They will be treated as separate chains unless you reorder your file.\n",
1629 srenew(pdb_ch, maxch);
1631 pdb_ch[nch].chainid = ri->chainid;
1632 pdb_ch[nch].chainnum = ri->chainnum;
1633 pdb_ch[nch].start = i;
1634 pdb_ch[nch].bAllWat = bWat;
1637 pdb_ch[nch].nterpairs = 0;
1641 pdb_ch[nch].nterpairs = 1;
1643 snew(pdb_ch[nch].chainstart, pdb_ch[nch].nterpairs+1);
1644 /* modified [nch] to [0] below */
1645 pdb_ch[nch].chainstart[0] = 0;
1651 pdb_ch[nch-1].natom = natom-pdb_ch[nch-1].start;
1653 /* set all the water blocks at the end of the chain */
1654 snew(swap_index, nch);
1656 for (i = 0; i < nch; i++)
1658 if (!pdb_ch[i].bAllWat)
1664 for (i = 0; i < nch; i++)
1666 if (pdb_ch[i].bAllWat)
1672 if (nwaterchain > 1)
1674 printf("Moved all the water blocks to the end\n");
1678 /* copy pdb data and x for all chains */
1679 for (i = 0; (i < nch); i++)
1682 chains[i].chainid = pdb_ch[si].chainid;
1683 chains[i].chainnum = pdb_ch[si].chainnum;
1684 chains[i].bAllWat = pdb_ch[si].bAllWat;
1685 chains[i].nterpairs = pdb_ch[si].nterpairs;
1686 chains[i].chainstart = pdb_ch[si].chainstart;
1687 snew(chains[i].ntdb, pdb_ch[si].nterpairs);
1688 snew(chains[i].ctdb, pdb_ch[si].nterpairs);
1689 snew(chains[i].r_start, pdb_ch[si].nterpairs);
1690 snew(chains[i].r_end, pdb_ch[si].nterpairs);
1692 snew(chains[i].pdba, 1);
1693 init_t_atoms(chains[i].pdba, pdb_ch[si].natom, TRUE);
1694 snew(chains[i].x, chains[i].pdba->nr);
1695 for (j = 0; j < chains[i].pdba->nr; j++)
1697 chains[i].pdba->atom[j] = pdba_all.atom[pdb_ch[si].start+j];
1698 snew(chains[i].pdba->atomname[j], 1);
1699 *chains[i].pdba->atomname[j] =
1700 gmx_strdup(*pdba_all.atomname[pdb_ch[si].start+j]);
1701 chains[i].pdba->pdbinfo[j] = pdba_all.pdbinfo[pdb_ch[si].start+j];
1702 copy_rvec(pdbx[pdb_ch[si].start+j], chains[i].x[j]);
1704 /* Re-index the residues assuming that the indices are continuous */
1705 k = chains[i].pdba->atom[0].resind;
1706 nres = chains[i].pdba->atom[chains[i].pdba->nr-1].resind - k + 1;
1707 chains[i].pdba->nres = nres;
1708 for (j = 0; j < chains[i].pdba->nr; j++)
1710 chains[i].pdba->atom[j].resind -= k;
1712 srenew(chains[i].pdba->resinfo, nres);
1713 for (j = 0; j < nres; j++)
1715 chains[i].pdba->resinfo[j] = pdba_all.resinfo[k+j];
1716 snew(chains[i].pdba->resinfo[j].name, 1);
1717 *chains[i].pdba->resinfo[j].name = gmx_strdup(*pdba_all.resinfo[k+j].name);
1718 /* make all chain identifiers equal to that of the chain */
1719 chains[i].pdba->resinfo[j].chainid = pdb_ch[si].chainid;
1723 if (nchainmerges > 0)
1725 printf("\nMerged chains into joint molecule definitions at %d places.\n\n",
1729 printf("There are %d chains and %d blocks of water and "
1730 "%d residues with %d atoms\n",
1731 nch-nwaterchain, nwaterchain,
1732 pdba_all.nres, natom);
1734 printf("\n %5s %4s %6s\n", "chain", "#res", "#atoms");
1735 for (i = 0; (i < nch); i++)
1737 printf(" %d '%c' %5d %6d %s\n",
1738 i+1, chains[i].chainid ? chains[i].chainid : '-',
1739 chains[i].pdba->nres, chains[i].pdba->nr,
1740 chains[i].bAllWat ? "(only water)" : "");
1744 check_occupancy(&pdba_all, opt2fn("-f", NFILE, fnm), bVerbose);
1746 /* Read atomtypes... */
1747 atype = read_atype(ffdir, &symtab);
1749 /* read residue database */
1750 printf("Reading residue database... (%s)\n", forcefield);
1751 nrtpf = fflib_search_file_end(ffdir, ".rtp", TRUE, &rtpf);
1754 for (i = 0; i < nrtpf; i++)
1756 read_resall(rtpf[i], &nrtp, &restp, atype, &symtab, FALSE);
1762 /* Not correct with multiple rtp input files with different bonded types */
1763 fp = gmx_fio_fopen("new.rtp", "w");
1764 print_resall(fp, nrtp, restp, atype);
1768 /* read hydrogen database */
1769 nah = read_h_db(ffdir, &ah);
1771 /* Read Termini database... */
1772 nNtdb = read_ter_db(ffdir, 'n', &ntdb, atype);
1773 nCtdb = read_ter_db(ffdir, 'c', &ctdb, atype);
1775 top_fn = ftp2fn(efTOP, NFILE, fnm);
1776 top_file = gmx_fio_fopen(top_fn, "w");
1778 print_top_header(top_file, top_fn, FALSE, ffdir, mHmult);
1785 for (chain = 0; (chain < nch); chain++)
1787 cc = &(chains[chain]);
1789 /* set pdba, natom and nres to the current chain */
1792 natom = cc->pdba->nr;
1793 nres = cc->pdba->nres;
1795 if (cc->chainid && ( cc->chainid != ' ' ) )
1797 printf("Processing chain %d '%c' (%d atoms, %d residues)\n",
1798 chain+1, cc->chainid, natom, nres);
1802 printf("Processing chain %d (%d atoms, %d residues)\n",
1803 chain+1, natom, nres);
1806 process_chain(pdba, x, bUnA, bUnA, bUnA, bLysMan, bAspMan, bGluMan,
1807 bHisMan, bArgMan, bGlnMan, angle, distance, &symtab,
1808 nrtprename, rtprename);
1810 cc->chainstart[cc->nterpairs] = pdba->nres;
1812 for (i = 0; i < cc->nterpairs; i++)
1814 find_nc_ter(pdba, cc->chainstart[i], cc->chainstart[i+1],
1815 &(cc->r_start[j]), &(cc->r_end[j]), rt);
1817 if (cc->r_start[j] >= 0 && cc->r_end[j] >= 0)
1823 if (cc->nterpairs == 0)
1825 printf("Problem with chain definition, or missing terminal residues.\n"
1826 "This chain does not appear to contain a recognized chain molecule.\n"
1827 "If this is incorrect, you can edit residuetypes.dat to modify the behavior.\n");
1830 /* Check for disulfides and other special bonds */
1831 nssbonds = mk_specbonds(pdba, x, bCysMan, &ssbonds, bVerbose);
1835 rename_resrtp(pdba, cc->nterpairs, cc->r_start, cc->r_end, nrtprename, rtprename,
1843 sprintf(fn, "chain.pdb");
1847 sprintf(fn, "chain_%c%d.pdb", cc->chainid, cc->chainnum);
1849 write_sto_conf(fn, title, pdba, x, NULL, ePBC, box);
1853 for (i = 0; i < cc->nterpairs; i++)
1857 * We first apply a filter so we only have the
1858 * termini that can be applied to the residue in question
1859 * (or a generic terminus if no-residue specific is available).
1861 /* First the N terminus */
1864 tdblist = filter_ter(nrtp, restp, nNtdb, ntdb,
1865 *pdba->resinfo[cc->r_start[i]].name,
1866 *pdba->resinfo[cc->r_start[i]].rtp,
1870 printf("No suitable end (N or 5') terminus found in database - assuming this residue\n"
1871 "is already in a terminus-specific form and skipping terminus selection.\n");
1876 if (bTerMan && ntdblist > 1)
1878 sprintf(select, "Select start terminus type for %s-%d",
1879 *pdba->resinfo[cc->r_start[i]].name,
1880 pdba->resinfo[cc->r_start[i]].nr);
1881 cc->ntdb[i] = choose_ter(ntdblist, tdblist, select);
1885 cc->ntdb[i] = tdblist[0];
1888 printf("Start terminus %s-%d: %s\n",
1889 *pdba->resinfo[cc->r_start[i]].name,
1890 pdba->resinfo[cc->r_start[i]].nr,
1891 (cc->ntdb[i])->name);
1900 /* And the C terminus */
1903 tdblist = filter_ter(nrtp, restp, nCtdb, ctdb,
1904 *pdba->resinfo[cc->r_end[i]].name,
1905 *pdba->resinfo[cc->r_end[i]].rtp,
1909 printf("No suitable end (C or 3') terminus found in database - assuming this residue\n"
1910 "is already in a terminus-specific form and skipping terminus selection.\n");
1915 if (bTerMan && ntdblist > 1)
1917 sprintf(select, "Select end terminus type for %s-%d",
1918 *pdba->resinfo[cc->r_end[i]].name,
1919 pdba->resinfo[cc->r_end[i]].nr);
1920 cc->ctdb[i] = choose_ter(ntdblist, tdblist, select);
1924 cc->ctdb[i] = tdblist[0];
1926 printf("End terminus %s-%d: %s\n",
1927 *pdba->resinfo[cc->r_end[i]].name,
1928 pdba->resinfo[cc->r_end[i]].nr,
1929 (cc->ctdb[i])->name);
1938 /* lookup hackblocks and rtp for all residues */
1939 get_hackblocks_rtp(&hb_chain, &restp_chain,
1940 nrtp, restp, pdba->nres, pdba->resinfo,
1941 cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end);
1942 /* ideally, now we would not need the rtp itself anymore, but do
1943 everything using the hb and restp arrays. Unfortunately, that
1944 requires some re-thinking of code in gen_vsite.c, which I won't
1945 do now :( AF 26-7-99 */
1947 rename_atoms(NULL, ffdir,
1948 pdba, &symtab, restp_chain, FALSE, rt, FALSE, bVerbose);
1950 match_atomnames_with_rtp(restp_chain, hb_chain, pdba, x, bVerbose);
1954 block = new_blocka();
1956 sort_pdbatoms(restp_chain, natom, &pdba, &x, block, &gnames);
1957 natom = remove_duplicate_atoms(pdba, x, bVerbose);
1958 if (ftp2bSet(efNDX, NFILE, fnm))
1962 fprintf(stderr, "WARNING: with the -remh option the generated "
1963 "index file (%s) might be useless\n"
1964 "(the index file is generated before hydrogens are added)",
1965 ftp2fn(efNDX, NFILE, fnm));
1967 write_index(ftp2fn(efNDX, NFILE, fnm), block, gnames, FALSE, 0);
1969 for (i = 0; i < block->nr; i++)
1978 fprintf(stderr, "WARNING: "
1979 "without sorting no check for duplicate atoms can be done\n");
1982 /* Generate Hydrogen atoms (and termini) in the sequence */
1983 printf("Generating any missing hydrogen atoms and/or adding termini.\n");
1984 natom = add_h(&pdba, &x, nah, ah,
1985 cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end, bAllowMissing,
1986 NULL, NULL, TRUE, FALSE);
1987 printf("Now there are %d residues with %d atoms\n",
1988 pdba->nres, pdba->nr);
1991 write_pdbfile(debug, title, pdba, x, ePBC, box, ' ', 0, NULL, TRUE);
1996 for (i = 0; (i < natom); i++)
1998 fprintf(debug, "Res %s%d atom %d %s\n",
1999 *(pdba->resinfo[pdba->atom[i].resind].name),
2000 pdba->resinfo[pdba->atom[i].resind].nr, i+1, *pdba->atomname[i]);
2004 strcpy(posre_fn, ftp2fn(efITP, NFILE, fnm));
2006 /* make up molecule name(s) */
2008 k = (cc->nterpairs > 0 && cc->r_start[0] >= 0) ? cc->r_start[0] : 0;
2010 gmx_residuetype_get_type(rt, *pdba->resinfo[k].name, &p_restype);
2016 sprintf(molname, "Water");
2020 this_chainid = cc->chainid;
2022 /* Add the chain id if we have one */
2023 if (this_chainid != ' ')
2025 sprintf(buf, "_chain_%c", this_chainid);
2026 strcat(suffix, buf);
2029 /* Check if there have been previous chains with the same id */
2031 for (k = 0; k < chain; k++)
2033 if (cc->chainid == chains[k].chainid)
2038 /* Add the number for this chain identifier if there are multiple copies */
2042 sprintf(buf, "%d", nid_used+1);
2043 strcat(suffix, buf);
2046 if (strlen(suffix) > 0)
2048 sprintf(molname, "%s%s", p_restype, suffix);
2052 strcpy(molname, p_restype);
2056 if ((nch-nwaterchain > 1) && !cc->bAllWat)
2059 strcpy(itp_fn, top_fn);
2060 printf("Chain time...\n");
2061 c = strrchr(itp_fn, '.');
2062 sprintf(c, "_%s.itp", molname);
2063 c = strrchr(posre_fn, '.');
2064 sprintf(c, "_%s.itp", molname);
2065 if (strcmp(itp_fn, posre_fn) == 0)
2067 strcpy(buf_fn, posre_fn);
2068 c = strrchr(buf_fn, '.');
2070 sprintf(posre_fn, "%s_pr.itp", buf_fn);
2074 srenew(incls, nincl);
2075 incls[nincl-1] = gmx_strdup(itp_fn);
2076 itp_file = gmx_fio_fopen(itp_fn, "w");
2083 srenew(mols, nmol+1);
2086 mols[nmol].name = gmx_strdup("SOL");
2087 mols[nmol].nr = pdba->nres;
2091 mols[nmol].name = gmx_strdup(molname);
2098 print_top_comment(itp_file, itp_fn, ffdir, TRUE);
2108 top_file2 = itp_file;
2112 top_file2 = top_file;
2115 pdb2top(top_file2, posre_fn, molname, pdba, &x, atype, &symtab,
2117 restp_chain, hb_chain,
2119 bVsites, bVsiteAromatics, ffdir,
2120 mHmult, nssbonds, ssbonds,
2121 long_bond_dist, short_bond_dist, bDeuterate, bChargeGroups, bCmap,
2122 bRenumRes, bRTPresname);
2126 write_posres(posre_fn, pdba, posre_fc);
2131 gmx_fio_fclose(itp_file);
2134 /* pdba and natom have been reassigned somewhere so: */
2140 if (cc->chainid == ' ')
2142 sprintf(fn, "chain.pdb");
2146 sprintf(fn, "chain_%c.pdb", cc->chainid);
2148 cool_quote(quote, 255, NULL);
2149 write_sto_conf(fn, quote, pdba, x, NULL, ePBC, box);
2153 if (watermodel == NULL)
2155 for (chain = 0; chain < nch; chain++)
2157 if (chains[chain].bAllWat)
2159 gmx_fatal(FARGS, "You have chosen not to include a water model, but there is water in the input file. Select a water model or remove the water from your input file.");
2165 sprintf(buf_fn, "%s%c%s.itp", ffdir, DIR_SEPARATOR, watermodel);
2166 if (!fflib_fexist(buf_fn))
2168 gmx_fatal(FARGS, "The topology file '%s' for the selected water model '%s' can not be found in the force field directory. Select a different water model.",
2169 buf_fn, watermodel);
2173 print_top_mols(top_file, title, ffdir, watermodel, nincl, incls, nmol, mols);
2174 gmx_fio_fclose(top_file);
2176 gmx_residuetype_destroy(rt);
2178 /* now merge all chains back together */
2181 for (i = 0; (i < nch); i++)
2183 natom += chains[i].pdba->nr;
2184 nres += chains[i].pdba->nres;
2187 init_t_atoms(atoms, natom, FALSE);
2188 for (i = 0; i < atoms->nres; i++)
2190 sfree(atoms->resinfo[i].name);
2192 sfree(atoms->resinfo);
2194 snew(atoms->resinfo, nres);
2198 for (i = 0; (i < nch); i++)
2202 printf("Including chain %d in system: %d atoms %d residues\n",
2203 i+1, chains[i].pdba->nr, chains[i].pdba->nres);
2205 for (j = 0; (j < chains[i].pdba->nr); j++)
2207 atoms->atom[k] = chains[i].pdba->atom[j];
2208 atoms->atom[k].resind += l; /* l is processed nr of residues */
2209 atoms->atomname[k] = chains[i].pdba->atomname[j];
2210 atoms->resinfo[atoms->atom[k].resind].chainid = chains[i].chainid;
2211 copy_rvec(chains[i].x[j], x[k]);
2214 for (j = 0; (j < chains[i].pdba->nres); j++)
2216 atoms->resinfo[l] = chains[i].pdba->resinfo[j];
2219 atoms->resinfo[l].name = atoms->resinfo[l].rtp;
2227 fprintf(stderr, "Now there are %d atoms and %d residues\n", k, l);
2228 print_sums(atoms, TRUE);
2231 fprintf(stderr, "\nWriting coordinate file...\n");
2232 clear_rvec(box_space);
2235 make_new_box(atoms->nr, x, box, box_space, FALSE);
2237 write_sto_conf(ftp2fn(efSTO, NFILE, fnm), title, atoms, x, NULL, ePBC, box);
2239 printf("\t\t--------- PLEASE NOTE ------------\n");
2240 printf("You have successfully generated a topology from: %s.\n",
2241 opt2fn("-f", NFILE, fnm));
2242 if (watermodel != NULL)
2244 printf("The %s force field and the %s water model are used.\n",
2245 ffname, watermodel);
2249 printf("The %s force field is used.\n",
2252 printf("\t\t--------- ETON ESAELP ------------\n");