2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
46 #include "gromacs/commandline/pargs.h"
47 #include "gromacs/fileio/confio.h"
48 #include "gromacs/fileio/gmxfio.h"
49 #include "gromacs/fileio/pdbio.h"
50 #include "gromacs/fileio/strdb.h"
51 #include "gromacs/gmxlib/conformation-utilities.h"
52 #include "gromacs/gmxpreprocess/fflibutil.h"
53 #include "gromacs/gmxpreprocess/genhydro.h"
54 #include "gromacs/gmxpreprocess/h_db.h"
55 #include "gromacs/gmxpreprocess/hizzie.h"
56 #include "gromacs/gmxpreprocess/pdb2top.h"
57 #include "gromacs/gmxpreprocess/pgutil.h"
58 #include "gromacs/gmxpreprocess/resall.h"
59 #include "gromacs/gmxpreprocess/specbond.h"
60 #include "gromacs/gmxpreprocess/ter_db.h"
61 #include "gromacs/gmxpreprocess/toputil.h"
62 #include "gromacs/gmxpreprocess/xlate.h"
63 #include "gromacs/legacyheaders/copyrite.h"
64 #include "gromacs/legacyheaders/macros.h"
65 #include "gromacs/legacyheaders/readinp.h"
66 #include "gromacs/legacyheaders/typedefs.h"
67 #include "gromacs/math/vec.h"
68 #include "gromacs/topology/atomprop.h"
69 #include "gromacs/topology/block.h"
70 #include "gromacs/topology/index.h"
71 #include "gromacs/topology/residuetypes.h"
72 #include "gromacs/topology/symtab.h"
73 #include "gromacs/utility/cstringutil.h"
74 #include "gromacs/utility/dir_separator.h"
75 #include "gromacs/utility/fatalerror.h"
76 #include "gromacs/utility/gmxassert.h"
77 #include "gromacs/utility/smalloc.h"
81 char gmx[RTP_MAXCHAR+2];
82 char main[RTP_MAXCHAR+2];
83 char nter[RTP_MAXCHAR+2];
84 char cter[RTP_MAXCHAR+2];
85 char bter[RTP_MAXCHAR+2];
89 static const char *res2bb_notermini(const char *name,
90 int nrr, const rtprename_t *rr)
92 /* NOTE: This function returns the main building block name,
93 * it does not take terminal renaming into account.
98 while (i < nrr && gmx_strcasecmp(name, rr[i].gmx) != 0)
103 return (i < nrr ? rr[i].main : name);
106 static const char *select_res(int nr, int resnr,
107 const char *name[], const char *expl[],
109 int nrr, const rtprename_t *rr)
113 printf("Which %s type do you want for residue %d\n", title, resnr+1);
114 for (sel = 0; (sel < nr); sel++)
116 printf("%d. %s (%s)\n",
117 sel, expl[sel], res2bb_notermini(name[sel], nrr, rr));
119 printf("\nType a number:"); fflush(stdout);
121 if (scanf("%d", &sel) != 1)
123 gmx_fatal(FARGS, "Answer me for res %s %d!", title, resnr+1);
129 static const char *get_asptp(int resnr, int nrr, const rtprename_t *rr)
134 const char *lh[easpNR] = { "ASP", "ASPH" };
135 const char *expl[easpNR] = {
136 "Not protonated (charge -1)",
137 "Protonated (charge 0)"
140 return select_res(easpNR, resnr, lh, expl, "ASPARTIC ACID", nrr, rr);
143 static const char *get_glutp(int resnr, int nrr, const rtprename_t *rr)
148 const char *lh[egluNR] = { "GLU", "GLUH" };
149 const char *expl[egluNR] = {
150 "Not protonated (charge -1)",
151 "Protonated (charge 0)"
154 return select_res(egluNR, resnr, lh, expl, "GLUTAMIC ACID", nrr, rr);
157 static const char *get_glntp(int resnr, int nrr, const rtprename_t *rr)
162 const char *lh[eglnNR] = { "GLN", "QLN" };
163 const char *expl[eglnNR] = {
164 "Not protonated (charge 0)",
165 "Protonated (charge +1)"
168 return select_res(eglnNR, resnr, lh, expl, "GLUTAMINE", nrr, rr);
171 static const char *get_lystp(int resnr, int nrr, const rtprename_t *rr)
176 const char *lh[elysNR] = { "LYSN", "LYS" };
177 const char *expl[elysNR] = {
178 "Not protonated (charge 0)",
179 "Protonated (charge +1)"
182 return select_res(elysNR, resnr, lh, expl, "LYSINE", nrr, rr);
185 static const char *get_argtp(int resnr, int nrr, const rtprename_t *rr)
190 const char *lh[eargNR] = { "ARGN", "ARG" };
191 const char *expl[eargNR] = {
192 "Not protonated (charge 0)",
193 "Protonated (charge +1)"
196 return select_res(eargNR, resnr, lh, expl, "ARGININE", nrr, rr);
199 static const char *get_histp(int resnr, int nrr, const rtprename_t *rr)
201 const char *expl[ehisNR] = {
208 return select_res(ehisNR, resnr, hh, expl, "HISTIDINE", nrr, rr);
211 static void read_rtprename(const char *fname, FILE *fp,
212 int *nrtprename, rtprename_t **rtprename)
214 char line[STRLEN], buf[STRLEN];
223 while (get_a_line(fp, line, STRLEN))
226 /* line is NULL-terminated and length<STRLEN, so final arg cannot overflow.
227 * For other args, we read up to 6 chars (so we can detect if the length is > 5).
228 * Note that the buffer length has been increased to 7 to allow this,
229 * so we just need to make sure the strings have zero-length initially.
232 rr[n].main[0] = '\0';
233 rr[n].nter[0] = '\0';
234 rr[n].cter[0] = '\0';
235 rr[n].bter[0] = '\0';
236 nc = sscanf(line, "%6s %6s %6s %6s %6s %s",
237 rr[n].gmx, rr[n].main, rr[n].nter, rr[n].cter, rr[n].bter, buf);
238 if (strlen(rr[n].gmx) > RTP_MAXCHAR || strlen(rr[n].main) > RTP_MAXCHAR ||
239 strlen(rr[n].nter) > RTP_MAXCHAR || strlen(rr[n].cter) > RTP_MAXCHAR || strlen(rr[n].bter) > RTP_MAXCHAR)
241 gmx_fatal(FARGS, "Residue renaming database '%s' has strings longer than %d chars in first 5 columns:\n%s",
242 fname, RTP_MAXCHAR, line);
247 if (nc != 2 && nc != 5)
249 gmx_fatal(FARGS, "Residue renaming database '%s' has %d columns instead of %d, %d or %d", fname, ncol, 2, 5);
255 gmx_fatal(FARGS, "A line in residue renaming database '%s' has %d columns, while previous lines have %d columns", fname, nc, ncol);
260 /* This file does not have special termini names, copy them from main */
261 strcpy(rr[n].nter, rr[n].main);
262 strcpy(rr[n].cter, rr[n].main);
263 strcpy(rr[n].bter, rr[n].main);
273 static char *search_resrename(int nrr, rtprename_t *rr,
275 gmx_bool bStart, gmx_bool bEnd,
276 gmx_bool bCompareFFRTPname)
284 while (i < nrr && ((!bCompareFFRTPname && strcmp(name, rr[i].gmx) != 0) ||
285 ( bCompareFFRTPname && strcmp(name, rr[i].main) != 0)))
290 /* If found in the database, rename this residue's rtp building block,
291 * otherwise keep the old name.
314 gmx_fatal(FARGS, "In the chosen force field there is no residue type for '%s'%s", name, bStart ? ( bEnd ? " as a standalone (starting & ending) residue" : " as a starting terminus") : (bEnd ? " as an ending terminus" : ""));
322 static void rename_resrtp(t_atoms *pdba, int nterpairs, int *r_start, int *r_end,
323 int nrr, rtprename_t *rr, t_symtab *symtab,
327 gmx_bool bStart, bEnd;
329 gmx_bool bFFRTPTERRNM;
331 bFFRTPTERRNM = (getenv("GMX_NO_FFRTP_TER_RENAME") == NULL);
333 for (r = 0; r < pdba->nres; r++)
337 for (j = 0; j < nterpairs; j++)
344 for (j = 0; j < nterpairs; j++)
352 nn = search_resrename(nrr, rr, *pdba->resinfo[r].rtp, bStart, bEnd, FALSE);
354 if (bFFRTPTERRNM && nn == NULL && (bStart || bEnd))
356 /* This is a terminal residue, but the residue name,
357 * currently stored in .rtp, is not a standard residue name,
358 * but probably a force field specific rtp name.
359 * Check if we need to rename it because it is terminal.
361 nn = search_resrename(nrr, rr,
362 *pdba->resinfo[r].rtp, bStart, bEnd, TRUE);
365 if (nn != NULL && strcmp(*pdba->resinfo[r].rtp, nn) != 0)
369 printf("Changing rtp entry of residue %d %s to '%s'\n",
370 pdba->resinfo[r].nr, *pdba->resinfo[r].name, nn);
372 pdba->resinfo[r].rtp = put_symtab(symtab, nn);
377 static void pdbres_to_gmxrtp(t_atoms *pdba)
381 for (i = 0; (i < pdba->nres); i++)
383 if (pdba->resinfo[i].rtp == NULL)
385 pdba->resinfo[i].rtp = pdba->resinfo[i].name;
390 static void rename_pdbres(t_atoms *pdba, const char *oldnm, const char *newnm,
391 gmx_bool bFullCompare, t_symtab *symtab)
396 for (i = 0; (i < pdba->nres); i++)
398 resnm = *pdba->resinfo[i].name;
399 if ((bFullCompare && (gmx_strcasecmp(resnm, oldnm) == 0)) ||
400 (!bFullCompare && strstr(resnm, oldnm) != NULL))
402 /* Rename the residue name (not the rtp name) */
403 pdba->resinfo[i].name = put_symtab(symtab, newnm);
408 static void rename_bb(t_atoms *pdba, const char *oldnm, const char *newnm,
409 gmx_bool bFullCompare, t_symtab *symtab)
414 for (i = 0; (i < pdba->nres); i++)
416 /* We have not set the rtp name yes, use the residue name */
417 bbnm = *pdba->resinfo[i].name;
418 if ((bFullCompare && (gmx_strcasecmp(bbnm, oldnm) == 0)) ||
419 (!bFullCompare && strstr(bbnm, oldnm) != NULL))
421 /* Change the rtp builing block name */
422 pdba->resinfo[i].rtp = put_symtab(symtab, newnm);
427 static void rename_bbint(t_atoms *pdba, const char *oldnm,
428 const char *gettp(int, int, const rtprename_t *),
429 gmx_bool bFullCompare,
431 int nrr, const rtprename_t *rr)
437 for (i = 0; i < pdba->nres; i++)
439 /* We have not set the rtp name yes, use the residue name */
440 bbnm = *pdba->resinfo[i].name;
441 if ((bFullCompare && (strcmp(bbnm, oldnm) == 0)) ||
442 (!bFullCompare && strstr(bbnm, oldnm) != NULL))
444 ptr = gettp(i, nrr, rr);
445 pdba->resinfo[i].rtp = put_symtab(symtab, ptr);
450 static void check_occupancy(t_atoms *atoms, const char *filename, gmx_bool bVerbose)
456 ftp = fn2ftp(filename);
457 if (!atoms->pdbinfo || ((ftp != efPDB) && (ftp != efBRK) && (ftp != efENT)))
459 fprintf(stderr, "No occupancies in %s\n", filename);
463 for (i = 0; (i < atoms->nr); i++)
465 if (atoms->pdbinfo[i].occup != 1)
469 fprintf(stderr, "Occupancy for atom %s%d-%s is %f rather than 1\n",
470 *atoms->resinfo[atoms->atom[i].resind].name,
471 atoms->resinfo[ atoms->atom[i].resind].nr,
473 atoms->pdbinfo[i].occup);
475 if (atoms->pdbinfo[i].occup == 0)
485 if (nzero == atoms->nr)
487 fprintf(stderr, "All occupancy fields zero. This is probably not an X-Ray structure\n");
489 else if ((nzero > 0) || (nnotone > 0))
493 "WARNING: there were %d atoms with zero occupancy and %d atoms with\n"
494 " occupancy unequal to one (out of %d atoms). Check your pdb file.\n"
496 nzero, nnotone, atoms->nr);
500 fprintf(stderr, "All occupancies are one\n");
505 void write_posres(char *fn, t_atoms *pdba, real fc)
510 fp = gmx_fio_fopen(fn, "w");
512 "; In this topology include file, you will find position restraint\n"
513 "; entries for all the heavy atoms in your original pdb file.\n"
514 "; This means that all the protons which were added by pdb2gmx are\n"
515 "; not restrained.\n"
517 "[ position_restraints ]\n"
518 "; %4s%6s%8s%8s%8s\n", "atom", "type", "fx", "fy", "fz"
520 for (i = 0; (i < pdba->nr); i++)
522 if (!is_hydrogen(*pdba->atomname[i]) && !is_dummymass(*pdba->atomname[i]))
524 fprintf(fp, "%6d%6d %g %g %g\n", i+1, 1, fc, fc, fc);
530 static int read_pdball(const char *inf, const char *outf, char *title,
531 t_atoms *atoms, rvec **x,
532 int *ePBC, matrix box, gmx_bool bRemoveH,
533 t_symtab *symtab, gmx_residuetype_t *rt, const char *watres,
534 gmx_atomprop_t aps, gmx_bool bVerbose)
535 /* Read a pdb file. (containing proteins) */
537 int natom, new_natom, i;
540 printf("Reading %s...\n", inf);
541 get_stx_coordnum(inf, &natom);
542 init_t_atoms(atoms, natom, TRUE);
544 read_stx_conf(inf, title, atoms, *x, NULL, ePBC, box);
545 if (fn2ftp(inf) == efPDB)
547 get_pdb_atomnumber(atoms, aps);
552 for (i = 0; i < atoms->nr; i++)
554 if (!is_hydrogen(*atoms->atomname[i]))
556 atoms->atom[new_natom] = atoms->atom[i];
557 atoms->atomname[new_natom] = atoms->atomname[i];
558 atoms->pdbinfo[new_natom] = atoms->pdbinfo[i];
559 copy_rvec((*x)[i], (*x)[new_natom]);
563 atoms->nr = new_natom;
568 if (title && title[0])
570 printf(" '%s',", title);
572 printf(" %d atoms\n", natom);
574 /* Rename residues */
575 rename_pdbres(atoms, "HOH", watres, FALSE, symtab);
576 rename_pdbres(atoms, "SOL", watres, FALSE, symtab);
577 rename_pdbres(atoms, "WAT", watres, FALSE, symtab);
579 rename_atoms("xlateat.dat", NULL,
580 atoms, symtab, NULL, TRUE, rt, TRUE, bVerbose);
589 write_sto_conf(outf, title, atoms, *x, NULL, *ePBC, box);
595 void process_chain(t_atoms *pdba, rvec *x,
596 gmx_bool bTrpU, gmx_bool bPheU, gmx_bool bTyrU,
597 gmx_bool bLysMan, gmx_bool bAspMan, gmx_bool bGluMan,
598 gmx_bool bHisMan, gmx_bool bArgMan, gmx_bool bGlnMan,
599 real angle, real distance, t_symtab *symtab,
600 int nrr, const rtprename_t *rr)
602 /* Rename aromatics, lys, asp and histidine */
605 rename_bb(pdba, "TYR", "TYRU", FALSE, symtab);
609 rename_bb(pdba, "TRP", "TRPU", FALSE, symtab);
613 rename_bb(pdba, "PHE", "PHEU", FALSE, symtab);
617 rename_bbint(pdba, "LYS", get_lystp, FALSE, symtab, nrr, rr);
621 rename_bbint(pdba, "ARG", get_argtp, FALSE, symtab, nrr, rr);
625 rename_bbint(pdba, "GLN", get_glntp, FALSE, symtab, nrr, rr);
629 rename_bbint(pdba, "ASP", get_asptp, FALSE, symtab, nrr, rr);
633 rename_bb(pdba, "ASPH", "ASP", FALSE, symtab);
637 rename_bbint(pdba, "GLU", get_glutp, FALSE, symtab, nrr, rr);
641 rename_bb(pdba, "GLUH", "GLU", FALSE, symtab);
646 set_histp(pdba, x, angle, distance);
650 rename_bbint(pdba, "HIS", get_histp, TRUE, symtab, nrr, rr);
653 /* Initialize the rtp builing block names with the residue names
654 * for the residues that have not been processed above.
656 pdbres_to_gmxrtp(pdba);
658 /* Now we have all rtp names set.
659 * The rtp names will conform to Gromacs naming,
660 * unless the input pdb file contained one or more force field specific
661 * rtp names as residue names.
665 /* struct for sorting the atoms from the pdb file */
667 int resnr; /* residue number */
668 int j; /* database order index */
669 int index; /* original atom number */
670 char anm1; /* second letter of atom name */
671 char altloc; /* alternate location indicator */
674 int pdbicomp(const void *a, const void *b)
679 pa = (t_pdbindex *)a;
680 pb = (t_pdbindex *)b;
682 d = (pa->resnr - pb->resnr);
688 d = (pa->anm1 - pb->anm1);
691 d = (pa->altloc - pb->altloc);
699 static void sort_pdbatoms(t_restp restp[],
700 int natoms, t_atoms **pdbaptr, rvec **x,
701 t_blocka *block, char ***gnames)
703 t_atoms *pdba, *pdbnew;
717 for (i = 0; i < natoms; i++)
719 atomnm = *pdba->atomname[i];
720 rptr = &restp[pdba->atom[i].resind];
721 for (j = 0; (j < rptr->natom); j++)
723 if (gmx_strcasecmp(atomnm, *(rptr->atomname[j])) == 0)
728 if (j == rptr->natom)
733 "Atom %s in residue %s %d was not found in rtp entry %s with %d atoms\n"
734 "while sorting atoms.\n%s", atomnm,
735 *pdba->resinfo[pdba->atom[i].resind].name,
736 pdba->resinfo[pdba->atom[i].resind].nr,
739 is_hydrogen(atomnm) ?
740 "\nFor a hydrogen, this can be a different protonation state, or it\n"
741 "might have had a different number in the PDB file and was rebuilt\n"
742 "(it might for instance have been H3, and we only expected H1 & H2).\n"
743 "Note that hydrogens might have been added to the entry for the N-terminus.\n"
744 "Remove this hydrogen or choose a different protonation state to solve it.\n"
745 "Option -ignh will ignore all hydrogens in the input." : ".");
746 gmx_fatal(FARGS, buf);
748 /* make shadow array to be sorted into indexgroup */
749 pdbi[i].resnr = pdba->atom[i].resind;
752 pdbi[i].anm1 = atomnm[1];
753 pdbi[i].altloc = pdba->pdbinfo[i].altloc;
755 qsort(pdbi, natoms, (size_t)sizeof(pdbi[0]), pdbicomp);
757 /* pdba is sorted in pdbnew using the pdbi index */
760 init_t_atoms(pdbnew, natoms, TRUE);
762 pdbnew->nr = pdba->nr;
763 pdbnew->nres = pdba->nres;
764 sfree(pdbnew->resinfo);
765 pdbnew->resinfo = pdba->resinfo;
766 for (i = 0; i < natoms; i++)
768 pdbnew->atom[i] = pdba->atom[pdbi[i].index];
769 pdbnew->atomname[i] = pdba->atomname[pdbi[i].index];
770 pdbnew->pdbinfo[i] = pdba->pdbinfo[pdbi[i].index];
771 copy_rvec((*x)[pdbi[i].index], (*xnew)[i]);
772 /* make indexgroup in block */
773 a[i] = pdbi[i].index;
776 sfree(pdba->atomname);
778 sfree(pdba->pdbinfo);
781 /* copy the sorted pdbnew back to pdba */
784 add_grp(block, gnames, natoms, a, "prot_sort");
790 static int remove_duplicate_atoms(t_atoms *pdba, rvec x[], gmx_bool bVerbose)
792 int i, j, oldnatoms, ndel;
795 printf("Checking for duplicate atoms....\n");
796 oldnatoms = pdba->nr;
798 /* NOTE: pdba->nr is modified inside the loop */
799 for (i = 1; (i < pdba->nr); i++)
801 /* compare 'i' and 'i-1', throw away 'i' if they are identical
802 this is a 'while' because multiple alternate locations can be present */
803 while ( (i < pdba->nr) &&
804 (pdba->atom[i-1].resind == pdba->atom[i].resind) &&
805 (strcmp(*pdba->atomname[i-1], *pdba->atomname[i]) == 0) )
810 ri = &pdba->resinfo[pdba->atom[i].resind];
811 printf("deleting duplicate atom %4s %s%4d%c",
812 *pdba->atomname[i], *ri->name, ri->nr, ri->ic);
813 if (ri->chainid && (ri->chainid != ' '))
815 printf(" ch %c", ri->chainid);
819 if (pdba->pdbinfo[i].atomnr)
821 printf(" pdb nr %4d", pdba->pdbinfo[i].atomnr);
823 if (pdba->pdbinfo[i].altloc && (pdba->pdbinfo[i].altloc != ' '))
825 printf(" altloc %c", pdba->pdbinfo[i].altloc);
831 /* We can not free, since it might be in the symtab */
832 /* sfree(pdba->atomname[i]); */
833 for (j = i; j < pdba->nr; j++)
835 pdba->atom[j] = pdba->atom[j+1];
836 pdba->atomname[j] = pdba->atomname[j+1];
839 pdba->pdbinfo[j] = pdba->pdbinfo[j+1];
841 copy_rvec(x[j+1], x[j]);
843 srenew(pdba->atom, pdba->nr);
844 /* srenew(pdba->atomname, pdba->nr); */
845 srenew(pdba->pdbinfo, pdba->nr);
848 if (pdba->nr != oldnatoms)
850 printf("Now there are %d atoms. Deleted %d duplicates.\n", pdba->nr, ndel);
856 void find_nc_ter(t_atoms *pdba, int r0, int r1, int *r_start, int *r_end,
857 gmx_residuetype_t *rt)
860 const char *p_startrestype;
861 const char *p_restype;
862 int nstartwarn, nendwarn;
870 /* Find the starting terminus (typially N or 5') */
871 for (i = r0; i < r1 && *r_start == -1; i++)
873 gmx_residuetype_get_type(rt, *pdba->resinfo[i].name, &p_startrestype);
874 if (!gmx_strcasecmp(p_startrestype, "Protein") || !gmx_strcasecmp(p_startrestype, "DNA") || !gmx_strcasecmp(p_startrestype, "RNA") )
876 printf("Identified residue %s%d as a starting terminus.\n", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
883 printf("Warning: Starting residue %s%d in chain not identified as Protein/RNA/DNA.\n", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
887 printf("More than 5 unidentified residues at start of chain - disabling further warnings.\n");
895 /* Go through the rest of the residues, check that they are the same class, and identify the ending terminus. */
896 for (i = *r_start; i < r1; i++)
898 gmx_residuetype_get_type(rt, *pdba->resinfo[i].name, &p_restype);
899 if (!gmx_strcasecmp(p_restype, p_startrestype) && nendwarn == 0)
907 printf("Warning: Residue %s%d in chain has different type (%s) from starting residue %s%d (%s).\n",
908 *pdba->resinfo[i].name, pdba->resinfo[i].nr, p_restype,
909 *pdba->resinfo[*r_start].name, pdba->resinfo[*r_start].nr, p_startrestype);
913 printf("More than 5 unidentified residues at end of chain - disabling further warnings.\n");
922 printf("Identified residue %s%d as a ending terminus.\n", *pdba->resinfo[*r_end].name, pdba->resinfo[*r_end].nr);
936 static SplittingType getSplittingType(const char *chainsep)
938 SplittingType splitting = SPLIT_TER_ONLY; /* keep compiler happy */
940 /* Be a bit flexible to catch typos */
941 if (!strncmp(chainsep, "id_o", 4))
943 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
944 splitting = SPLIT_ID_OR_TER;
945 printf("Splitting chemical chains based on TER records or chain id changing.\n");
947 else if (!strncmp(chainsep, "int", 3))
949 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
950 splitting = SPLIT_INTERACTIVE;
951 printf("Splitting chemical chains interactively.\n");
953 else if (!strncmp(chainsep, "id_a", 4))
955 splitting = SPLIT_ID_AND_TER;
956 printf("Splitting chemical chains based on TER records and chain id changing.\n");
958 else if (strlen(chainsep) == 2 && !strncmp(chainsep, "id", 4))
960 splitting = SPLIT_ID_ONLY;
961 printf("Splitting chemical chains based on changing chain id only (ignoring TER records).\n");
963 else if (chainsep[0] == 't')
965 splitting = SPLIT_TER_ONLY;
966 printf("Splitting chemical chains based on TER records only (ignoring chain id).\n");
970 gmx_fatal(FARGS, "Unidentified setting for chain separation: %s\n", chainsep);
976 modify_chain_numbers(t_atoms * pdba,
977 const char * chainsep)
980 char old_prev_chainid;
981 char old_this_chainid;
982 int old_prev_chainnum;
983 int old_this_chainnum;
989 const char * prev_atomname;
990 const char * this_atomname;
991 const char * prev_resname;
992 const char * this_resname;
998 SplittingType splitting = getSplittingType(chainsep);
1000 /* The default chain enumeration is based on TER records only, which is reflected in chainnum below */
1002 old_prev_chainid = '?';
1003 old_prev_chainnum = -1;
1006 this_atomname = NULL;
1008 this_resname = NULL;
1012 for (i = 0; i < pdba->nres; i++)
1014 ri = &pdba->resinfo[i];
1015 old_this_chainid = ri->chainid;
1016 old_this_chainnum = ri->chainnum;
1018 prev_atomname = this_atomname;
1019 prev_atomnum = this_atomnum;
1020 prev_resname = this_resname;
1021 prev_resnum = this_resnum;
1022 prev_chainid = this_chainid;
1024 this_atomname = *(pdba->atomname[i]);
1025 this_atomnum = (pdba->pdbinfo != NULL) ? pdba->pdbinfo[i].atomnr : i+1;
1026 this_resname = *ri->name;
1027 this_resnum = ri->nr;
1028 this_chainid = ri->chainid;
1032 case SPLIT_ID_OR_TER:
1033 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1039 case SPLIT_ID_AND_TER:
1040 if (old_this_chainid != old_prev_chainid && old_this_chainnum != old_prev_chainnum)
1047 if (old_this_chainid != old_prev_chainid)
1053 case SPLIT_TER_ONLY:
1054 if (old_this_chainnum != old_prev_chainnum)
1059 case SPLIT_INTERACTIVE:
1060 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1064 printf("Split the chain (and introduce termini) between residue %s%d (chain id '%c', atom %d %s)\n"
1065 "and residue %s%d (chain id '%c', atom %d %s) ? [n/y]\n",
1066 prev_resname, prev_resnum, prev_chainid, prev_atomnum, prev_atomname,
1067 this_resname, this_resnum, this_chainid, this_atomnum, this_atomname);
1069 if (NULL == fgets(select, STRLEN-1, stdin))
1071 gmx_fatal(FARGS, "Error reading from stdin");
1074 if (i == 0 || select[0] == 'y')
1081 gmx_fatal(FARGS, "Internal inconsistency - this shouldn't happen...");
1084 old_prev_chainid = old_this_chainid;
1085 old_prev_chainnum = old_this_chainnum;
1087 ri->chainnum = new_chainnum;
1116 int gmx_pdb2gmx(int argc, char *argv[])
1118 const char *desc[] = {
1119 "[THISMODULE] reads a [REF].pdb[ref] (or [REF].gro[ref]) file, reads",
1120 "some database files, adds hydrogens to the molecules and generates",
1121 "coordinates in GROMACS (GROMOS), or optionally [REF].pdb[ref], format",
1122 "and a topology in GROMACS format.",
1123 "These files can subsequently be processed to generate a run input file.",
1125 "[THISMODULE] will search for force fields by looking for",
1126 "a [TT]forcefield.itp[tt] file in subdirectories [TT]<forcefield>.ff[tt]",
1127 "of the current working directory and of the GROMACS library directory",
1128 "as inferred from the path of the binary or the [TT]GMXLIB[tt] environment",
1130 "By default the forcefield selection is interactive,",
1131 "but you can use the [TT]-ff[tt] option to specify one of the short names",
1132 "in the list on the command line instead. In that case [THISMODULE] just looks",
1133 "for the corresponding [TT]<forcefield>.ff[tt] directory.",
1135 "After choosing a force field, all files will be read only from",
1136 "the corresponding force field directory.",
1137 "If you want to modify or add a residue types, you can copy the force",
1138 "field directory from the GROMACS library directory to your current",
1139 "working directory. If you want to add new protein residue types,",
1140 "you will need to modify [TT]residuetypes.dat[tt] in the library directory",
1141 "or copy the whole library directory to a local directory and set",
1142 "the environment variable [TT]GMXLIB[tt] to the name of that directory.",
1143 "Check Chapter 5 of the manual for more information about file formats.",
1146 "Note that a [REF].pdb[ref] file is nothing more than a file format, and it",
1147 "need not necessarily contain a protein structure. Every kind of",
1148 "molecule for which there is support in the database can be converted.",
1149 "If there is no support in the database, you can add it yourself.[PAR]",
1151 "The program has limited intelligence, it reads a number of database",
1152 "files, that allow it to make special bonds (Cys-Cys, Heme-His, etc.),",
1153 "if necessary this can be done manually. The program can prompt the",
1154 "user to select which kind of LYS, ASP, GLU, CYS or HIS residue is",
1155 "desired. For Lys the choice is between neutral (two protons on NZ) or",
1156 "protonated (three protons, default), for Asp and Glu unprotonated",
1157 "(default) or protonated, for His the proton can be either on ND1,",
1158 "on NE2 or on both. By default these selections are done automatically.",
1159 "For His, this is based on an optimal hydrogen bonding",
1160 "conformation. Hydrogen bonds are defined based on a simple geometric",
1161 "criterion, specified by the maximum hydrogen-donor-acceptor angle",
1162 "and donor-acceptor distance, which are set by [TT]-angle[tt] and",
1163 "[TT]-dist[tt] respectively.[PAR]",
1165 "The protonation state of N- and C-termini can be chosen interactively",
1166 "with the [TT]-ter[tt] flag. Default termini are ionized (NH3+ and COO-),",
1167 "respectively. Some force fields support zwitterionic forms for chains of",
1168 "one residue, but for polypeptides these options should NOT be selected.",
1169 "The AMBER force fields have unique forms for the terminal residues,",
1170 "and these are incompatible with the [TT]-ter[tt] mechanism. You need",
1171 "to prefix your N- or C-terminal residue names with \"N\" or \"C\"",
1172 "respectively to use these forms, making sure you preserve the format",
1173 "of the coordinate file. Alternatively, use named terminating residues",
1174 "(e.g. ACE, NME).[PAR]",
1176 "The separation of chains is not entirely trivial since the markup",
1177 "in user-generated PDB files frequently varies and sometimes it",
1178 "is desirable to merge entries across a TER record, for instance",
1179 "if you want a disulfide bridge or distance restraints between",
1180 "two protein chains or if you have a HEME group bound to a protein.",
1181 "In such cases multiple chains should be contained in a single",
1182 "[TT]moleculetype[tt] definition.",
1183 "To handle this, [THISMODULE] uses two separate options.",
1184 "First, [TT]-chainsep[tt] allows you to choose when a new chemical chain should",
1185 "start, and termini added when applicable. This can be done based on the",
1186 "existence of TER records, when the chain id changes, or combinations of either",
1187 "or both of these. You can also do the selection fully interactively.",
1188 "In addition, there is a [TT]-merge[tt] option that controls how multiple chains",
1189 "are merged into one moleculetype, after adding all the chemical termini (or not).",
1190 "This can be turned off (no merging), all non-water chains can be merged into a",
1191 "single molecule, or the selection can be done interactively.[PAR]",
1193 "[THISMODULE] will also check the occupancy field of the [REF].pdb[ref] file.",
1194 "If any of the occupancies are not one, indicating that the atom is",
1195 "not resolved well in the structure, a warning message is issued.",
1196 "When a [REF].pdb[ref] file does not originate from an X-ray structure determination",
1197 "all occupancy fields may be zero. Either way, it is up to the user",
1198 "to verify the correctness of the input data (read the article!).[PAR]",
1200 "During processing the atoms will be reordered according to GROMACS",
1201 "conventions. With [TT]-n[tt] an index file can be generated that",
1202 "contains one group reordered in the same way. This allows you to",
1203 "convert a GROMOS trajectory and coordinate file to GROMOS. There is",
1204 "one limitation: reordering is done after the hydrogens are stripped",
1205 "from the input and before new hydrogens are added. This means that",
1206 "you should not use [TT]-ignh[tt].[PAR]",
1208 "The [REF].gro[ref] and [TT].g96[tt] file formats do not support chain",
1209 "identifiers. Therefore it is useful to enter a [REF].pdb[ref] file name at",
1210 "the [TT]-o[tt] option when you want to convert a multi-chain [REF].pdb[ref] file.",
1213 "The option [TT]-vsite[tt] removes hydrogen and fast improper dihedral",
1214 "motions. Angular and out-of-plane motions can be removed by changing",
1215 "hydrogens into virtual sites and fixing angles, which fixes their",
1216 "position relative to neighboring atoms. Additionally, all atoms in the",
1217 "aromatic rings of the standard amino acids (i.e. PHE, TRP, TYR and HIS)",
1218 "can be converted into virtual sites, eliminating the fast improper dihedral",
1219 "fluctuations in these rings. [BB]Note[bb] that in this case all other hydrogen",
1220 "atoms are also converted to virtual sites. The mass of all atoms that are",
1221 "converted into virtual sites, is added to the heavy atoms.[PAR]",
1222 "Also slowing down of dihedral motion can be done with [TT]-heavyh[tt]",
1223 "done by increasing the hydrogen-mass by a factor of 4. This is also",
1224 "done for water hydrogens to slow down the rotational motion of water.",
1225 "The increase in mass of the hydrogens is subtracted from the bonded",
1226 "(heavy) atom so that the total mass of the system remains the same."
1230 FILE *fp, *top_file, *top_file2, *itp_file = NULL;
1232 t_atoms pdba_all, *pdba;
1236 int chain, nch, maxch, nwaterchain;
1238 t_chain *chains, *cc;
1239 char select[STRLEN];
1247 int i, j, k, l, nrtp;
1248 int *swap_index, si;
1252 gpp_atomtype_t atype;
1253 gmx_residuetype_t*rt;
1255 char fn[256], itp_fn[STRLEN], posre_fn[STRLEN], buf_fn[STRLEN];
1256 char molname[STRLEN], title[STRLEN], quote[STRLEN];
1257 char *c, forcefield[STRLEN], ffdir[STRLEN];
1258 char ffname[STRLEN], suffix[STRLEN], buf[STRLEN];
1266 rtprename_t *rtprename = NULL;
1267 int nah, nNtdb, nCtdb, ntdblist;
1268 t_hackblock *ntdb, *ctdb, **tdblist;
1272 gmx_bool bVsites = FALSE, bWat, bPrevWat = FALSE, bITP, bVsiteAromatics = FALSE;
1274 t_hackblock *hb_chain;
1275 t_restp *restp_chain;
1277 const char *p_restype;
1281 const char * prev_atomname;
1282 const char * this_atomname;
1283 const char * prev_resname;
1284 const char * this_resname;
1289 int prev_chainnumber;
1290 int this_chainnumber;
1292 int this_chainstart;
1293 int prev_chainstart;
1300 { efSTX, "-f", "eiwit.pdb", ffREAD },
1301 { efSTO, "-o", "conf", ffWRITE },
1302 { efTOP, NULL, NULL, ffWRITE },
1303 { efITP, "-i", "posre", ffWRITE },
1304 { efNDX, "-n", "clean", ffOPTWR },
1305 { efSTO, "-q", "clean.pdb", ffOPTWR }
1307 #define NFILE asize(fnm)
1310 /* Command line arguments must be static */
1311 static gmx_bool bNewRTP = FALSE;
1312 static gmx_bool bInter = FALSE, bCysMan = FALSE;
1313 static gmx_bool bLysMan = FALSE, bAspMan = FALSE, bGluMan = FALSE, bHisMan = FALSE;
1314 static gmx_bool bGlnMan = FALSE, bArgMan = FALSE;
1315 static gmx_bool bTerMan = FALSE, bUnA = FALSE, bHeavyH;
1316 static gmx_bool bSort = TRUE, bAllowMissing = FALSE, bRemoveH = FALSE;
1317 static gmx_bool bDeuterate = FALSE, bVerbose = FALSE, bChargeGroups = TRUE, bCmap = TRUE;
1318 static gmx_bool bRenumRes = FALSE, bRTPresname = FALSE;
1319 static real angle = 135.0, distance = 0.3, posre_fc = 1000;
1320 static real long_bond_dist = 0.25, short_bond_dist = 0.05;
1321 static const char *vsitestr[] = { NULL, "none", "hydrogens", "aromatics", NULL };
1322 static const char *watstr[] = { NULL, "select", "none", "spc", "spce", "tip3p", "tip4p", "tip5p", NULL };
1323 static const char *chainsep[] = { NULL, "id_or_ter", "id_and_ter", "ter", "id", "interactive", NULL };
1324 static const char *merge[] = {NULL, "no", "all", "interactive", NULL };
1325 static const char *ff = "select";
1328 { "-newrtp", FALSE, etBOOL, {&bNewRTP},
1329 "HIDDENWrite the residue database in new format to [TT]new.rtp[tt]"},
1330 { "-lb", FALSE, etREAL, {&long_bond_dist},
1331 "HIDDENLong bond warning distance" },
1332 { "-sb", FALSE, etREAL, {&short_bond_dist},
1333 "HIDDENShort bond warning distance" },
1334 { "-chainsep", FALSE, etENUM, {chainsep},
1335 "Condition in PDB files when a new chain should be started (adding termini)" },
1336 { "-merge", FALSE, etENUM, {&merge},
1337 "Merge multiple chains into a single [moleculetype]" },
1338 { "-ff", FALSE, etSTR, {&ff},
1339 "Force field, interactive by default. Use [TT]-h[tt] for information." },
1340 { "-water", FALSE, etENUM, {watstr},
1341 "Water model to use" },
1342 { "-inter", FALSE, etBOOL, {&bInter},
1343 "Set the next 8 options to interactive"},
1344 { "-ss", FALSE, etBOOL, {&bCysMan},
1345 "Interactive SS bridge selection" },
1346 { "-ter", FALSE, etBOOL, {&bTerMan},
1347 "Interactive termini selection, instead of charged (default)" },
1348 { "-lys", FALSE, etBOOL, {&bLysMan},
1349 "Interactive lysine selection, instead of charged" },
1350 { "-arg", FALSE, etBOOL, {&bArgMan},
1351 "Interactive arginine selection, instead of charged" },
1352 { "-asp", FALSE, etBOOL, {&bAspMan},
1353 "Interactive aspartic acid selection, instead of charged" },
1354 { "-glu", FALSE, etBOOL, {&bGluMan},
1355 "Interactive glutamic acid selection, instead of charged" },
1356 { "-gln", FALSE, etBOOL, {&bGlnMan},
1357 "Interactive glutamine selection, instead of neutral" },
1358 { "-his", FALSE, etBOOL, {&bHisMan},
1359 "Interactive histidine selection, instead of checking H-bonds" },
1360 { "-angle", FALSE, etREAL, {&angle},
1361 "Minimum hydrogen-donor-acceptor angle for a H-bond (degrees)" },
1362 { "-dist", FALSE, etREAL, {&distance},
1363 "Maximum donor-acceptor distance for a H-bond (nm)" },
1364 { "-una", FALSE, etBOOL, {&bUnA},
1365 "Select aromatic rings with united CH atoms on phenylalanine, "
1366 "tryptophane and tyrosine" },
1367 { "-sort", FALSE, etBOOL, {&bSort},
1368 "HIDDENSort the residues according to database, turning this off is dangerous as charge groups might be broken in parts" },
1369 { "-ignh", FALSE, etBOOL, {&bRemoveH},
1370 "Ignore hydrogen atoms that are in the coordinate file" },
1371 { "-missing", FALSE, etBOOL, {&bAllowMissing},
1372 "Continue when atoms are missing, dangerous" },
1373 { "-v", FALSE, etBOOL, {&bVerbose},
1374 "Be slightly more verbose in messages" },
1375 { "-posrefc", FALSE, etREAL, {&posre_fc},
1376 "Force constant for position restraints" },
1377 { "-vsite", FALSE, etENUM, {vsitestr},
1378 "Convert atoms to virtual sites" },
1379 { "-heavyh", FALSE, etBOOL, {&bHeavyH},
1380 "Make hydrogen atoms heavy" },
1381 { "-deuterate", FALSE, etBOOL, {&bDeuterate},
1382 "Change the mass of hydrogens to 2 amu" },
1383 { "-chargegrp", TRUE, etBOOL, {&bChargeGroups},
1384 "Use charge groups in the [REF].rtp[ref] file" },
1385 { "-cmap", TRUE, etBOOL, {&bCmap},
1386 "Use cmap torsions (if enabled in the [REF].rtp[ref] file)" },
1387 { "-renum", TRUE, etBOOL, {&bRenumRes},
1388 "Renumber the residues consecutively in the output" },
1389 { "-rtpres", TRUE, etBOOL, {&bRTPresname},
1390 "Use [REF].rtp[ref] entry names as residue names" }
1392 #define NPARGS asize(pa)
1394 if (!parse_common_args(&argc, argv, 0, NFILE, fnm, asize(pa), pa, asize(desc), desc,
1400 /* Force field selection, interactive or direct */
1401 choose_ff(strcmp(ff, "select") == 0 ? NULL : ff,
1402 forcefield, sizeof(forcefield),
1403 ffdir, sizeof(ffdir));
1405 if (strlen(forcefield) > 0)
1407 strcpy(ffname, forcefield);
1408 ffname[0] = toupper(ffname[0]);
1412 gmx_fatal(FARGS, "Empty forcefield string");
1415 printf("\nUsing the %s force field in directory %s\n\n",
1418 choose_watermodel(watstr[0], ffdir, &watermodel);
1422 /* if anything changes here, also change description of -inter */
1437 else if (bDeuterate)
1446 /* parse_common_args ensures vsitestr has been selected, but
1447 clang-static-analyzer needs clues to know that */
1448 GMX_ASSERT(vsitestr[0], "-vsite default wasn't processed correctly");
1449 switch (vsitestr[0][0])
1451 case 'n': /* none */
1453 bVsiteAromatics = FALSE;
1455 case 'h': /* hydrogens */
1457 bVsiteAromatics = FALSE;
1459 case 'a': /* aromatics */
1461 bVsiteAromatics = TRUE;
1464 gmx_fatal(FARGS, "DEATH HORROR in $s (%d): vsitestr[0]='%s'",
1465 __FILE__, __LINE__, vsitestr[0]);
1468 /* Open the symbol table */
1469 open_symtab(&symtab);
1471 /* Residue type database */
1472 gmx_residuetype_init(&rt);
1474 /* Read residue renaming database(s), if present */
1475 nrrn = fflib_search_file_end(ffdir, ".r2b", FALSE, &rrn);
1479 for (i = 0; i < nrrn; i++)
1481 fp = fflib_open(rrn[i]);
1482 read_rtprename(rrn[i], fp, &nrtprename, &rtprename);
1488 /* Add all alternative names from the residue renaming database to the list of recognized amino/nucleic acids. */
1489 for (i = 0; i < nrtprename; i++)
1491 rc = gmx_residuetype_get_type(rt, rtprename[i].gmx, &p_restype);
1493 /* Only add names if the 'standard' gromacs/iupac base name was found */
1496 gmx_residuetype_add(rt, rtprename[i].main, p_restype);
1497 gmx_residuetype_add(rt, rtprename[i].nter, p_restype);
1498 gmx_residuetype_add(rt, rtprename[i].cter, p_restype);
1499 gmx_residuetype_add(rt, rtprename[i].bter, p_restype);
1504 if (watermodel != NULL && (strstr(watermodel, "4p") ||
1505 strstr(watermodel, "4P")))
1509 else if (watermodel != NULL && (strstr(watermodel, "5p") ||
1510 strstr(watermodel, "5P")))
1519 aps = gmx_atomprop_init();
1520 natom = read_pdball(opt2fn("-f", NFILE, fnm), opt2fn_null("-q", NFILE, fnm), title,
1521 &pdba_all, &pdbx, &ePBC, box, bRemoveH, &symtab, rt, watres,
1526 gmx_fatal(FARGS, "No atoms found in pdb file %s\n", opt2fn("-f", NFILE, fnm));
1529 printf("Analyzing pdb file\n");
1532 modify_chain_numbers(&pdba_all, chainsep[0]);
1536 this_atomname = NULL;
1538 this_resname = NULL;
1541 this_chainnumber = -1;
1542 this_chainstart = 0;
1543 /* Keep the compiler happy */
1544 prev_chainstart = 0;
1548 snew(pdb_ch, maxch);
1551 for (i = 0; (i < natom); i++)
1553 ri = &pdba_all.resinfo[pdba_all.atom[i].resind];
1555 /* TODO this should live in a helper object, and consolidate
1556 that with code in modify_chain_numbers */
1557 prev_atomname = this_atomname;
1558 prev_atomnum = this_atomnum;
1559 prev_resname = this_resname;
1560 prev_resnum = this_resnum;
1561 prev_chainid = this_chainid;
1562 prev_chainnumber = this_chainnumber;
1565 prev_chainstart = this_chainstart;
1568 this_atomname = *pdba_all.atomname[i];
1569 this_atomnum = (pdba_all.pdbinfo != NULL) ? pdba_all.pdbinfo[i].atomnr : i+1;
1570 this_resname = *ri->name;
1571 this_resnum = ri->nr;
1572 this_chainid = ri->chainid;
1573 this_chainnumber = ri->chainnum;
1575 bWat = gmx_strcasecmp(*ri->name, watres) == 0;
1576 if ((i == 0) || (this_chainnumber != prev_chainnumber) || (bWat != bPrevWat))
1578 this_chainstart = pdba_all.atom[i].resind;
1583 if (!strncmp(merge[0], "int", 3))
1585 printf("Merge chain ending with residue %s%d (chain id '%c', atom %d %s) and chain starting with\n"
1586 "residue %s%d (chain id '%c', atom %d %s) into a single moleculetype (keeping termini)? [n/y]\n",
1587 prev_resname, prev_resnum, prev_chainid, prev_atomnum, prev_atomname,
1588 this_resname, this_resnum, this_chainid, this_atomnum, this_atomname);
1590 if (NULL == fgets(select, STRLEN-1, stdin))
1592 gmx_fatal(FARGS, "Error reading from stdin");
1594 bMerged = (select[0] == 'y');
1596 else if (!strncmp(merge[0], "all", 3))
1604 pdb_ch[nch-1].chainstart[pdb_ch[nch-1].nterpairs] =
1605 pdba_all.atom[i].resind - prev_chainstart;
1606 pdb_ch[nch-1].nterpairs++;
1607 srenew(pdb_ch[nch-1].chainstart, pdb_ch[nch-1].nterpairs+1);
1612 /* set natom for previous chain */
1615 pdb_ch[nch-1].natom = i-pdb_ch[nch-1].start;
1622 /* check if chain identifier was used before */
1623 for (j = 0; (j < nch); j++)
1625 if (pdb_ch[j].chainid != ' ' && pdb_ch[j].chainid == ri->chainid)
1627 printf("WARNING: Chain identifier '%c' is used in two non-sequential blocks.\n"
1628 "They will be treated as separate chains unless you reorder your file.\n",
1632 // TODO This is too convoluted. Use a std::vector
1636 srenew(pdb_ch, maxch);
1638 pdb_ch[nch].chainid = ri->chainid;
1639 pdb_ch[nch].chainnum = ri->chainnum;
1640 pdb_ch[nch].start = i;
1641 pdb_ch[nch].bAllWat = bWat;
1644 pdb_ch[nch].nterpairs = 0;
1648 pdb_ch[nch].nterpairs = 1;
1650 snew(pdb_ch[nch].chainstart, pdb_ch[nch].nterpairs+1);
1651 /* modified [nch] to [0] below */
1652 pdb_ch[nch].chainstart[0] = 0;
1658 pdb_ch[nch-1].natom = natom-pdb_ch[nch-1].start;
1660 /* set all the water blocks at the end of the chain */
1661 snew(swap_index, nch);
1663 for (i = 0; i < nch; i++)
1665 if (!pdb_ch[i].bAllWat)
1671 for (i = 0; i < nch; i++)
1673 if (pdb_ch[i].bAllWat)
1679 if (nwaterchain > 1)
1681 printf("Moved all the water blocks to the end\n");
1685 /* copy pdb data and x for all chains */
1686 for (i = 0; (i < nch); i++)
1689 chains[i].chainid = pdb_ch[si].chainid;
1690 chains[i].chainnum = pdb_ch[si].chainnum;
1691 chains[i].bAllWat = pdb_ch[si].bAllWat;
1692 chains[i].nterpairs = pdb_ch[si].nterpairs;
1693 chains[i].chainstart = pdb_ch[si].chainstart;
1694 snew(chains[i].ntdb, pdb_ch[si].nterpairs);
1695 snew(chains[i].ctdb, pdb_ch[si].nterpairs);
1696 snew(chains[i].r_start, pdb_ch[si].nterpairs);
1697 snew(chains[i].r_end, pdb_ch[si].nterpairs);
1699 snew(chains[i].pdba, 1);
1700 init_t_atoms(chains[i].pdba, pdb_ch[si].natom, TRUE);
1701 snew(chains[i].x, chains[i].pdba->nr);
1702 for (j = 0; j < chains[i].pdba->nr; j++)
1704 chains[i].pdba->atom[j] = pdba_all.atom[pdb_ch[si].start+j];
1705 snew(chains[i].pdba->atomname[j], 1);
1706 *chains[i].pdba->atomname[j] =
1707 gmx_strdup(*pdba_all.atomname[pdb_ch[si].start+j]);
1708 chains[i].pdba->pdbinfo[j] = pdba_all.pdbinfo[pdb_ch[si].start+j];
1709 copy_rvec(pdbx[pdb_ch[si].start+j], chains[i].x[j]);
1711 /* Re-index the residues assuming that the indices are continuous */
1712 k = chains[i].pdba->atom[0].resind;
1713 nres = chains[i].pdba->atom[chains[i].pdba->nr-1].resind - k + 1;
1714 chains[i].pdba->nres = nres;
1715 for (j = 0; j < chains[i].pdba->nr; j++)
1717 chains[i].pdba->atom[j].resind -= k;
1719 srenew(chains[i].pdba->resinfo, nres);
1720 for (j = 0; j < nres; j++)
1722 chains[i].pdba->resinfo[j] = pdba_all.resinfo[k+j];
1723 snew(chains[i].pdba->resinfo[j].name, 1);
1724 *chains[i].pdba->resinfo[j].name = gmx_strdup(*pdba_all.resinfo[k+j].name);
1725 /* make all chain identifiers equal to that of the chain */
1726 chains[i].pdba->resinfo[j].chainid = pdb_ch[si].chainid;
1730 if (nchainmerges > 0)
1732 printf("\nMerged chains into joint molecule definitions at %d places.\n\n",
1736 printf("There are %d chains and %d blocks of water and "
1737 "%d residues with %d atoms\n",
1738 nch-nwaterchain, nwaterchain,
1739 pdba_all.nres, natom);
1741 printf("\n %5s %4s %6s\n", "chain", "#res", "#atoms");
1742 for (i = 0; (i < nch); i++)
1744 printf(" %d '%c' %5d %6d %s\n",
1745 i+1, chains[i].chainid ? chains[i].chainid : '-',
1746 chains[i].pdba->nres, chains[i].pdba->nr,
1747 chains[i].bAllWat ? "(only water)" : "");
1751 check_occupancy(&pdba_all, opt2fn("-f", NFILE, fnm), bVerbose);
1753 /* Read atomtypes... */
1754 atype = read_atype(ffdir, &symtab);
1756 /* read residue database */
1757 printf("Reading residue database... (%s)\n", forcefield);
1758 nrtpf = fflib_search_file_end(ffdir, ".rtp", TRUE, &rtpf);
1761 for (i = 0; i < nrtpf; i++)
1763 read_resall(rtpf[i], &nrtp, &restp, atype, &symtab, FALSE);
1769 /* Not correct with multiple rtp input files with different bonded types */
1770 fp = gmx_fio_fopen("new.rtp", "w");
1771 print_resall(fp, nrtp, restp, atype);
1775 /* read hydrogen database */
1776 nah = read_h_db(ffdir, &ah);
1778 /* Read Termini database... */
1779 nNtdb = read_ter_db(ffdir, 'n', &ntdb, atype);
1780 nCtdb = read_ter_db(ffdir, 'c', &ctdb, atype);
1782 top_fn = ftp2fn(efTOP, NFILE, fnm);
1783 top_file = gmx_fio_fopen(top_fn, "w");
1785 print_top_header(top_file, top_fn, FALSE, ffdir, mHmult);
1791 for (chain = 0; (chain < nch); chain++)
1793 cc = &(chains[chain]);
1795 /* set pdba, natom and nres to the current chain */
1798 natom = cc->pdba->nr;
1799 nres = cc->pdba->nres;
1801 if (cc->chainid && ( cc->chainid != ' ' ) )
1803 printf("Processing chain %d '%c' (%d atoms, %d residues)\n",
1804 chain+1, cc->chainid, natom, nres);
1808 printf("Processing chain %d (%d atoms, %d residues)\n",
1809 chain+1, natom, nres);
1812 process_chain(pdba, x, bUnA, bUnA, bUnA, bLysMan, bAspMan, bGluMan,
1813 bHisMan, bArgMan, bGlnMan, angle, distance, &symtab,
1814 nrtprename, rtprename);
1816 cc->chainstart[cc->nterpairs] = pdba->nres;
1818 for (i = 0; i < cc->nterpairs; i++)
1820 find_nc_ter(pdba, cc->chainstart[i], cc->chainstart[i+1],
1821 &(cc->r_start[j]), &(cc->r_end[j]), rt);
1823 if (cc->r_start[j] >= 0 && cc->r_end[j] >= 0)
1829 if (cc->nterpairs == 0)
1831 printf("Problem with chain definition, or missing terminal residues.\n"
1832 "This chain does not appear to contain a recognized chain molecule.\n"
1833 "If this is incorrect, you can edit residuetypes.dat to modify the behavior.\n");
1836 /* Check for disulfides and other special bonds */
1837 nssbonds = mk_specbonds(pdba, x, bCysMan, &ssbonds, bVerbose);
1841 rename_resrtp(pdba, cc->nterpairs, cc->r_start, cc->r_end, nrtprename, rtprename,
1849 sprintf(fn, "chain.pdb");
1853 sprintf(fn, "chain_%c%d.pdb", cc->chainid, cc->chainnum);
1855 write_sto_conf(fn, title, pdba, x, NULL, ePBC, box);
1859 for (i = 0; i < cc->nterpairs; i++)
1863 * We first apply a filter so we only have the
1864 * termini that can be applied to the residue in question
1865 * (or a generic terminus if no-residue specific is available).
1867 /* First the N terminus */
1870 tdblist = filter_ter(nrtp, restp, nNtdb, ntdb,
1871 *pdba->resinfo[cc->r_start[i]].name,
1872 *pdba->resinfo[cc->r_start[i]].rtp,
1876 printf("No suitable end (N or 5') terminus found in database - assuming this residue\n"
1877 "is already in a terminus-specific form and skipping terminus selection.\n");
1882 if (bTerMan && ntdblist > 1)
1884 sprintf(select, "Select start terminus type for %s-%d",
1885 *pdba->resinfo[cc->r_start[i]].name,
1886 pdba->resinfo[cc->r_start[i]].nr);
1887 cc->ntdb[i] = choose_ter(ntdblist, tdblist, select);
1891 cc->ntdb[i] = tdblist[0];
1894 printf("Start terminus %s-%d: %s\n",
1895 *pdba->resinfo[cc->r_start[i]].name,
1896 pdba->resinfo[cc->r_start[i]].nr,
1897 (cc->ntdb[i])->name);
1906 /* And the C terminus */
1909 tdblist = filter_ter(nrtp, restp, nCtdb, ctdb,
1910 *pdba->resinfo[cc->r_end[i]].name,
1911 *pdba->resinfo[cc->r_end[i]].rtp,
1915 printf("No suitable end (C or 3') terminus found in database - assuming this residue\n"
1916 "is already in a terminus-specific form and skipping terminus selection.\n");
1921 if (bTerMan && ntdblist > 1)
1923 sprintf(select, "Select end terminus type for %s-%d",
1924 *pdba->resinfo[cc->r_end[i]].name,
1925 pdba->resinfo[cc->r_end[i]].nr);
1926 cc->ctdb[i] = choose_ter(ntdblist, tdblist, select);
1930 cc->ctdb[i] = tdblist[0];
1932 printf("End terminus %s-%d: %s\n",
1933 *pdba->resinfo[cc->r_end[i]].name,
1934 pdba->resinfo[cc->r_end[i]].nr,
1935 (cc->ctdb[i])->name);
1944 /* lookup hackblocks and rtp for all residues */
1945 get_hackblocks_rtp(&hb_chain, &restp_chain,
1946 nrtp, restp, pdba->nres, pdba->resinfo,
1947 cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end);
1948 /* ideally, now we would not need the rtp itself anymore, but do
1949 everything using the hb and restp arrays. Unfortunately, that
1950 requires some re-thinking of code in gen_vsite.c, which I won't
1951 do now :( AF 26-7-99 */
1953 rename_atoms(NULL, ffdir,
1954 pdba, &symtab, restp_chain, FALSE, rt, FALSE, bVerbose);
1956 match_atomnames_with_rtp(restp_chain, hb_chain, pdba, x, bVerbose);
1960 block = new_blocka();
1962 sort_pdbatoms(restp_chain, natom, &pdba, &x, block, &gnames);
1963 remove_duplicate_atoms(pdba, x, bVerbose);
1964 if (ftp2bSet(efNDX, NFILE, fnm))
1968 fprintf(stderr, "WARNING: with the -remh option the generated "
1969 "index file (%s) might be useless\n"
1970 "(the index file is generated before hydrogens are added)",
1971 ftp2fn(efNDX, NFILE, fnm));
1973 write_index(ftp2fn(efNDX, NFILE, fnm), block, gnames, FALSE, 0);
1975 for (i = 0; i < block->nr; i++)
1984 fprintf(stderr, "WARNING: "
1985 "without sorting no check for duplicate atoms can be done\n");
1988 /* Generate Hydrogen atoms (and termini) in the sequence */
1989 printf("Generating any missing hydrogen atoms and/or adding termini.\n");
1990 natom = add_h(&pdba, &x, nah, ah,
1991 cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end, bAllowMissing,
1992 NULL, NULL, TRUE, FALSE);
1993 printf("Now there are %d residues with %d atoms\n",
1994 pdba->nres, pdba->nr);
1997 write_pdbfile(debug, title, pdba, x, ePBC, box, ' ', 0, NULL, TRUE);
2002 for (i = 0; (i < natom); i++)
2004 fprintf(debug, "Res %s%d atom %d %s\n",
2005 *(pdba->resinfo[pdba->atom[i].resind].name),
2006 pdba->resinfo[pdba->atom[i].resind].nr, i+1, *pdba->atomname[i]);
2010 strcpy(posre_fn, ftp2fn(efITP, NFILE, fnm));
2012 /* make up molecule name(s) */
2014 k = (cc->nterpairs > 0 && cc->r_start[0] >= 0) ? cc->r_start[0] : 0;
2016 gmx_residuetype_get_type(rt, *pdba->resinfo[k].name, &p_restype);
2022 sprintf(molname, "Water");
2026 this_chainid = cc->chainid;
2028 /* Add the chain id if we have one */
2029 if (this_chainid != ' ')
2031 sprintf(buf, "_chain_%c", this_chainid);
2032 strcat(suffix, buf);
2035 /* Check if there have been previous chains with the same id */
2037 for (k = 0; k < chain; k++)
2039 if (cc->chainid == chains[k].chainid)
2044 /* Add the number for this chain identifier if there are multiple copies */
2048 sprintf(buf, "%d", nid_used+1);
2049 strcat(suffix, buf);
2052 if (strlen(suffix) > 0)
2054 sprintf(molname, "%s%s", p_restype, suffix);
2058 strcpy(molname, p_restype);
2062 if ((nch-nwaterchain > 1) && !cc->bAllWat)
2065 strcpy(itp_fn, top_fn);
2066 printf("Chain time...\n");
2067 c = strrchr(itp_fn, '.');
2068 sprintf(c, "_%s.itp", molname);
2069 c = strrchr(posre_fn, '.');
2070 sprintf(c, "_%s.itp", molname);
2071 if (strcmp(itp_fn, posre_fn) == 0)
2073 strcpy(buf_fn, posre_fn);
2074 c = strrchr(buf_fn, '.');
2076 sprintf(posre_fn, "%s_pr.itp", buf_fn);
2080 srenew(incls, nincl);
2081 incls[nincl-1] = gmx_strdup(itp_fn);
2082 itp_file = gmx_fio_fopen(itp_fn, "w");
2089 srenew(mols, nmol+1);
2092 mols[nmol].name = gmx_strdup("SOL");
2093 mols[nmol].nr = pdba->nres;
2097 mols[nmol].name = gmx_strdup(molname);
2104 print_top_comment(itp_file, itp_fn, ffdir, TRUE);
2114 top_file2 = itp_file;
2118 top_file2 = top_file;
2121 pdb2top(top_file2, posre_fn, molname, pdba, &x, atype, &symtab,
2123 restp_chain, hb_chain,
2125 bVsites, bVsiteAromatics, ffdir,
2126 mHmult, nssbonds, ssbonds,
2127 long_bond_dist, short_bond_dist, bDeuterate, bChargeGroups, bCmap,
2128 bRenumRes, bRTPresname);
2132 write_posres(posre_fn, pdba, posre_fc);
2137 gmx_fio_fclose(itp_file);
2140 /* pdba and natom have been reassigned somewhere so: */
2146 if (cc->chainid == ' ')
2148 sprintf(fn, "chain.pdb");
2152 sprintf(fn, "chain_%c.pdb", cc->chainid);
2154 cool_quote(quote, 255, NULL);
2155 write_sto_conf(fn, quote, pdba, x, NULL, ePBC, box);
2159 if (watermodel == NULL)
2161 for (chain = 0; chain < nch; chain++)
2163 if (chains[chain].bAllWat)
2165 gmx_fatal(FARGS, "You have chosen not to include a water model, but there is water in the input file. Select a water model or remove the water from your input file.");
2171 sprintf(buf_fn, "%s%c%s.itp", ffdir, DIR_SEPARATOR, watermodel);
2172 if (!fflib_fexist(buf_fn))
2174 gmx_fatal(FARGS, "The topology file '%s' for the selected water model '%s' can not be found in the force field directory. Select a different water model.",
2175 buf_fn, watermodel);
2179 print_top_mols(top_file, title, ffdir, watermodel, nincl, incls, nmol, mols);
2180 gmx_fio_fclose(top_file);
2182 gmx_residuetype_destroy(rt);
2184 /* now merge all chains back together */
2187 for (i = 0; (i < nch); i++)
2189 natom += chains[i].pdba->nr;
2190 nres += chains[i].pdba->nres;
2193 init_t_atoms(atoms, natom, FALSE);
2194 for (i = 0; i < atoms->nres; i++)
2196 sfree(atoms->resinfo[i].name);
2198 sfree(atoms->resinfo);
2200 snew(atoms->resinfo, nres);
2204 for (i = 0; (i < nch); i++)
2208 printf("Including chain %d in system: %d atoms %d residues\n",
2209 i+1, chains[i].pdba->nr, chains[i].pdba->nres);
2211 for (j = 0; (j < chains[i].pdba->nr); j++)
2213 atoms->atom[k] = chains[i].pdba->atom[j];
2214 atoms->atom[k].resind += l; /* l is processed nr of residues */
2215 atoms->atomname[k] = chains[i].pdba->atomname[j];
2216 atoms->resinfo[atoms->atom[k].resind].chainid = chains[i].chainid;
2217 copy_rvec(chains[i].x[j], x[k]);
2220 for (j = 0; (j < chains[i].pdba->nres); j++)
2222 atoms->resinfo[l] = chains[i].pdba->resinfo[j];
2225 atoms->resinfo[l].name = atoms->resinfo[l].rtp;
2233 fprintf(stderr, "Now there are %d atoms and %d residues\n", k, l);
2234 print_sums(atoms, TRUE);
2237 fprintf(stderr, "\nWriting coordinate file...\n");
2238 clear_rvec(box_space);
2241 make_new_box(atoms->nr, x, box, box_space, FALSE);
2243 write_sto_conf(ftp2fn(efSTO, NFILE, fnm), title, atoms, x, NULL, ePBC, box);
2245 printf("\t\t--------- PLEASE NOTE ------------\n");
2246 printf("You have successfully generated a topology from: %s.\n",
2247 opt2fn("-f", NFILE, fnm));
2248 if (watermodel != NULL)
2250 printf("The %s force field and the %s water model are used.\n",
2251 ffname, watermodel);
2255 printf("The %s force field is used.\n",
2258 printf("\t\t--------- ETON ESAELP ------------\n");