1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
53 #include "gmx_fatal.h"
72 #include "fflibutil.h"
85 static const char *res2bb_notermini(const char *name,
86 int nrr, const rtprename_t *rr)
88 /* NOTE: This function returns the main building block name,
89 * it does not take terminal renaming into account.
94 while (i < nrr && gmx_strcasecmp(name, rr[i].gmx) != 0)
99 return (i < nrr ? rr[i].main : name);
102 static const char *select_res(int nr, int resnr,
103 const char *name[], const char *expl[],
105 int nrr, const rtprename_t *rr)
109 printf("Which %s type do you want for residue %d\n", title, resnr+1);
110 for (sel = 0; (sel < nr); sel++)
112 printf("%d. %s (%s)\n",
113 sel, expl[sel], res2bb_notermini(name[sel], nrr, rr));
115 printf("\nType a number:"); fflush(stdout);
117 if (scanf("%d", &sel) != 1)
119 gmx_fatal(FARGS, "Answer me for res %s %d!", title, resnr+1);
125 static const char *get_asptp(int resnr, int nrr, const rtprename_t *rr)
130 const char *lh[easpNR] = { "ASP", "ASPH" };
131 const char *expl[easpNR] = {
132 "Not protonated (charge -1)",
133 "Protonated (charge 0)"
136 return select_res(easpNR, resnr, lh, expl, "ASPARTIC ACID", nrr, rr);
139 static const char *get_glutp(int resnr, int nrr, const rtprename_t *rr)
144 const char *lh[egluNR] = { "GLU", "GLUH" };
145 const char *expl[egluNR] = {
146 "Not protonated (charge -1)",
147 "Protonated (charge 0)"
150 return select_res(egluNR, resnr, lh, expl, "GLUTAMIC ACID", nrr, rr);
153 static const char *get_glntp(int resnr, int nrr, const rtprename_t *rr)
158 const char *lh[eglnNR] = { "GLN", "QLN" };
159 const char *expl[eglnNR] = {
160 "Not protonated (charge 0)",
161 "Protonated (charge +1)"
164 return select_res(eglnNR, resnr, lh, expl, "GLUTAMINE", nrr, rr);
167 static const char *get_lystp(int resnr, int nrr, const rtprename_t *rr)
172 const char *lh[elysNR] = { "LYSN", "LYS" };
173 const char *expl[elysNR] = {
174 "Not protonated (charge 0)",
175 "Protonated (charge +1)"
178 return select_res(elysNR, resnr, lh, expl, "LYSINE", nrr, rr);
181 static const char *get_argtp(int resnr, int nrr, const rtprename_t *rr)
186 const char *lh[eargNR] = { "ARGN", "ARG" };
187 const char *expl[eargNR] = {
188 "Not protonated (charge 0)",
189 "Protonated (charge +1)"
192 return select_res(eargNR, resnr, lh, expl, "ARGININE", nrr, rr);
195 static const char *get_histp(int resnr, int nrr, const rtprename_t *rr)
197 const char *expl[ehisNR] = {
204 return select_res(ehisNR, resnr, hh, expl, "HISTIDINE", nrr, rr);
207 static void read_rtprename(const char *fname, FILE *fp,
208 int *nrtprename, rtprename_t **rtprename)
210 char line[STRLEN], buf[STRLEN];
219 while (get_a_line(fp, line, STRLEN))
222 nc = sscanf(line, "%s %s %s %s %s %s",
223 rr[n].gmx, rr[n].main, rr[n].nter, rr[n].cter, rr[n].bter, buf);
226 if (nc != 2 && nc != 5)
228 gmx_fatal(FARGS, "Residue renaming database '%s' has %d columns instead of %d, %d or %d", fname, ncol, 2, 5);
234 gmx_fatal(FARGS, "A line in residue renaming database '%s' has %d columns, while previous lines have %d columns", fname, nc, ncol);
239 /* This file does not have special termini names, copy them from main */
240 strcpy(rr[n].nter, rr[n].main);
241 strcpy(rr[n].cter, rr[n].main);
242 strcpy(rr[n].bter, rr[n].main);
252 static char *search_resrename(int nrr, rtprename_t *rr,
254 gmx_bool bStart, gmx_bool bEnd,
255 gmx_bool bCompareFFRTPname)
263 while (i < nrr && ((!bCompareFFRTPname && strcmp(name, rr[i].gmx) != 0) ||
264 ( bCompareFFRTPname && strcmp(name, rr[i].main) != 0)))
269 /* If found in the database, rename this residue's rtp building block,
270 * otherwise keep the old name.
293 gmx_fatal(FARGS, "In the chosen force field there is no residue type for '%s'%s", name, bStart ? ( bEnd ? " as a standalone (starting & ending) residue" : " as a starting terminus") : (bEnd ? " as an ending terminus" : ""));
301 static void rename_resrtp(t_atoms *pdba, int nterpairs, int *r_start, int *r_end,
302 int nrr, rtprename_t *rr, t_symtab *symtab,
306 gmx_bool bStart, bEnd;
308 gmx_bool bFFRTPTERRNM;
310 bFFRTPTERRNM = (getenv("GMX_NO_FFRTP_TER_RENAME") == NULL);
312 for (r = 0; r < pdba->nres; r++)
316 for (j = 0; j < nterpairs; j++)
323 for (j = 0; j < nterpairs; j++)
331 nn = search_resrename(nrr, rr, *pdba->resinfo[r].rtp, bStart, bEnd, FALSE);
333 if (bFFRTPTERRNM && nn == NULL && (bStart || bEnd))
335 /* This is a terminal residue, but the residue name,
336 * currently stored in .rtp, is not a standard residue name,
337 * but probably a force field specific rtp name.
338 * Check if we need to rename it because it is terminal.
340 nn = search_resrename(nrr, rr,
341 *pdba->resinfo[r].rtp, bStart, bEnd, TRUE);
344 if (nn != NULL && strcmp(*pdba->resinfo[r].rtp, nn) != 0)
348 printf("Changing rtp entry of residue %d %s to '%s'\n",
349 pdba->resinfo[r].nr, *pdba->resinfo[r].name, nn);
351 pdba->resinfo[r].rtp = put_symtab(symtab, nn);
356 static void pdbres_to_gmxrtp(t_atoms *pdba)
360 for (i = 0; (i < pdba->nres); i++)
362 if (pdba->resinfo[i].rtp == NULL)
364 pdba->resinfo[i].rtp = pdba->resinfo[i].name;
369 static void rename_pdbres(t_atoms *pdba, const char *oldnm, const char *newnm,
370 gmx_bool bFullCompare, t_symtab *symtab)
375 for (i = 0; (i < pdba->nres); i++)
377 resnm = *pdba->resinfo[i].name;
378 if ((bFullCompare && (gmx_strcasecmp(resnm, oldnm) == 0)) ||
379 (!bFullCompare && strstr(resnm, oldnm) != NULL))
381 /* Rename the residue name (not the rtp name) */
382 pdba->resinfo[i].name = put_symtab(symtab, newnm);
387 static void rename_bb(t_atoms *pdba, const char *oldnm, const char *newnm,
388 gmx_bool bFullCompare, t_symtab *symtab)
393 for (i = 0; (i < pdba->nres); i++)
395 /* We have not set the rtp name yes, use the residue name */
396 bbnm = *pdba->resinfo[i].name;
397 if ((bFullCompare && (gmx_strcasecmp(bbnm, oldnm) == 0)) ||
398 (!bFullCompare && strstr(bbnm, oldnm) != NULL))
400 /* Change the rtp builing block name */
401 pdba->resinfo[i].rtp = put_symtab(symtab, newnm);
406 static void rename_bbint(t_atoms *pdba, const char *oldnm,
407 const char *gettp(int, int, const rtprename_t *),
408 gmx_bool bFullCompare,
410 int nrr, const rtprename_t *rr)
416 for (i = 0; i < pdba->nres; i++)
418 /* We have not set the rtp name yes, use the residue name */
419 bbnm = *pdba->resinfo[i].name;
420 if ((bFullCompare && (strcmp(bbnm, oldnm) == 0)) ||
421 (!bFullCompare && strstr(bbnm, oldnm) != NULL))
423 ptr = gettp(i, nrr, rr);
424 pdba->resinfo[i].rtp = put_symtab(symtab, ptr);
429 static void check_occupancy(t_atoms *atoms, const char *filename, gmx_bool bVerbose)
435 ftp = fn2ftp(filename);
436 if (!atoms->pdbinfo || ((ftp != efPDB) && (ftp != efBRK) && (ftp != efENT)))
438 fprintf(stderr, "No occupancies in %s\n", filename);
442 for (i = 0; (i < atoms->nr); i++)
444 if (atoms->pdbinfo[i].occup != 1)
448 fprintf(stderr, "Occupancy for atom %s%d-%s is %f rather than 1\n",
449 *atoms->resinfo[atoms->atom[i].resind].name,
450 atoms->resinfo[ atoms->atom[i].resind].nr,
452 atoms->pdbinfo[i].occup);
454 if (atoms->pdbinfo[i].occup == 0)
464 if (nzero == atoms->nr)
466 fprintf(stderr, "All occupancy fields zero. This is probably not an X-Ray structure\n");
468 else if ((nzero > 0) || (nnotone > 0))
472 "WARNING: there were %d atoms with zero occupancy and %d atoms with\n"
473 " occupancy unequal to one (out of %d atoms). Check your pdb file.\n"
475 nzero, nnotone, atoms->nr);
479 fprintf(stderr, "All occupancies are one\n");
484 void write_posres(char *fn, t_atoms *pdba, real fc)
489 fp = gmx_fio_fopen(fn, "w");
491 "; In this topology include file, you will find position restraint\n"
492 "; entries for all the heavy atoms in your original pdb file.\n"
493 "; This means that all the protons which were added by pdb2gmx are\n"
494 "; not restrained.\n"
496 "[ position_restraints ]\n"
497 "; %4s%6s%8s%8s%8s\n", "atom", "type", "fx", "fy", "fz"
499 for (i = 0; (i < pdba->nr); i++)
501 if (!is_hydrogen(*pdba->atomname[i]) && !is_dummymass(*pdba->atomname[i]))
503 fprintf(fp, "%6d%6d %g %g %g\n", i+1, 1, fc, fc, fc);
509 static int read_pdball(const char *inf, const char *outf, char *title,
510 t_atoms *atoms, rvec **x,
511 int *ePBC, matrix box, gmx_bool bRemoveH,
512 t_symtab *symtab, gmx_residuetype_t rt, const char *watres,
513 gmx_atomprop_t aps, gmx_bool bVerbose)
514 /* Read a pdb file. (containing proteins) */
516 int natom, new_natom, i;
519 printf("Reading %s...\n", inf);
520 get_stx_coordnum(inf, &natom);
521 init_t_atoms(atoms, natom, TRUE);
523 read_stx_conf(inf, title, atoms, *x, NULL, ePBC, box);
524 if (fn2ftp(inf) == efPDB)
526 get_pdb_atomnumber(atoms, aps);
531 for (i = 0; i < atoms->nr; i++)
533 if (!is_hydrogen(*atoms->atomname[i]))
535 atoms->atom[new_natom] = atoms->atom[i];
536 atoms->atomname[new_natom] = atoms->atomname[i];
537 atoms->pdbinfo[new_natom] = atoms->pdbinfo[i];
538 copy_rvec((*x)[i], (*x)[new_natom]);
542 atoms->nr = new_natom;
547 if (title && title[0])
549 printf(" '%s',", title);
551 printf(" %d atoms\n", natom);
553 /* Rename residues */
554 rename_pdbres(atoms, "HOH", watres, FALSE, symtab);
555 rename_pdbres(atoms, "SOL", watres, FALSE, symtab);
556 rename_pdbres(atoms, "WAT", watres, FALSE, symtab);
558 rename_atoms("xlateat.dat", NULL,
559 atoms, symtab, NULL, TRUE, rt, TRUE, bVerbose);
568 write_sto_conf(outf, title, atoms, *x, NULL, *ePBC, box);
574 void process_chain(t_atoms *pdba, rvec *x,
575 gmx_bool bTrpU, gmx_bool bPheU, gmx_bool bTyrU,
576 gmx_bool bLysMan, gmx_bool bAspMan, gmx_bool bGluMan,
577 gmx_bool bHisMan, gmx_bool bArgMan, gmx_bool bGlnMan,
578 real angle, real distance, t_symtab *symtab,
579 int nrr, const rtprename_t *rr)
581 /* Rename aromatics, lys, asp and histidine */
584 rename_bb(pdba, "TYR", "TYRU", FALSE, symtab);
588 rename_bb(pdba, "TRP", "TRPU", FALSE, symtab);
592 rename_bb(pdba, "PHE", "PHEU", FALSE, symtab);
596 rename_bbint(pdba, "LYS", get_lystp, FALSE, symtab, nrr, rr);
600 rename_bbint(pdba, "ARG", get_argtp, FALSE, symtab, nrr, rr);
604 rename_bbint(pdba, "GLN", get_glntp, FALSE, symtab, nrr, rr);
608 rename_bbint(pdba, "ASP", get_asptp, FALSE, symtab, nrr, rr);
612 rename_bb(pdba, "ASPH", "ASP", FALSE, symtab);
616 rename_bbint(pdba, "GLU", get_glutp, FALSE, symtab, nrr, rr);
620 rename_bb(pdba, "GLUH", "GLU", FALSE, symtab);
625 set_histp(pdba, x, angle, distance);
629 rename_bbint(pdba, "HIS", get_histp, TRUE, symtab, nrr, rr);
632 /* Initialize the rtp builing block names with the residue names
633 * for the residues that have not been processed above.
635 pdbres_to_gmxrtp(pdba);
637 /* Now we have all rtp names set.
638 * The rtp names will conform to Gromacs naming,
639 * unless the input pdb file contained one or more force field specific
640 * rtp names as residue names.
644 /* struct for sorting the atoms from the pdb file */
646 int resnr; /* residue number */
647 int j; /* database order index */
648 int index; /* original atom number */
649 char anm1; /* second letter of atom name */
650 char altloc; /* alternate location indicator */
653 int pdbicomp(const void *a, const void *b)
658 pa = (t_pdbindex *)a;
659 pb = (t_pdbindex *)b;
661 d = (pa->resnr - pb->resnr);
667 d = (pa->anm1 - pb->anm1);
670 d = (pa->altloc - pb->altloc);
678 static void sort_pdbatoms(int nrtp, t_restp restp[], t_hackblock hb[],
679 int natoms, t_atoms **pdbaptr, rvec **x,
680 t_blocka *block, char ***gnames)
682 t_atoms *pdba, *pdbnew;
697 for (i = 0; i < natoms; i++)
699 atomnm = *pdba->atomname[i];
700 rptr = &restp[pdba->atom[i].resind];
701 for (j = 0; (j < rptr->natom); j++)
703 if (gmx_strcasecmp(atomnm, *(rptr->atomname[j])) == 0)
708 if (j == rptr->natom)
713 "Atom %s in residue %s %d was not found in rtp entry %s with %d atoms\n"
714 "while sorting atoms.\n%s", atomnm,
715 *pdba->resinfo[pdba->atom[i].resind].name,
716 pdba->resinfo[pdba->atom[i].resind].nr,
719 is_hydrogen(atomnm) ?
720 "\nFor a hydrogen, this can be a different protonation state, or it\n"
721 "might have had a different number in the PDB file and was rebuilt\n"
722 "(it might for instance have been H3, and we only expected H1 & H2).\n"
723 "Note that hydrogens might have been added to the entry for the N-terminus.\n"
724 "Remove this hydrogen or choose a different protonation state to solve it.\n"
725 "Option -ignh will ignore all hydrogens in the input." : ".");
726 gmx_fatal(FARGS, buf);
728 /* make shadow array to be sorted into indexgroup */
729 pdbi[i].resnr = pdba->atom[i].resind;
732 pdbi[i].anm1 = atomnm[1];
733 pdbi[i].altloc = pdba->pdbinfo[i].altloc;
735 qsort(pdbi, natoms, (size_t)sizeof(pdbi[0]), pdbicomp);
737 /* pdba is sorted in pdbnew using the pdbi index */
740 init_t_atoms(pdbnew, natoms, TRUE);
742 pdbnew->nr = pdba->nr;
743 pdbnew->nres = pdba->nres;
744 sfree(pdbnew->resinfo);
745 pdbnew->resinfo = pdba->resinfo;
746 for (i = 0; i < natoms; i++)
748 pdbnew->atom[i] = pdba->atom[pdbi[i].index];
749 pdbnew->atomname[i] = pdba->atomname[pdbi[i].index];
750 pdbnew->pdbinfo[i] = pdba->pdbinfo[pdbi[i].index];
751 copy_rvec((*x)[pdbi[i].index], (*xnew)[i]);
752 /* make indexgroup in block */
753 a[i] = pdbi[i].index;
756 sfree(pdba->atomname);
758 sfree(pdba->pdbinfo);
761 /* copy the sorted pdbnew back to pdba */
764 add_grp(block, gnames, natoms, a, "prot_sort");
770 static int remove_duplicate_atoms(t_atoms *pdba, rvec x[], gmx_bool bVerbose)
772 int i, j, oldnatoms, ndel;
775 printf("Checking for duplicate atoms....\n");
776 oldnatoms = pdba->nr;
778 /* NOTE: pdba->nr is modified inside the loop */
779 for (i = 1; (i < pdba->nr); i++)
781 /* compare 'i' and 'i-1', throw away 'i' if they are identical
782 this is a 'while' because multiple alternate locations can be present */
783 while ( (i < pdba->nr) &&
784 (pdba->atom[i-1].resind == pdba->atom[i].resind) &&
785 (strcmp(*pdba->atomname[i-1], *pdba->atomname[i]) == 0) )
790 ri = &pdba->resinfo[pdba->atom[i].resind];
791 printf("deleting duplicate atom %4s %s%4d%c",
792 *pdba->atomname[i], *ri->name, ri->nr, ri->ic);
793 if (ri->chainid && (ri->chainid != ' '))
795 printf(" ch %c", ri->chainid);
799 if (pdba->pdbinfo[i].atomnr)
801 printf(" pdb nr %4d", pdba->pdbinfo[i].atomnr);
803 if (pdba->pdbinfo[i].altloc && (pdba->pdbinfo[i].altloc != ' '))
805 printf(" altloc %c", pdba->pdbinfo[i].altloc);
811 /* We can not free, since it might be in the symtab */
812 /* sfree(pdba->atomname[i]); */
813 for (j = i; j < pdba->nr; j++)
815 pdba->atom[j] = pdba->atom[j+1];
816 pdba->atomname[j] = pdba->atomname[j+1];
817 pdba->pdbinfo[j] = pdba->pdbinfo[j+1];
818 copy_rvec(x[j+1], x[j]);
820 srenew(pdba->atom, pdba->nr);
821 /* srenew(pdba->atomname, pdba->nr); */
822 srenew(pdba->pdbinfo, pdba->nr);
825 if (pdba->nr != oldnatoms)
827 printf("Now there are %d atoms. Deleted %d duplicates.\n", pdba->nr, ndel);
833 void find_nc_ter(t_atoms *pdba, int r0, int r1, int *r_start, int *r_end, gmx_residuetype_t rt)
836 const char *p_startrestype;
837 const char *p_restype;
838 int nstartwarn, nendwarn;
846 /* Find the starting terminus (typially N or 5') */
847 for (i = r0; i < r1 && *r_start == -1; i++)
849 gmx_residuetype_get_type(rt, *pdba->resinfo[i].name, &p_startrestype);
850 if (!gmx_strcasecmp(p_startrestype, "Protein") || !gmx_strcasecmp(p_startrestype, "DNA") || !gmx_strcasecmp(p_startrestype, "RNA") )
852 printf("Identified residue %s%d as a starting terminus.\n", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
859 printf("Warning: Starting residue %s%d in chain not identified as Protein/RNA/DNA.\n", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
863 printf("More than 5 unidentified residues at start of chain - disabling further warnings.\n");
871 /* Go through the rest of the residues, check that they are the same class, and identify the ending terminus. */
872 for (i = *r_start; i < r1; i++)
874 gmx_residuetype_get_type(rt, *pdba->resinfo[i].name, &p_restype);
875 if (!gmx_strcasecmp(p_restype, p_startrestype) && nendwarn == 0)
883 printf("Warning: Residue %s%d in chain has different type (%s) from starting residue %s%d (%s).\n",
884 *pdba->resinfo[i].name, pdba->resinfo[i].nr, p_restype,
885 *pdba->resinfo[*r_start].name, pdba->resinfo[*r_start].nr, p_startrestype);
889 printf("More than 5 unidentified residues at end of chain - disabling further warnings.\n");
898 printf("Identified residue %s%d as a ending terminus.\n", *pdba->resinfo[*r_end].name, pdba->resinfo[*r_end].nr);
904 modify_chain_numbers(t_atoms * pdba,
905 const char * chainsep)
908 char old_prev_chainid;
909 char old_this_chainid;
910 int old_prev_chainnum;
911 int old_this_chainnum;
917 const char * prev_atomname;
918 const char * this_atomname;
919 const char * prev_resname;
920 const char * this_resname;
925 int prev_chainnumber;
926 int this_chainnumber;
938 splitting = SPLIT_TER_ONLY; /* keep compiler happy */
940 /* Be a bit flexible to catch typos */
941 if (!strncmp(chainsep, "id_o", 4))
943 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
944 splitting = SPLIT_ID_OR_TER;
945 printf("Splitting chemical chains based on TER records or chain id changing.\n");
947 else if (!strncmp(chainsep, "int", 3))
949 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
950 splitting = SPLIT_INTERACTIVE;
951 printf("Splitting chemical chains interactively.\n");
953 else if (!strncmp(chainsep, "id_a", 4))
955 splitting = SPLIT_ID_AND_TER;
956 printf("Splitting chemical chains based on TER records and chain id changing.\n");
958 else if (strlen(chainsep) == 2 && !strncmp(chainsep, "id", 4))
960 splitting = SPLIT_ID_ONLY;
961 printf("Splitting chemical chains based on changing chain id only (ignoring TER records).\n");
963 else if (chainsep[0] == 't')
965 splitting = SPLIT_TER_ONLY;
966 printf("Splitting chemical chains based on TER records only (ignoring chain id).\n");
970 gmx_fatal(FARGS, "Unidentified setting for chain separation: %s\n", chainsep);
973 /* The default chain enumeration is based on TER records only, which is reflected in chainnum below */
975 old_prev_chainid = '?';
976 old_prev_chainnum = -1;
979 this_atomname = NULL;
984 this_chainnumber = -1;
986 for (i = 0; i < pdba->nres; i++)
988 ri = &pdba->resinfo[i];
989 old_this_chainid = ri->chainid;
990 old_this_chainnum = ri->chainnum;
992 prev_atomname = this_atomname;
993 prev_atomnum = this_atomnum;
994 prev_resname = this_resname;
995 prev_resnum = this_resnum;
996 prev_chainid = this_chainid;
997 prev_chainnumber = this_chainnumber;
999 this_atomname = *(pdba->atomname[i]);
1000 this_atomnum = (pdba->pdbinfo != NULL) ? pdba->pdbinfo[i].atomnr : i+1;
1001 this_resname = *ri->name;
1002 this_resnum = ri->nr;
1003 this_chainid = ri->chainid;
1004 this_chainnumber = ri->chainnum;
1008 case SPLIT_ID_OR_TER:
1009 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1015 case SPLIT_ID_AND_TER:
1016 if (old_this_chainid != old_prev_chainid && old_this_chainnum != old_prev_chainnum)
1023 if (old_this_chainid != old_prev_chainid)
1029 case SPLIT_TER_ONLY:
1030 if (old_this_chainnum != old_prev_chainnum)
1035 case SPLIT_INTERACTIVE:
1036 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1040 printf("Split the chain (and introduce termini) between residue %s%d (chain id '%c', atom %d %s)\n"
1041 "and residue %s%d (chain id '%c', atom %d %s) ? [n/y]\n",
1042 prev_resname, prev_resnum, prev_chainid, prev_atomnum, prev_atomname,
1043 this_resname, this_resnum, this_chainid, this_atomnum, this_atomname);
1045 if (NULL == fgets(select, STRLEN-1, stdin))
1047 gmx_fatal(FARGS, "Error reading from stdin");
1050 if (i == 0 || select[0] == 'y')
1057 gmx_fatal(FARGS, "Internal inconsistency - this shouldn't happen...");
1060 old_prev_chainid = old_this_chainid;
1061 old_prev_chainnum = old_this_chainnum;
1063 ri->chainnum = new_chainnum;
1092 int cmain(int argc, char *argv[])
1094 const char *desc[] = {
1095 "This program reads a [TT].pdb[tt] (or [TT].gro[tt]) file, reads",
1096 "some database files, adds hydrogens to the molecules and generates",
1097 "coordinates in GROMACS (GROMOS), or optionally [TT].pdb[tt], format",
1098 "and a topology in GROMACS format.",
1099 "These files can subsequently be processed to generate a run input file.",
1101 "[TT]pdb2gmx[tt] will search for force fields by looking for",
1102 "a [TT]forcefield.itp[tt] file in subdirectories [TT]<forcefield>.ff[tt]",
1103 "of the current working directory and of the GROMACS library directory",
1104 "as inferred from the path of the binary or the [TT]GMXLIB[tt] environment",
1106 "By default the forcefield selection is interactive,",
1107 "but you can use the [TT]-ff[tt] option to specify one of the short names",
1108 "in the list on the command line instead. In that case [TT]pdb2gmx[tt] just looks",
1109 "for the corresponding [TT]<forcefield>.ff[tt] directory.",
1111 "After choosing a force field, all files will be read only from",
1112 "the corresponding force field directory.",
1113 "If you want to modify or add a residue types, you can copy the force",
1114 "field directory from the GROMACS library directory to your current",
1115 "working directory. If you want to add new protein residue types,",
1116 "you will need to modify [TT]residuetypes.dat[tt] in the library directory",
1117 "or copy the whole library directory to a local directory and set",
1118 "the environment variable [TT]GMXLIB[tt] to the name of that directory.",
1119 "Check Chapter 5 of the manual for more information about file formats.",
1122 "Note that a [TT].pdb[tt] file is nothing more than a file format, and it",
1123 "need not necessarily contain a protein structure. Every kind of",
1124 "molecule for which there is support in the database can be converted.",
1125 "If there is no support in the database, you can add it yourself.[PAR]",
1127 "The program has limited intelligence, it reads a number of database",
1128 "files, that allow it to make special bonds (Cys-Cys, Heme-His, etc.),",
1129 "if necessary this can be done manually. The program can prompt the",
1130 "user to select which kind of LYS, ASP, GLU, CYS or HIS residue is",
1131 "desired. For Lys the choice is between neutral (two protons on NZ) or",
1132 "protonated (three protons, default), for Asp and Glu unprotonated",
1133 "(default) or protonated, for His the proton can be either on ND1,",
1134 "on NE2 or on both. By default these selections are done automatically.",
1135 "For His, this is based on an optimal hydrogen bonding",
1136 "conformation. Hydrogen bonds are defined based on a simple geometric",
1137 "criterion, specified by the maximum hydrogen-donor-acceptor angle",
1138 "and donor-acceptor distance, which are set by [TT]-angle[tt] and",
1139 "[TT]-dist[tt] respectively.[PAR]",
1141 "The protonation state of N- and C-termini can be chosen interactively",
1142 "with the [TT]-ter[tt] flag. Default termini are ionized (NH3+ and COO-),",
1143 "respectively. Some force fields support zwitterionic forms for chains of",
1144 "one residue, but for polypeptides these options should NOT be selected.",
1145 "The AMBER force fields have unique forms for the terminal residues,",
1146 "and these are incompatible with the [TT]-ter[tt] mechanism. You need",
1147 "to prefix your N- or C-terminal residue names with \"N\" or \"C\"",
1148 "respectively to use these forms, making sure you preserve the format",
1149 "of the coordinate file. Alternatively, use named terminating residues",
1150 "(e.g. ACE, NME).[PAR]",
1152 "The separation of chains is not entirely trivial since the markup",
1153 "in user-generated PDB files frequently varies and sometimes it",
1154 "is desirable to merge entries across a TER record, for instance",
1155 "if you want a disulfide bridge or distance restraints between",
1156 "two protein chains or if you have a HEME group bound to a protein.",
1157 "In such cases multiple chains should be contained in a single",
1158 "[TT]moleculetype[tt] definition.",
1159 "To handle this, [TT]pdb2gmx[tt] uses two separate options.",
1160 "First, [TT]-chainsep[tt] allows you to choose when a new chemical chain should",
1161 "start, and termini added when applicable. This can be done based on the",
1162 "existence of TER records, when the chain id changes, or combinations of either",
1163 "or both of these. You can also do the selection fully interactively.",
1164 "In addition, there is a [TT]-merge[tt] option that controls how multiple chains",
1165 "are merged into one moleculetype, after adding all the chemical termini (or not).",
1166 "This can be turned off (no merging), all non-water chains can be merged into a",
1167 "single molecule, or the selection can be done interactively.[PAR]",
1169 "[TT]pdb2gmx[tt] will also check the occupancy field of the [TT].pdb[tt] file.",
1170 "If any of the occupancies are not one, indicating that the atom is",
1171 "not resolved well in the structure, a warning message is issued.",
1172 "When a [TT].pdb[tt] file does not originate from an X-ray structure determination",
1173 "all occupancy fields may be zero. Either way, it is up to the user",
1174 "to verify the correctness of the input data (read the article!).[PAR]",
1176 "During processing the atoms will be reordered according to GROMACS",
1177 "conventions. With [TT]-n[tt] an index file can be generated that",
1178 "contains one group reordered in the same way. This allows you to",
1179 "convert a GROMOS trajectory and coordinate file to GROMOS. There is",
1180 "one limitation: reordering is done after the hydrogens are stripped",
1181 "from the input and before new hydrogens are added. This means that",
1182 "you should not use [TT]-ignh[tt].[PAR]",
1184 "The [TT].gro[tt] and [TT].g96[tt] file formats do not support chain",
1185 "identifiers. Therefore it is useful to enter a [TT].pdb[tt] file name at",
1186 "the [TT]-o[tt] option when you want to convert a multi-chain [TT].pdb[tt] file.",
1189 "The option [TT]-vsite[tt] removes hydrogen and fast improper dihedral",
1190 "motions. Angular and out-of-plane motions can be removed by changing",
1191 "hydrogens into virtual sites and fixing angles, which fixes their",
1192 "position relative to neighboring atoms. Additionally, all atoms in the",
1193 "aromatic rings of the standard amino acids (i.e. PHE, TRP, TYR and HIS)",
1194 "can be converted into virtual sites, eliminating the fast improper dihedral",
1195 "fluctuations in these rings. [BB]Note[bb] that in this case all other hydrogen",
1196 "atoms are also converted to virtual sites. The mass of all atoms that are",
1197 "converted into virtual sites, is added to the heavy atoms.[PAR]",
1198 "Also slowing down of dihedral motion can be done with [TT]-heavyh[tt]",
1199 "done by increasing the hydrogen-mass by a factor of 4. This is also",
1200 "done for water hydrogens to slow down the rotational motion of water.",
1201 "The increase in mass of the hydrogens is subtracted from the bonded",
1202 "(heavy) atom so that the total mass of the system remains the same."
1206 FILE *fp, *top_file, *top_file2, *itp_file = NULL;
1208 t_atoms pdba_all, *pdba;
1212 int chain, nch, maxch, nwaterchain;
1214 t_chain *chains, *cc;
1215 char select[STRLEN];
1223 int i, j, k, l, nrtp;
1224 int *swap_index, si;
1228 gpp_atomtype_t atype;
1229 gmx_residuetype_t rt;
1231 char fn[256], itp_fn[STRLEN], posre_fn[STRLEN], buf_fn[STRLEN];
1232 char molname[STRLEN], title[STRLEN], quote[STRLEN], generator[STRLEN];
1233 char *c, forcefield[STRLEN], ffdir[STRLEN];
1234 char ffname[STRLEN], suffix[STRLEN], buf[STRLEN];
1242 int nrtprename, naa;
1243 rtprename_t *rtprename = NULL;
1244 int nah, nNtdb, nCtdb, ntdblist;
1245 t_hackblock *ntdb, *ctdb, **tdblist;
1249 gmx_bool bVsites = FALSE, bWat, bPrevWat = FALSE, bITP, bVsiteAromatics = FALSE, bCheckMerge;
1251 t_hackblock *hb_chain;
1252 t_restp *restp_chain;
1254 const char *p_restype;
1258 const char * prev_atomname;
1259 const char * this_atomname;
1260 const char * prev_resname;
1261 const char * this_resname;
1266 int prev_chainnumber;
1267 int this_chainnumber;
1269 int this_chainstart;
1270 int prev_chainstart;
1277 { efSTX, "-f", "eiwit.pdb", ffREAD },
1278 { efSTO, "-o", "conf", ffWRITE },
1279 { efTOP, NULL, NULL, ffWRITE },
1280 { efITP, "-i", "posre", ffWRITE },
1281 { efNDX, "-n", "clean", ffOPTWR },
1282 { efSTO, "-q", "clean.pdb", ffOPTWR }
1284 #define NFILE asize(fnm)
1287 /* Command line arguments must be static */
1288 static gmx_bool bNewRTP = FALSE;
1289 static gmx_bool bInter = FALSE, bCysMan = FALSE;
1290 static gmx_bool bLysMan = FALSE, bAspMan = FALSE, bGluMan = FALSE, bHisMan = FALSE;
1291 static gmx_bool bGlnMan = FALSE, bArgMan = FALSE;
1292 static gmx_bool bTerMan = FALSE, bUnA = FALSE, bHeavyH;
1293 static gmx_bool bSort = TRUE, bAllowMissing = FALSE, bRemoveH = FALSE;
1294 static gmx_bool bDeuterate = FALSE, bVerbose = FALSE, bChargeGroups = TRUE, bCmap = TRUE;
1295 static gmx_bool bRenumRes = FALSE, bRTPresname = FALSE;
1296 static real angle = 135.0, distance = 0.3, posre_fc = 1000;
1297 static real long_bond_dist = 0.25, short_bond_dist = 0.05;
1298 static const char *vsitestr[] = { NULL, "none", "hydrogens", "aromatics", NULL };
1299 static const char *watstr[] = { NULL, "select", "none", "spc", "spce", "tip3p", "tip4p", "tip5p", NULL };
1300 static const char *chainsep[] = { NULL, "id_or_ter", "id_and_ter", "ter", "id", "interactive", NULL };
1301 static const char *merge[] = {NULL, "no", "all", "interactive", NULL };
1302 static const char *ff = "select";
1305 { "-newrtp", FALSE, etBOOL, {&bNewRTP},
1306 "HIDDENWrite the residue database in new format to [TT]new.rtp[tt]"},
1307 { "-lb", FALSE, etREAL, {&long_bond_dist},
1308 "HIDDENLong bond warning distance" },
1309 { "-sb", FALSE, etREAL, {&short_bond_dist},
1310 "HIDDENShort bond warning distance" },
1311 { "-chainsep", FALSE, etENUM, {chainsep},
1312 "Condition in PDB files when a new chain should be started (adding termini)" },
1313 { "-merge", FALSE, etENUM, {&merge},
1314 "Merge multiple chains into a single [moleculetype]" },
1315 { "-ff", FALSE, etSTR, {&ff},
1316 "Force field, interactive by default. Use [TT]-h[tt] for information." },
1317 { "-water", FALSE, etENUM, {watstr},
1318 "Water model to use" },
1319 { "-inter", FALSE, etBOOL, {&bInter},
1320 "Set the next 8 options to interactive"},
1321 { "-ss", FALSE, etBOOL, {&bCysMan},
1322 "Interactive SS bridge selection" },
1323 { "-ter", FALSE, etBOOL, {&bTerMan},
1324 "Interactive termini selection, instead of charged (default)" },
1325 { "-lys", FALSE, etBOOL, {&bLysMan},
1326 "Interactive lysine selection, instead of charged" },
1327 { "-arg", FALSE, etBOOL, {&bArgMan},
1328 "Interactive arginine selection, instead of charged" },
1329 { "-asp", FALSE, etBOOL, {&bAspMan},
1330 "Interactive aspartic acid selection, instead of charged" },
1331 { "-glu", FALSE, etBOOL, {&bGluMan},
1332 "Interactive glutamic acid selection, instead of charged" },
1333 { "-gln", FALSE, etBOOL, {&bGlnMan},
1334 "Interactive glutamine selection, instead of neutral" },
1335 { "-his", FALSE, etBOOL, {&bHisMan},
1336 "Interactive histidine selection, instead of checking H-bonds" },
1337 { "-angle", FALSE, etREAL, {&angle},
1338 "Minimum hydrogen-donor-acceptor angle for a H-bond (degrees)" },
1339 { "-dist", FALSE, etREAL, {&distance},
1340 "Maximum donor-acceptor distance for a H-bond (nm)" },
1341 { "-una", FALSE, etBOOL, {&bUnA},
1342 "Select aromatic rings with united CH atoms on phenylalanine, "
1343 "tryptophane and tyrosine" },
1344 { "-sort", FALSE, etBOOL, {&bSort},
1345 "HIDDENSort the residues according to database, turning this off is dangerous as charge groups might be broken in parts" },
1346 { "-ignh", FALSE, etBOOL, {&bRemoveH},
1347 "Ignore hydrogen atoms that are in the coordinate file" },
1348 { "-missing", FALSE, etBOOL, {&bAllowMissing},
1349 "Continue when atoms are missing, dangerous" },
1350 { "-v", FALSE, etBOOL, {&bVerbose},
1351 "Be slightly more verbose in messages" },
1352 { "-posrefc", FALSE, etREAL, {&posre_fc},
1353 "Force constant for position restraints" },
1354 { "-vsite", FALSE, etENUM, {vsitestr},
1355 "Convert atoms to virtual sites" },
1356 { "-heavyh", FALSE, etBOOL, {&bHeavyH},
1357 "Make hydrogen atoms heavy" },
1358 { "-deuterate", FALSE, etBOOL, {&bDeuterate},
1359 "Change the mass of hydrogens to 2 amu" },
1360 { "-chargegrp", TRUE, etBOOL, {&bChargeGroups},
1361 "Use charge groups in the [TT].rtp[tt] file" },
1362 { "-cmap", TRUE, etBOOL, {&bCmap},
1363 "Use cmap torsions (if enabled in the [TT].rtp[tt] file)" },
1364 { "-renum", TRUE, etBOOL, {&bRenumRes},
1365 "Renumber the residues consecutively in the output" },
1366 { "-rtpres", TRUE, etBOOL, {&bRTPresname},
1367 "Use [TT].rtp[tt] entry names as residue names" }
1369 #define NPARGS asize(pa)
1371 CopyRight(stderr, argv[0]);
1372 parse_common_args(&argc, argv, 0, NFILE, fnm, asize(pa), pa, asize(desc), desc,
1375 /* Force field selection, interactive or direct */
1376 choose_ff(strcmp(ff, "select") == 0 ? NULL : ff,
1377 forcefield, sizeof(forcefield),
1378 ffdir, sizeof(ffdir));
1380 if (strlen(forcefield) > 0)
1382 strcpy(ffname, forcefield);
1383 ffname[0] = toupper(ffname[0]);
1387 gmx_fatal(FARGS, "Empty forcefield string");
1390 printf("\nUsing the %s force field in directory %s\n\n",
1393 choose_watermodel(watstr[0], ffdir, &watermodel);
1397 /* if anything changes here, also change description of -inter */
1412 else if (bDeuterate)
1421 switch (vsitestr[0][0])
1423 case 'n': /* none */
1425 bVsiteAromatics = FALSE;
1427 case 'h': /* hydrogens */
1429 bVsiteAromatics = FALSE;
1431 case 'a': /* aromatics */
1433 bVsiteAromatics = TRUE;
1436 gmx_fatal(FARGS, "DEATH HORROR in $s (%d): vsitestr[0]='%s'",
1437 __FILE__, __LINE__, vsitestr[0]);
1440 /* Open the symbol table */
1441 open_symtab(&symtab);
1443 /* Residue type database */
1444 gmx_residuetype_init(&rt);
1446 /* Read residue renaming database(s), if present */
1447 nrrn = fflib_search_file_end(ffdir, ".r2b", FALSE, &rrn);
1451 for (i = 0; i < nrrn; i++)
1453 fp = fflib_open(rrn[i]);
1454 read_rtprename(rrn[i], fp, &nrtprename, &rtprename);
1460 /* Add all alternative names from the residue renaming database to the list of recognized amino/nucleic acids. */
1462 for (i = 0; i < nrtprename; i++)
1464 rc = gmx_residuetype_get_type(rt, rtprename[i].gmx, &p_restype);
1466 /* Only add names if the 'standard' gromacs/iupac base name was found */
1469 gmx_residuetype_add(rt, rtprename[i].main, p_restype);
1470 gmx_residuetype_add(rt, rtprename[i].nter, p_restype);
1471 gmx_residuetype_add(rt, rtprename[i].cter, p_restype);
1472 gmx_residuetype_add(rt, rtprename[i].bter, p_restype);
1477 if (watermodel != NULL && (strstr(watermodel, "4p") ||
1478 strstr(watermodel, "4P")))
1482 else if (watermodel != NULL && (strstr(watermodel, "5p") ||
1483 strstr(watermodel, "5P")))
1492 aps = gmx_atomprop_init();
1493 natom = read_pdball(opt2fn("-f", NFILE, fnm), opt2fn_null("-q", NFILE, fnm), title,
1494 &pdba_all, &pdbx, &ePBC, box, bRemoveH, &symtab, rt, watres,
1499 gmx_fatal(FARGS, "No atoms found in pdb file %s\n", opt2fn("-f", NFILE, fnm));
1502 printf("Analyzing pdb file\n");
1507 modify_chain_numbers(&pdba_all, chainsep[0]);
1511 this_atomname = NULL;
1513 this_resname = NULL;
1516 this_chainnumber = -1;
1517 this_chainstart = 0;
1518 /* Keep the compiler happy */
1519 prev_chainstart = 0;
1524 for (i = 0; (i < natom); i++)
1526 ri = &pdba_all.resinfo[pdba_all.atom[i].resind];
1528 prev_atomname = this_atomname;
1529 prev_atomnum = this_atomnum;
1530 prev_resname = this_resname;
1531 prev_resnum = this_resnum;
1532 prev_chainid = this_chainid;
1533 prev_chainnumber = this_chainnumber;
1536 prev_chainstart = this_chainstart;
1539 this_atomname = *pdba_all.atomname[i];
1540 this_atomnum = (pdba_all.pdbinfo != NULL) ? pdba_all.pdbinfo[i].atomnr : i+1;
1541 this_resname = *ri->name;
1542 this_resnum = ri->nr;
1543 this_chainid = ri->chainid;
1544 this_chainnumber = ri->chainnum;
1546 bWat = gmx_strcasecmp(*ri->name, watres) == 0;
1547 if ((i == 0) || (this_chainnumber != prev_chainnumber) || (bWat != bPrevWat))
1549 this_chainstart = pdba_all.atom[i].resind;
1554 if (!strncmp(merge[0], "int", 3))
1556 printf("Merge chain ending with residue %s%d (chain id '%c', atom %d %s) and chain starting with\n"
1557 "residue %s%d (chain id '%c', atom %d %s) into a single moleculetype (keeping termini)? [n/y]\n",
1558 prev_resname, prev_resnum, prev_chainid, prev_atomnum, prev_atomname,
1559 this_resname, this_resnum, this_chainid, this_atomnum, this_atomname);
1561 if (NULL == fgets(select, STRLEN-1, stdin))
1563 gmx_fatal(FARGS, "Error reading from stdin");
1565 bMerged = (select[0] == 'y');
1567 else if (!strncmp(merge[0], "all", 3))
1575 pdb_ch[nch-1].chainstart[pdb_ch[nch-1].nterpairs] =
1576 pdba_all.atom[i].resind - prev_chainstart;
1577 pdb_ch[nch-1].nterpairs++;
1578 srenew(pdb_ch[nch-1].chainstart, pdb_ch[nch-1].nterpairs+1);
1583 /* set natom for previous chain */
1586 pdb_ch[nch-1].natom = i-pdb_ch[nch-1].start;
1593 /* check if chain identifier was used before */
1594 for (j = 0; (j < nch); j++)
1596 if (pdb_ch[j].chainid != ' ' && pdb_ch[j].chainid == ri->chainid)
1598 printf("WARNING: Chain identifier '%c' is used in two non-sequential blocks.\n"
1599 "They will be treated as separate chains unless you reorder your file.\n",
1606 srenew(pdb_ch, maxch);
1608 pdb_ch[nch].chainid = ri->chainid;
1609 pdb_ch[nch].chainnum = ri->chainnum;
1610 pdb_ch[nch].start = i;
1611 pdb_ch[nch].bAllWat = bWat;
1614 pdb_ch[nch].nterpairs = 0;
1618 pdb_ch[nch].nterpairs = 1;
1620 snew(pdb_ch[nch].chainstart, pdb_ch[nch].nterpairs+1);
1621 /* modified [nch] to [0] below */
1622 pdb_ch[nch].chainstart[0] = 0;
1628 pdb_ch[nch-1].natom = natom-pdb_ch[nch-1].start;
1630 /* set all the water blocks at the end of the chain */
1631 snew(swap_index, nch);
1633 for (i = 0; i < nch; i++)
1635 if (!pdb_ch[i].bAllWat)
1641 for (i = 0; i < nch; i++)
1643 if (pdb_ch[i].bAllWat)
1649 if (nwaterchain > 1)
1651 printf("Moved all the water blocks to the end\n");
1655 /* copy pdb data and x for all chains */
1656 for (i = 0; (i < nch); i++)
1659 chains[i].chainid = pdb_ch[si].chainid;
1660 chains[i].chainnum = pdb_ch[si].chainnum;
1661 chains[i].bAllWat = pdb_ch[si].bAllWat;
1662 chains[i].nterpairs = pdb_ch[si].nterpairs;
1663 chains[i].chainstart = pdb_ch[si].chainstart;
1664 snew(chains[i].ntdb, pdb_ch[si].nterpairs);
1665 snew(chains[i].ctdb, pdb_ch[si].nterpairs);
1666 snew(chains[i].r_start, pdb_ch[si].nterpairs);
1667 snew(chains[i].r_end, pdb_ch[si].nterpairs);
1669 snew(chains[i].pdba, 1);
1670 init_t_atoms(chains[i].pdba, pdb_ch[si].natom, TRUE);
1671 snew(chains[i].x, chains[i].pdba->nr);
1672 for (j = 0; j < chains[i].pdba->nr; j++)
1674 chains[i].pdba->atom[j] = pdba_all.atom[pdb_ch[si].start+j];
1675 snew(chains[i].pdba->atomname[j], 1);
1676 *chains[i].pdba->atomname[j] =
1677 strdup(*pdba_all.atomname[pdb_ch[si].start+j]);
1678 chains[i].pdba->pdbinfo[j] = pdba_all.pdbinfo[pdb_ch[si].start+j];
1679 copy_rvec(pdbx[pdb_ch[si].start+j], chains[i].x[j]);
1681 /* Re-index the residues assuming that the indices are continuous */
1682 k = chains[i].pdba->atom[0].resind;
1683 nres = chains[i].pdba->atom[chains[i].pdba->nr-1].resind - k + 1;
1684 chains[i].pdba->nres = nres;
1685 for (j = 0; j < chains[i].pdba->nr; j++)
1687 chains[i].pdba->atom[j].resind -= k;
1689 srenew(chains[i].pdba->resinfo, nres);
1690 for (j = 0; j < nres; j++)
1692 chains[i].pdba->resinfo[j] = pdba_all.resinfo[k+j];
1693 snew(chains[i].pdba->resinfo[j].name, 1);
1694 *chains[i].pdba->resinfo[j].name = strdup(*pdba_all.resinfo[k+j].name);
1695 /* make all chain identifiers equal to that of the chain */
1696 chains[i].pdba->resinfo[j].chainid = pdb_ch[si].chainid;
1700 if (nchainmerges > 0)
1702 printf("\nMerged chains into joint molecule definitions at %d places.\n\n",
1706 printf("There are %d chains and %d blocks of water and "
1707 "%d residues with %d atoms\n",
1708 nch-nwaterchain, nwaterchain,
1709 pdba_all.resinfo[pdba_all.atom[natom-1].resind].nr, natom);
1711 printf("\n %5s %4s %6s\n", "chain", "#res", "#atoms");
1712 for (i = 0; (i < nch); i++)
1714 printf(" %d '%c' %5d %6d %s\n",
1715 i+1, chains[i].chainid ? chains[i].chainid : '-',
1716 chains[i].pdba->nres, chains[i].pdba->nr,
1717 chains[i].bAllWat ? "(only water)" : "");
1721 check_occupancy(&pdba_all, opt2fn("-f", NFILE, fnm), bVerbose);
1723 /* Read atomtypes... */
1724 atype = read_atype(ffdir, &symtab);
1726 /* read residue database */
1727 printf("Reading residue database... (%s)\n", forcefield);
1728 nrtpf = fflib_search_file_end(ffdir, ".rtp", TRUE, &rtpf);
1731 for (i = 0; i < nrtpf; i++)
1733 read_resall(rtpf[i], &nrtp, &restp, atype, &symtab, FALSE);
1739 /* Not correct with multiple rtp input files with different bonded types */
1740 fp = gmx_fio_fopen("new.rtp", "w");
1741 print_resall(fp, nrtp, restp, atype);
1745 /* read hydrogen database */
1746 nah = read_h_db(ffdir, &ah);
1748 /* Read Termini database... */
1749 nNtdb = read_ter_db(ffdir, 'n', &ntdb, atype);
1750 nCtdb = read_ter_db(ffdir, 'c', &ctdb, atype);
1752 top_fn = ftp2fn(efTOP, NFILE, fnm);
1753 top_file = gmx_fio_fopen(top_fn, "w");
1755 sprintf(generator, "%s - %s", ShortProgram(), GromacsVersion() );
1757 print_top_header(top_file, top_fn, generator, FALSE, ffdir, mHmult);
1764 for (chain = 0; (chain < nch); chain++)
1766 cc = &(chains[chain]);
1768 /* set pdba, natom and nres to the current chain */
1771 natom = cc->pdba->nr;
1772 nres = cc->pdba->nres;
1774 if (cc->chainid && ( cc->chainid != ' ' ) )
1776 printf("Processing chain %d '%c' (%d atoms, %d residues)\n",
1777 chain+1, cc->chainid, natom, nres);
1781 printf("Processing chain %d (%d atoms, %d residues)\n",
1782 chain+1, natom, nres);
1785 process_chain(pdba, x, bUnA, bUnA, bUnA, bLysMan, bAspMan, bGluMan,
1786 bHisMan, bArgMan, bGlnMan, angle, distance, &symtab,
1787 nrtprename, rtprename);
1789 cc->chainstart[cc->nterpairs] = pdba->nres;
1791 for (i = 0; i < cc->nterpairs; i++)
1793 find_nc_ter(pdba, cc->chainstart[i], cc->chainstart[i+1],
1794 &(cc->r_start[j]), &(cc->r_end[j]), rt);
1796 if (cc->r_start[j] >= 0 && cc->r_end[j] >= 0)
1802 if (cc->nterpairs == 0)
1804 printf("Problem with chain definition, or missing terminal residues.\n"
1805 "This chain does not appear to contain a recognized chain molecule.\n"
1806 "If this is incorrect, you can edit residuetypes.dat to modify the behavior.\n");
1809 /* Check for disulfides and other special bonds */
1810 nssbonds = mk_specbonds(pdba, x, bCysMan, &ssbonds, bVerbose);
1814 rename_resrtp(pdba, cc->nterpairs, cc->r_start, cc->r_end, nrtprename, rtprename,
1822 sprintf(fn, "chain.pdb");
1826 sprintf(fn, "chain_%c%d.pdb", cc->chainid, cc->chainnum);
1828 write_sto_conf(fn, title, pdba, x, NULL, ePBC, box);
1832 for (i = 0; i < cc->nterpairs; i++)
1836 * We first apply a filter so we only have the
1837 * termini that can be applied to the residue in question
1838 * (or a generic terminus if no-residue specific is available).
1840 /* First the N terminus */
1843 tdblist = filter_ter(nrtp, restp, nNtdb, ntdb,
1844 *pdba->resinfo[cc->r_start[i]].name,
1845 *pdba->resinfo[cc->r_start[i]].rtp,
1849 printf("No suitable end (N or 5') terminus found in database - assuming this residue\n"
1850 "is already in a terminus-specific form and skipping terminus selection.\n");
1855 if (bTerMan && ntdblist > 1)
1857 sprintf(select, "Select start terminus type for %s-%d",
1858 *pdba->resinfo[cc->r_start[i]].name,
1859 pdba->resinfo[cc->r_start[i]].nr);
1860 cc->ntdb[i] = choose_ter(ntdblist, tdblist, select);
1864 cc->ntdb[i] = tdblist[0];
1867 printf("Start terminus %s-%d: %s\n",
1868 *pdba->resinfo[cc->r_start[i]].name,
1869 pdba->resinfo[cc->r_start[i]].nr,
1870 (cc->ntdb[i])->name);
1879 /* And the C terminus */
1882 tdblist = filter_ter(nrtp, restp, nCtdb, ctdb,
1883 *pdba->resinfo[cc->r_end[i]].name,
1884 *pdba->resinfo[cc->r_end[i]].rtp,
1888 printf("No suitable end (C or 3') terminus found in database - assuming this residue\n"
1889 "is already in a terminus-specific form and skipping terminus selection.\n");
1894 if (bTerMan && ntdblist > 1)
1896 sprintf(select, "Select end terminus type for %s-%d",
1897 *pdba->resinfo[cc->r_end[i]].name,
1898 pdba->resinfo[cc->r_end[i]].nr);
1899 cc->ctdb[i] = choose_ter(ntdblist, tdblist, select);
1903 cc->ctdb[i] = tdblist[0];
1905 printf("End terminus %s-%d: %s\n",
1906 *pdba->resinfo[cc->r_end[i]].name,
1907 pdba->resinfo[cc->r_end[i]].nr,
1908 (cc->ctdb[i])->name);
1917 /* lookup hackblocks and rtp for all residues */
1918 get_hackblocks_rtp(&hb_chain, &restp_chain,
1919 nrtp, restp, pdba->nres, pdba->resinfo,
1920 cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end);
1921 /* ideally, now we would not need the rtp itself anymore, but do
1922 everything using the hb and restp arrays. Unfortunately, that
1923 requires some re-thinking of code in gen_vsite.c, which I won't
1924 do now :( AF 26-7-99 */
1926 rename_atoms(NULL, ffdir,
1927 pdba, &symtab, restp_chain, FALSE, rt, FALSE, bVerbose);
1929 match_atomnames_with_rtp(restp_chain, hb_chain, pdba, x, bVerbose);
1933 block = new_blocka();
1935 sort_pdbatoms(pdba->nres, restp_chain, hb_chain,
1936 natom, &pdba, &x, block, &gnames);
1937 natom = remove_duplicate_atoms(pdba, x, bVerbose);
1938 if (ftp2bSet(efNDX, NFILE, fnm))
1942 fprintf(stderr, "WARNING: with the -remh option the generated "
1943 "index file (%s) might be useless\n"
1944 "(the index file is generated before hydrogens are added)",
1945 ftp2fn(efNDX, NFILE, fnm));
1947 write_index(ftp2fn(efNDX, NFILE, fnm), block, gnames);
1949 for (i = 0; i < block->nr; i++)
1958 fprintf(stderr, "WARNING: "
1959 "without sorting no check for duplicate atoms can be done\n");
1962 /* Generate Hydrogen atoms (and termini) in the sequence */
1963 printf("Generating any missing hydrogen atoms and/or adding termini.\n");
1964 natom = add_h(&pdba, &x, nah, ah,
1965 cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end, bAllowMissing,
1966 NULL, NULL, TRUE, FALSE);
1967 printf("Now there are %d residues with %d atoms\n",
1968 pdba->nres, pdba->nr);
1971 write_pdbfile(debug, title, pdba, x, ePBC, box, ' ', 0, NULL, TRUE);
1976 for (i = 0; (i < natom); i++)
1978 fprintf(debug, "Res %s%d atom %d %s\n",
1979 *(pdba->resinfo[pdba->atom[i].resind].name),
1980 pdba->resinfo[pdba->atom[i].resind].nr, i+1, *pdba->atomname[i]);
1984 strcpy(posre_fn, ftp2fn(efITP, NFILE, fnm));
1986 /* make up molecule name(s) */
1988 k = (cc->nterpairs > 0 && cc->r_start[0] >= 0) ? cc->r_start[0] : 0;
1990 gmx_residuetype_get_type(rt, *pdba->resinfo[k].name, &p_restype);
1996 sprintf(molname, "Water");
2000 this_chainid = cc->chainid;
2002 /* Add the chain id if we have one */
2003 if (this_chainid != ' ')
2005 sprintf(buf, "_chain_%c", this_chainid);
2006 strcat(suffix, buf);
2009 /* Check if there have been previous chains with the same id */
2011 for (k = 0; k < chain; k++)
2013 if (cc->chainid == chains[k].chainid)
2018 /* Add the number for this chain identifier if there are multiple copies */
2022 sprintf(buf, "%d", nid_used+1);
2023 strcat(suffix, buf);
2026 if (strlen(suffix) > 0)
2028 sprintf(molname, "%s%s", p_restype, suffix);
2032 strcpy(molname, p_restype);
2036 if ((nch-nwaterchain > 1) && !cc->bAllWat)
2039 strcpy(itp_fn, top_fn);
2040 printf("Chain time...\n");
2041 c = strrchr(itp_fn, '.');
2042 sprintf(c, "_%s.itp", molname);
2043 c = strrchr(posre_fn, '.');
2044 sprintf(c, "_%s.itp", molname);
2045 if (strcmp(itp_fn, posre_fn) == 0)
2047 strcpy(buf_fn, posre_fn);
2048 c = strrchr(buf_fn, '.');
2050 sprintf(posre_fn, "%s_pr.itp", buf_fn);
2054 srenew(incls, nincl);
2055 incls[nincl-1] = strdup(itp_fn);
2056 itp_file = gmx_fio_fopen(itp_fn, "w");
2063 srenew(mols, nmol+1);
2066 mols[nmol].name = strdup("SOL");
2067 mols[nmol].nr = pdba->nres;
2071 mols[nmol].name = strdup(molname);
2078 print_top_comment(itp_file, itp_fn, generator, ffdir, TRUE);
2088 top_file2 = itp_file;
2092 top_file2 = top_file;
2095 pdb2top(top_file2, posre_fn, molname, pdba, &x, atype, &symtab,
2097 restp_chain, hb_chain,
2098 cc->nterpairs, cc->ntdb, cc->ctdb, bAllowMissing,
2099 bVsites, bVsiteAromatics, forcefield, ffdir,
2100 mHmult, nssbonds, ssbonds,
2101 long_bond_dist, short_bond_dist, bDeuterate, bChargeGroups, bCmap,
2102 bRenumRes, bRTPresname);
2106 write_posres(posre_fn, pdba, posre_fc);
2111 gmx_fio_fclose(itp_file);
2114 /* pdba and natom have been reassigned somewhere so: */
2120 if (cc->chainid == ' ')
2122 sprintf(fn, "chain.pdb");
2126 sprintf(fn, "chain_%c.pdb", cc->chainid);
2128 cool_quote(quote, 255, NULL);
2129 write_sto_conf(fn, quote, pdba, x, NULL, ePBC, box);
2133 if (watermodel == NULL)
2135 for (chain = 0; chain < nch; chain++)
2137 if (chains[chain].bAllWat)
2139 gmx_fatal(FARGS, "You have chosen not to include a water model, but there is water in the input file. Select a water model or remove the water from your input file.");
2145 sprintf(buf_fn, "%s%c%s.itp", ffdir, DIR_SEPARATOR, watermodel);
2146 if (!fflib_fexist(buf_fn))
2148 gmx_fatal(FARGS, "The topology file '%s' for the selected water model '%s' can not be found in the force field directory. Select a different water model.",
2149 buf_fn, watermodel);
2153 print_top_mols(top_file, title, ffdir, watermodel, nincl, incls, nmol, mols);
2154 gmx_fio_fclose(top_file);
2156 gmx_residuetype_destroy(rt);
2158 /* now merge all chains back together */
2161 for (i = 0; (i < nch); i++)
2163 natom += chains[i].pdba->nr;
2164 nres += chains[i].pdba->nres;
2167 init_t_atoms(atoms, natom, FALSE);
2168 for (i = 0; i < atoms->nres; i++)
2170 sfree(atoms->resinfo[i].name);
2172 sfree(atoms->resinfo);
2174 snew(atoms->resinfo, nres);
2178 for (i = 0; (i < nch); i++)
2182 printf("Including chain %d in system: %d atoms %d residues\n",
2183 i+1, chains[i].pdba->nr, chains[i].pdba->nres);
2185 for (j = 0; (j < chains[i].pdba->nr); j++)
2187 atoms->atom[k] = chains[i].pdba->atom[j];
2188 atoms->atom[k].resind += l; /* l is processed nr of residues */
2189 atoms->atomname[k] = chains[i].pdba->atomname[j];
2190 atoms->resinfo[atoms->atom[k].resind].chainid = chains[i].chainid;
2191 copy_rvec(chains[i].x[j], x[k]);
2194 for (j = 0; (j < chains[i].pdba->nres); j++)
2196 atoms->resinfo[l] = chains[i].pdba->resinfo[j];
2199 atoms->resinfo[l].name = atoms->resinfo[l].rtp;
2207 fprintf(stderr, "Now there are %d atoms and %d residues\n", k, l);
2208 print_sums(atoms, TRUE);
2211 fprintf(stderr, "\nWriting coordinate file...\n");
2212 clear_rvec(box_space);
2215 gen_box(0, atoms->nr, x, box, box_space, FALSE);
2217 write_sto_conf(ftp2fn(efSTO, NFILE, fnm), title, atoms, x, NULL, ePBC, box);
2219 printf("\t\t--------- PLEASE NOTE ------------\n");
2220 printf("You have successfully generated a topology from: %s.\n",
2221 opt2fn("-f", NFILE, fnm));
2222 if (watermodel != NULL)
2224 printf("The %s force field and the %s water model are used.\n",
2225 ffname, watermodel);
2229 printf("The %s force field is used.\n",
2232 printf("\t\t--------- ETON ESAELP ------------\n");