1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
44 #include "gromacs/fileio/gmxfio.h"
48 #include "gromacs/fileio/confio.h"
52 #include "gromacs/fileio/futil.h"
53 #include "gmx_fatal.h"
54 #include "gromacs/fileio/pdbio.h"
69 #include "fflibutil.h"
85 static const char *res2bb_notermini(const char *name,
86 int nrr, const rtprename_t *rr)
88 /* NOTE: This function returns the main building block name,
89 * it does not take terminal renaming into account.
94 while (i < nrr && gmx_strcasecmp(name, rr[i].gmx) != 0)
99 return (i < nrr ? rr[i].main : name);
102 static const char *select_res(int nr, int resnr,
103 const char *name[], const char *expl[],
105 int nrr, const rtprename_t *rr)
109 printf("Which %s type do you want for residue %d\n", title, resnr+1);
110 for (sel = 0; (sel < nr); sel++)
112 printf("%d. %s (%s)\n",
113 sel, expl[sel], res2bb_notermini(name[sel], nrr, rr));
115 printf("\nType a number:"); fflush(stdout);
117 if (scanf("%d", &sel) != 1)
119 gmx_fatal(FARGS, "Answer me for res %s %d!", title, resnr+1);
125 static const char *get_asptp(int resnr, int nrr, const rtprename_t *rr)
130 const char *lh[easpNR] = { "ASP", "ASPH" };
131 const char *expl[easpNR] = {
132 "Not protonated (charge -1)",
133 "Protonated (charge 0)"
136 return select_res(easpNR, resnr, lh, expl, "ASPARTIC ACID", nrr, rr);
139 static const char *get_glutp(int resnr, int nrr, const rtprename_t *rr)
144 const char *lh[egluNR] = { "GLU", "GLUH" };
145 const char *expl[egluNR] = {
146 "Not protonated (charge -1)",
147 "Protonated (charge 0)"
150 return select_res(egluNR, resnr, lh, expl, "GLUTAMIC ACID", nrr, rr);
153 static const char *get_glntp(int resnr, int nrr, const rtprename_t *rr)
158 const char *lh[eglnNR] = { "GLN", "QLN" };
159 const char *expl[eglnNR] = {
160 "Not protonated (charge 0)",
161 "Protonated (charge +1)"
164 return select_res(eglnNR, resnr, lh, expl, "GLUTAMINE", nrr, rr);
167 static const char *get_lystp(int resnr, int nrr, const rtprename_t *rr)
172 const char *lh[elysNR] = { "LYSN", "LYS" };
173 const char *expl[elysNR] = {
174 "Not protonated (charge 0)",
175 "Protonated (charge +1)"
178 return select_res(elysNR, resnr, lh, expl, "LYSINE", nrr, rr);
181 static const char *get_argtp(int resnr, int nrr, const rtprename_t *rr)
186 const char *lh[eargNR] = { "ARGN", "ARG" };
187 const char *expl[eargNR] = {
188 "Not protonated (charge 0)",
189 "Protonated (charge +1)"
192 return select_res(eargNR, resnr, lh, expl, "ARGININE", nrr, rr);
195 static const char *get_histp(int resnr, int nrr, const rtprename_t *rr)
197 const char *expl[ehisNR] = {
204 return select_res(ehisNR, resnr, hh, expl, "HISTIDINE", nrr, rr);
207 static void read_rtprename(const char *fname, FILE *fp,
208 int *nrtprename, rtprename_t **rtprename)
210 char line[STRLEN], buf[STRLEN];
219 while (get_a_line(fp, line, STRLEN))
222 nc = sscanf(line, "%s %s %s %s %s %s",
223 rr[n].gmx, rr[n].main, rr[n].nter, rr[n].cter, rr[n].bter, buf);
226 if (nc != 2 && nc != 5)
228 gmx_fatal(FARGS, "Residue renaming database '%s' has %d columns instead of %d, %d or %d", fname, ncol, 2, 5);
234 gmx_fatal(FARGS, "A line in residue renaming database '%s' has %d columns, while previous lines have %d columns", fname, nc, ncol);
239 /* This file does not have special termini names, copy them from main */
240 strcpy(rr[n].nter, rr[n].main);
241 strcpy(rr[n].cter, rr[n].main);
242 strcpy(rr[n].bter, rr[n].main);
252 static char *search_resrename(int nrr, rtprename_t *rr,
254 gmx_bool bStart, gmx_bool bEnd,
255 gmx_bool bCompareFFRTPname)
263 while (i < nrr && ((!bCompareFFRTPname && strcmp(name, rr[i].gmx) != 0) ||
264 ( bCompareFFRTPname && strcmp(name, rr[i].main) != 0)))
269 /* If found in the database, rename this residue's rtp building block,
270 * otherwise keep the old name.
293 gmx_fatal(FARGS, "In the chosen force field there is no residue type for '%s'%s", name, bStart ? ( bEnd ? " as a standalone (starting & ending) residue" : " as a starting terminus") : (bEnd ? " as an ending terminus" : ""));
301 static void rename_resrtp(t_atoms *pdba, int nterpairs, int *r_start, int *r_end,
302 int nrr, rtprename_t *rr, t_symtab *symtab,
306 gmx_bool bStart, bEnd;
308 gmx_bool bFFRTPTERRNM;
310 bFFRTPTERRNM = (getenv("GMX_NO_FFRTP_TER_RENAME") == NULL);
312 for (r = 0; r < pdba->nres; r++)
316 for (j = 0; j < nterpairs; j++)
323 for (j = 0; j < nterpairs; j++)
331 nn = search_resrename(nrr, rr, *pdba->resinfo[r].rtp, bStart, bEnd, FALSE);
333 if (bFFRTPTERRNM && nn == NULL && (bStart || bEnd))
335 /* This is a terminal residue, but the residue name,
336 * currently stored in .rtp, is not a standard residue name,
337 * but probably a force field specific rtp name.
338 * Check if we need to rename it because it is terminal.
340 nn = search_resrename(nrr, rr,
341 *pdba->resinfo[r].rtp, bStart, bEnd, TRUE);
344 if (nn != NULL && strcmp(*pdba->resinfo[r].rtp, nn) != 0)
348 printf("Changing rtp entry of residue %d %s to '%s'\n",
349 pdba->resinfo[r].nr, *pdba->resinfo[r].name, nn);
351 pdba->resinfo[r].rtp = put_symtab(symtab, nn);
356 static void pdbres_to_gmxrtp(t_atoms *pdba)
360 for (i = 0; (i < pdba->nres); i++)
362 if (pdba->resinfo[i].rtp == NULL)
364 pdba->resinfo[i].rtp = pdba->resinfo[i].name;
369 static void rename_pdbres(t_atoms *pdba, const char *oldnm, const char *newnm,
370 gmx_bool bFullCompare, t_symtab *symtab)
375 for (i = 0; (i < pdba->nres); i++)
377 resnm = *pdba->resinfo[i].name;
378 if ((bFullCompare && (gmx_strcasecmp(resnm, oldnm) == 0)) ||
379 (!bFullCompare && strstr(resnm, oldnm) != NULL))
381 /* Rename the residue name (not the rtp name) */
382 pdba->resinfo[i].name = put_symtab(symtab, newnm);
387 static void rename_bb(t_atoms *pdba, const char *oldnm, const char *newnm,
388 gmx_bool bFullCompare, t_symtab *symtab)
393 for (i = 0; (i < pdba->nres); i++)
395 /* We have not set the rtp name yes, use the residue name */
396 bbnm = *pdba->resinfo[i].name;
397 if ((bFullCompare && (gmx_strcasecmp(bbnm, oldnm) == 0)) ||
398 (!bFullCompare && strstr(bbnm, oldnm) != NULL))
400 /* Change the rtp builing block name */
401 pdba->resinfo[i].rtp = put_symtab(symtab, newnm);
406 static void rename_bbint(t_atoms *pdba, const char *oldnm,
407 const char *gettp(int, int, const rtprename_t *),
408 gmx_bool bFullCompare,
410 int nrr, const rtprename_t *rr)
416 for (i = 0; i < pdba->nres; i++)
418 /* We have not set the rtp name yes, use the residue name */
419 bbnm = *pdba->resinfo[i].name;
420 if ((bFullCompare && (strcmp(bbnm, oldnm) == 0)) ||
421 (!bFullCompare && strstr(bbnm, oldnm) != NULL))
423 ptr = gettp(i, nrr, rr);
424 pdba->resinfo[i].rtp = put_symtab(symtab, ptr);
429 static void check_occupancy(t_atoms *atoms, const char *filename, gmx_bool bVerbose)
435 ftp = fn2ftp(filename);
436 if (!atoms->pdbinfo || ((ftp != efPDB) && (ftp != efBRK) && (ftp != efENT)))
438 fprintf(stderr, "No occupancies in %s\n", filename);
442 for (i = 0; (i < atoms->nr); i++)
444 if (atoms->pdbinfo[i].occup != 1)
448 fprintf(stderr, "Occupancy for atom %s%d-%s is %f rather than 1\n",
449 *atoms->resinfo[atoms->atom[i].resind].name,
450 atoms->resinfo[ atoms->atom[i].resind].nr,
452 atoms->pdbinfo[i].occup);
454 if (atoms->pdbinfo[i].occup == 0)
464 if (nzero == atoms->nr)
466 fprintf(stderr, "All occupancy fields zero. This is probably not an X-Ray structure\n");
468 else if ((nzero > 0) || (nnotone > 0))
472 "WARNING: there were %d atoms with zero occupancy and %d atoms with\n"
473 " occupancy unequal to one (out of %d atoms). Check your pdb file.\n"
475 nzero, nnotone, atoms->nr);
479 fprintf(stderr, "All occupancies are one\n");
484 void write_posres(char *fn, t_atoms *pdba, real fc)
489 fp = gmx_fio_fopen(fn, "w");
491 "; In this topology include file, you will find position restraint\n"
492 "; entries for all the heavy atoms in your original pdb file.\n"
493 "; This means that all the protons which were added by pdb2gmx are\n"
494 "; not restrained.\n"
496 "[ position_restraints ]\n"
497 "; %4s%6s%8s%8s%8s\n", "atom", "type", "fx", "fy", "fz"
499 for (i = 0; (i < pdba->nr); i++)
501 if (!is_hydrogen(*pdba->atomname[i]) && !is_dummymass(*pdba->atomname[i]))
503 fprintf(fp, "%6d%6d %g %g %g\n", i+1, 1, fc, fc, fc);
509 static int read_pdball(const char *inf, const char *outf, char *title,
510 t_atoms *atoms, rvec **x,
511 int *ePBC, matrix box, gmx_bool bRemoveH,
512 t_symtab *symtab, gmx_residuetype_t rt, const char *watres,
513 gmx_atomprop_t aps, gmx_bool bVerbose)
514 /* Read a pdb file. (containing proteins) */
516 int natom, new_natom, i;
519 printf("Reading %s...\n", inf);
520 get_stx_coordnum(inf, &natom);
521 init_t_atoms(atoms, natom, TRUE);
523 read_stx_conf(inf, title, atoms, *x, NULL, ePBC, box);
524 if (fn2ftp(inf) == efPDB)
526 get_pdb_atomnumber(atoms, aps);
531 for (i = 0; i < atoms->nr; i++)
533 if (!is_hydrogen(*atoms->atomname[i]))
535 atoms->atom[new_natom] = atoms->atom[i];
536 atoms->atomname[new_natom] = atoms->atomname[i];
537 atoms->pdbinfo[new_natom] = atoms->pdbinfo[i];
538 copy_rvec((*x)[i], (*x)[new_natom]);
542 atoms->nr = new_natom;
547 if (title && title[0])
549 printf(" '%s',", title);
551 printf(" %d atoms\n", natom);
553 /* Rename residues */
554 rename_pdbres(atoms, "HOH", watres, FALSE, symtab);
555 rename_pdbres(atoms, "SOL", watres, FALSE, symtab);
556 rename_pdbres(atoms, "WAT", watres, FALSE, symtab);
558 rename_atoms("xlateat.dat", NULL,
559 atoms, symtab, NULL, TRUE, rt, TRUE, bVerbose);
568 write_sto_conf(outf, title, atoms, *x, NULL, *ePBC, box);
574 void process_chain(t_atoms *pdba, rvec *x,
575 gmx_bool bTrpU, gmx_bool bPheU, gmx_bool bTyrU,
576 gmx_bool bLysMan, gmx_bool bAspMan, gmx_bool bGluMan,
577 gmx_bool bHisMan, gmx_bool bArgMan, gmx_bool bGlnMan,
578 real angle, real distance, t_symtab *symtab,
579 int nrr, const rtprename_t *rr)
581 /* Rename aromatics, lys, asp and histidine */
584 rename_bb(pdba, "TYR", "TYRU", FALSE, symtab);
588 rename_bb(pdba, "TRP", "TRPU", FALSE, symtab);
592 rename_bb(pdba, "PHE", "PHEU", FALSE, symtab);
596 rename_bbint(pdba, "LYS", get_lystp, FALSE, symtab, nrr, rr);
600 rename_bbint(pdba, "ARG", get_argtp, FALSE, symtab, nrr, rr);
604 rename_bbint(pdba, "GLN", get_glntp, FALSE, symtab, nrr, rr);
608 rename_bbint(pdba, "ASP", get_asptp, FALSE, symtab, nrr, rr);
612 rename_bb(pdba, "ASPH", "ASP", FALSE, symtab);
616 rename_bbint(pdba, "GLU", get_glutp, FALSE, symtab, nrr, rr);
620 rename_bb(pdba, "GLUH", "GLU", FALSE, symtab);
625 set_histp(pdba, x, angle, distance);
629 rename_bbint(pdba, "HIS", get_histp, TRUE, symtab, nrr, rr);
632 /* Initialize the rtp builing block names with the residue names
633 * for the residues that have not been processed above.
635 pdbres_to_gmxrtp(pdba);
637 /* Now we have all rtp names set.
638 * The rtp names will conform to Gromacs naming,
639 * unless the input pdb file contained one or more force field specific
640 * rtp names as residue names.
644 /* struct for sorting the atoms from the pdb file */
646 int resnr; /* residue number */
647 int j; /* database order index */
648 int index; /* original atom number */
649 char anm1; /* second letter of atom name */
650 char altloc; /* alternate location indicator */
653 int pdbicomp(const void *a, const void *b)
658 pa = (t_pdbindex *)a;
659 pb = (t_pdbindex *)b;
661 d = (pa->resnr - pb->resnr);
667 d = (pa->anm1 - pb->anm1);
670 d = (pa->altloc - pb->altloc);
678 static void sort_pdbatoms(t_restp restp[],
679 int natoms, t_atoms **pdbaptr, rvec **x,
680 t_blocka *block, char ***gnames)
682 t_atoms *pdba, *pdbnew;
697 for (i = 0; i < natoms; i++)
699 atomnm = *pdba->atomname[i];
700 rptr = &restp[pdba->atom[i].resind];
701 for (j = 0; (j < rptr->natom); j++)
703 if (gmx_strcasecmp(atomnm, *(rptr->atomname[j])) == 0)
708 if (j == rptr->natom)
713 "Atom %s in residue %s %d was not found in rtp entry %s with %d atoms\n"
714 "while sorting atoms.\n%s", atomnm,
715 *pdba->resinfo[pdba->atom[i].resind].name,
716 pdba->resinfo[pdba->atom[i].resind].nr,
719 is_hydrogen(atomnm) ?
720 "\nFor a hydrogen, this can be a different protonation state, or it\n"
721 "might have had a different number in the PDB file and was rebuilt\n"
722 "(it might for instance have been H3, and we only expected H1 & H2).\n"
723 "Note that hydrogens might have been added to the entry for the N-terminus.\n"
724 "Remove this hydrogen or choose a different protonation state to solve it.\n"
725 "Option -ignh will ignore all hydrogens in the input." : ".");
726 gmx_fatal(FARGS, buf);
728 /* make shadow array to be sorted into indexgroup */
729 pdbi[i].resnr = pdba->atom[i].resind;
732 pdbi[i].anm1 = atomnm[1];
733 pdbi[i].altloc = pdba->pdbinfo[i].altloc;
735 qsort(pdbi, natoms, (size_t)sizeof(pdbi[0]), pdbicomp);
737 /* pdba is sorted in pdbnew using the pdbi index */
740 init_t_atoms(pdbnew, natoms, TRUE);
742 pdbnew->nr = pdba->nr;
743 pdbnew->nres = pdba->nres;
744 sfree(pdbnew->resinfo);
745 pdbnew->resinfo = pdba->resinfo;
746 for (i = 0; i < natoms; i++)
748 pdbnew->atom[i] = pdba->atom[pdbi[i].index];
749 pdbnew->atomname[i] = pdba->atomname[pdbi[i].index];
750 pdbnew->pdbinfo[i] = pdba->pdbinfo[pdbi[i].index];
751 copy_rvec((*x)[pdbi[i].index], (*xnew)[i]);
752 /* make indexgroup in block */
753 a[i] = pdbi[i].index;
756 sfree(pdba->atomname);
758 sfree(pdba->pdbinfo);
761 /* copy the sorted pdbnew back to pdba */
764 add_grp(block, gnames, natoms, a, "prot_sort");
770 static int remove_duplicate_atoms(t_atoms *pdba, rvec x[], gmx_bool bVerbose)
772 int i, j, oldnatoms, ndel;
775 printf("Checking for duplicate atoms....\n");
776 oldnatoms = pdba->nr;
778 /* NOTE: pdba->nr is modified inside the loop */
779 for (i = 1; (i < pdba->nr); i++)
781 /* compare 'i' and 'i-1', throw away 'i' if they are identical
782 this is a 'while' because multiple alternate locations can be present */
783 while ( (i < pdba->nr) &&
784 (pdba->atom[i-1].resind == pdba->atom[i].resind) &&
785 (strcmp(*pdba->atomname[i-1], *pdba->atomname[i]) == 0) )
790 ri = &pdba->resinfo[pdba->atom[i].resind];
791 printf("deleting duplicate atom %4s %s%4d%c",
792 *pdba->atomname[i], *ri->name, ri->nr, ri->ic);
793 if (ri->chainid && (ri->chainid != ' '))
795 printf(" ch %c", ri->chainid);
799 if (pdba->pdbinfo[i].atomnr)
801 printf(" pdb nr %4d", pdba->pdbinfo[i].atomnr);
803 if (pdba->pdbinfo[i].altloc && (pdba->pdbinfo[i].altloc != ' '))
805 printf(" altloc %c", pdba->pdbinfo[i].altloc);
811 /* We can not free, since it might be in the symtab */
812 /* sfree(pdba->atomname[i]); */
813 for (j = i; j < pdba->nr; j++)
815 pdba->atom[j] = pdba->atom[j+1];
816 pdba->atomname[j] = pdba->atomname[j+1];
817 pdba->pdbinfo[j] = pdba->pdbinfo[j+1];
818 copy_rvec(x[j+1], x[j]);
820 srenew(pdba->atom, pdba->nr);
821 /* srenew(pdba->atomname, pdba->nr); */
822 srenew(pdba->pdbinfo, pdba->nr);
825 if (pdba->nr != oldnatoms)
827 printf("Now there are %d atoms. Deleted %d duplicates.\n", pdba->nr, ndel);
833 void find_nc_ter(t_atoms *pdba, int r0, int r1, int *r_start, int *r_end, gmx_residuetype_t rt)
836 const char *p_startrestype;
837 const char *p_restype;
838 int nstartwarn, nendwarn;
846 /* Find the starting terminus (typially N or 5') */
847 for (i = r0; i < r1 && *r_start == -1; i++)
849 gmx_residuetype_get_type(rt, *pdba->resinfo[i].name, &p_startrestype);
850 if (!gmx_strcasecmp(p_startrestype, "Protein") || !gmx_strcasecmp(p_startrestype, "DNA") || !gmx_strcasecmp(p_startrestype, "RNA") )
852 printf("Identified residue %s%d as a starting terminus.\n", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
859 printf("Warning: Starting residue %s%d in chain not identified as Protein/RNA/DNA.\n", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
863 printf("More than 5 unidentified residues at start of chain - disabling further warnings.\n");
871 /* Go through the rest of the residues, check that they are the same class, and identify the ending terminus. */
872 for (i = *r_start; i < r1; i++)
874 gmx_residuetype_get_type(rt, *pdba->resinfo[i].name, &p_restype);
875 if (!gmx_strcasecmp(p_restype, p_startrestype) && nendwarn == 0)
883 printf("Warning: Residue %s%d in chain has different type (%s) from starting residue %s%d (%s).\n",
884 *pdba->resinfo[i].name, pdba->resinfo[i].nr, p_restype,
885 *pdba->resinfo[*r_start].name, pdba->resinfo[*r_start].nr, p_startrestype);
889 printf("More than 5 unidentified residues at end of chain - disabling further warnings.\n");
898 printf("Identified residue %s%d as a ending terminus.\n", *pdba->resinfo[*r_end].name, pdba->resinfo[*r_end].nr);
904 modify_chain_numbers(t_atoms * pdba,
905 const char * chainsep)
908 char old_prev_chainid;
909 char old_this_chainid;
910 int old_prev_chainnum;
911 int old_this_chainnum;
917 const char * prev_atomname;
918 const char * this_atomname;
919 const char * prev_resname;
920 const char * this_resname;
925 int prev_chainnumber;
926 int this_chainnumber;
938 splitting = SPLIT_TER_ONLY; /* keep compiler happy */
940 /* Be a bit flexible to catch typos */
941 if (!strncmp(chainsep, "id_o", 4))
943 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
944 splitting = SPLIT_ID_OR_TER;
945 printf("Splitting chemical chains based on TER records or chain id changing.\n");
947 else if (!strncmp(chainsep, "int", 3))
949 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
950 splitting = SPLIT_INTERACTIVE;
951 printf("Splitting chemical chains interactively.\n");
953 else if (!strncmp(chainsep, "id_a", 4))
955 splitting = SPLIT_ID_AND_TER;
956 printf("Splitting chemical chains based on TER records and chain id changing.\n");
958 else if (strlen(chainsep) == 2 && !strncmp(chainsep, "id", 4))
960 splitting = SPLIT_ID_ONLY;
961 printf("Splitting chemical chains based on changing chain id only (ignoring TER records).\n");
963 else if (chainsep[0] == 't')
965 splitting = SPLIT_TER_ONLY;
966 printf("Splitting chemical chains based on TER records only (ignoring chain id).\n");
970 gmx_fatal(FARGS, "Unidentified setting for chain separation: %s\n", chainsep);
973 /* The default chain enumeration is based on TER records only, which is reflected in chainnum below */
975 old_prev_chainid = '?';
976 old_prev_chainnum = -1;
979 this_atomname = NULL;
984 this_chainnumber = -1;
986 for (i = 0; i < pdba->nres; i++)
988 ri = &pdba->resinfo[i];
989 old_this_chainid = ri->chainid;
990 old_this_chainnum = ri->chainnum;
992 prev_atomname = this_atomname;
993 prev_atomnum = this_atomnum;
994 prev_resname = this_resname;
995 prev_resnum = this_resnum;
996 prev_chainid = this_chainid;
997 prev_chainnumber = this_chainnumber;
999 this_atomname = *(pdba->atomname[i]);
1000 this_atomnum = (pdba->pdbinfo != NULL) ? pdba->pdbinfo[i].atomnr : i+1;
1001 this_resname = *ri->name;
1002 this_resnum = ri->nr;
1003 this_chainid = ri->chainid;
1004 this_chainnumber = ri->chainnum;
1008 case SPLIT_ID_OR_TER:
1009 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1015 case SPLIT_ID_AND_TER:
1016 if (old_this_chainid != old_prev_chainid && old_this_chainnum != old_prev_chainnum)
1023 if (old_this_chainid != old_prev_chainid)
1029 case SPLIT_TER_ONLY:
1030 if (old_this_chainnum != old_prev_chainnum)
1035 case SPLIT_INTERACTIVE:
1036 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1040 printf("Split the chain (and introduce termini) between residue %s%d (chain id '%c', atom %d %s)\n"
1041 "and residue %s%d (chain id '%c', atom %d %s) ? [n/y]\n",
1042 prev_resname, prev_resnum, prev_chainid, prev_atomnum, prev_atomname,
1043 this_resname, this_resnum, this_chainid, this_atomnum, this_atomname);
1045 if (NULL == fgets(select, STRLEN-1, stdin))
1047 gmx_fatal(FARGS, "Error reading from stdin");
1050 if (i == 0 || select[0] == 'y')
1057 gmx_fatal(FARGS, "Internal inconsistency - this shouldn't happen...");
1060 old_prev_chainid = old_this_chainid;
1061 old_prev_chainnum = old_this_chainnum;
1063 ri->chainnum = new_chainnum;
1092 int gmx_pdb2gmx(int argc, char *argv[])
1094 const char *desc[] = {
1095 "[THISMODULE] reads a [TT].pdb[tt] (or [TT].gro[tt]) file, reads",
1096 "some database files, adds hydrogens to the molecules and generates",
1097 "coordinates in GROMACS (GROMOS), or optionally [TT].pdb[tt], format",
1098 "and a topology in GROMACS format.",
1099 "These files can subsequently be processed to generate a run input file.",
1101 "[THISMODULE] will search for force fields by looking for",
1102 "a [TT]forcefield.itp[tt] file in subdirectories [TT]<forcefield>.ff[tt]",
1103 "of the current working directory and of the GROMACS library directory",
1104 "as inferred from the path of the binary or the [TT]GMXLIB[tt] environment",
1106 "By default the forcefield selection is interactive,",
1107 "but you can use the [TT]-ff[tt] option to specify one of the short names",
1108 "in the list on the command line instead. In that case [THISMODULE] just looks",
1109 "for the corresponding [TT]<forcefield>.ff[tt] directory.",
1111 "After choosing a force field, all files will be read only from",
1112 "the corresponding force field directory.",
1113 "If you want to modify or add a residue types, you can copy the force",
1114 "field directory from the GROMACS library directory to your current",
1115 "working directory. If you want to add new protein residue types,",
1116 "you will need to modify [TT]residuetypes.dat[tt] in the library directory",
1117 "or copy the whole library directory to a local directory and set",
1118 "the environment variable [TT]GMXLIB[tt] to the name of that directory.",
1119 "Check Chapter 5 of the manual for more information about file formats.",
1122 "Note that a [TT].pdb[tt] file is nothing more than a file format, and it",
1123 "need not necessarily contain a protein structure. Every kind of",
1124 "molecule for which there is support in the database can be converted.",
1125 "If there is no support in the database, you can add it yourself.[PAR]",
1127 "The program has limited intelligence, it reads a number of database",
1128 "files, that allow it to make special bonds (Cys-Cys, Heme-His, etc.),",
1129 "if necessary this can be done manually. The program can prompt the",
1130 "user to select which kind of LYS, ASP, GLU, CYS or HIS residue is",
1131 "desired. For Lys the choice is between neutral (two protons on NZ) or",
1132 "protonated (three protons, default), for Asp and Glu unprotonated",
1133 "(default) or protonated, for His the proton can be either on ND1,",
1134 "on NE2 or on both. By default these selections are done automatically.",
1135 "For His, this is based on an optimal hydrogen bonding",
1136 "conformation. Hydrogen bonds are defined based on a simple geometric",
1137 "criterion, specified by the maximum hydrogen-donor-acceptor angle",
1138 "and donor-acceptor distance, which are set by [TT]-angle[tt] and",
1139 "[TT]-dist[tt] respectively.[PAR]",
1141 "The protonation state of N- and C-termini can be chosen interactively",
1142 "with the [TT]-ter[tt] flag. Default termini are ionized (NH3+ and COO-),",
1143 "respectively. Some force fields support zwitterionic forms for chains of",
1144 "one residue, but for polypeptides these options should NOT be selected.",
1145 "The AMBER force fields have unique forms for the terminal residues,",
1146 "and these are incompatible with the [TT]-ter[tt] mechanism. You need",
1147 "to prefix your N- or C-terminal residue names with \"N\" or \"C\"",
1148 "respectively to use these forms, making sure you preserve the format",
1149 "of the coordinate file. Alternatively, use named terminating residues",
1150 "(e.g. ACE, NME).[PAR]",
1152 "The separation of chains is not entirely trivial since the markup",
1153 "in user-generated PDB files frequently varies and sometimes it",
1154 "is desirable to merge entries across a TER record, for instance",
1155 "if you want a disulfide bridge or distance restraints between",
1156 "two protein chains or if you have a HEME group bound to a protein.",
1157 "In such cases multiple chains should be contained in a single",
1158 "[TT]moleculetype[tt] definition.",
1159 "To handle this, [THISMODULE] uses two separate options.",
1160 "First, [TT]-chainsep[tt] allows you to choose when a new chemical chain should",
1161 "start, and termini added when applicable. This can be done based on the",
1162 "existence of TER records, when the chain id changes, or combinations of either",
1163 "or both of these. You can also do the selection fully interactively.",
1164 "In addition, there is a [TT]-merge[tt] option that controls how multiple chains",
1165 "are merged into one moleculetype, after adding all the chemical termini (or not).",
1166 "This can be turned off (no merging), all non-water chains can be merged into a",
1167 "single molecule, or the selection can be done interactively.[PAR]",
1169 "[THISMODULE] will also check the occupancy field of the [TT].pdb[tt] file.",
1170 "If any of the occupancies are not one, indicating that the atom is",
1171 "not resolved well in the structure, a warning message is issued.",
1172 "When a [TT].pdb[tt] file does not originate from an X-ray structure determination",
1173 "all occupancy fields may be zero. Either way, it is up to the user",
1174 "to verify the correctness of the input data (read the article!).[PAR]",
1176 "During processing the atoms will be reordered according to GROMACS",
1177 "conventions. With [TT]-n[tt] an index file can be generated that",
1178 "contains one group reordered in the same way. This allows you to",
1179 "convert a GROMOS trajectory and coordinate file to GROMOS. There is",
1180 "one limitation: reordering is done after the hydrogens are stripped",
1181 "from the input and before new hydrogens are added. This means that",
1182 "you should not use [TT]-ignh[tt].[PAR]",
1184 "The [TT].gro[tt] and [TT].g96[tt] file formats do not support chain",
1185 "identifiers. Therefore it is useful to enter a [TT].pdb[tt] file name at",
1186 "the [TT]-o[tt] option when you want to convert a multi-chain [TT].pdb[tt] file.",
1189 "The option [TT]-vsite[tt] removes hydrogen and fast improper dihedral",
1190 "motions. Angular and out-of-plane motions can be removed by changing",
1191 "hydrogens into virtual sites and fixing angles, which fixes their",
1192 "position relative to neighboring atoms. Additionally, all atoms in the",
1193 "aromatic rings of the standard amino acids (i.e. PHE, TRP, TYR and HIS)",
1194 "can be converted into virtual sites, eliminating the fast improper dihedral",
1195 "fluctuations in these rings. [BB]Note[bb] that in this case all other hydrogen",
1196 "atoms are also converted to virtual sites. The mass of all atoms that are",
1197 "converted into virtual sites, is added to the heavy atoms.[PAR]",
1198 "Also slowing down of dihedral motion can be done with [TT]-heavyh[tt]",
1199 "done by increasing the hydrogen-mass by a factor of 4. This is also",
1200 "done for water hydrogens to slow down the rotational motion of water.",
1201 "The increase in mass of the hydrogens is subtracted from the bonded",
1202 "(heavy) atom so that the total mass of the system remains the same."
1206 FILE *fp, *top_file, *top_file2, *itp_file = NULL;
1208 t_atoms pdba_all, *pdba;
1212 int chain, nch, maxch, nwaterchain;
1214 t_chain *chains, *cc;
1215 char select[STRLEN];
1223 int i, j, k, l, nrtp;
1224 int *swap_index, si;
1228 gpp_atomtype_t atype;
1229 gmx_residuetype_t rt;
1231 char fn[256], itp_fn[STRLEN], posre_fn[STRLEN], buf_fn[STRLEN];
1232 char molname[STRLEN], title[STRLEN], quote[STRLEN];
1233 char *c, forcefield[STRLEN], ffdir[STRLEN];
1234 char ffname[STRLEN], suffix[STRLEN], buf[STRLEN];
1242 int nrtprename, naa;
1243 rtprename_t *rtprename = NULL;
1244 int nah, nNtdb, nCtdb, ntdblist;
1245 t_hackblock *ntdb, *ctdb, **tdblist;
1249 gmx_bool bVsites = FALSE, bWat, bPrevWat = FALSE, bITP, bVsiteAromatics = FALSE, bCheckMerge;
1251 t_hackblock *hb_chain;
1252 t_restp *restp_chain;
1254 const char *p_restype;
1258 const char * prev_atomname;
1259 const char * this_atomname;
1260 const char * prev_resname;
1261 const char * this_resname;
1266 int prev_chainnumber;
1267 int this_chainnumber;
1269 int this_chainstart;
1270 int prev_chainstart;
1277 { efSTX, "-f", "eiwit.pdb", ffREAD },
1278 { efSTO, "-o", "conf", ffWRITE },
1279 { efTOP, NULL, NULL, ffWRITE },
1280 { efITP, "-i", "posre", ffWRITE },
1281 { efNDX, "-n", "clean", ffOPTWR },
1282 { efSTO, "-q", "clean.pdb", ffOPTWR }
1284 #define NFILE asize(fnm)
1287 /* Command line arguments must be static */
1288 static gmx_bool bNewRTP = FALSE;
1289 static gmx_bool bInter = FALSE, bCysMan = FALSE;
1290 static gmx_bool bLysMan = FALSE, bAspMan = FALSE, bGluMan = FALSE, bHisMan = FALSE;
1291 static gmx_bool bGlnMan = FALSE, bArgMan = FALSE;
1292 static gmx_bool bTerMan = FALSE, bUnA = FALSE, bHeavyH;
1293 static gmx_bool bSort = TRUE, bAllowMissing = FALSE, bRemoveH = FALSE;
1294 static gmx_bool bDeuterate = FALSE, bVerbose = FALSE, bChargeGroups = TRUE, bCmap = TRUE;
1295 static gmx_bool bRenumRes = FALSE, bRTPresname = FALSE;
1296 static real angle = 135.0, distance = 0.3, posre_fc = 1000;
1297 static real long_bond_dist = 0.25, short_bond_dist = 0.05;
1298 static const char *vsitestr[] = { NULL, "none", "hydrogens", "aromatics", NULL };
1299 static const char *watstr[] = { NULL, "select", "none", "spc", "spce", "tip3p", "tip4p", "tip5p", NULL };
1300 static const char *chainsep[] = { NULL, "id_or_ter", "id_and_ter", "ter", "id", "interactive", NULL };
1301 static const char *merge[] = {NULL, "no", "all", "interactive", NULL };
1302 static const char *ff = "select";
1305 { "-newrtp", FALSE, etBOOL, {&bNewRTP},
1306 "HIDDENWrite the residue database in new format to [TT]new.rtp[tt]"},
1307 { "-lb", FALSE, etREAL, {&long_bond_dist},
1308 "HIDDENLong bond warning distance" },
1309 { "-sb", FALSE, etREAL, {&short_bond_dist},
1310 "HIDDENShort bond warning distance" },
1311 { "-chainsep", FALSE, etENUM, {chainsep},
1312 "Condition in PDB files when a new chain should be started (adding termini)" },
1313 { "-merge", FALSE, etENUM, {&merge},
1314 "Merge multiple chains into a single [moleculetype]" },
1315 { "-ff", FALSE, etSTR, {&ff},
1316 "Force field, interactive by default. Use [TT]-h[tt] for information." },
1317 { "-water", FALSE, etENUM, {watstr},
1318 "Water model to use" },
1319 { "-inter", FALSE, etBOOL, {&bInter},
1320 "Set the next 8 options to interactive"},
1321 { "-ss", FALSE, etBOOL, {&bCysMan},
1322 "Interactive SS bridge selection" },
1323 { "-ter", FALSE, etBOOL, {&bTerMan},
1324 "Interactive termini selection, instead of charged (default)" },
1325 { "-lys", FALSE, etBOOL, {&bLysMan},
1326 "Interactive lysine selection, instead of charged" },
1327 { "-arg", FALSE, etBOOL, {&bArgMan},
1328 "Interactive arginine selection, instead of charged" },
1329 { "-asp", FALSE, etBOOL, {&bAspMan},
1330 "Interactive aspartic acid selection, instead of charged" },
1331 { "-glu", FALSE, etBOOL, {&bGluMan},
1332 "Interactive glutamic acid selection, instead of charged" },
1333 { "-gln", FALSE, etBOOL, {&bGlnMan},
1334 "Interactive glutamine selection, instead of neutral" },
1335 { "-his", FALSE, etBOOL, {&bHisMan},
1336 "Interactive histidine selection, instead of checking H-bonds" },
1337 { "-angle", FALSE, etREAL, {&angle},
1338 "Minimum hydrogen-donor-acceptor angle for a H-bond (degrees)" },
1339 { "-dist", FALSE, etREAL, {&distance},
1340 "Maximum donor-acceptor distance for a H-bond (nm)" },
1341 { "-una", FALSE, etBOOL, {&bUnA},
1342 "Select aromatic rings with united CH atoms on phenylalanine, "
1343 "tryptophane and tyrosine" },
1344 { "-sort", FALSE, etBOOL, {&bSort},
1345 "HIDDENSort the residues according to database, turning this off is dangerous as charge groups might be broken in parts" },
1346 { "-ignh", FALSE, etBOOL, {&bRemoveH},
1347 "Ignore hydrogen atoms that are in the coordinate file" },
1348 { "-missing", FALSE, etBOOL, {&bAllowMissing},
1349 "Continue when atoms are missing, dangerous" },
1350 { "-v", FALSE, etBOOL, {&bVerbose},
1351 "Be slightly more verbose in messages" },
1352 { "-posrefc", FALSE, etREAL, {&posre_fc},
1353 "Force constant for position restraints" },
1354 { "-vsite", FALSE, etENUM, {vsitestr},
1355 "Convert atoms to virtual sites" },
1356 { "-heavyh", FALSE, etBOOL, {&bHeavyH},
1357 "Make hydrogen atoms heavy" },
1358 { "-deuterate", FALSE, etBOOL, {&bDeuterate},
1359 "Change the mass of hydrogens to 2 amu" },
1360 { "-chargegrp", TRUE, etBOOL, {&bChargeGroups},
1361 "Use charge groups in the [TT].rtp[tt] file" },
1362 { "-cmap", TRUE, etBOOL, {&bCmap},
1363 "Use cmap torsions (if enabled in the [TT].rtp[tt] file)" },
1364 { "-renum", TRUE, etBOOL, {&bRenumRes},
1365 "Renumber the residues consecutively in the output" },
1366 { "-rtpres", TRUE, etBOOL, {&bRTPresname},
1367 "Use [TT].rtp[tt] entry names as residue names" }
1369 #define NPARGS asize(pa)
1371 if (!parse_common_args(&argc, argv, 0, NFILE, fnm, asize(pa), pa, asize(desc), desc,
1377 /* Force field selection, interactive or direct */
1378 choose_ff(strcmp(ff, "select") == 0 ? NULL : ff,
1379 forcefield, sizeof(forcefield),
1380 ffdir, sizeof(ffdir));
1382 if (strlen(forcefield) > 0)
1384 strcpy(ffname, forcefield);
1385 ffname[0] = toupper(ffname[0]);
1389 gmx_fatal(FARGS, "Empty forcefield string");
1392 printf("\nUsing the %s force field in directory %s\n\n",
1395 choose_watermodel(watstr[0], ffdir, &watermodel);
1399 /* if anything changes here, also change description of -inter */
1414 else if (bDeuterate)
1423 switch (vsitestr[0][0])
1425 case 'n': /* none */
1427 bVsiteAromatics = FALSE;
1429 case 'h': /* hydrogens */
1431 bVsiteAromatics = FALSE;
1433 case 'a': /* aromatics */
1435 bVsiteAromatics = TRUE;
1438 gmx_fatal(FARGS, "DEATH HORROR in $s (%d): vsitestr[0]='%s'",
1439 __FILE__, __LINE__, vsitestr[0]);
1442 /* Open the symbol table */
1443 open_symtab(&symtab);
1445 /* Residue type database */
1446 gmx_residuetype_init(&rt);
1448 /* Read residue renaming database(s), if present */
1449 nrrn = fflib_search_file_end(ffdir, ".r2b", FALSE, &rrn);
1453 for (i = 0; i < nrrn; i++)
1455 fp = fflib_open(rrn[i]);
1456 read_rtprename(rrn[i], fp, &nrtprename, &rtprename);
1462 /* Add all alternative names from the residue renaming database to the list of recognized amino/nucleic acids. */
1464 for (i = 0; i < nrtprename; i++)
1466 rc = gmx_residuetype_get_type(rt, rtprename[i].gmx, &p_restype);
1468 /* Only add names if the 'standard' gromacs/iupac base name was found */
1471 gmx_residuetype_add(rt, rtprename[i].main, p_restype);
1472 gmx_residuetype_add(rt, rtprename[i].nter, p_restype);
1473 gmx_residuetype_add(rt, rtprename[i].cter, p_restype);
1474 gmx_residuetype_add(rt, rtprename[i].bter, p_restype);
1479 if (watermodel != NULL && (strstr(watermodel, "4p") ||
1480 strstr(watermodel, "4P")))
1484 else if (watermodel != NULL && (strstr(watermodel, "5p") ||
1485 strstr(watermodel, "5P")))
1494 aps = gmx_atomprop_init();
1495 natom = read_pdball(opt2fn("-f", NFILE, fnm), opt2fn_null("-q", NFILE, fnm), title,
1496 &pdba_all, &pdbx, &ePBC, box, bRemoveH, &symtab, rt, watres,
1501 gmx_fatal(FARGS, "No atoms found in pdb file %s\n", opt2fn("-f", NFILE, fnm));
1504 printf("Analyzing pdb file\n");
1509 modify_chain_numbers(&pdba_all, chainsep[0]);
1513 this_atomname = NULL;
1515 this_resname = NULL;
1518 this_chainnumber = -1;
1519 this_chainstart = 0;
1520 /* Keep the compiler happy */
1521 prev_chainstart = 0;
1526 for (i = 0; (i < natom); i++)
1528 ri = &pdba_all.resinfo[pdba_all.atom[i].resind];
1530 prev_atomname = this_atomname;
1531 prev_atomnum = this_atomnum;
1532 prev_resname = this_resname;
1533 prev_resnum = this_resnum;
1534 prev_chainid = this_chainid;
1535 prev_chainnumber = this_chainnumber;
1538 prev_chainstart = this_chainstart;
1541 this_atomname = *pdba_all.atomname[i];
1542 this_atomnum = (pdba_all.pdbinfo != NULL) ? pdba_all.pdbinfo[i].atomnr : i+1;
1543 this_resname = *ri->name;
1544 this_resnum = ri->nr;
1545 this_chainid = ri->chainid;
1546 this_chainnumber = ri->chainnum;
1548 bWat = gmx_strcasecmp(*ri->name, watres) == 0;
1549 if ((i == 0) || (this_chainnumber != prev_chainnumber) || (bWat != bPrevWat))
1551 this_chainstart = pdba_all.atom[i].resind;
1556 if (!strncmp(merge[0], "int", 3))
1558 printf("Merge chain ending with residue %s%d (chain id '%c', atom %d %s) and chain starting with\n"
1559 "residue %s%d (chain id '%c', atom %d %s) into a single moleculetype (keeping termini)? [n/y]\n",
1560 prev_resname, prev_resnum, prev_chainid, prev_atomnum, prev_atomname,
1561 this_resname, this_resnum, this_chainid, this_atomnum, this_atomname);
1563 if (NULL == fgets(select, STRLEN-1, stdin))
1565 gmx_fatal(FARGS, "Error reading from stdin");
1567 bMerged = (select[0] == 'y');
1569 else if (!strncmp(merge[0], "all", 3))
1577 pdb_ch[nch-1].chainstart[pdb_ch[nch-1].nterpairs] =
1578 pdba_all.atom[i].resind - prev_chainstart;
1579 pdb_ch[nch-1].nterpairs++;
1580 srenew(pdb_ch[nch-1].chainstart, pdb_ch[nch-1].nterpairs+1);
1585 /* set natom for previous chain */
1588 pdb_ch[nch-1].natom = i-pdb_ch[nch-1].start;
1595 /* check if chain identifier was used before */
1596 for (j = 0; (j < nch); j++)
1598 if (pdb_ch[j].chainid != ' ' && pdb_ch[j].chainid == ri->chainid)
1600 printf("WARNING: Chain identifier '%c' is used in two non-sequential blocks.\n"
1601 "They will be treated as separate chains unless you reorder your file.\n",
1608 srenew(pdb_ch, maxch);
1610 pdb_ch[nch].chainid = ri->chainid;
1611 pdb_ch[nch].chainnum = ri->chainnum;
1612 pdb_ch[nch].start = i;
1613 pdb_ch[nch].bAllWat = bWat;
1616 pdb_ch[nch].nterpairs = 0;
1620 pdb_ch[nch].nterpairs = 1;
1622 snew(pdb_ch[nch].chainstart, pdb_ch[nch].nterpairs+1);
1623 /* modified [nch] to [0] below */
1624 pdb_ch[nch].chainstart[0] = 0;
1630 pdb_ch[nch-1].natom = natom-pdb_ch[nch-1].start;
1632 /* set all the water blocks at the end of the chain */
1633 snew(swap_index, nch);
1635 for (i = 0; i < nch; i++)
1637 if (!pdb_ch[i].bAllWat)
1643 for (i = 0; i < nch; i++)
1645 if (pdb_ch[i].bAllWat)
1651 if (nwaterchain > 1)
1653 printf("Moved all the water blocks to the end\n");
1657 /* copy pdb data and x for all chains */
1658 for (i = 0; (i < nch); i++)
1661 chains[i].chainid = pdb_ch[si].chainid;
1662 chains[i].chainnum = pdb_ch[si].chainnum;
1663 chains[i].bAllWat = pdb_ch[si].bAllWat;
1664 chains[i].nterpairs = pdb_ch[si].nterpairs;
1665 chains[i].chainstart = pdb_ch[si].chainstart;
1666 snew(chains[i].ntdb, pdb_ch[si].nterpairs);
1667 snew(chains[i].ctdb, pdb_ch[si].nterpairs);
1668 snew(chains[i].r_start, pdb_ch[si].nterpairs);
1669 snew(chains[i].r_end, pdb_ch[si].nterpairs);
1671 snew(chains[i].pdba, 1);
1672 init_t_atoms(chains[i].pdba, pdb_ch[si].natom, TRUE);
1673 snew(chains[i].x, chains[i].pdba->nr);
1674 for (j = 0; j < chains[i].pdba->nr; j++)
1676 chains[i].pdba->atom[j] = pdba_all.atom[pdb_ch[si].start+j];
1677 snew(chains[i].pdba->atomname[j], 1);
1678 *chains[i].pdba->atomname[j] =
1679 strdup(*pdba_all.atomname[pdb_ch[si].start+j]);
1680 chains[i].pdba->pdbinfo[j] = pdba_all.pdbinfo[pdb_ch[si].start+j];
1681 copy_rvec(pdbx[pdb_ch[si].start+j], chains[i].x[j]);
1683 /* Re-index the residues assuming that the indices are continuous */
1684 k = chains[i].pdba->atom[0].resind;
1685 nres = chains[i].pdba->atom[chains[i].pdba->nr-1].resind - k + 1;
1686 chains[i].pdba->nres = nres;
1687 for (j = 0; j < chains[i].pdba->nr; j++)
1689 chains[i].pdba->atom[j].resind -= k;
1691 srenew(chains[i].pdba->resinfo, nres);
1692 for (j = 0; j < nres; j++)
1694 chains[i].pdba->resinfo[j] = pdba_all.resinfo[k+j];
1695 snew(chains[i].pdba->resinfo[j].name, 1);
1696 *chains[i].pdba->resinfo[j].name = strdup(*pdba_all.resinfo[k+j].name);
1697 /* make all chain identifiers equal to that of the chain */
1698 chains[i].pdba->resinfo[j].chainid = pdb_ch[si].chainid;
1702 if (nchainmerges > 0)
1704 printf("\nMerged chains into joint molecule definitions at %d places.\n\n",
1708 printf("There are %d chains and %d blocks of water and "
1709 "%d residues with %d atoms\n",
1710 nch-nwaterchain, nwaterchain,
1711 pdba_all.resinfo[pdba_all.atom[natom-1].resind].nr, natom);
1713 printf("\n %5s %4s %6s\n", "chain", "#res", "#atoms");
1714 for (i = 0; (i < nch); i++)
1716 printf(" %d '%c' %5d %6d %s\n",
1717 i+1, chains[i].chainid ? chains[i].chainid : '-',
1718 chains[i].pdba->nres, chains[i].pdba->nr,
1719 chains[i].bAllWat ? "(only water)" : "");
1723 check_occupancy(&pdba_all, opt2fn("-f", NFILE, fnm), bVerbose);
1725 /* Read atomtypes... */
1726 atype = read_atype(ffdir, &symtab);
1728 /* read residue database */
1729 printf("Reading residue database... (%s)\n", forcefield);
1730 nrtpf = fflib_search_file_end(ffdir, ".rtp", TRUE, &rtpf);
1733 for (i = 0; i < nrtpf; i++)
1735 read_resall(rtpf[i], &nrtp, &restp, atype, &symtab, FALSE);
1741 /* Not correct with multiple rtp input files with different bonded types */
1742 fp = gmx_fio_fopen("new.rtp", "w");
1743 print_resall(fp, nrtp, restp, atype);
1747 /* read hydrogen database */
1748 nah = read_h_db(ffdir, &ah);
1750 /* Read Termini database... */
1751 nNtdb = read_ter_db(ffdir, 'n', &ntdb, atype);
1752 nCtdb = read_ter_db(ffdir, 'c', &ctdb, atype);
1754 top_fn = ftp2fn(efTOP, NFILE, fnm);
1755 top_file = gmx_fio_fopen(top_fn, "w");
1757 print_top_header(top_file, top_fn, FALSE, ffdir, mHmult);
1764 for (chain = 0; (chain < nch); chain++)
1766 cc = &(chains[chain]);
1768 /* set pdba, natom and nres to the current chain */
1771 natom = cc->pdba->nr;
1772 nres = cc->pdba->nres;
1774 if (cc->chainid && ( cc->chainid != ' ' ) )
1776 printf("Processing chain %d '%c' (%d atoms, %d residues)\n",
1777 chain+1, cc->chainid, natom, nres);
1781 printf("Processing chain %d (%d atoms, %d residues)\n",
1782 chain+1, natom, nres);
1785 process_chain(pdba, x, bUnA, bUnA, bUnA, bLysMan, bAspMan, bGluMan,
1786 bHisMan, bArgMan, bGlnMan, angle, distance, &symtab,
1787 nrtprename, rtprename);
1789 cc->chainstart[cc->nterpairs] = pdba->nres;
1791 for (i = 0; i < cc->nterpairs; i++)
1793 find_nc_ter(pdba, cc->chainstart[i], cc->chainstart[i+1],
1794 &(cc->r_start[j]), &(cc->r_end[j]), rt);
1796 if (cc->r_start[j] >= 0 && cc->r_end[j] >= 0)
1802 if (cc->nterpairs == 0)
1804 printf("Problem with chain definition, or missing terminal residues.\n"
1805 "This chain does not appear to contain a recognized chain molecule.\n"
1806 "If this is incorrect, you can edit residuetypes.dat to modify the behavior.\n");
1809 /* Check for disulfides and other special bonds */
1810 nssbonds = mk_specbonds(pdba, x, bCysMan, &ssbonds, bVerbose);
1814 rename_resrtp(pdba, cc->nterpairs, cc->r_start, cc->r_end, nrtprename, rtprename,
1822 sprintf(fn, "chain.pdb");
1826 sprintf(fn, "chain_%c%d.pdb", cc->chainid, cc->chainnum);
1828 write_sto_conf(fn, title, pdba, x, NULL, ePBC, box);
1832 for (i = 0; i < cc->nterpairs; i++)
1836 * We first apply a filter so we only have the
1837 * termini that can be applied to the residue in question
1838 * (or a generic terminus if no-residue specific is available).
1840 /* First the N terminus */
1843 tdblist = filter_ter(nrtp, restp, nNtdb, ntdb,
1844 *pdba->resinfo[cc->r_start[i]].name,
1845 *pdba->resinfo[cc->r_start[i]].rtp,
1849 printf("No suitable end (N or 5') terminus found in database - assuming this residue\n"
1850 "is already in a terminus-specific form and skipping terminus selection.\n");
1855 if (bTerMan && ntdblist > 1)
1857 sprintf(select, "Select start terminus type for %s-%d",
1858 *pdba->resinfo[cc->r_start[i]].name,
1859 pdba->resinfo[cc->r_start[i]].nr);
1860 cc->ntdb[i] = choose_ter(ntdblist, tdblist, select);
1864 cc->ntdb[i] = tdblist[0];
1867 printf("Start terminus %s-%d: %s\n",
1868 *pdba->resinfo[cc->r_start[i]].name,
1869 pdba->resinfo[cc->r_start[i]].nr,
1870 (cc->ntdb[i])->name);
1879 /* And the C terminus */
1882 tdblist = filter_ter(nrtp, restp, nCtdb, ctdb,
1883 *pdba->resinfo[cc->r_end[i]].name,
1884 *pdba->resinfo[cc->r_end[i]].rtp,
1888 printf("No suitable end (C or 3') terminus found in database - assuming this residue\n"
1889 "is already in a terminus-specific form and skipping terminus selection.\n");
1894 if (bTerMan && ntdblist > 1)
1896 sprintf(select, "Select end terminus type for %s-%d",
1897 *pdba->resinfo[cc->r_end[i]].name,
1898 pdba->resinfo[cc->r_end[i]].nr);
1899 cc->ctdb[i] = choose_ter(ntdblist, tdblist, select);
1903 cc->ctdb[i] = tdblist[0];
1905 printf("End terminus %s-%d: %s\n",
1906 *pdba->resinfo[cc->r_end[i]].name,
1907 pdba->resinfo[cc->r_end[i]].nr,
1908 (cc->ctdb[i])->name);
1917 /* lookup hackblocks and rtp for all residues */
1918 get_hackblocks_rtp(&hb_chain, &restp_chain,
1919 nrtp, restp, pdba->nres, pdba->resinfo,
1920 cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end);
1921 /* ideally, now we would not need the rtp itself anymore, but do
1922 everything using the hb and restp arrays. Unfortunately, that
1923 requires some re-thinking of code in gen_vsite.c, which I won't
1924 do now :( AF 26-7-99 */
1926 rename_atoms(NULL, ffdir,
1927 pdba, &symtab, restp_chain, FALSE, rt, FALSE, bVerbose);
1929 match_atomnames_with_rtp(restp_chain, hb_chain, pdba, x, bVerbose);
1933 block = new_blocka();
1935 sort_pdbatoms(restp_chain, natom, &pdba, &x, block, &gnames);
1936 natom = remove_duplicate_atoms(pdba, x, bVerbose);
1937 if (ftp2bSet(efNDX, NFILE, fnm))
1941 fprintf(stderr, "WARNING: with the -remh option the generated "
1942 "index file (%s) might be useless\n"
1943 "(the index file is generated before hydrogens are added)",
1944 ftp2fn(efNDX, NFILE, fnm));
1946 write_index(ftp2fn(efNDX, NFILE, fnm), block, gnames);
1948 for (i = 0; i < block->nr; i++)
1957 fprintf(stderr, "WARNING: "
1958 "without sorting no check for duplicate atoms can be done\n");
1961 /* Generate Hydrogen atoms (and termini) in the sequence */
1962 printf("Generating any missing hydrogen atoms and/or adding termini.\n");
1963 natom = add_h(&pdba, &x, nah, ah,
1964 cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end, bAllowMissing,
1965 NULL, NULL, TRUE, FALSE);
1966 printf("Now there are %d residues with %d atoms\n",
1967 pdba->nres, pdba->nr);
1970 write_pdbfile(debug, title, pdba, x, ePBC, box, ' ', 0, NULL, TRUE);
1975 for (i = 0; (i < natom); i++)
1977 fprintf(debug, "Res %s%d atom %d %s\n",
1978 *(pdba->resinfo[pdba->atom[i].resind].name),
1979 pdba->resinfo[pdba->atom[i].resind].nr, i+1, *pdba->atomname[i]);
1983 strcpy(posre_fn, ftp2fn(efITP, NFILE, fnm));
1985 /* make up molecule name(s) */
1987 k = (cc->nterpairs > 0 && cc->r_start[0] >= 0) ? cc->r_start[0] : 0;
1989 gmx_residuetype_get_type(rt, *pdba->resinfo[k].name, &p_restype);
1995 sprintf(molname, "Water");
1999 this_chainid = cc->chainid;
2001 /* Add the chain id if we have one */
2002 if (this_chainid != ' ')
2004 sprintf(buf, "_chain_%c", this_chainid);
2005 strcat(suffix, buf);
2008 /* Check if there have been previous chains with the same id */
2010 for (k = 0; k < chain; k++)
2012 if (cc->chainid == chains[k].chainid)
2017 /* Add the number for this chain identifier if there are multiple copies */
2021 sprintf(buf, "%d", nid_used+1);
2022 strcat(suffix, buf);
2025 if (strlen(suffix) > 0)
2027 sprintf(molname, "%s%s", p_restype, suffix);
2031 strcpy(molname, p_restype);
2035 if ((nch-nwaterchain > 1) && !cc->bAllWat)
2038 strcpy(itp_fn, top_fn);
2039 printf("Chain time...\n");
2040 c = strrchr(itp_fn, '.');
2041 sprintf(c, "_%s.itp", molname);
2042 c = strrchr(posre_fn, '.');
2043 sprintf(c, "_%s.itp", molname);
2044 if (strcmp(itp_fn, posre_fn) == 0)
2046 strcpy(buf_fn, posre_fn);
2047 c = strrchr(buf_fn, '.');
2049 sprintf(posre_fn, "%s_pr.itp", buf_fn);
2053 srenew(incls, nincl);
2054 incls[nincl-1] = strdup(itp_fn);
2055 itp_file = gmx_fio_fopen(itp_fn, "w");
2062 srenew(mols, nmol+1);
2065 mols[nmol].name = strdup("SOL");
2066 mols[nmol].nr = pdba->nres;
2070 mols[nmol].name = strdup(molname);
2077 print_top_comment(itp_file, itp_fn, ffdir, TRUE);
2087 top_file2 = itp_file;
2091 top_file2 = top_file;
2094 pdb2top(top_file2, posre_fn, molname, pdba, &x, atype, &symtab,
2096 restp_chain, hb_chain,
2098 bVsites, bVsiteAromatics, ffdir,
2099 mHmult, nssbonds, ssbonds,
2100 long_bond_dist, short_bond_dist, bDeuterate, bChargeGroups, bCmap,
2101 bRenumRes, bRTPresname);
2105 write_posres(posre_fn, pdba, posre_fc);
2110 gmx_fio_fclose(itp_file);
2113 /* pdba and natom have been reassigned somewhere so: */
2119 if (cc->chainid == ' ')
2121 sprintf(fn, "chain.pdb");
2125 sprintf(fn, "chain_%c.pdb", cc->chainid);
2127 cool_quote(quote, 255, NULL);
2128 write_sto_conf(fn, quote, pdba, x, NULL, ePBC, box);
2132 if (watermodel == NULL)
2134 for (chain = 0; chain < nch; chain++)
2136 if (chains[chain].bAllWat)
2138 gmx_fatal(FARGS, "You have chosen not to include a water model, but there is water in the input file. Select a water model or remove the water from your input file.");
2144 sprintf(buf_fn, "%s%c%s.itp", ffdir, DIR_SEPARATOR, watermodel);
2145 if (!fflib_fexist(buf_fn))
2147 gmx_fatal(FARGS, "The topology file '%s' for the selected water model '%s' can not be found in the force field directory. Select a different water model.",
2148 buf_fn, watermodel);
2152 print_top_mols(top_file, title, ffdir, watermodel, nincl, incls, nmol, mols);
2153 gmx_fio_fclose(top_file);
2155 gmx_residuetype_destroy(rt);
2157 /* now merge all chains back together */
2160 for (i = 0; (i < nch); i++)
2162 natom += chains[i].pdba->nr;
2163 nres += chains[i].pdba->nres;
2166 init_t_atoms(atoms, natom, FALSE);
2167 for (i = 0; i < atoms->nres; i++)
2169 sfree(atoms->resinfo[i].name);
2171 sfree(atoms->resinfo);
2173 snew(atoms->resinfo, nres);
2177 for (i = 0; (i < nch); i++)
2181 printf("Including chain %d in system: %d atoms %d residues\n",
2182 i+1, chains[i].pdba->nr, chains[i].pdba->nres);
2184 for (j = 0; (j < chains[i].pdba->nr); j++)
2186 atoms->atom[k] = chains[i].pdba->atom[j];
2187 atoms->atom[k].resind += l; /* l is processed nr of residues */
2188 atoms->atomname[k] = chains[i].pdba->atomname[j];
2189 atoms->resinfo[atoms->atom[k].resind].chainid = chains[i].chainid;
2190 copy_rvec(chains[i].x[j], x[k]);
2193 for (j = 0; (j < chains[i].pdba->nres); j++)
2195 atoms->resinfo[l] = chains[i].pdba->resinfo[j];
2198 atoms->resinfo[l].name = atoms->resinfo[l].rtp;
2206 fprintf(stderr, "Now there are %d atoms and %d residues\n", k, l);
2207 print_sums(atoms, TRUE);
2210 fprintf(stderr, "\nWriting coordinate file...\n");
2211 clear_rvec(box_space);
2214 gen_box(0, atoms->nr, x, box, box_space, FALSE);
2216 write_sto_conf(ftp2fn(efSTO, NFILE, fnm), title, atoms, x, NULL, ePBC, box);
2218 printf("\t\t--------- PLEASE NOTE ------------\n");
2219 printf("You have successfully generated a topology from: %s.\n",
2220 opt2fn("-f", NFILE, fnm));
2221 if (watermodel != NULL)
2223 printf("The %s force field and the %s water model are used.\n",
2224 ffname, watermodel);
2228 printf("The %s force field is used.\n",
2231 printf("\t\t--------- ETON ESAELP ------------\n");