2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team,
6 * check out http://www.gromacs.org for more information.
7 * Copyright (c) 2012,2013, by the GROMACS development team, led by
8 * David van der Spoel, Berk Hess, Erik Lindahl, and including many
9 * others, as listed in the AUTHORS file in the top-level source
10 * directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
55 #include "gmx_fatal.h"
74 #include "fflibutil.h"
86 static const char *res2bb_notermini(const char *name,
87 int nrr,const rtprename_t *rr)
89 /* NOTE: This function returns the main building block name,
90 * it does not take terminal renaming into account.
95 while (i < nrr && gmx_strcasecmp(name,rr[i].gmx) != 0) {
99 return (i < nrr ? rr[i].main : name);
102 static const char *select_res(int nr,int resnr,
103 const char *name[],const char *expl[],
105 int nrr,const rtprename_t *rr)
109 printf("Which %s type do you want for residue %d\n",title,resnr+1);
110 for(sel=0; (sel < nr); sel++) {
111 printf("%d. %s (%s)\n",
112 sel,expl[sel],res2bb_notermini(name[sel],nrr,rr));
114 printf("\nType a number:"); fflush(stdout);
116 if (scanf("%d",&sel) != 1)
117 gmx_fatal(FARGS,"Answer me for res %s %d!",title,resnr+1);
122 static const char *get_asptp(int resnr,int nrr,const rtprename_t *rr)
124 enum { easp, easpH, easpNR };
125 const char *lh[easpNR] = { "ASP", "ASPH" };
126 const char *expl[easpNR] = {
127 "Not protonated (charge -1)",
128 "Protonated (charge 0)"
131 return select_res(easpNR,resnr,lh,expl,"ASPARTIC ACID",nrr,rr);
134 static const char *get_glutp(int resnr,int nrr,const rtprename_t *rr)
136 enum { eglu, egluH, egluNR };
137 const char *lh[egluNR] = { "GLU", "GLUH" };
138 const char *expl[egluNR] = {
139 "Not protonated (charge -1)",
140 "Protonated (charge 0)"
143 return select_res(egluNR,resnr,lh,expl,"GLUTAMIC ACID",nrr,rr);
146 static const char *get_glntp(int resnr,int nrr,const rtprename_t *rr)
148 enum { egln, eglnH, eglnNR };
149 const char *lh[eglnNR] = { "GLN", "QLN" };
150 const char *expl[eglnNR] = {
151 "Not protonated (charge 0)",
152 "Protonated (charge +1)"
155 return select_res(eglnNR,resnr,lh,expl,"GLUTAMINE",nrr,rr);
158 static const char *get_lystp(int resnr,int nrr,const rtprename_t *rr)
160 enum { elys, elysH, elysNR };
161 const char *lh[elysNR] = { "LYSN", "LYS" };
162 const char *expl[elysNR] = {
163 "Not protonated (charge 0)",
164 "Protonated (charge +1)"
167 return select_res(elysNR,resnr,lh,expl,"LYSINE",nrr,rr);
170 static const char *get_argtp(int resnr,int nrr,const rtprename_t *rr)
172 enum { earg, eargH, eargNR };
173 const char *lh[eargNR] = { "ARGN", "ARG" };
174 const char *expl[eargNR] = {
175 "Not protonated (charge 0)",
176 "Protonated (charge +1)"
179 return select_res(eargNR,resnr,lh,expl,"ARGININE",nrr,rr);
182 static const char *get_histp(int resnr,int nrr,const rtprename_t *rr)
184 const char *expl[ehisNR] = {
191 return select_res(ehisNR,resnr,hh,expl,"HISTIDINE",nrr,rr);
194 static void read_rtprename(const char *fname,FILE *fp,
195 int *nrtprename,rtprename_t **rtprename)
197 char line[STRLEN],buf[STRLEN];
206 while(get_a_line(fp,line,STRLEN)) {
208 nc = sscanf(line,"%s %s %s %s %s %s",
209 rr[n].gmx,rr[n].main,rr[n].nter,rr[n].cter,rr[n].bter,buf);
211 if (nc != 2 && nc != 5) {
212 gmx_fatal(FARGS,"Residue renaming database '%s' has %d columns instead of %d, %d or %d",fname,ncol,2,5);
215 } else if (nc != ncol) {
216 gmx_fatal(FARGS,"A line in residue renaming database '%s' has %d columns, while previous lines have %d columns",fname,nc,ncol);
220 /* This file does not have special termini names, copy them from main */
221 strcpy(rr[n].nter,rr[n].main);
222 strcpy(rr[n].cter,rr[n].main);
223 strcpy(rr[n].bter,rr[n].main);
233 static char *search_resrename(int nrr,rtprename_t *rr,
235 gmx_bool bStart,gmx_bool bEnd,
236 gmx_bool bCompareFFRTPname)
244 while (i<nrr && ((!bCompareFFRTPname && strcmp(name,rr[i].gmx) != 0) ||
245 ( bCompareFFRTPname && strcmp(name,rr[i].main) != 0)))
250 /* If found in the database, rename this residue's rtp building block,
251 * otherwise keep the old name.
274 gmx_fatal(FARGS,"In the chosen force field there is no residue type for '%s'%s",name,bStart ? ( bEnd ? " as a standalone (starting & ending) residue" : " as a starting terminus") : (bEnd ? " as an ending terminus" : ""));
282 static void rename_resrtp(t_atoms *pdba,int nterpairs,int *r_start,int *r_end,
283 int nrr,rtprename_t *rr,t_symtab *symtab,
287 gmx_bool bStart,bEnd;
289 gmx_bool bFFRTPTERRNM;
291 bFFRTPTERRNM = (getenv("GMX_NO_FFRTP_TER_RENAME") == NULL);
293 for(r=0; r<pdba->nres; r++)
297 for(j=0; j<nterpairs; j++)
304 for(j=0; j<nterpairs; j++)
312 nn = search_resrename(nrr,rr,*pdba->resinfo[r].rtp,bStart,bEnd,FALSE);
314 if (bFFRTPTERRNM && nn == NULL && (bStart || bEnd))
316 /* This is a terminal residue, but the residue name,
317 * currently stored in .rtp, is not a standard residue name,
318 * but probably a force field specific rtp name.
319 * Check if we need to rename it because it is terminal.
321 nn = search_resrename(nrr,rr,
322 *pdba->resinfo[r].rtp,bStart,bEnd,TRUE);
325 if (nn != NULL && strcmp(*pdba->resinfo[r].rtp,nn) != 0)
329 printf("Changing rtp entry of residue %d %s to '%s'\n",
330 pdba->resinfo[r].nr,*pdba->resinfo[r].name,nn);
332 pdba->resinfo[r].rtp = put_symtab(symtab,nn);
337 static void pdbres_to_gmxrtp(t_atoms *pdba)
341 for(i=0; (i<pdba->nres); i++)
343 if (pdba->resinfo[i].rtp == NULL)
345 pdba->resinfo[i].rtp = pdba->resinfo[i].name;
350 static void rename_pdbres(t_atoms *pdba,const char *oldnm,const char *newnm,
351 gmx_bool bFullCompare,t_symtab *symtab)
356 for(i=0; (i<pdba->nres); i++)
358 resnm = *pdba->resinfo[i].name;
359 if ((bFullCompare && (gmx_strcasecmp(resnm,oldnm) == 0)) ||
360 (!bFullCompare && strstr(resnm,oldnm) != NULL))
362 /* Rename the residue name (not the rtp name) */
363 pdba->resinfo[i].name = put_symtab(symtab,newnm);
368 static void rename_bb(t_atoms *pdba,const char *oldnm,const char *newnm,
369 gmx_bool bFullCompare,t_symtab *symtab)
374 for(i=0; (i<pdba->nres); i++)
376 /* We have not set the rtp name yes, use the residue name */
377 bbnm = *pdba->resinfo[i].name;
378 if ((bFullCompare && (gmx_strcasecmp(bbnm,oldnm) == 0)) ||
379 (!bFullCompare && strstr(bbnm,oldnm) != NULL))
381 /* Change the rtp builing block name */
382 pdba->resinfo[i].rtp = put_symtab(symtab,newnm);
387 static void rename_bbint(t_atoms *pdba,const char *oldnm,
388 const char *gettp(int,int,const rtprename_t *),
389 gmx_bool bFullCompare,
391 int nrr,const rtprename_t *rr)
397 for(i=0; i<pdba->nres; i++)
399 /* We have not set the rtp name yes, use the residue name */
400 bbnm = *pdba->resinfo[i].name;
401 if ((bFullCompare && (strcmp(bbnm,oldnm) == 0)) ||
402 (!bFullCompare && strstr(bbnm,oldnm) != NULL))
404 ptr = gettp(i,nrr,rr);
405 pdba->resinfo[i].rtp = put_symtab(symtab,ptr);
410 static void check_occupancy(t_atoms *atoms,const char *filename,gmx_bool bVerbose)
416 ftp = fn2ftp(filename);
417 if (!atoms->pdbinfo || ((ftp != efPDB) && (ftp != efBRK) && (ftp != efENT)))
418 fprintf(stderr,"No occupancies in %s\n",filename);
420 for(i=0; (i<atoms->nr); i++) {
421 if (atoms->pdbinfo[i].occup != 1) {
423 fprintf(stderr,"Occupancy for atom %s%d-%s is %f rather than 1\n",
424 *atoms->resinfo[atoms->atom[i].resind].name,
425 atoms->resinfo[ atoms->atom[i].resind].nr,
427 atoms->pdbinfo[i].occup);
428 if (atoms->pdbinfo[i].occup == 0)
434 if (nzero == atoms->nr) {
435 fprintf(stderr,"All occupancy fields zero. This is probably not an X-Ray structure\n");
436 } else if ((nzero > 0) || (nnotone > 0)) {
439 "WARNING: there were %d atoms with zero occupancy and %d atoms with\n"
440 " occupancy unequal to one (out of %d atoms). Check your pdb file.\n"
442 nzero,nnotone,atoms->nr);
444 fprintf(stderr,"All occupancies are one\n");
449 void write_posres(char *fn,t_atoms *pdba,real fc)
454 fp=gmx_fio_fopen(fn,"w");
456 "; In this topology include file, you will find position restraint\n"
457 "; entries for all the heavy atoms in your original pdb file.\n"
458 "; This means that all the protons which were added by pdb2gmx are\n"
459 "; not restrained.\n"
461 "[ position_restraints ]\n"
462 "; %4s%6s%8s%8s%8s\n","atom","type","fx","fy","fz"
464 for(i=0; (i<pdba->nr); i++) {
465 if (!is_hydrogen(*pdba->atomname[i]) && !is_dummymass(*pdba->atomname[i]))
466 fprintf(fp,"%6d%6d %g %g %g\n",i+1,1,fc,fc,fc);
471 static int read_pdball(const char *inf, const char *outf,char *title,
472 t_atoms *atoms, rvec **x,
473 int *ePBC,matrix box, gmx_bool bRemoveH,
474 t_symtab *symtab,gmx_residuetype_t rt,const char *watres,
475 gmx_atomprop_t aps,gmx_bool bVerbose)
476 /* Read a pdb file. (containing proteins) */
478 int natom,new_natom,i;
481 printf("Reading %s...\n",inf);
482 get_stx_coordnum(inf,&natom);
483 init_t_atoms(atoms,natom,TRUE);
485 read_stx_conf(inf,title,atoms,*x,NULL,ePBC,box);
486 if (fn2ftp(inf) == efPDB)
487 get_pdb_atomnumber(atoms,aps);
490 for(i=0; i<atoms->nr; i++)
491 if (!is_hydrogen(*atoms->atomname[i])) {
492 atoms->atom[new_natom]=atoms->atom[i];
493 atoms->atomname[new_natom]=atoms->atomname[i];
494 atoms->pdbinfo[new_natom]=atoms->pdbinfo[i];
495 copy_rvec((*x)[i],(*x)[new_natom]);
503 if (title && title[0])
504 printf(" '%s',",title);
505 printf(" %d atoms\n",natom);
507 /* Rename residues */
508 rename_pdbres(atoms,"HOH",watres,FALSE,symtab);
509 rename_pdbres(atoms,"SOL",watres,FALSE,symtab);
510 rename_pdbres(atoms,"WAT",watres,FALSE,symtab);
512 rename_atoms("xlateat.dat",NULL,
513 atoms,symtab,NULL,TRUE,rt,TRUE,bVerbose);
519 write_sto_conf(outf,title,atoms,*x,NULL,*ePBC,box);
524 void process_chain(t_atoms *pdba, rvec *x,
525 gmx_bool bTrpU,gmx_bool bPheU,gmx_bool bTyrU,
526 gmx_bool bLysMan,gmx_bool bAspMan,gmx_bool bGluMan,
527 gmx_bool bHisMan,gmx_bool bArgMan,gmx_bool bGlnMan,
528 real angle,real distance,t_symtab *symtab,
529 int nrr,const rtprename_t *rr)
531 /* Rename aromatics, lys, asp and histidine */
532 if (bTyrU) rename_bb(pdba,"TYR","TYRU",FALSE,symtab);
533 if (bTrpU) rename_bb(pdba,"TRP","TRPU",FALSE,symtab);
534 if (bPheU) rename_bb(pdba,"PHE","PHEU",FALSE,symtab);
536 rename_bbint(pdba,"LYS",get_lystp,FALSE,symtab,nrr,rr);
538 rename_bbint(pdba,"ARG",get_argtp,FALSE,symtab,nrr,rr);
540 rename_bbint(pdba,"GLN",get_glntp,FALSE,symtab,nrr,rr);
542 rename_bbint(pdba,"ASP",get_asptp,FALSE,symtab,nrr,rr);
544 rename_bb(pdba,"ASPH","ASP",FALSE,symtab);
546 rename_bbint(pdba,"GLU",get_glutp,FALSE,symtab,nrr,rr);
548 rename_bb(pdba,"GLUH","GLU",FALSE,symtab);
551 set_histp(pdba,x,angle,distance);
553 rename_bbint(pdba,"HIS",get_histp,TRUE,symtab,nrr,rr);
555 /* Initialize the rtp builing block names with the residue names
556 * for the residues that have not been processed above.
558 pdbres_to_gmxrtp(pdba);
560 /* Now we have all rtp names set.
561 * The rtp names will conform to Gromacs naming,
562 * unless the input pdb file contained one or more force field specific
563 * rtp names as residue names.
567 /* struct for sorting the atoms from the pdb file */
569 int resnr; /* residue number */
570 int j; /* database order index */
571 int index; /* original atom number */
572 char anm1; /* second letter of atom name */
573 char altloc; /* alternate location indicator */
576 int pdbicomp(const void *a,const void *b)
584 d = (pa->resnr - pb->resnr);
588 d = (pa->anm1 - pb->anm1);
590 d = (pa->altloc - pb->altloc);
597 static void sort_pdbatoms(int nrtp,t_restp restp[],t_hackblock hb[],
598 int natoms,t_atoms **pdbaptr,rvec **x,
599 t_blocka *block,char ***gnames)
601 t_atoms *pdba,*pdbnew;
616 for(i=0; i<natoms; i++)
618 atomnm = *pdba->atomname[i];
619 rptr = &restp[pdba->atom[i].resind];
620 for(j=0; (j<rptr->natom); j++)
622 if (gmx_strcasecmp(atomnm,*(rptr->atomname[j])) == 0)
632 "Atom %s in residue %s %d was not found in rtp entry %s with %d atoms\n"
633 "while sorting atoms.\n%s",atomnm,
634 *pdba->resinfo[pdba->atom[i].resind].name,
635 pdba->resinfo[pdba->atom[i].resind].nr,
638 is_hydrogen(atomnm) ?
639 "\nFor a hydrogen, this can be a different protonation state, or it\n"
640 "might have had a different number in the PDB file and was rebuilt\n"
641 "(it might for instance have been H3, and we only expected H1 & H2).\n"
642 "Note that hydrogens might have been added to the entry for the N-terminus.\n"
643 "Remove this hydrogen or choose a different protonation state to solve it.\n"
644 "Option -ignh will ignore all hydrogens in the input." : ".");
645 gmx_fatal(FARGS,buf);
647 /* make shadow array to be sorted into indexgroup */
648 pdbi[i].resnr = pdba->atom[i].resind;
651 pdbi[i].anm1 = atomnm[1];
652 pdbi[i].altloc = pdba->pdbinfo[i].altloc;
654 qsort(pdbi,natoms,(size_t)sizeof(pdbi[0]),pdbicomp);
656 /* pdba is sorted in pdbnew using the pdbi index */
659 init_t_atoms(pdbnew,natoms,TRUE);
662 pdbnew->nres=pdba->nres;
663 sfree(pdbnew->resinfo);
664 pdbnew->resinfo=pdba->resinfo;
665 for (i=0; i<natoms; i++) {
666 pdbnew->atom[i] = pdba->atom[pdbi[i].index];
667 pdbnew->atomname[i] = pdba->atomname[pdbi[i].index];
668 pdbnew->pdbinfo[i] = pdba->pdbinfo[pdbi[i].index];
669 copy_rvec((*x)[pdbi[i].index],(*xnew)[i]);
670 /* make indexgroup in block */
674 sfree(pdba->atomname);
676 sfree(pdba->pdbinfo);
679 /* copy the sorted pdbnew back to pdba */
682 add_grp(block, gnames, natoms, a, "prot_sort");
688 static int remove_duplicate_atoms(t_atoms *pdba,rvec x[],gmx_bool bVerbose)
690 int i,j,oldnatoms,ndel;
693 printf("Checking for duplicate atoms....\n");
694 oldnatoms = pdba->nr;
696 /* NOTE: pdba->nr is modified inside the loop */
697 for(i=1; (i < pdba->nr); i++) {
698 /* compare 'i' and 'i-1', throw away 'i' if they are identical
699 this is a 'while' because multiple alternate locations can be present */
700 while ( (i < pdba->nr) &&
701 (pdba->atom[i-1].resind == pdba->atom[i].resind) &&
702 (strcmp(*pdba->atomname[i-1],*pdba->atomname[i])==0) ) {
705 ri = &pdba->resinfo[pdba->atom[i].resind];
706 printf("deleting duplicate atom %4s %s%4d%c",
707 *pdba->atomname[i],*ri->name,ri->nr,ri->ic);
708 if (ri->chainid && (ri->chainid != ' '))
709 printf(" ch %c", ri->chainid);
711 if (pdba->pdbinfo[i].atomnr)
712 printf(" pdb nr %4d",pdba->pdbinfo[i].atomnr);
713 if (pdba->pdbinfo[i].altloc && (pdba->pdbinfo[i].altloc!=' '))
714 printf(" altloc %c",pdba->pdbinfo[i].altloc);
719 /* We can not free, since it might be in the symtab */
720 /* sfree(pdba->atomname[i]); */
721 for (j=i; j < pdba->nr; j++) {
722 pdba->atom[j] = pdba->atom[j+1];
723 pdba->atomname[j] = pdba->atomname[j+1];
724 pdba->pdbinfo[j] = pdba->pdbinfo[j+1];
725 copy_rvec(x[j+1],x[j]);
727 srenew(pdba->atom, pdba->nr);
728 /* srenew(pdba->atomname, pdba->nr); */
729 srenew(pdba->pdbinfo, pdba->nr);
732 if (pdba->nr != oldnatoms)
733 printf("Now there are %d atoms. Deleted %d duplicates.\n",pdba->nr,ndel);
738 void find_nc_ter(t_atoms *pdba,int r0,int r1,int *r_start,int *r_end,gmx_residuetype_t rt)
741 const char *p_startrestype;
742 const char *p_restype;
743 int nstartwarn,nendwarn;
751 /* Find the starting terminus (typially N or 5') */
752 for(i=r0;i<r1 && *r_start==-1;i++)
754 gmx_residuetype_get_type(rt,*pdba->resinfo[i].name,&p_startrestype);
755 if( !gmx_strcasecmp(p_startrestype,"Protein") || !gmx_strcasecmp(p_startrestype,"DNA") || !gmx_strcasecmp(p_startrestype,"RNA") )
757 printf("Identified residue %s%d as a starting terminus.\n",*pdba->resinfo[i].name,pdba->resinfo[i].nr);
764 printf("Warning: Starting residue %s%d in chain not identified as Protein/RNA/DNA.\n",*pdba->resinfo[i].name,pdba->resinfo[i].nr);
768 printf("More than 5 unidentified residues at start of chain - disabling further warnings.\n");
776 /* Go through the rest of the residues, check that they are the same class, and identify the ending terminus. */
777 for(i=*r_start;i<r1;i++)
779 gmx_residuetype_get_type(rt,*pdba->resinfo[i].name,&p_restype);
780 if( !gmx_strcasecmp(p_restype,p_startrestype) && nendwarn==0)
788 printf("Warning: Residue %s%d in chain has different type (%s) from starting residue %s%d (%s).\n",
789 *pdba->resinfo[i].name,pdba->resinfo[i].nr,p_restype,
790 *pdba->resinfo[*r_start].name,pdba->resinfo[*r_start].nr,p_startrestype);
794 printf("More than 5 unidentified residues at end of chain - disabling further warnings.\n");
803 printf("Identified residue %s%d as a ending terminus.\n",*pdba->resinfo[*r_end].name,pdba->resinfo[*r_end].nr);
809 modify_chain_numbers(t_atoms * pdba,
810 const char * chainsep)
813 char old_prev_chainid;
814 char old_this_chainid;
815 int old_prev_chainnum;
816 int old_this_chainnum;
822 const char * prev_atomname;
823 const char * this_atomname;
824 const char * prev_resname;
825 const char * this_resname;
830 int prev_chainnumber;
831 int this_chainnumber;
843 splitting = SPLIT_TER_ONLY; /* keep compiler happy */
845 /* Be a bit flexible to catch typos */
846 if (!strncmp(chainsep,"id_o",4))
848 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
849 splitting = SPLIT_ID_OR_TER;
850 printf("Splitting chemical chains based on TER records or chain id changing.\n");
852 else if (!strncmp(chainsep,"int",3))
854 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
855 splitting = SPLIT_INTERACTIVE;
856 printf("Splitting chemical chains interactively.\n");
858 else if (!strncmp(chainsep,"id_a",4))
860 splitting = SPLIT_ID_AND_TER;
861 printf("Splitting chemical chains based on TER records and chain id changing.\n");
863 else if (strlen(chainsep)==2 && !strncmp(chainsep,"id",4))
865 splitting = SPLIT_ID_ONLY;
866 printf("Splitting chemical chains based on changing chain id only (ignoring TER records).\n");
868 else if (chainsep[0]=='t')
870 splitting = SPLIT_TER_ONLY;
871 printf("Splitting chemical chains based on TER records only (ignoring chain id).\n");
875 gmx_fatal(FARGS,"Unidentified setting for chain separation: %s\n",chainsep);
878 /* The default chain enumeration is based on TER records only, which is reflected in chainnum below */
880 old_prev_chainid = '?';
881 old_prev_chainnum = -1;
884 this_atomname = NULL;
889 this_chainnumber = -1;
891 for(i=0;i<pdba->nres;i++)
893 ri = &pdba->resinfo[i];
894 old_this_chainid = ri->chainid;
895 old_this_chainnum = ri->chainnum;
897 prev_atomname = this_atomname;
898 prev_atomnum = this_atomnum;
899 prev_resname = this_resname;
900 prev_resnum = this_resnum;
901 prev_chainid = this_chainid;
902 prev_chainnumber = this_chainnumber;
904 this_atomname = *(pdba->atomname[i]);
905 this_atomnum = (pdba->pdbinfo != NULL) ? pdba->pdbinfo[i].atomnr : i+1;
906 this_resname = *ri->name;
907 this_resnum = ri->nr;
908 this_chainid = ri->chainid;
909 this_chainnumber = ri->chainnum;
913 case SPLIT_ID_OR_TER:
914 if(old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
920 case SPLIT_ID_AND_TER:
921 if(old_this_chainid != old_prev_chainid && old_this_chainnum != old_prev_chainnum)
928 if(old_this_chainid != old_prev_chainid)
935 if(old_this_chainnum != old_prev_chainnum)
940 case SPLIT_INTERACTIVE:
941 if(old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
945 printf("Split the chain (and introduce termini) between residue %s%d (chain id '%c', atom %d %s)\n"
946 "and residue %s%d (chain id '%c', atom %d %s) ? [n/y]\n",
947 prev_resname,prev_resnum,prev_chainid,prev_atomnum,prev_atomname,
948 this_resname,this_resnum,this_chainid,this_atomnum,this_atomname);
950 if(NULL==fgets(select,STRLEN-1,stdin))
952 gmx_fatal(FARGS,"Error reading from stdin");
955 if(i==0 || select[0] == 'y')
962 gmx_fatal(FARGS,"Internal inconsistency - this shouldn't happen...");
965 old_prev_chainid = old_this_chainid;
966 old_prev_chainnum = old_this_chainnum;
968 ri->chainnum = new_chainnum;
997 int cmain(int argc, char *argv[])
999 const char *desc[] = {
1000 "This program reads a [TT].pdb[tt] (or [TT].gro[tt]) file, reads",
1001 "some database files, adds hydrogens to the molecules and generates",
1002 "coordinates in GROMACS (GROMOS), or optionally [TT].pdb[tt], format",
1003 "and a topology in GROMACS format.",
1004 "These files can subsequently be processed to generate a run input file.",
1006 "[TT]pdb2gmx[tt] will search for force fields by looking for",
1007 "a [TT]forcefield.itp[tt] file in subdirectories [TT]<forcefield>.ff[tt]",
1008 "of the current working directory and of the GROMACS library directory",
1009 "as inferred from the path of the binary or the [TT]GMXLIB[tt] environment",
1011 "By default the forcefield selection is interactive,",
1012 "but you can use the [TT]-ff[tt] option to specify one of the short names",
1013 "in the list on the command line instead. In that case [TT]pdb2gmx[tt] just looks",
1014 "for the corresponding [TT]<forcefield>.ff[tt] directory.",
1016 "After choosing a force field, all files will be read only from",
1017 "the corresponding force field directory.",
1018 "If you want to modify or add a residue types, you can copy the force",
1019 "field directory from the GROMACS library directory to your current",
1020 "working directory. If you want to add new protein residue types,",
1021 "you will need to modify [TT]residuetypes.dat[tt] in the library directory",
1022 "or copy the whole library directory to a local directory and set",
1023 "the environment variable [TT]GMXLIB[tt] to the name of that directory.",
1024 "Check Chapter 5 of the manual for more information about file formats.",
1027 "Note that a [TT].pdb[tt] file is nothing more than a file format, and it",
1028 "need not necessarily contain a protein structure. Every kind of",
1029 "molecule for which there is support in the database can be converted.",
1030 "If there is no support in the database, you can add it yourself.[PAR]",
1032 "The program has limited intelligence, it reads a number of database",
1033 "files, that allow it to make special bonds (Cys-Cys, Heme-His, etc.),",
1034 "if necessary this can be done manually. The program can prompt the",
1035 "user to select which kind of LYS, ASP, GLU, CYS or HIS residue is",
1036 "desired. For Lys the choice is between neutral (two protons on NZ) or",
1037 "protonated (three protons, default), for Asp and Glu unprotonated",
1038 "(default) or protonated, for His the proton can be either on ND1,",
1039 "on NE2 or on both. By default these selections are done automatically.",
1040 "For His, this is based on an optimal hydrogen bonding",
1041 "conformation. Hydrogen bonds are defined based on a simple geometric",
1042 "criterion, specified by the maximum hydrogen-donor-acceptor angle",
1043 "and donor-acceptor distance, which are set by [TT]-angle[tt] and",
1044 "[TT]-dist[tt] respectively.[PAR]",
1046 "The protonation state of N- and C-termini can be chosen interactively",
1047 "with the [TT]-ter[tt] flag. Default termini are ionized (NH3+ and COO-),",
1048 "respectively. Some force fields support zwitterionic forms for chains of",
1049 "one residue, but for polypeptides these options should NOT be selected.",
1050 "The AMBER force fields have unique forms for the terminal residues,",
1051 "and these are incompatible with the [TT]-ter[tt] mechanism. You need",
1052 "to prefix your N- or C-terminal residue names with \"N\" or \"C\"",
1053 "respectively to use these forms, making sure you preserve the format",
1054 "of the coordinate file. Alternatively, use named terminating residues",
1055 "(e.g. ACE, NME).[PAR]",
1057 "The separation of chains is not entirely trivial since the markup",
1058 "in user-generated PDB files frequently varies and sometimes it",
1059 "is desirable to merge entries across a TER record, for instance",
1060 "if you want a disulfide bridge or distance restraints between",
1061 "two protein chains or if you have a HEME group bound to a protein.",
1062 "In such cases multiple chains should be contained in a single",
1063 "[TT]moleculetype[tt] definition.",
1064 "To handle this, [TT]pdb2gmx[tt] uses two separate options.",
1065 "First, [TT]-chainsep[tt] allows you to choose when a new chemical chain should",
1066 "start, and termini added when applicable. This can be done based on the",
1067 "existence of TER records, when the chain id changes, or combinations of either",
1068 "or both of these. You can also do the selection fully interactively.",
1069 "In addition, there is a [TT]-merge[tt] option that controls how multiple chains",
1070 "are merged into one moleculetype, after adding all the chemical termini (or not).",
1071 "This can be turned off (no merging), all non-water chains can be merged into a",
1072 "single molecule, or the selection can be done interactively.[PAR]",
1074 "[TT]pdb2gmx[tt] will also check the occupancy field of the [TT].pdb[tt] file.",
1075 "If any of the occupancies are not one, indicating that the atom is",
1076 "not resolved well in the structure, a warning message is issued.",
1077 "When a [TT].pdb[tt] file does not originate from an X-ray structure determination",
1078 "all occupancy fields may be zero. Either way, it is up to the user",
1079 "to verify the correctness of the input data (read the article!).[PAR]",
1081 "During processing the atoms will be reordered according to GROMACS",
1082 "conventions. With [TT]-n[tt] an index file can be generated that",
1083 "contains one group reordered in the same way. This allows you to",
1084 "convert a GROMOS trajectory and coordinate file to GROMOS. There is",
1085 "one limitation: reordering is done after the hydrogens are stripped",
1086 "from the input and before new hydrogens are added. This means that",
1087 "you should not use [TT]-ignh[tt].[PAR]",
1089 "The [TT].gro[tt] and [TT].g96[tt] file formats do not support chain",
1090 "identifiers. Therefore it is useful to enter a [TT].pdb[tt] file name at",
1091 "the [TT]-o[tt] option when you want to convert a multi-chain [TT].pdb[tt] file.",
1094 "The option [TT]-vsite[tt] removes hydrogen and fast improper dihedral",
1095 "motions. Angular and out-of-plane motions can be removed by changing",
1096 "hydrogens into virtual sites and fixing angles, which fixes their",
1097 "position relative to neighboring atoms. Additionally, all atoms in the",
1098 "aromatic rings of the standard amino acids (i.e. PHE, TRP, TYR and HIS)",
1099 "can be converted into virtual sites, eliminating the fast improper dihedral",
1100 "fluctuations in these rings. [BB]Note[bb] that in this case all other hydrogen",
1101 "atoms are also converted to virtual sites. The mass of all atoms that are",
1102 "converted into virtual sites, is added to the heavy atoms.[PAR]",
1103 "Also slowing down of dihedral motion can be done with [TT]-heavyh[tt]",
1104 "done by increasing the hydrogen-mass by a factor of 4. This is also",
1105 "done for water hydrogens to slow down the rotational motion of water.",
1106 "The increase in mass of the hydrogens is subtracted from the bonded",
1107 "(heavy) atom so that the total mass of the system remains the same."
1111 FILE *fp,*top_file,*top_file2,*itp_file=NULL;
1113 t_atoms pdba_all,*pdba;
1117 int chain,nch,maxch,nwaterchain;
1119 t_chain *chains,*cc;
1120 char select[STRLEN];
1133 gpp_atomtype_t atype;
1134 gmx_residuetype_t rt;
1136 char fn[256],itp_fn[STRLEN],posre_fn[STRLEN],buf_fn[STRLEN];
1137 char molname[STRLEN],title[STRLEN],quote[STRLEN],generator[STRLEN];
1138 char *c,forcefield[STRLEN],ffdir[STRLEN];
1139 char ffname[STRLEN],suffix[STRLEN],buf[STRLEN];
1148 rtprename_t *rtprename=NULL;
1149 int nah,nNtdb,nCtdb,ntdblist;
1150 t_hackblock *ntdb,*ctdb,**tdblist;
1154 gmx_bool bVsites=FALSE,bWat,bPrevWat=FALSE,bITP,bVsiteAromatics=FALSE,bCheckMerge;
1156 t_hackblock *hb_chain;
1157 t_restp *restp_chain;
1159 const char *p_restype;
1163 const char * prev_atomname;
1164 const char * this_atomname;
1165 const char * prev_resname;
1166 const char * this_resname;
1171 int prev_chainnumber;
1172 int this_chainnumber;
1174 int this_chainstart;
1175 int prev_chainstart;
1182 { efSTX, "-f", "eiwit.pdb", ffREAD },
1183 { efSTO, "-o", "conf", ffWRITE },
1184 { efTOP, NULL, NULL, ffWRITE },
1185 { efITP, "-i", "posre", ffWRITE },
1186 { efNDX, "-n", "clean", ffOPTWR },
1187 { efSTO, "-q", "clean.pdb", ffOPTWR }
1189 #define NFILE asize(fnm)
1192 /* Command line arguments must be static */
1193 static gmx_bool bNewRTP=FALSE;
1194 static gmx_bool bInter=FALSE, bCysMan=FALSE;
1195 static gmx_bool bLysMan=FALSE, bAspMan=FALSE, bGluMan=FALSE, bHisMan=FALSE;
1196 static gmx_bool bGlnMan=FALSE, bArgMan=FALSE;
1197 static gmx_bool bTerMan=FALSE, bUnA=FALSE, bHeavyH;
1198 static gmx_bool bSort=TRUE, bAllowMissing=FALSE, bRemoveH=FALSE;
1199 static gmx_bool bDeuterate=FALSE,bVerbose=FALSE,bChargeGroups=TRUE,bCmap=TRUE;
1200 static gmx_bool bRenumRes=FALSE,bRTPresname=FALSE;
1201 static real angle=135.0, distance=0.3,posre_fc=1000;
1202 static real long_bond_dist=0.25, short_bond_dist=0.05;
1203 static const char *vsitestr[] = { NULL, "none", "hydrogens", "aromatics", NULL };
1204 static const char *watstr[] = { NULL, "select", "none", "spc", "spce", "tip3p", "tip4p", "tip5p", NULL };
1205 static const char *chainsep[] = { NULL, "id_or_ter", "id_and_ter", "ter", "id", "interactive", NULL };
1206 static const char *merge[] = {NULL, "no", "all", "interactive", NULL };
1207 static const char *ff = "select";
1210 { "-newrtp", FALSE, etBOOL, {&bNewRTP},
1211 "HIDDENWrite the residue database in new format to [TT]new.rtp[tt]"},
1212 { "-lb", FALSE, etREAL, {&long_bond_dist},
1213 "HIDDENLong bond warning distance" },
1214 { "-sb", FALSE, etREAL, {&short_bond_dist},
1215 "HIDDENShort bond warning distance" },
1216 { "-chainsep", FALSE, etENUM, {chainsep},
1217 "Condition in PDB files when a new chain should be started (adding termini)" },
1218 { "-merge", FALSE, etENUM, {&merge},
1219 "Merge multiple chains into a single [moleculetype]" },
1220 { "-ff", FALSE, etSTR, {&ff},
1221 "Force field, interactive by default. Use [TT]-h[tt] for information." },
1222 { "-water", FALSE, etENUM, {watstr},
1223 "Water model to use" },
1224 { "-inter", FALSE, etBOOL, {&bInter},
1225 "Set the next 8 options to interactive"},
1226 { "-ss", FALSE, etBOOL, {&bCysMan},
1227 "Interactive SS bridge selection" },
1228 { "-ter", FALSE, etBOOL, {&bTerMan},
1229 "Interactive termini selection, instead of charged (default)" },
1230 { "-lys", FALSE, etBOOL, {&bLysMan},
1231 "Interactive lysine selection, instead of charged" },
1232 { "-arg", FALSE, etBOOL, {&bArgMan},
1233 "Interactive arginine selection, instead of charged" },
1234 { "-asp", FALSE, etBOOL, {&bAspMan},
1235 "Interactive aspartic acid selection, instead of charged" },
1236 { "-glu", FALSE, etBOOL, {&bGluMan},
1237 "Interactive glutamic acid selection, instead of charged" },
1238 { "-gln", FALSE, etBOOL, {&bGlnMan},
1239 "Interactive glutamine selection, instead of neutral" },
1240 { "-his", FALSE, etBOOL, {&bHisMan},
1241 "Interactive histidine selection, instead of checking H-bonds" },
1242 { "-angle", FALSE, etREAL, {&angle},
1243 "Minimum hydrogen-donor-acceptor angle for a H-bond (degrees)" },
1244 { "-dist", FALSE, etREAL, {&distance},
1245 "Maximum donor-acceptor distance for a H-bond (nm)" },
1246 { "-una", FALSE, etBOOL, {&bUnA},
1247 "Select aromatic rings with united CH atoms on phenylalanine, "
1248 "tryptophane and tyrosine" },
1249 { "-sort", FALSE, etBOOL, {&bSort},
1250 "HIDDENSort the residues according to database, turning this off is dangerous as charge groups might be broken in parts" },
1251 { "-ignh", FALSE, etBOOL, {&bRemoveH},
1252 "Ignore hydrogen atoms that are in the coordinate file" },
1253 { "-missing",FALSE, etBOOL, {&bAllowMissing},
1254 "Continue when atoms are missing, dangerous" },
1255 { "-v", FALSE, etBOOL, {&bVerbose},
1256 "Be slightly more verbose in messages" },
1257 { "-posrefc",FALSE, etREAL, {&posre_fc},
1258 "Force constant for position restraints" },
1259 { "-vsite", FALSE, etENUM, {vsitestr},
1260 "Convert atoms to virtual sites" },
1261 { "-heavyh", FALSE, etBOOL, {&bHeavyH},
1262 "Make hydrogen atoms heavy" },
1263 { "-deuterate", FALSE, etBOOL, {&bDeuterate},
1264 "Change the mass of hydrogens to 2 amu" },
1265 { "-chargegrp", TRUE, etBOOL, {&bChargeGroups},
1266 "Use charge groups in the [TT].rtp[tt] file" },
1267 { "-cmap", TRUE, etBOOL, {&bCmap},
1268 "Use cmap torsions (if enabled in the [TT].rtp[tt] file)" },
1269 { "-renum", TRUE, etBOOL, {&bRenumRes},
1270 "Renumber the residues consecutively in the output" },
1271 { "-rtpres", TRUE, etBOOL, {&bRTPresname},
1272 "Use [TT].rtp[tt] entry names as residue names" }
1274 #define NPARGS asize(pa)
1276 CopyRight(stderr,argv[0]);
1277 parse_common_args(&argc,argv,0,NFILE,fnm,asize(pa),pa,asize(desc),desc,
1280 /* Force field selection, interactive or direct */
1281 choose_ff(strcmp(ff,"select") == 0 ? NULL : ff,
1282 forcefield,sizeof(forcefield),
1283 ffdir,sizeof(ffdir));
1285 if (strlen(forcefield) > 0) {
1286 strcpy(ffname,forcefield);
1287 ffname[0] = toupper(ffname[0]);
1289 gmx_fatal(FARGS,"Empty forcefield string");
1292 printf("\nUsing the %s force field in directory %s\n\n",
1295 choose_watermodel(watstr[0],ffdir,&watermodel);
1298 /* if anything changes here, also change description of -inter */
1311 else if (bDeuterate)
1316 switch(vsitestr[0][0]) {
1317 case 'n': /* none */
1319 bVsiteAromatics=FALSE;
1321 case 'h': /* hydrogens */
1323 bVsiteAromatics=FALSE;
1325 case 'a': /* aromatics */
1327 bVsiteAromatics=TRUE;
1330 gmx_fatal(FARGS,"DEATH HORROR in $s (%d): vsitestr[0]='%s'",
1331 __FILE__,__LINE__,vsitestr[0]);
1334 /* Open the symbol table */
1335 open_symtab(&symtab);
1337 /* Residue type database */
1338 gmx_residuetype_init(&rt);
1340 /* Read residue renaming database(s), if present */
1341 nrrn = fflib_search_file_end(ffdir,".r2b",FALSE,&rrn);
1345 for(i=0; i<nrrn; i++) {
1346 fp = fflib_open(rrn[i]);
1347 read_rtprename(rrn[i],fp,&nrtprename,&rtprename);
1353 /* Add all alternative names from the residue renaming database to the list of recognized amino/nucleic acids. */
1355 for(i=0;i<nrtprename;i++)
1357 rc=gmx_residuetype_get_type(rt,rtprename[i].gmx,&p_restype);
1359 /* Only add names if the 'standard' gromacs/iupac base name was found */
1362 gmx_residuetype_add(rt,rtprename[i].main,p_restype);
1363 gmx_residuetype_add(rt,rtprename[i].nter,p_restype);
1364 gmx_residuetype_add(rt,rtprename[i].cter,p_restype);
1365 gmx_residuetype_add(rt,rtprename[i].bter,p_restype);
1370 if (watermodel != NULL && (strstr(watermodel,"4p") ||
1371 strstr(watermodel,"4P"))) {
1373 } else if (watermodel != NULL && (strstr(watermodel,"5p") ||
1374 strstr(watermodel,"5P"))) {
1380 aps = gmx_atomprop_init();
1381 natom = read_pdball(opt2fn("-f",NFILE,fnm),opt2fn_null("-q",NFILE,fnm),title,
1382 &pdba_all,&pdbx,&ePBC,box,bRemoveH,&symtab,rt,watres,
1386 gmx_fatal(FARGS,"No atoms found in pdb file %s\n",opt2fn("-f",NFILE,fnm));
1388 printf("Analyzing pdb file\n");
1393 modify_chain_numbers(&pdba_all,chainsep[0]);
1397 this_atomname = NULL;
1399 this_resname = NULL;
1402 this_chainnumber = -1;
1403 this_chainstart = 0;
1404 /* Keep the compiler happy */
1405 prev_chainstart = 0;
1410 for (i=0; (i<natom); i++)
1412 ri = &pdba_all.resinfo[pdba_all.atom[i].resind];
1414 prev_atomname = this_atomname;
1415 prev_atomnum = this_atomnum;
1416 prev_resname = this_resname;
1417 prev_resnum = this_resnum;
1418 prev_chainid = this_chainid;
1419 prev_chainnumber = this_chainnumber;
1422 prev_chainstart = this_chainstart;
1425 this_atomname = *pdba_all.atomname[i];
1426 this_atomnum = (pdba_all.pdbinfo != NULL) ? pdba_all.pdbinfo[i].atomnr : i+1;
1427 this_resname = *ri->name;
1428 this_resnum = ri->nr;
1429 this_chainid = ri->chainid;
1430 this_chainnumber = ri->chainnum;
1432 bWat = gmx_strcasecmp(*ri->name,watres) == 0;
1433 if ((i == 0) || (this_chainnumber != prev_chainnumber) || (bWat != bPrevWat))
1435 this_chainstart = pdba_all.atom[i].resind;
1440 if(!strncmp(merge[0],"int",3))
1442 printf("Merge chain ending with residue %s%d (chain id '%c', atom %d %s) and chain starting with\n"
1443 "residue %s%d (chain id '%c', atom %d %s) into a single moleculetype (keeping termini)? [n/y]\n",
1444 prev_resname,prev_resnum,prev_chainid,prev_atomnum,prev_atomname,
1445 this_resname,this_resnum,this_chainid,this_atomnum,this_atomname);
1447 if(NULL==fgets(select,STRLEN-1,stdin))
1449 gmx_fatal(FARGS,"Error reading from stdin");
1451 bMerged = (select[0] == 'y');
1453 else if(!strncmp(merge[0],"all",3))
1461 pdb_ch[nch-1].chainstart[pdb_ch[nch-1].nterpairs] =
1462 pdba_all.atom[i].resind - prev_chainstart;
1463 pdb_ch[nch-1].nterpairs++;
1464 srenew(pdb_ch[nch-1].chainstart,pdb_ch[nch-1].nterpairs+1);
1469 /* set natom for previous chain */
1472 pdb_ch[nch-1].natom=i-pdb_ch[nch-1].start;
1479 /* check if chain identifier was used before */
1480 for (j=0; (j<nch); j++)
1482 if (pdb_ch[j].chainid != ' ' && pdb_ch[j].chainid == ri->chainid)
1484 printf("WARNING: Chain identifier '%c' is used in two non-sequential blocks.\n"
1485 "They will be treated as separate chains unless you reorder your file.\n",
1492 srenew(pdb_ch,maxch);
1494 pdb_ch[nch].chainid = ri->chainid;
1495 pdb_ch[nch].chainnum = ri->chainnum;
1496 pdb_ch[nch].start=i;
1497 pdb_ch[nch].bAllWat=bWat;
1499 pdb_ch[nch].nterpairs=0;
1501 pdb_ch[nch].nterpairs=1;
1502 snew(pdb_ch[nch].chainstart,pdb_ch[nch].nterpairs+1);
1503 /* modified [nch] to [0] below */
1504 pdb_ch[nch].chainstart[0]=0;
1510 pdb_ch[nch-1].natom=natom-pdb_ch[nch-1].start;
1512 /* set all the water blocks at the end of the chain */
1513 snew(swap_index,nch);
1515 for(i=0; i<nch; i++)
1516 if (!pdb_ch[i].bAllWat) {
1520 for(i=0; i<nch; i++)
1521 if (pdb_ch[i].bAllWat) {
1526 printf("Moved all the water blocks to the end\n");
1529 /* copy pdb data and x for all chains */
1530 for (i=0; (i<nch); i++) {
1532 chains[i].chainid = pdb_ch[si].chainid;
1533 chains[i].chainnum = pdb_ch[si].chainnum;
1534 chains[i].bAllWat = pdb_ch[si].bAllWat;
1535 chains[i].nterpairs = pdb_ch[si].nterpairs;
1536 chains[i].chainstart = pdb_ch[si].chainstart;
1537 snew(chains[i].ntdb,pdb_ch[si].nterpairs);
1538 snew(chains[i].ctdb,pdb_ch[si].nterpairs);
1539 snew(chains[i].r_start,pdb_ch[si].nterpairs);
1540 snew(chains[i].r_end,pdb_ch[si].nterpairs);
1542 snew(chains[i].pdba,1);
1543 init_t_atoms(chains[i].pdba,pdb_ch[si].natom,TRUE);
1544 snew(chains[i].x,chains[i].pdba->nr);
1545 for (j=0; j<chains[i].pdba->nr; j++) {
1546 chains[i].pdba->atom[j] = pdba_all.atom[pdb_ch[si].start+j];
1547 snew(chains[i].pdba->atomname[j],1);
1548 *chains[i].pdba->atomname[j] =
1549 strdup(*pdba_all.atomname[pdb_ch[si].start+j]);
1550 chains[i].pdba->pdbinfo[j] = pdba_all.pdbinfo[pdb_ch[si].start+j];
1551 copy_rvec(pdbx[pdb_ch[si].start+j],chains[i].x[j]);
1553 /* Re-index the residues assuming that the indices are continuous */
1554 k = chains[i].pdba->atom[0].resind;
1555 nres = chains[i].pdba->atom[chains[i].pdba->nr-1].resind - k + 1;
1556 chains[i].pdba->nres = nres;
1557 for(j=0; j < chains[i].pdba->nr; j++) {
1558 chains[i].pdba->atom[j].resind -= k;
1560 srenew(chains[i].pdba->resinfo,nres);
1561 for(j=0; j<nres; j++) {
1562 chains[i].pdba->resinfo[j] = pdba_all.resinfo[k+j];
1563 snew(chains[i].pdba->resinfo[j].name,1);
1564 *chains[i].pdba->resinfo[j].name = strdup(*pdba_all.resinfo[k+j].name);
1565 /* make all chain identifiers equal to that of the chain */
1566 chains[i].pdba->resinfo[j].chainid = pdb_ch[si].chainid;
1571 printf("\nMerged chains into joint molecule definitions at %d places.\n\n",
1574 printf("There are %d chains and %d blocks of water and "
1575 "%d residues with %d atoms\n",
1576 nch-nwaterchain,nwaterchain,
1577 pdba_all.resinfo[pdba_all.atom[natom-1].resind].nr,natom);
1579 printf("\n %5s %4s %6s\n","chain","#res","#atoms");
1580 for (i=0; (i<nch); i++)
1581 printf(" %d '%c' %5d %6d %s\n",
1582 i+1, chains[i].chainid ? chains[i].chainid:'-',
1583 chains[i].pdba->nres, chains[i].pdba->nr,
1584 chains[i].bAllWat ? "(only water)":"");
1587 check_occupancy(&pdba_all,opt2fn("-f",NFILE,fnm),bVerbose);
1589 /* Read atomtypes... */
1590 atype = read_atype(ffdir,&symtab);
1592 /* read residue database */
1593 printf("Reading residue database... (%s)\n",forcefield);
1594 nrtpf = fflib_search_file_end(ffdir,".rtp",TRUE,&rtpf);
1597 for(i=0; i<nrtpf; i++) {
1598 read_resall(rtpf[i],&nrtp,&restp,atype,&symtab,FALSE);
1603 /* Not correct with multiple rtp input files with different bonded types */
1604 fp=gmx_fio_fopen("new.rtp","w");
1605 print_resall(fp,nrtp,restp,atype);
1609 /* read hydrogen database */
1610 nah = read_h_db(ffdir,&ah);
1612 /* Read Termini database... */
1613 nNtdb=read_ter_db(ffdir,'n',&ntdb,atype);
1614 nCtdb=read_ter_db(ffdir,'c',&ctdb,atype);
1616 top_fn=ftp2fn(efTOP,NFILE,fnm);
1617 top_file=gmx_fio_fopen(top_fn,"w");
1619 sprintf(generator,"%s - %s",ShortProgram(), GromacsVersion() );
1621 print_top_header(top_file,top_fn,generator,FALSE,ffdir,mHmult);
1628 for(chain=0; (chain<nch); chain++) {
1629 cc = &(chains[chain]);
1631 /* set pdba, natom and nres to the current chain */
1635 nres =cc->pdba->nres;
1637 if (cc->chainid && ( cc->chainid != ' ' ) )
1638 printf("Processing chain %d '%c' (%d atoms, %d residues)\n",
1639 chain+1,cc->chainid,natom,nres);
1641 printf("Processing chain %d (%d atoms, %d residues)\n",
1642 chain+1,natom,nres);
1644 process_chain(pdba,x,bUnA,bUnA,bUnA,bLysMan,bAspMan,bGluMan,
1645 bHisMan,bArgMan,bGlnMan,angle,distance,&symtab,
1646 nrtprename,rtprename);
1648 cc->chainstart[cc->nterpairs] = pdba->nres;
1650 for(i=0; i<cc->nterpairs; i++)
1652 find_nc_ter(pdba,cc->chainstart[i],cc->chainstart[i+1],
1653 &(cc->r_start[j]),&(cc->r_end[j]),rt);
1655 if (cc->r_start[j] >= 0 && cc->r_end[j] >= 0)
1661 if (cc->nterpairs == 0)
1663 printf("Problem with chain definition, or missing terminal residues.\n"
1664 "This chain does not appear to contain a recognized chain molecule.\n"
1665 "If this is incorrect, you can edit residuetypes.dat to modify the behavior.\n");
1668 /* Check for disulfides and other special bonds */
1669 nssbonds = mk_specbonds(pdba,x,bCysMan,&ssbonds,bVerbose);
1671 if (nrtprename > 0) {
1672 rename_resrtp(pdba,cc->nterpairs,cc->r_start,cc->r_end,nrtprename,rtprename,
1678 sprintf(fn,"chain.pdb");
1680 sprintf(fn,"chain_%c%d.pdb",cc->chainid,cc->chainnum);
1682 write_sto_conf(fn,title,pdba,x,NULL,ePBC,box);
1686 for(i=0; i<cc->nterpairs; i++)
1690 * We first apply a filter so we only have the
1691 * termini that can be applied to the residue in question
1692 * (or a generic terminus if no-residue specific is available).
1694 /* First the N terminus */
1697 tdblist = filter_ter(nrtp,restp,nNtdb,ntdb,
1698 *pdba->resinfo[cc->r_start[i]].name,
1699 *pdba->resinfo[cc->r_start[i]].rtp,
1703 printf("No suitable end (N or 5') terminus found in database - assuming this residue\n"
1704 "is already in a terminus-specific form and skipping terminus selection.\n");
1709 if(bTerMan && ntdblist>1)
1711 sprintf(select,"Select start terminus type for %s-%d",
1712 *pdba->resinfo[cc->r_start[i]].name,
1713 pdba->resinfo[cc->r_start[i]].nr);
1714 cc->ntdb[i] = choose_ter(ntdblist,tdblist,select);
1718 cc->ntdb[i] = tdblist[0];
1721 printf("Start terminus %s-%d: %s\n",
1722 *pdba->resinfo[cc->r_start[i]].name,
1723 pdba->resinfo[cc->r_start[i]].nr,
1724 (cc->ntdb[i])->name);
1733 /* And the C terminus */
1736 tdblist = filter_ter(nrtp,restp,nCtdb,ctdb,
1737 *pdba->resinfo[cc->r_end[i]].name,
1738 *pdba->resinfo[cc->r_end[i]].rtp,
1742 printf("No suitable end (C or 3') terminus found in database - assuming this residue\n"
1743 "is already in a terminus-specific form and skipping terminus selection.\n");
1748 if(bTerMan && ntdblist>1)
1750 sprintf(select,"Select end terminus type for %s-%d",
1751 *pdba->resinfo[cc->r_end[i]].name,
1752 pdba->resinfo[cc->r_end[i]].nr);
1753 cc->ctdb[i] = choose_ter(ntdblist,tdblist,select);
1757 cc->ctdb[i] = tdblist[0];
1759 printf("End terminus %s-%d: %s\n",
1760 *pdba->resinfo[cc->r_end[i]].name,
1761 pdba->resinfo[cc->r_end[i]].nr,
1762 (cc->ctdb[i])->name);
1771 /* lookup hackblocks and rtp for all residues */
1772 get_hackblocks_rtp(&hb_chain, &restp_chain,
1773 nrtp, restp, pdba->nres, pdba->resinfo,
1774 cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end);
1775 /* ideally, now we would not need the rtp itself anymore, but do
1776 everything using the hb and restp arrays. Unfortunately, that
1777 requires some re-thinking of code in gen_vsite.c, which I won't
1778 do now :( AF 26-7-99 */
1780 rename_atoms(NULL,ffdir,
1781 pdba,&symtab,restp_chain,FALSE,rt,FALSE,bVerbose);
1783 match_atomnames_with_rtp(restp_chain,hb_chain,pdba,x,bVerbose);
1786 block = new_blocka();
1788 sort_pdbatoms(pdba->nres,restp_chain,hb_chain,
1789 natom,&pdba,&x,block,&gnames);
1790 natom = remove_duplicate_atoms(pdba,x,bVerbose);
1791 if (ftp2bSet(efNDX,NFILE,fnm)) {
1793 fprintf(stderr,"WARNING: with the -remh option the generated "
1794 "index file (%s) might be useless\n"
1795 "(the index file is generated before hydrogens are added)",
1796 ftp2fn(efNDX,NFILE,fnm));
1798 write_index(ftp2fn(efNDX,NFILE,fnm),block,gnames);
1800 for(i=0; i < block->nr; i++)
1805 fprintf(stderr,"WARNING: "
1806 "without sorting no check for duplicate atoms can be done\n");
1809 /* Generate Hydrogen atoms (and termini) in the sequence */
1810 printf("Generating any missing hydrogen atoms and/or adding termini.\n");
1811 natom=add_h(&pdba,&x,nah,ah,
1812 cc->nterpairs,cc->ntdb,cc->ctdb,cc->r_start,cc->r_end,bAllowMissing,
1813 NULL,NULL,TRUE,FALSE);
1814 printf("Now there are %d residues with %d atoms\n",
1815 pdba->nres,pdba->nr);
1816 if (debug) write_pdbfile(debug,title,pdba,x,ePBC,box,' ',0,NULL,TRUE);
1819 for(i=0; (i<natom); i++)
1820 fprintf(debug,"Res %s%d atom %d %s\n",
1821 *(pdba->resinfo[pdba->atom[i].resind].name),
1822 pdba->resinfo[pdba->atom[i].resind].nr,i+1,*pdba->atomname[i]);
1824 strcpy(posre_fn,ftp2fn(efITP,NFILE,fnm));
1826 /* make up molecule name(s) */
1828 k = (cc->nterpairs>0 && cc->r_start[0]>=0) ? cc->r_start[0] : 0;
1830 gmx_residuetype_get_type(rt,*pdba->resinfo[k].name,&p_restype);
1836 sprintf(molname,"Water");
1840 this_chainid = cc->chainid;
1842 /* Add the chain id if we have one */
1843 if(this_chainid != ' ')
1845 sprintf(buf,"_chain_%c",this_chainid);
1849 /* Check if there have been previous chains with the same id */
1851 for(k=0;k<chain;k++)
1853 if(cc->chainid == chains[k].chainid)
1858 /* Add the number for this chain identifier if there are multiple copies */
1862 sprintf(buf,"%d",nid_used+1);
1866 if(strlen(suffix)>0)
1868 sprintf(molname,"%s%s",p_restype,suffix);
1872 strcpy(molname,p_restype);
1876 if ((nch-nwaterchain>1) && !cc->bAllWat) {
1878 strcpy(itp_fn,top_fn);
1879 printf("Chain time...\n");
1880 c=strrchr(itp_fn,'.');
1881 sprintf(c,"_%s.itp",molname);
1882 c=strrchr(posre_fn,'.');
1883 sprintf(c,"_%s.itp",molname);
1884 if (strcmp(itp_fn,posre_fn) == 0) {
1885 strcpy(buf_fn,posre_fn);
1886 c = strrchr(buf_fn,'.');
1888 sprintf(posre_fn,"%s_pr.itp",buf_fn);
1892 srenew(incls,nincl);
1893 incls[nincl-1]=strdup(itp_fn);
1894 itp_file=gmx_fio_fopen(itp_fn,"w");
1898 srenew(mols,nmol+1);
1900 mols[nmol].name = strdup("SOL");
1901 mols[nmol].nr = pdba->nres;
1903 mols[nmol].name = strdup(molname);
1909 print_top_comment(itp_file,itp_fn,generator,ffdir,TRUE);
1919 pdb2top(top_file2,posre_fn,molname,pdba,&x,atype,&symtab,
1921 restp_chain,hb_chain,
1922 cc->nterpairs,cc->ntdb,cc->ctdb,bAllowMissing,
1923 bVsites,bVsiteAromatics,forcefield,ffdir,
1924 mHmult,nssbonds,ssbonds,
1925 long_bond_dist,short_bond_dist,bDeuterate,bChargeGroups,bCmap,
1926 bRenumRes,bRTPresname);
1929 write_posres(posre_fn,pdba,posre_fc);
1932 gmx_fio_fclose(itp_file);
1934 /* pdba and natom have been reassigned somewhere so: */
1939 if (cc->chainid == ' ')
1940 sprintf(fn,"chain.pdb");
1942 sprintf(fn,"chain_%c.pdb",cc->chainid);
1943 cool_quote(quote,255,NULL);
1944 write_sto_conf(fn,quote,pdba,x,NULL,ePBC,box);
1948 if (watermodel == NULL) {
1949 for(chain=0; chain<nch; chain++) {
1950 if (chains[chain].bAllWat) {
1951 gmx_fatal(FARGS,"You have chosen not to include a water model, but there is water in the input file. Select a water model or remove the water from your input file.");
1955 sprintf(buf_fn,"%s%c%s.itp",ffdir,DIR_SEPARATOR,watermodel);
1956 if (!fflib_fexist(buf_fn)) {
1957 gmx_fatal(FARGS,"The topology file '%s' for the selected water model '%s' can not be found in the force field directory. Select a different water model.",
1962 print_top_mols(top_file,title,ffdir,watermodel,nincl,incls,nmol,mols);
1963 gmx_fio_fclose(top_file);
1965 gmx_residuetype_destroy(rt);
1967 /* now merge all chains back together */
1970 for (i=0; (i<nch); i++) {
1971 natom+=chains[i].pdba->nr;
1972 nres+=chains[i].pdba->nres;
1975 init_t_atoms(atoms,natom,FALSE);
1976 for(i=0; i < atoms->nres; i++)
1977 sfree(atoms->resinfo[i].name);
1978 sfree(atoms->resinfo);
1980 snew(atoms->resinfo,nres);
1984 for (i=0; (i<nch); i++) {
1986 printf("Including chain %d in system: %d atoms %d residues\n",
1987 i+1,chains[i].pdba->nr,chains[i].pdba->nres);
1988 for (j=0; (j<chains[i].pdba->nr); j++) {
1989 atoms->atom[k]=chains[i].pdba->atom[j];
1990 atoms->atom[k].resind += l; /* l is processed nr of residues */
1991 atoms->atomname[k]=chains[i].pdba->atomname[j];
1992 atoms->resinfo[atoms->atom[k].resind].chainid = chains[i].chainid;
1993 copy_rvec(chains[i].x[j],x[k]);
1996 for (j=0; (j<chains[i].pdba->nres); j++) {
1997 atoms->resinfo[l] = chains[i].pdba->resinfo[j];
1999 atoms->resinfo[l].name = atoms->resinfo[l].rtp;
2006 fprintf(stderr,"Now there are %d atoms and %d residues\n",k,l);
2007 print_sums(atoms, TRUE);
2010 fprintf(stderr,"\nWriting coordinate file...\n");
2011 clear_rvec(box_space);
2013 gen_box(0,atoms->nr,x,box,box_space,FALSE);
2014 write_sto_conf(ftp2fn(efSTO,NFILE,fnm),title,atoms,x,NULL,ePBC,box);
2016 printf("\t\t--------- PLEASE NOTE ------------\n");
2017 printf("You have successfully generated a topology from: %s.\n",
2018 opt2fn("-f",NFILE,fnm));
2019 if (watermodel != NULL) {
2020 printf("The %s force field and the %s water model are used.\n",
2023 printf("The %s force field is used.\n",
2026 printf("\t\t--------- ETON ESAELP ------------\n");