1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
53 #include "gmx_fatal.h"
72 #include "fflibutil.h"
84 static const char *res2bb_notermini(const char *name,
85 int nrr,const rtprename_t *rr)
87 /* NOTE: This function returns the main building block name,
88 * it does not take terminal renaming into account.
93 while (i < nrr && gmx_strcasecmp(name,rr[i].gmx) != 0) {
97 return (i < nrr ? rr[i].main : name);
100 static const char *select_res(int nr,int resnr,
101 const char *name[],const char *expl[],
103 int nrr,const rtprename_t *rr)
107 printf("Which %s type do you want for residue %d\n",title,resnr+1);
108 for(sel=0; (sel < nr); sel++) {
109 printf("%d. %s (%s)\n",
110 sel,expl[sel],res2bb_notermini(name[sel],nrr,rr));
112 printf("\nType a number:"); fflush(stdout);
114 if (scanf("%d",&sel) != 1)
115 gmx_fatal(FARGS,"Answer me for res %s %d!",title,resnr+1);
120 static const char *get_asptp(int resnr,int nrr,const rtprename_t *rr)
122 enum { easp, easpH, easpNR };
123 const char *lh[easpNR] = { "ASP", "ASPH" };
124 const char *expl[easpNR] = {
125 "Not protonated (charge -1)",
126 "Protonated (charge 0)"
129 return select_res(easpNR,resnr,lh,expl,"ASPARTIC ACID",nrr,rr);
132 static const char *get_glutp(int resnr,int nrr,const rtprename_t *rr)
134 enum { eglu, egluH, egluNR };
135 const char *lh[egluNR] = { "GLU", "GLUH" };
136 const char *expl[egluNR] = {
137 "Not protonated (charge -1)",
138 "Protonated (charge 0)"
141 return select_res(egluNR,resnr,lh,expl,"GLUTAMIC ACID",nrr,rr);
144 static const char *get_glntp(int resnr,int nrr,const rtprename_t *rr)
146 enum { egln, eglnH, eglnNR };
147 const char *lh[eglnNR] = { "GLN", "QLN" };
148 const char *expl[eglnNR] = {
149 "Not protonated (charge 0)",
150 "Protonated (charge +1)"
153 return select_res(eglnNR,resnr,lh,expl,"GLUTAMINE",nrr,rr);
156 static const char *get_lystp(int resnr,int nrr,const rtprename_t *rr)
158 enum { elys, elysH, elysNR };
159 const char *lh[elysNR] = { "LYSN", "LYS" };
160 const char *expl[elysNR] = {
161 "Not protonated (charge 0)",
162 "Protonated (charge +1)"
165 return select_res(elysNR,resnr,lh,expl,"LYSINE",nrr,rr);
168 static const char *get_argtp(int resnr,int nrr,const rtprename_t *rr)
170 enum { earg, eargH, eargNR };
171 const char *lh[eargNR] = { "ARGN", "ARG" };
172 const char *expl[eargNR] = {
173 "Not protonated (charge 0)",
174 "Protonated (charge +1)"
177 return select_res(eargNR,resnr,lh,expl,"ARGININE",nrr,rr);
180 static const char *get_cystp(int resnr,int nrr,const rtprename_t *rr)
182 enum { ecys, ecysH, ecysNR };
183 const char *lh[ecysNR] = { "CYS2", "CYS" };
184 const char *expl[ecysNR] = {
185 "Cysteine in disulfide bridge",
189 return select_res(ecysNR,resnr,lh,expl,"CYSTEINE",nrr,rr);
193 static const char *get_histp(int resnr,int nrr,const rtprename_t *rr)
195 const char *expl[ehisNR] = {
202 return select_res(ehisNR,resnr,hh,expl,"HISTIDINE",nrr,rr);
205 static void read_rtprename(const char *fname,FILE *fp,
206 int *nrtprename,rtprename_t **rtprename)
208 char line[STRLEN],buf[STRLEN];
217 while(get_a_line(fp,line,STRLEN)) {
219 nc = sscanf(line,"%s %s %s %s %s %s",
220 rr[n].gmx,rr[n].main,rr[n].nter,rr[n].cter,rr[n].bter,buf);
222 if (nc != 2 && nc != 5) {
223 gmx_fatal(FARGS,"Residue renaming database '%s' has %d columns instead of %d, %d or %d",fname,ncol,2,5);
226 } else if (nc != ncol) {
227 gmx_fatal(FARGS,"A line in residue renaming database '%s' has %d columns, while previous lines have %d columns",fname,nc,ncol);
231 /* This file does not have special termini names, copy them from main */
232 strcpy(rr[n].nter,rr[n].main);
233 strcpy(rr[n].cter,rr[n].main);
234 strcpy(rr[n].bter,rr[n].main);
244 static char *search_resrename(int nrr,rtprename_t *rr,
246 bool bStart,bool bEnd,
247 bool bCompareFFRTPname)
255 while (i<nrr && ((!bCompareFFRTPname && strcmp(name,rr[i].gmx) != 0) ||
256 ( bCompareFFRTPname && strcmp(name,rr[i].main) != 0)))
261 /* If found in the database, rename this residue's rtp building block,
262 * otherwise keep the old name.
284 gmx_fatal(FARGS,"In the chosen force field there is no residue type for '%s'%s",name,bStart ? " as a starting terminus" : (bEnd ? " as an ending terminus" : ""));
292 static void rename_resrtp(t_atoms *pdba,int nterpairs,int *r_start,int *r_end,
293 int nrr,rtprename_t *rr,t_symtab *symtab,
301 bFFRTPTERRNM = (getenv("GMX_NO_FFRTP_TER_RENAME") == NULL);
303 for(r=0; r<pdba->nres; r++)
307 for(j=0; j<nterpairs; j++)
314 for(j=0; j<nterpairs; j++)
322 nn = search_resrename(nrr,rr,*pdba->resinfo[r].rtp,bStart,bEnd,FALSE);
324 if (bFFRTPTERRNM && nn == NULL && (bStart || bEnd))
326 /* This is a terminal residue, but the residue name,
327 * currently stored in .rtp, is not a standard residue name,
328 * but probably a force field specific rtp name.
329 * Check if we need to rename it because it is terminal.
331 nn = search_resrename(nrr,rr,
332 *pdba->resinfo[r].rtp,bStart,bEnd,TRUE);
335 if (nn != NULL && strcmp(*pdba->resinfo[r].rtp,nn) != 0)
339 printf("Changing rtp entry of residue %d %s to '%s'\n",
340 pdba->resinfo[r].nr,*pdba->resinfo[r].name,nn);
342 pdba->resinfo[r].rtp = put_symtab(symtab,nn);
347 static void pdbres_to_gmxrtp(t_atoms *pdba)
351 for(i=0; (i<pdba->nres); i++)
353 if (pdba->resinfo[i].rtp == NULL)
355 pdba->resinfo[i].rtp = pdba->resinfo[i].name;
360 static void rename_pdbres(t_atoms *pdba,const char *oldnm,const char *newnm,
361 bool bFullCompare,t_symtab *symtab)
366 for(i=0; (i<pdba->nres); i++)
368 resnm = *pdba->resinfo[i].name;
369 if ((bFullCompare && (gmx_strcasecmp(resnm,oldnm) == 0)) ||
370 (!bFullCompare && strstr(resnm,oldnm) != NULL))
372 /* Rename the residue name (not the rtp name) */
373 pdba->resinfo[i].name = put_symtab(symtab,newnm);
378 static void rename_bb(t_atoms *pdba,const char *oldnm,const char *newnm,
379 bool bFullCompare,t_symtab *symtab)
384 for(i=0; (i<pdba->nres); i++)
386 /* We have not set the rtp name yes, use the residue name */
387 bbnm = *pdba->resinfo[i].name;
388 if ((bFullCompare && (gmx_strcasecmp(bbnm,oldnm) == 0)) ||
389 (!bFullCompare && strstr(bbnm,oldnm) != NULL))
391 /* Change the rtp builing block name */
392 pdba->resinfo[i].rtp = put_symtab(symtab,newnm);
397 static void rename_bbint(t_atoms *pdba,const char *oldnm,
398 const char *gettp(int,int,const rtprename_t *),
401 int nrr,const rtprename_t *rr)
407 for(i=0; i<pdba->nres; i++)
409 /* We have not set the rtp name yes, use the residue name */
410 bbnm = *pdba->resinfo[i].name;
411 if ((bFullCompare && (strcmp(bbnm,oldnm) == 0)) ||
412 (!bFullCompare && strstr(bbnm,oldnm) != NULL))
414 ptr = gettp(i,nrr,rr);
415 pdba->resinfo[i].rtp = put_symtab(symtab,ptr);
420 static void check_occupancy(t_atoms *atoms,const char *filename,bool bVerbose)
426 ftp = fn2ftp(filename);
427 if (!atoms->pdbinfo || ((ftp != efPDB) && (ftp != efBRK) && (ftp != efENT)))
428 fprintf(stderr,"No occupancies in %s\n",filename);
430 for(i=0; (i<atoms->nr); i++) {
431 if (atoms->pdbinfo[i].occup != 1) {
433 fprintf(stderr,"Occupancy for atom %s%d-%s is %f rather than 1\n",
434 *atoms->resinfo[atoms->atom[i].resind].name,
435 atoms->resinfo[ atoms->atom[i].resind].nr,
437 atoms->pdbinfo[i].occup);
438 if (atoms->pdbinfo[i].occup == 0)
444 if (nzero == atoms->nr) {
445 fprintf(stderr,"All occupancy fields zero. This is probably not an X-Ray structure\n");
446 } else if ((nzero > 0) || (nnotone > 0)) {
449 "WARNING: there were %d atoms with zero occupancy and %d atoms with\n"
450 " occupancy unequal to one (out of %d atoms). Check your pdb file.\n"
452 nzero,nnotone,atoms->nr);
454 fprintf(stderr,"All occupancies are one\n");
459 void write_posres(char *fn,t_atoms *pdba,real fc)
464 fp=gmx_fio_fopen(fn,"w");
466 "; In this topology include file, you will find position restraint\n"
467 "; entries for all the heavy atoms in your original pdb file.\n"
468 "; This means that all the protons which were added by pdb2gmx are\n"
469 "; not restrained.\n"
471 "[ position_restraints ]\n"
472 "; %4s%6s%8s%8s%8s\n","atom","type","fx","fy","fz"
474 for(i=0; (i<pdba->nr); i++) {
475 if (!is_hydrogen(*pdba->atomname[i]) && !is_dummymass(*pdba->atomname[i]))
476 fprintf(fp,"%6d%6d %g %g %g\n",i+1,1,fc,fc,fc);
481 static int read_pdball(const char *inf, const char *outf,char *title,
482 t_atoms *atoms, rvec **x,
483 int *ePBC,matrix box, bool bRemoveH,
484 t_symtab *symtab,gmx_residuetype_t rt,const char *watres,
485 gmx_atomprop_t aps,bool bVerbose)
486 /* Read a pdb file. (containing proteins) */
488 int natom,new_natom,i;
491 printf("Reading %s...\n",inf);
492 get_stx_coordnum(inf,&natom);
493 init_t_atoms(atoms,natom,TRUE);
495 read_stx_conf(inf,title,atoms,*x,NULL,ePBC,box);
496 if (fn2ftp(inf) == efPDB)
497 get_pdb_atomnumber(atoms,aps);
500 for(i=0; i<atoms->nr; i++)
501 if (!is_hydrogen(*atoms->atomname[i])) {
502 atoms->atom[new_natom]=atoms->atom[i];
503 atoms->atomname[new_natom]=atoms->atomname[i];
504 atoms->pdbinfo[new_natom]=atoms->pdbinfo[i];
505 copy_rvec((*x)[i],(*x)[new_natom]);
513 if (title && title[0])
514 printf(" '%s',",title);
515 printf(" %d atoms\n",natom);
517 /* Rename residues */
518 rename_pdbres(atoms,"HOH",watres,FALSE,symtab);
519 rename_pdbres(atoms,"SOL",watres,FALSE,symtab);
520 rename_pdbres(atoms,"WAT",watres,FALSE,symtab);
522 rename_atoms("xlateat.dat",NULL,
523 atoms,symtab,NULL,TRUE,rt,TRUE,bVerbose);
529 write_sto_conf(outf,title,atoms,*x,NULL,*ePBC,box);
534 void process_chain(t_atoms *pdba, rvec *x,
535 bool bTrpU,bool bPheU,bool bTyrU,
536 bool bLysMan,bool bAspMan,bool bGluMan,
537 bool bHisMan,bool bArgMan,bool bGlnMan,
538 real angle,real distance,t_symtab *symtab,
539 int nrr,const rtprename_t *rr)
541 /* Rename aromatics, lys, asp and histidine */
542 if (bTyrU) rename_bb(pdba,"TYR","TYRU",FALSE,symtab);
543 if (bTrpU) rename_bb(pdba,"TRP","TRPU",FALSE,symtab);
544 if (bPheU) rename_bb(pdba,"PHE","PHEU",FALSE,symtab);
546 rename_bbint(pdba,"LYS",get_lystp,FALSE,symtab,nrr,rr);
548 rename_bbint(pdba,"ARG",get_argtp,FALSE,symtab,nrr,rr);
550 rename_bbint(pdba,"GLN",get_glntp,FALSE,symtab,nrr,rr);
552 rename_bbint(pdba,"ASP",get_asptp,FALSE,symtab,nrr,rr);
554 rename_bb(pdba,"ASPH","ASP",FALSE,symtab);
556 rename_bbint(pdba,"GLU",get_glutp,FALSE,symtab,nrr,rr);
558 rename_bb(pdba,"GLUH","GLU",FALSE,symtab);
561 set_histp(pdba,x,angle,distance);
563 rename_bbint(pdba,"HIS",get_histp,TRUE,symtab,nrr,rr);
565 /* Initialize the rtp builing block names with the residue names
566 * for the residues that have not been processed above.
568 pdbres_to_gmxrtp(pdba);
570 /* Now we have all rtp names set.
571 * The rtp names will conform to Gromacs naming,
572 * unless the input pdb file contained one or more force field specific
573 * rtp names as residue names.
577 /* struct for sorting the atoms from the pdb file */
579 int resnr; /* residue number */
580 int j; /* database order index */
581 int index; /* original atom number */
582 char anm1; /* second letter of atom name */
583 char altloc; /* alternate location indicator */
586 int pdbicomp(const void *a,const void *b)
594 d = (pa->resnr - pb->resnr);
598 d = (pa->anm1 - pb->anm1);
600 d = (pa->altloc - pb->altloc);
607 static void sort_pdbatoms(int nrtp,t_restp restp[],t_hackblock hb[],
608 int natoms,t_atoms **pdbaptr,rvec **x,
609 t_blocka *block,char ***gnames)
611 t_atoms *pdba,*pdbnew;
626 for(i=0; i<natoms; i++) {
627 atomnm = *pdba->atomname[i];
628 rptr = &restp[pdba->atom[i].resind];
629 for(j=0; (j<rptr->natom); j++) {
630 if (gmx_strcasecmp(atomnm,*(rptr->atomname[j])) == 0) {
634 if (j==rptr->natom) {
635 if ( ( ( pdba->atom[i].resind == 0) && (atomnm[0] == 'H') &&
636 ( (atomnm[1] == '1') || (atomnm[1] == '2') ||
637 (atomnm[1] == '3') ) ) )
642 sprintf(buf,"Atom %s in residue %s %d not found in rtp entry %s with %d atoms\n"
643 "while sorting atoms%s",atomnm,
644 *pdba->resinfo[pdba->atom[i].resind].name,
645 pdba->resinfo[pdba->atom[i].resind].nr,
648 is_hydrogen(atomnm) ? ". Maybe different protonation state.\n"
649 " Remove this hydrogen or choose a different "
650 "protonation state.\n"
651 " Option -ignh will ignore all hydrogens "
652 "in the input." : "");
653 gmx_fatal(FARGS,buf);
656 /* make shadow array to be sorted into indexgroup */
657 pdbi[i].resnr = pdba->atom[i].resind;
660 pdbi[i].anm1 = atomnm[1];
661 pdbi[i].altloc = pdba->pdbinfo[i].altloc;
663 qsort(pdbi,natoms,(size_t)sizeof(pdbi[0]),pdbicomp);
665 /* pdba is sorted in pdbnew using the pdbi index */
668 init_t_atoms(pdbnew,natoms,TRUE);
671 pdbnew->nres=pdba->nres;
672 sfree(pdbnew->resinfo);
673 pdbnew->resinfo=pdba->resinfo;
674 for (i=0; i<natoms; i++) {
675 pdbnew->atom[i] = pdba->atom[pdbi[i].index];
676 pdbnew->atomname[i] = pdba->atomname[pdbi[i].index];
677 pdbnew->pdbinfo[i] = pdba->pdbinfo[pdbi[i].index];
678 copy_rvec((*x)[pdbi[i].index],(*xnew)[i]);
679 /* make indexgroup in block */
683 sfree(pdba->atomname);
685 sfree(pdba->pdbinfo);
688 /* copy the sorted pdbnew back to pdba */
691 add_grp(block, gnames, natoms, a, "prot_sort");
697 static int remove_duplicate_atoms(t_atoms *pdba,rvec x[],bool bVerbose)
699 int i,j,oldnatoms,ndel;
702 printf("Checking for duplicate atoms....\n");
703 oldnatoms = pdba->nr;
705 /* NOTE: pdba->nr is modified inside the loop */
706 for(i=1; (i < pdba->nr); i++) {
707 /* compare 'i' and 'i-1', throw away 'i' if they are identical
708 this is a 'while' because multiple alternate locations can be present */
709 while ( (i < pdba->nr) &&
710 (pdba->atom[i-1].resind == pdba->atom[i].resind) &&
711 (strcmp(*pdba->atomname[i-1],*pdba->atomname[i])==0) ) {
714 ri = &pdba->resinfo[pdba->atom[i].resind];
715 printf("deleting duplicate atom %4s %s%4d%c",
716 *pdba->atomname[i],*ri->name,ri->nr,ri->ic);
717 if (ri->chainid && (ri->chainid != ' '))
718 printf(" ch %c", ri->chainid);
720 if (pdba->pdbinfo[i].atomnr)
721 printf(" pdb nr %4d",pdba->pdbinfo[i].atomnr);
722 if (pdba->pdbinfo[i].altloc && (pdba->pdbinfo[i].altloc!=' '))
723 printf(" altloc %c",pdba->pdbinfo[i].altloc);
728 /* We can not free, since it might be in the symtab */
729 /* sfree(pdba->atomname[i]); */
730 for (j=i; j < pdba->nr; j++) {
731 pdba->atom[j] = pdba->atom[j+1];
732 pdba->atomname[j] = pdba->atomname[j+1];
733 pdba->pdbinfo[j] = pdba->pdbinfo[j+1];
734 copy_rvec(x[j+1],x[j]);
736 srenew(pdba->atom, pdba->nr);
737 /* srenew(pdba->atomname, pdba->nr); */
738 srenew(pdba->pdbinfo, pdba->nr);
741 if (pdba->nr != oldnatoms)
742 printf("Now there are %d atoms. Deleted %d duplicates.\n",pdba->nr,ndel);
747 void find_nc_ter(t_atoms *pdba,int r0,int r1,int *r_start,int *r_end,gmx_residuetype_t rt)
750 const char *p_startrestype;
751 const char *p_restype;
752 int nstartwarn,nendwarn;
760 /* Find the starting terminus (typially N or 5') */
761 for(i=r0;i<r1 && *r_start==-1;i++)
763 gmx_residuetype_get_type(rt,*pdba->resinfo[i].name,&p_startrestype);
764 if( !gmx_strcasecmp(p_startrestype,"Protein") || !gmx_strcasecmp(p_startrestype,"DNA") || !gmx_strcasecmp(p_startrestype,"RNA") )
766 printf("Identified residue %s%d as a starting terminus.\n",*pdba->resinfo[i].name,pdba->resinfo[i].nr);
773 printf("Warning: Starting residue %s%d in chain not identified as Protein/RNA/DNA.\n",*pdba->resinfo[i].name,pdba->resinfo[i].nr);
777 printf("More than 5 unidentified residues at start of chain - disabling further warnings.\n");
785 /* Go through the rest of the residues, check that they are the same class, and identify the ending terminus. */
786 for(i=*r_start;i<r1;i++)
788 gmx_residuetype_get_type(rt,*pdba->resinfo[i].name,&p_restype);
789 if( !gmx_strcasecmp(p_restype,p_startrestype) && nendwarn==0)
797 printf("Warning: Residue %s%d in chain has different type (%s) from starting residue %s%d (%s).\n",
798 *pdba->resinfo[i].name,pdba->resinfo[i].nr,p_restype,
799 *pdba->resinfo[*r_start].name,pdba->resinfo[*r_start].nr,p_startrestype);
803 printf("More than 5 unidentified residues at end of chain - disabling further warnings.\n");
812 printf("Identified residue %s%d as a ending terminus.\n",*pdba->resinfo[*r_end].name,pdba->resinfo[*r_end].nr);
818 modify_chain_numbers(t_atoms * pdba,
819 const char * chainsep)
822 char old_prev_chainid;
823 char old_this_chainid;
824 int old_prev_chainnum;
825 int old_this_chainnum;
838 printf("Splitting PDB chains based on ");
839 splitting = SPLIT_TER_ONLY; /* keep compiler happy */
841 /* Be a bit flexible to catch typos */
842 if (!strncmp(chainsep,"id_o",4) || !strncmp(chainsep,"int",3))
844 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
845 splitting = SPLIT_ID_OR_TER;
846 printf("TER records or changing chain id.\n");
848 else if (!strncmp(chainsep,"id_a",4))
850 splitting = SPLIT_ID_AND_TER;
851 printf("TER records and chain id.\n");
853 else if (strlen(chainsep)==2 && !strncmp(chainsep,"id",4))
855 splitting = SPLIT_ID_ONLY;
856 printf("changing chain id only (ignoring TER records).\n");
858 else if (chainsep[0]=='t')
860 splitting = SPLIT_TER_ONLY;
861 printf("TER records only (ignoring chain id).\n");
865 gmx_fatal(FARGS,"Unidentified setting for chain separation: %s\n",chainsep);
868 /* The default chain enumeration is based on TER records only, which is reflected in chainnum below */
870 old_prev_chainid = '?';
871 old_prev_chainnum = -1;
874 for(i=0;i<pdba->nres;i++)
876 ri = &pdba->resinfo[i];
877 old_this_chainid = ri->chainid;
878 old_this_chainnum = ri->chainnum;
882 case SPLIT_ID_OR_TER:
883 if(old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
889 case SPLIT_ID_AND_TER:
890 if(old_this_chainid != old_prev_chainid && old_this_chainnum != old_prev_chainnum)
897 if(old_this_chainid != old_prev_chainid)
904 if(old_this_chainnum != old_prev_chainnum)
910 gmx_fatal(FARGS,"Internal inconsistency - this shouldn't happen...");
913 old_prev_chainid = old_this_chainid;
914 old_prev_chainnum = old_this_chainnum;
916 ri->chainnum = new_chainnum;
945 int main(int argc, char *argv[])
947 const char *desc[] = {
948 "This program reads a pdb (or gro) file, reads",
949 "some database files, adds hydrogens to the molecules and generates",
950 "coordinates in Gromacs (Gromos), or optionally pdb, format",
951 "and a topology in Gromacs format.",
952 "These files can subsequently be processed to generate a run input file.",
954 "pdb2gmx will search for force fields by looking for",
955 "a [TT]forcefield.itp[tt] file in subdirectories [TT]<forcefield>.ff[tt]",
956 "of the current working directory and of the Gomracs library directory",
957 "as inferred from the path of the binary or the [TT]GMXLIB[tt] environment",
959 "By default the forcefield selection is interactive,",
960 "but you can use the [TT]-ff[tt] option to specify one of the short names",
961 "in the list on the command line instead. In that case pdb2gmx just looks",
962 "for the corresponding [TT]<forcefield>.ff[tt] directory.",
964 "After choosing a force field, all files will be read only from",
965 "the corresponding force field directory.",
966 "If you want to modify or add a residue types, you can copy the force",
967 "field directory from the Gromacs library directory to your current",
968 "working directory. If you want to add new protein residue types,",
969 "you will need to modify residuetypes.dat in the libary directory",
970 "or copy the whole library directory to a local directory and set",
971 "the environment variable [TT]GMXLIB[tt] to the name of that directory.",
972 "Check chapter 5 of the manual for more information about file formats.",
975 "Note that a pdb file is nothing more than a file format, and it",
976 "need not necessarily contain a protein structure. Every kind of",
977 "molecule for which there is support in the database can be converted.",
978 "If there is no support in the database, you can add it yourself.[PAR]",
980 "The program has limited intelligence, it reads a number of database",
981 "files, that allow it to make special bonds (Cys-Cys, Heme-His, etc.),",
982 "if necessary this can be done manually. The program can prompt the",
983 "user to select which kind of LYS, ASP, GLU, CYS or HIS residue she",
984 "wants. For LYS the choice is between neutral (two protons on NZ) or",
985 "protonated (three protons, default), for ASP and GLU unprotonated",
986 "(default) or protonated, for HIS the proton can be either on ND1,",
987 "on NE2 or on both. By default these selections are done automatically.",
988 "For His, this is based on an optimal hydrogen bonding",
989 "conformation. Hydrogen bonds are defined based on a simple geometric",
990 "criterium, specified by the maximum hydrogen-donor-acceptor angle",
991 "and donor-acceptor distance, which are set by [TT]-angle[tt] and",
992 "[TT]-dist[tt] respectively.[PAR]",
994 "The separation of chains is not entirely trivial since the markup",
995 "in user-generated PDB files frequently varies and sometimes it",
996 "is desirable to merge entries across a TER record, for instance",
997 "if you want a disulfide bridge or distance restraints between",
998 "two protein chains or if you have a HEME group bound to a protein.",
999 "In such cases multiple chains should be contained in a single",
1000 "[TT]molecule_type[tt] definition.",
1001 "To handle this, pdb2gmx has an option [TT]-chainsep[tt] so you can",
1002 "choose whether a new chain should start when we find a TER record,",
1003 "when the chain id changes, combinations of either or both of these",
1004 "or fully interactively.[PAR]",
1006 "pdb2gmx will also check the occupancy field of the pdb file.",
1007 "If any of the occupancies are not one, indicating that the atom is",
1008 "not resolved well in the structure, a warning message is issued.",
1009 "When a pdb file does not originate from an X-Ray structure determination",
1010 "all occupancy fields may be zero. Either way, it is up to the user",
1011 "to verify the correctness of the input data (read the article!).[PAR]",
1013 "During processing the atoms will be reordered according to Gromacs",
1014 "conventions. With [TT]-n[tt] an index file can be generated that",
1015 "contains one group reordered in the same way. This allows you to",
1016 "convert a Gromos trajectory and coordinate file to Gromos. There is",
1017 "one limitation: reordering is done after the hydrogens are stripped",
1018 "from the input and before new hydrogens are added. This means that",
1019 "you should not use [TT]-ignh[tt].[PAR]",
1021 "The [TT].gro[tt] and [TT].g96[tt] file formats do not support chain",
1022 "identifiers. Therefore it is useful to enter a pdb file name at",
1023 "the [TT]-o[tt] option when you want to convert a multichain pdb file.",
1026 "The option [TT]-vsite[tt] removes hydrogen and fast improper dihedral",
1027 "motions. Angular and out-of-plane motions can be removed by changing",
1028 "hydrogens into virtual sites and fixing angles, which fixes their",
1029 "position relative to neighboring atoms. Additionally, all atoms in the",
1030 "aromatic rings of the standard amino acids (i.e. PHE, TRP, TYR and HIS)",
1031 "can be converted into virtual sites, elminating the fast improper dihedral",
1032 "fluctuations in these rings. Note that in this case all other hydrogen",
1033 "atoms are also converted to virtual sites. The mass of all atoms that are",
1034 "converted into virtual sites, is added to the heavy atoms.[PAR]",
1035 "Also slowing down of dihedral motion can be done with [TT]-heavyh[tt]",
1036 "done by increasing the hydrogen-mass by a factor of 4. This is also",
1037 "done for water hydrogens to slow down the rotational motion of water.",
1038 "The increase in mass of the hydrogens is subtracted from the bonded",
1039 "(heavy) atom so that the total mass of the system remains the same."
1043 FILE *fp,*top_file,*top_file2,*itp_file=NULL;
1045 t_atoms pdba_all,*pdba;
1049 int chain,nch,maxch,nwaterchain;
1051 t_chain *chains,*cc;
1052 char select[STRLEN];
1065 gpp_atomtype_t atype;
1066 gmx_residuetype_t rt;
1068 char fn[256],itp_fn[STRLEN],posre_fn[STRLEN],buf_fn[STRLEN];
1069 char molname[STRLEN],title[STRLEN],quote[STRLEN],generator[STRLEN];
1070 char *c,forcefield[STRLEN],ffdir[STRLEN];
1071 char ffname[STRLEN],suffix[STRLEN],buf[STRLEN];
1080 rtprename_t *rtprename=NULL;
1081 int nah,nNtdb,nCtdb,ntdblist;
1082 t_hackblock *ntdb,*ctdb,**tdblist;
1086 bool bVsites=FALSE,bWat,bPrevWat=FALSE,bITP,bVsiteAromatics=FALSE,bMerge;
1088 t_hackblock *hb_chain;
1089 t_restp *restp_chain;
1091 const char *p_restype;
1095 const char * prev_atomname;
1096 const char * this_atomname;
1097 const char * prev_resname;
1098 const char * this_resname;
1103 int prev_chainnumber;
1104 int this_chainnumber;
1106 int this_chainstart;
1107 int prev_chainstart;
1112 { efSTX, "-f", "eiwit.pdb", ffREAD },
1113 { efSTO, "-o", "conf", ffWRITE },
1114 { efTOP, NULL, NULL, ffWRITE },
1115 { efITP, "-i", "posre", ffWRITE },
1116 { efNDX, "-n", "clean", ffOPTWR },
1117 { efSTO, "-q", "clean.pdb", ffOPTWR }
1119 #define NFILE asize(fnm)
1122 /* Command line arguments must be static */
1123 static bool bNewRTP=FALSE;
1124 static bool bInter=FALSE, bCysMan=FALSE;
1125 static bool bLysMan=FALSE, bAspMan=FALSE, bGluMan=FALSE, bHisMan=FALSE;
1126 static bool bGlnMan=FALSE, bArgMan=FALSE;
1127 static bool bTerMan=FALSE, bUnA=FALSE, bHeavyH;
1128 static bool bSort=TRUE, bAllowMissing=FALSE, bRemoveH=FALSE;
1129 static bool bDeuterate=FALSE,bVerbose=FALSE,bChargeGroups=TRUE,bCmap=TRUE;
1130 static bool bRenumRes=FALSE,bRTPresname=FALSE;
1131 static real angle=135.0, distance=0.3,posre_fc=1000;
1132 static real long_bond_dist=0.25, short_bond_dist=0.05;
1133 static const char *vsitestr[] = { NULL, "none", "hydrogens", "aromatics", NULL };
1134 static const char *watstr[] = { NULL, "select", "none", "spc", "spce", "tip3p", "tip4p", "tip5p", NULL };
1135 static const char *chainsep[] = { NULL, "id_or_ter", "id_and_ter", "ter", "id", "interactive", NULL };
1136 static const char *ff = "select";
1139 { "-newrtp", FALSE, etBOOL, {&bNewRTP},
1140 "HIDDENWrite the residue database in new format to 'new.rtp'"},
1141 { "-lb", FALSE, etREAL, {&long_bond_dist},
1142 "HIDDENLong bond warning distance" },
1143 { "-sb", FALSE, etREAL, {&short_bond_dist},
1144 "HIDDENShort bond warning distance" },
1145 { "-chainsep", FALSE, etENUM, {chainsep},
1146 "Condition in PDB files when a new chain and molecule_type should be started" },
1147 { "-ff", FALSE, etSTR, {&ff},
1148 "Force field, interactive by default. Use -h for information." },
1149 { "-water", FALSE, etENUM, {watstr},
1150 "Water model to use" },
1151 { "-inter", FALSE, etBOOL, {&bInter},
1152 "Set the next 8 options to interactive"},
1153 { "-ss", FALSE, etBOOL, {&bCysMan},
1154 "Interactive SS bridge selection" },
1155 { "-ter", FALSE, etBOOL, {&bTerMan},
1156 "Interactive termini selection, iso charged" },
1157 { "-lys", FALSE, etBOOL, {&bLysMan},
1158 "Interactive Lysine selection, iso charged" },
1159 { "-arg", FALSE, etBOOL, {&bArgMan},
1160 "Interactive Arganine selection, iso charged" },
1161 { "-asp", FALSE, etBOOL, {&bAspMan},
1162 "Interactive Aspartic Acid selection, iso charged" },
1163 { "-glu", FALSE, etBOOL, {&bGluMan},
1164 "Interactive Glutamic Acid selection, iso charged" },
1165 { "-gln", FALSE, etBOOL, {&bGlnMan},
1166 "Interactive Glutamine selection, iso neutral" },
1167 { "-his", FALSE, etBOOL, {&bHisMan},
1168 "Interactive Histidine selection, iso checking H-bonds" },
1169 { "-angle", FALSE, etREAL, {&angle},
1170 "Minimum hydrogen-donor-acceptor angle for a H-bond (degrees)" },
1171 { "-dist", FALSE, etREAL, {&distance},
1172 "Maximum donor-acceptor distance for a H-bond (nm)" },
1173 { "-una", FALSE, etBOOL, {&bUnA},
1174 "Select aromatic rings with united CH atoms on Phenylalanine, "
1175 "Tryptophane and Tyrosine" },
1176 { "-sort", FALSE, etBOOL, {&bSort},
1177 "HIDDENSort the residues according to database, turning this off is dangerous as charge groups might be broken in parts" },
1178 { "-ignh", FALSE, etBOOL, {&bRemoveH},
1179 "Ignore hydrogen atoms that are in the pdb file" },
1180 { "-missing",FALSE, etBOOL, {&bAllowMissing},
1181 "Continue when atoms are missing, dangerous" },
1182 { "-v", FALSE, etBOOL, {&bVerbose},
1183 "Be slightly more verbose in messages" },
1184 { "-posrefc",FALSE, etREAL, {&posre_fc},
1185 "Force constant for position restraints" },
1186 { "-vsite", FALSE, etENUM, {vsitestr},
1187 "Convert atoms to virtual sites" },
1188 { "-heavyh", FALSE, etBOOL, {&bHeavyH},
1189 "Make hydrogen atoms heavy" },
1190 { "-deuterate", FALSE, etBOOL, {&bDeuterate},
1191 "Change the mass of hydrogens to 2 amu" },
1192 { "-chargegrp", TRUE, etBOOL, {&bChargeGroups},
1193 "Use charge groups in the rtp file" },
1194 { "-cmap", TRUE, etBOOL, {&bCmap},
1195 "Use cmap torsions (if enabled in the rtp file)" },
1196 { "-renum", TRUE, etBOOL, {&bRenumRes},
1197 "Renumber the residues consecutively in the output" },
1198 { "-rtpres", TRUE, etBOOL, {&bRTPresname},
1199 "Use rtp entry names as residue names" }
1201 #define NPARGS asize(pa)
1203 CopyRight(stderr,argv[0]);
1204 parse_common_args(&argc,argv,0,NFILE,fnm,asize(pa),pa,asize(desc),desc,
1207 /* Force field selection, interactive or direct */
1208 choose_ff(strcmp(ff,"select") == 0 ? NULL : ff,
1209 forcefield,sizeof(forcefield),
1210 ffdir,sizeof(ffdir));
1212 if (strlen(forcefield) > 0) {
1213 strcpy(ffname,forcefield);
1214 ffname[0] = toupper(ffname[0]);
1216 gmx_fatal(FARGS,"Empty forcefield string");
1219 printf("\nUsing the %s force field in directory %s\n\n",
1222 choose_watermodel(watstr[0],ffdir,&watermodel);
1225 /* if anything changes here, also change description of -inter */
1238 else if (bDeuterate)
1243 switch(vsitestr[0][0]) {
1244 case 'n': /* none */
1246 bVsiteAromatics=FALSE;
1248 case 'h': /* hydrogens */
1250 bVsiteAromatics=FALSE;
1252 case 'a': /* aromatics */
1254 bVsiteAromatics=TRUE;
1257 gmx_fatal(FARGS,"DEATH HORROR in $s (%d): vsitestr[0]='%s'",
1258 __FILE__,__LINE__,vsitestr[0]);
1261 /* Open the symbol table */
1262 open_symtab(&symtab);
1264 /* Residue type database */
1265 gmx_residuetype_init(&rt);
1267 /* Read residue renaming database(s), if present */
1268 nrrn = fflib_search_file_end(ffdir,".r2b",FALSE,&rrn);
1272 for(i=0; i<nrrn; i++) {
1273 fp = fflib_open(rrn[i]);
1274 read_rtprename(rrn[i],fp,&nrtprename,&rtprename);
1280 /* Add all alternative names from the residue renaming database to the list of recognized amino/nucleic acids. */
1282 for(i=0;i<nrtprename;i++)
1284 rc=gmx_residuetype_get_type(rt,rtprename[i].gmx,&p_restype);
1286 /* Only add names if the 'standard' gromacs/iupac base name was found */
1289 gmx_residuetype_add(rt,rtprename[i].main,p_restype);
1290 gmx_residuetype_add(rt,rtprename[i].nter,p_restype);
1291 gmx_residuetype_add(rt,rtprename[i].cter,p_restype);
1292 gmx_residuetype_add(rt,rtprename[i].bter,p_restype);
1297 if (watermodel != NULL && (strstr(watermodel,"4p") ||
1298 strstr(watermodel,"4P"))) {
1300 } else if (watermodel != NULL && (strstr(watermodel,"5p") ||
1301 strstr(watermodel,"5P"))) {
1307 aps = gmx_atomprop_init();
1308 natom = read_pdball(opt2fn("-f",NFILE,fnm),opt2fn_null("-q",NFILE,fnm),title,
1309 &pdba_all,&pdbx,&ePBC,box,bRemoveH,&symtab,rt,watres,
1313 gmx_fatal(FARGS,"No atoms found in pdb file %s\n",opt2fn("-f",NFILE,fnm));
1315 printf("Analyzing pdb file\n");
1320 modify_chain_numbers(&pdba_all,chainsep[0]);
1323 bMerge = !strncmp(chainsep[0],"int",3);
1325 this_atomname = NULL;
1327 this_resname = NULL;
1330 this_chainnumber = -1;
1331 this_chainstart = 0;
1334 for (i=0; (i<natom); i++)
1336 ri = &pdba_all.resinfo[pdba_all.atom[i].resind];
1338 prev_atomname = this_atomname;
1339 prev_atomnum = this_atomnum;
1340 prev_resname = this_resname;
1341 prev_resnum = this_resnum;
1342 prev_chainid = this_chainid;
1343 prev_chainnumber = this_chainnumber;
1344 prev_chainstart = this_chainstart;
1346 this_atomname = *pdba_all.atomname[i];
1347 this_atomnum = (pdba_all.pdbinfo != NULL) ? pdba_all.pdbinfo[i].atomnr : i+1;
1348 this_resname = *ri->name;
1349 this_resnum = ri->nr;
1350 this_chainid = ri->chainid;
1351 this_chainnumber = ri->chainnum;
1353 bWat = gmx_strcasecmp(*ri->name,watres) == 0;
1354 if ((i == 0) || (this_chainnumber != prev_chainnumber) || (bWat != bPrevWat))
1356 this_chainstart = pdba_all.atom[i].resind;
1357 if (bMerge && i>0 && !bWat)
1359 printf("Merge chain ending with residue %s%d (chain id '%c', atom %d %s) with\n"
1360 "chain starting with residue %s%d (chain id '%c', atom %d %s)? [n/y]\n",
1361 prev_resname,prev_resnum,prev_chainid,prev_atomnum,prev_atomname,
1362 this_resname,this_resnum,this_chainid,this_atomnum,this_atomname);
1364 if(NULL==fgets(select,STRLEN-1,stdin))
1366 gmx_fatal(FARGS,"Error reading from stdin");
1374 if (select[0] == 'y')
1376 pdb_ch[nch-1].chainstart[pdb_ch[nch-1].nterpairs] =
1377 pdba_all.atom[i].resind - prev_chainstart;
1378 pdb_ch[nch-1].nterpairs++;
1379 srenew(pdb_ch[nch-1].chainstart,pdb_ch[nch-1].nterpairs+1);
1383 /* set natom for previous chain */
1386 pdb_ch[nch-1].natom=i-pdb_ch[nch-1].start;
1393 /* check if chain identifier was used before */
1394 for (j=0; (j<nch); j++)
1396 if (pdb_ch[j].chainid != ' ' && pdb_ch[j].chainid == ri->chainid)
1398 printf("WARNING: Chain identifier '%c' is used in two non-sequential blocks.\n"
1399 "They will be treated as separate chains unless you reorder your file.\n",
1406 srenew(pdb_ch,maxch);
1408 pdb_ch[nch].chainid = ri->chainid;
1409 pdb_ch[nch].chainnum = ri->chainnum;
1410 pdb_ch[nch].start=i;
1411 pdb_ch[nch].bAllWat=bWat;
1413 pdb_ch[nch].nterpairs=0;
1415 pdb_ch[nch].nterpairs=1;
1416 snew(pdb_ch[nch].chainstart,pdb_ch[nch].nterpairs+1);
1417 /* modified [nch] to [0] below */
1418 pdb_ch[nch].chainstart[0]=0;
1424 pdb_ch[nch-1].natom=natom-pdb_ch[nch-1].start;
1426 /* set all the water blocks at the end of the chain */
1427 snew(swap_index,nch);
1429 for(i=0; i<nch; i++)
1430 if (!pdb_ch[i].bAllWat) {
1434 for(i=0; i<nch; i++)
1435 if (pdb_ch[i].bAllWat) {
1440 printf("Moved all the water blocks to the end\n");
1443 /* copy pdb data and x for all chains */
1444 for (i=0; (i<nch); i++) {
1446 chains[i].chainid = pdb_ch[si].chainid;
1447 chains[i].chainnum = pdb_ch[si].chainnum;
1448 chains[i].bAllWat = pdb_ch[si].bAllWat;
1449 chains[i].nterpairs = pdb_ch[si].nterpairs;
1450 chains[i].chainstart = pdb_ch[si].chainstart;
1451 snew(chains[i].ntdb,pdb_ch[si].nterpairs);
1452 snew(chains[i].ctdb,pdb_ch[si].nterpairs);
1453 snew(chains[i].r_start,pdb_ch[si].nterpairs);
1454 snew(chains[i].r_end,pdb_ch[si].nterpairs);
1456 snew(chains[i].pdba,1);
1457 init_t_atoms(chains[i].pdba,pdb_ch[si].natom,TRUE);
1458 snew(chains[i].x,chains[i].pdba->nr);
1459 for (j=0; j<chains[i].pdba->nr; j++) {
1460 chains[i].pdba->atom[j] = pdba_all.atom[pdb_ch[si].start+j];
1461 snew(chains[i].pdba->atomname[j],1);
1462 *chains[i].pdba->atomname[j] =
1463 strdup(*pdba_all.atomname[pdb_ch[si].start+j]);
1464 chains[i].pdba->pdbinfo[j] = pdba_all.pdbinfo[pdb_ch[si].start+j];
1465 copy_rvec(pdbx[pdb_ch[si].start+j],chains[i].x[j]);
1467 /* Re-index the residues assuming that the indices are continuous */
1468 k = chains[i].pdba->atom[0].resind;
1469 nres = chains[i].pdba->atom[chains[i].pdba->nr-1].resind - k + 1;
1470 chains[i].pdba->nres = nres;
1471 for(j=0; j < chains[i].pdba->nr; j++) {
1472 chains[i].pdba->atom[j].resind -= k;
1474 srenew(chains[i].pdba->resinfo,nres);
1475 for(j=0; j<nres; j++) {
1476 chains[i].pdba->resinfo[j] = pdba_all.resinfo[k+j];
1477 snew(chains[i].pdba->resinfo[j].name,1);
1478 *chains[i].pdba->resinfo[j].name = strdup(*pdba_all.resinfo[k+j].name);
1479 /* make all chain identifiers equal to that of the chain */
1480 chains[i].pdba->resinfo[j].chainid = pdb_ch[si].chainid;
1485 printf("\nMerged %d chains into one molecule definition\n\n",
1486 pdb_ch[0].nterpairs);
1488 printf("There are %d chains and %d blocks of water and "
1489 "%d residues with %d atoms\n",
1490 nch-nwaterchain,nwaterchain,
1491 pdba_all.resinfo[pdba_all.atom[natom-1].resind].nr,natom);
1493 printf("\n %5s %4s %6s\n","chain","#res","#atoms");
1494 for (i=0; (i<nch); i++)
1495 printf(" %d '%c' %5d %6d %s\n",
1496 i+1, chains[i].chainid ? chains[i].chainid:'-',
1497 chains[i].pdba->nres, chains[i].pdba->nr,
1498 chains[i].bAllWat ? "(only water)":"");
1501 check_occupancy(&pdba_all,opt2fn("-f",NFILE,fnm),bVerbose);
1503 /* Read atomtypes... */
1504 atype = read_atype(ffdir,&symtab);
1506 /* read residue database */
1507 printf("Reading residue database... (%s)\n",forcefield);
1508 nrtpf = fflib_search_file_end(ffdir,".rtp",TRUE,&rtpf);
1511 for(i=0; i<nrtpf; i++) {
1512 read_resall(rtpf[i],&nrtp,&restp,atype,&symtab,FALSE);
1517 /* Not correct with multiple rtp input files with different bonded types */
1518 fp=gmx_fio_fopen("new.rtp","w");
1519 print_resall(fp,nrtp,restp,atype);
1523 /* read hydrogen database */
1524 nah = read_h_db(ffdir,&ah);
1526 /* Read Termini database... */
1527 nNtdb=read_ter_db(ffdir,'n',&ntdb,atype);
1528 nCtdb=read_ter_db(ffdir,'c',&ctdb,atype);
1530 top_fn=ftp2fn(efTOP,NFILE,fnm);
1531 top_file=gmx_fio_fopen(top_fn,"w");
1533 #ifdef PACKAGE_VERSION
1534 sprintf(generator,"%s - version %s",ShortProgram(), PACKAGE_VERSION );
1536 sprintf(generator,"%s - version %s",ShortProgram(), "unknown" );
1538 print_top_header(top_file,top_fn,generator,FALSE,ffdir,mHmult);
1545 for(chain=0; (chain<nch); chain++) {
1546 cc = &(chains[chain]);
1548 /* set pdba, natom and nres to the current chain */
1552 nres =cc->pdba->nres;
1554 if (cc->chainid && ( cc->chainid != ' ' ) )
1555 printf("Processing chain %d '%c' (%d atoms, %d residues)\n",
1556 chain+1,cc->chainid,natom,nres);
1558 printf("Processing chain %d (%d atoms, %d residues)\n",
1559 chain+1,natom,nres);
1561 process_chain(pdba,x,bUnA,bUnA,bUnA,bLysMan,bAspMan,bGluMan,
1562 bHisMan,bArgMan,bGlnMan,angle,distance,&symtab,
1563 nrtprename,rtprename);
1565 for(i=0; i<cc->nterpairs; i++) {
1567 cc->chainstart[cc->nterpairs] = pdba->nres;
1569 find_nc_ter(pdba,cc->chainstart[i],cc->chainstart[i+1],
1570 &(cc->r_start[i]),&(cc->r_end[i]),rt);
1573 if ( (cc->r_start[i]<0) || (cc->r_end[i]<0) ) {
1574 printf("Problem with chain definition, or missing terminal residues.\n"
1575 "This chain does not appear to contain a recognized chain molecule.\n"
1576 "If this is incorrect, you can edit residuetypes.dat to modify the behavior.\n");
1583 /* Check for disulfides and other special bonds */
1584 nssbonds = mk_specbonds(pdba,x,bCysMan,&ssbonds,bVerbose);
1586 if (nrtprename > 0) {
1587 rename_resrtp(pdba,cc->nterpairs,cc->r_start,cc->r_end,nrtprename,rtprename,
1593 sprintf(fn,"chain.pdb");
1595 sprintf(fn,"chain_%c%d.pdb",cc->chainid,cc->chainnum);
1597 write_sto_conf(fn,title,pdba,x,NULL,ePBC,box);
1601 for(i=0; i<cc->nterpairs; i++)
1605 * We first apply a filter so we only have the
1606 * termini that can be applied to the residue in question
1607 * (or a generic terminus if no-residue specific is available).
1609 /* First the N terminus */
1612 tdblist = filter_ter(nrtp,restp,nNtdb,ntdb,
1613 *pdba->resinfo[cc->r_start[i]].name,
1614 *pdba->resinfo[cc->r_start[i]].rtp,
1618 printf("No suitable end (N or 5') terminus found in database - assuming this residue\n"
1619 "is already in a terminus-specific form and skipping terminus selection.\n");
1624 if(bTerMan && ntdblist>1)
1626 cc->ntdb[i] = choose_ter(ntdblist,tdblist,"Select start terminus type");
1630 cc->ntdb[i] = tdblist[0];
1633 printf("Start terminus: %s\n",(cc->ntdb[i])->name);
1642 /* And the C terminus */
1645 tdblist = filter_ter(nrtp,restp,nCtdb,ctdb,
1646 *pdba->resinfo[cc->r_end[i]].name,
1647 *pdba->resinfo[cc->r_end[i]].rtp,
1651 printf("No suitable end (C or 3') terminus found in database - assuming this residue\n"
1652 "is already in a terminus-specific form and skipping terminus selection.\n");
1657 if(bTerMan && ntdblist>1)
1659 cc->ctdb[i] = choose_ter(ntdblist,tdblist,"Select end terminus type");
1663 cc->ctdb[i] = tdblist[0];
1665 printf("End terminus: %s\n",(cc->ctdb[i])->name);
1674 /* lookup hackblocks and rtp for all residues */
1675 get_hackblocks_rtp(&hb_chain, &restp_chain,
1676 nrtp, restp, pdba->nres, pdba->resinfo,
1677 cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end, ffname);
1678 /* ideally, now we would not need the rtp itself anymore, but do
1679 everything using the hb and restp arrays. Unfortunately, that
1680 requires some re-thinking of code in gen_vsite.c, which I won't
1681 do now :( AF 26-7-99 */
1683 rename_atoms(NULL,ffdir,
1684 pdba,&symtab,restp_chain,FALSE,rt,FALSE,bVerbose);
1686 match_atomnames_with_rtp(restp_chain,hb_chain,pdba,x,bVerbose);
1689 block = new_blocka();
1691 sort_pdbatoms(pdba->nres,restp_chain,hb_chain,
1692 natom,&pdba,&x,block,&gnames);
1693 natom = remove_duplicate_atoms(pdba,x,bVerbose);
1694 if (ftp2bSet(efNDX,NFILE,fnm)) {
1696 fprintf(stderr,"WARNING: with the -remh option the generated "
1697 "index file (%s) might be useless\n"
1698 "(the index file is generated before hydrogens are added)",
1699 ftp2fn(efNDX,NFILE,fnm));
1701 write_index(ftp2fn(efNDX,NFILE,fnm),block,gnames);
1703 for(i=0; i < block->nr; i++)
1708 fprintf(stderr,"WARNING: "
1709 "without sorting no check for duplicate atoms can be done\n");
1712 /* Generate Hydrogen atoms (and termini) in the sequence */
1713 natom=add_h(&pdba,&x,nah,ah,
1714 cc->nterpairs,cc->ntdb,cc->ctdb,cc->r_start,cc->r_end,bAllowMissing,
1715 NULL,NULL,TRUE,FALSE);
1716 printf("Now there are %d residues with %d atoms\n",
1717 pdba->nres,pdba->nr);
1718 if (debug) write_pdbfile(debug,title,pdba,x,ePBC,box,' ',0,NULL,TRUE);
1721 for(i=0; (i<natom); i++)
1722 fprintf(debug,"Res %s%d atom %d %s\n",
1723 *(pdba->resinfo[pdba->atom[i].resind].name),
1724 pdba->resinfo[pdba->atom[i].resind].nr,i+1,*pdba->atomname[i]);
1726 strcpy(posre_fn,ftp2fn(efITP,NFILE,fnm));
1728 /* make up molecule name(s) */
1730 k = (cc->nterpairs>0 && cc->r_start[0]>=0) ? cc->r_start[0] : 0;
1732 gmx_residuetype_get_type(rt,*pdba->resinfo[k].name,&p_restype);
1738 sprintf(molname,"Water");
1742 this_chainid = cc->chainid;
1744 /* Add the chain id if we have one */
1745 if(this_chainid != ' ')
1747 sprintf(buf,"_chain_%c",this_chainid);
1751 /* Check if there have been previous chains with the same id */
1753 for(k=0;k<chain;k++)
1755 if(cc->chainid == chains[k].chainid)
1760 /* Add the number for this chain identifier if there are multiple copies */
1764 sprintf(buf,"%d",nid_used+1);
1768 if(strlen(suffix)>0)
1770 sprintf(molname,"%s%s",p_restype,suffix);
1774 strcpy(molname,p_restype);
1778 if ((nch-nwaterchain>1) && !cc->bAllWat) {
1780 strcpy(itp_fn,top_fn);
1781 printf("Chain time...\n");
1782 c=strrchr(itp_fn,'.');
1783 sprintf(c,"_%s.itp",molname);
1784 c=strrchr(posre_fn,'.');
1785 sprintf(c,"_%s.itp",molname);
1786 if (strcmp(itp_fn,posre_fn) == 0) {
1787 strcpy(buf_fn,posre_fn);
1788 c = strrchr(buf_fn,'.');
1790 sprintf(posre_fn,"%s_pr.itp",buf_fn);
1794 srenew(incls,nincl);
1795 incls[nincl-1]=strdup(itp_fn);
1796 itp_file=gmx_fio_fopen(itp_fn,"w");
1800 srenew(mols,nmol+1);
1802 mols[nmol].name = strdup("SOL");
1803 mols[nmol].nr = pdba->nres;
1805 mols[nmol].name = strdup(molname);
1811 print_top_comment(itp_file,itp_fn,generator,TRUE);
1821 pdb2top(top_file2,posre_fn,molname,pdba,&x,atype,&symtab,
1823 restp_chain,hb_chain,
1824 cc->nterpairs,cc->ntdb,cc->ctdb,cc->r_start,cc->r_end,bAllowMissing,
1825 bVsites,bVsiteAromatics,forcefield,ffdir,
1826 mHmult,nssbonds,ssbonds,
1827 long_bond_dist,short_bond_dist,bDeuterate,bChargeGroups,bCmap,
1828 bRenumRes,bRTPresname);
1831 write_posres(posre_fn,pdba,posre_fc);
1834 gmx_fio_fclose(itp_file);
1836 /* pdba and natom have been reassigned somewhere so: */
1841 if (cc->chainid == ' ')
1842 sprintf(fn,"chain.pdb");
1844 sprintf(fn,"chain_%c.pdb",cc->chainid);
1845 cool_quote(quote,255,NULL);
1846 write_sto_conf(fn,quote,pdba,x,NULL,ePBC,box);
1850 if (watermodel == NULL) {
1851 for(chain=0; chain<nch; chain++) {
1852 if (chains[chain].bAllWat) {
1853 gmx_fatal(FARGS,"You have chosen not to include a water model, but there is water in the input file. Select a water model or remove the water from your input file.");
1857 sprintf(buf_fn,"%s%c%s.itp",ffdir,DIR_SEPARATOR,watermodel);
1858 if (!fflib_fexist(buf_fn)) {
1859 gmx_fatal(FARGS,"The topology file '%s' for the selected water model '%s' can not be found in the force field directory. Select a different water model.",
1864 print_top_mols(top_file,title,ffdir,watermodel,nincl,incls,nmol,mols);
1865 gmx_fio_fclose(top_file);
1867 gmx_residuetype_destroy(rt);
1869 /* now merge all chains back together */
1872 for (i=0; (i<nch); i++) {
1873 natom+=chains[i].pdba->nr;
1874 nres+=chains[i].pdba->nres;
1877 init_t_atoms(atoms,natom,FALSE);
1878 for(i=0; i < atoms->nres; i++)
1879 sfree(atoms->resinfo[i].name);
1880 sfree(atoms->resinfo);
1882 snew(atoms->resinfo,nres);
1886 for (i=0; (i<nch); i++) {
1888 printf("Including chain %d in system: %d atoms %d residues\n",
1889 i+1,chains[i].pdba->nr,chains[i].pdba->nres);
1890 for (j=0; (j<chains[i].pdba->nr); j++) {
1891 atoms->atom[k]=chains[i].pdba->atom[j];
1892 atoms->atom[k].resind += l; /* l is processed nr of residues */
1893 atoms->atomname[k]=chains[i].pdba->atomname[j];
1894 atoms->resinfo[atoms->atom[k].resind].chainid = chains[i].chainid;
1895 copy_rvec(chains[i].x[j],x[k]);
1898 for (j=0; (j<chains[i].pdba->nres); j++) {
1899 atoms->resinfo[l] = chains[i].pdba->resinfo[j];
1901 atoms->resinfo[l].name = atoms->resinfo[l].rtp;
1908 fprintf(stderr,"Now there are %d atoms and %d residues\n",k,l);
1909 print_sums(atoms, TRUE);
1912 fprintf(stderr,"\nWriting coordinate file...\n");
1913 clear_rvec(box_space);
1915 gen_box(0,atoms->nr,x,box,box_space,FALSE);
1916 write_sto_conf(ftp2fn(efSTO,NFILE,fnm),title,atoms,x,NULL,ePBC,box);
1918 printf("\t\t--------- PLEASE NOTE ------------\n");
1919 printf("You have successfully generated a topology from: %s.\n",
1920 opt2fn("-f",NFILE,fnm));
1921 if (watermodel != NULL) {
1922 printf("The %s force field and the %s water model are used.\n",
1925 printf("The %s force field is used.\n",
1928 printf("\t\t--------- ETON ESAELP ------------\n");