1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
53 #include "gmx_fatal.h"
72 #include "fflibutil.h"
85 static const char *res2bb_notermini(const char *name,
86 int nrr,const rtprename_t *rr)
88 /* NOTE: This function returns the main building block name,
89 * it does not take terminal renaming into account.
94 while (i < nrr && gmx_strcasecmp(name,rr[i].gmx) != 0) {
98 return (i < nrr ? rr[i].main : name);
101 static const char *select_res(int nr,int resnr,
102 const char *name[],const char *expl[],
104 int nrr,const rtprename_t *rr)
108 printf("Which %s type do you want for residue %d\n",title,resnr+1);
109 for(sel=0; (sel < nr); sel++) {
110 printf("%d. %s (%s)\n",
111 sel,expl[sel],res2bb_notermini(name[sel],nrr,rr));
113 printf("\nType a number:"); fflush(stdout);
115 if (scanf("%d",&sel) != 1)
116 gmx_fatal(FARGS,"Answer me for res %s %d!",title,resnr+1);
121 static const char *get_asptp(int resnr,int nrr,const rtprename_t *rr)
123 enum { easp, easpH, easpNR };
124 const char *lh[easpNR] = { "ASP", "ASPH" };
125 const char *expl[easpNR] = {
126 "Not protonated (charge -1)",
127 "Protonated (charge 0)"
130 return select_res(easpNR,resnr,lh,expl,"ASPARTIC ACID",nrr,rr);
133 static const char *get_glutp(int resnr,int nrr,const rtprename_t *rr)
135 enum { eglu, egluH, egluNR };
136 const char *lh[egluNR] = { "GLU", "GLUH" };
137 const char *expl[egluNR] = {
138 "Not protonated (charge -1)",
139 "Protonated (charge 0)"
142 return select_res(egluNR,resnr,lh,expl,"GLUTAMIC ACID",nrr,rr);
145 static const char *get_glntp(int resnr,int nrr,const rtprename_t *rr)
147 enum { egln, eglnH, eglnNR };
148 const char *lh[eglnNR] = { "GLN", "QLN" };
149 const char *expl[eglnNR] = {
150 "Not protonated (charge 0)",
151 "Protonated (charge +1)"
154 return select_res(eglnNR,resnr,lh,expl,"GLUTAMINE",nrr,rr);
157 static const char *get_lystp(int resnr,int nrr,const rtprename_t *rr)
159 enum { elys, elysH, elysNR };
160 const char *lh[elysNR] = { "LYSN", "LYS" };
161 const char *expl[elysNR] = {
162 "Not protonated (charge 0)",
163 "Protonated (charge +1)"
166 return select_res(elysNR,resnr,lh,expl,"LYSINE",nrr,rr);
169 static const char *get_argtp(int resnr,int nrr,const rtprename_t *rr)
171 enum { earg, eargH, eargNR };
172 const char *lh[eargNR] = { "ARGN", "ARG" };
173 const char *expl[eargNR] = {
174 "Not protonated (charge 0)",
175 "Protonated (charge +1)"
178 return select_res(eargNR,resnr,lh,expl,"ARGININE",nrr,rr);
181 static const char *get_histp(int resnr,int nrr,const rtprename_t *rr)
183 const char *expl[ehisNR] = {
190 return select_res(ehisNR,resnr,hh,expl,"HISTIDINE",nrr,rr);
193 static void read_rtprename(const char *fname,FILE *fp,
194 int *nrtprename,rtprename_t **rtprename)
196 char line[STRLEN],buf[STRLEN];
205 while(get_a_line(fp,line,STRLEN)) {
207 nc = sscanf(line,"%s %s %s %s %s %s",
208 rr[n].gmx,rr[n].main,rr[n].nter,rr[n].cter,rr[n].bter,buf);
210 if (nc != 2 && nc != 5) {
211 gmx_fatal(FARGS,"Residue renaming database '%s' has %d columns instead of %d, %d or %d",fname,ncol,2,5);
214 } else if (nc != ncol) {
215 gmx_fatal(FARGS,"A line in residue renaming database '%s' has %d columns, while previous lines have %d columns",fname,nc,ncol);
219 /* This file does not have special termini names, copy them from main */
220 strcpy(rr[n].nter,rr[n].main);
221 strcpy(rr[n].cter,rr[n].main);
222 strcpy(rr[n].bter,rr[n].main);
232 static char *search_resrename(int nrr,rtprename_t *rr,
234 gmx_bool bStart,gmx_bool bEnd,
235 gmx_bool bCompareFFRTPname)
243 while (i<nrr && ((!bCompareFFRTPname && strcmp(name,rr[i].gmx) != 0) ||
244 ( bCompareFFRTPname && strcmp(name,rr[i].main) != 0)))
249 /* If found in the database, rename this residue's rtp building block,
250 * otherwise keep the old name.
273 gmx_fatal(FARGS,"In the chosen force field there is no residue type for '%s'%s",name,bStart ? ( bEnd ? " as a standalone (starting & ending) residue" : " as a starting terminus") : (bEnd ? " as an ending terminus" : ""));
281 static void rename_resrtp(t_atoms *pdba,int nterpairs,int *r_start,int *r_end,
282 int nrr,rtprename_t *rr,t_symtab *symtab,
286 gmx_bool bStart,bEnd;
288 gmx_bool bFFRTPTERRNM;
290 bFFRTPTERRNM = (getenv("GMX_NO_FFRTP_TER_RENAME") == NULL);
292 for(r=0; r<pdba->nres; r++)
296 for(j=0; j<nterpairs; j++)
303 for(j=0; j<nterpairs; j++)
311 nn = search_resrename(nrr,rr,*pdba->resinfo[r].rtp,bStart,bEnd,FALSE);
313 if (bFFRTPTERRNM && nn == NULL && (bStart || bEnd))
315 /* This is a terminal residue, but the residue name,
316 * currently stored in .rtp, is not a standard residue name,
317 * but probably a force field specific rtp name.
318 * Check if we need to rename it because it is terminal.
320 nn = search_resrename(nrr,rr,
321 *pdba->resinfo[r].rtp,bStart,bEnd,TRUE);
324 if (nn != NULL && strcmp(*pdba->resinfo[r].rtp,nn) != 0)
328 printf("Changing rtp entry of residue %d %s to '%s'\n",
329 pdba->resinfo[r].nr,*pdba->resinfo[r].name,nn);
331 pdba->resinfo[r].rtp = put_symtab(symtab,nn);
336 static void pdbres_to_gmxrtp(t_atoms *pdba)
340 for(i=0; (i<pdba->nres); i++)
342 if (pdba->resinfo[i].rtp == NULL)
344 pdba->resinfo[i].rtp = pdba->resinfo[i].name;
349 static void rename_pdbres(t_atoms *pdba,const char *oldnm,const char *newnm,
350 gmx_bool bFullCompare,t_symtab *symtab)
355 for(i=0; (i<pdba->nres); i++)
357 resnm = *pdba->resinfo[i].name;
358 if ((bFullCompare && (gmx_strcasecmp(resnm,oldnm) == 0)) ||
359 (!bFullCompare && strstr(resnm,oldnm) != NULL))
361 /* Rename the residue name (not the rtp name) */
362 pdba->resinfo[i].name = put_symtab(symtab,newnm);
367 static void rename_bb(t_atoms *pdba,const char *oldnm,const char *newnm,
368 gmx_bool bFullCompare,t_symtab *symtab)
373 for(i=0; (i<pdba->nres); i++)
375 /* We have not set the rtp name yes, use the residue name */
376 bbnm = *pdba->resinfo[i].name;
377 if ((bFullCompare && (gmx_strcasecmp(bbnm,oldnm) == 0)) ||
378 (!bFullCompare && strstr(bbnm,oldnm) != NULL))
380 /* Change the rtp builing block name */
381 pdba->resinfo[i].rtp = put_symtab(symtab,newnm);
386 static void rename_bbint(t_atoms *pdba,const char *oldnm,
387 const char *gettp(int,int,const rtprename_t *),
388 gmx_bool bFullCompare,
390 int nrr,const rtprename_t *rr)
396 for(i=0; i<pdba->nres; i++)
398 /* We have not set the rtp name yes, use the residue name */
399 bbnm = *pdba->resinfo[i].name;
400 if ((bFullCompare && (strcmp(bbnm,oldnm) == 0)) ||
401 (!bFullCompare && strstr(bbnm,oldnm) != NULL))
403 ptr = gettp(i,nrr,rr);
404 pdba->resinfo[i].rtp = put_symtab(symtab,ptr);
409 static void check_occupancy(t_atoms *atoms,const char *filename,gmx_bool bVerbose)
415 ftp = fn2ftp(filename);
416 if (!atoms->pdbinfo || ((ftp != efPDB) && (ftp != efBRK) && (ftp != efENT)))
417 fprintf(stderr,"No occupancies in %s\n",filename);
419 for(i=0; (i<atoms->nr); i++) {
420 if (atoms->pdbinfo[i].occup != 1) {
422 fprintf(stderr,"Occupancy for atom %s%d-%s is %f rather than 1\n",
423 *atoms->resinfo[atoms->atom[i].resind].name,
424 atoms->resinfo[ atoms->atom[i].resind].nr,
426 atoms->pdbinfo[i].occup);
427 if (atoms->pdbinfo[i].occup == 0)
433 if (nzero == atoms->nr) {
434 fprintf(stderr,"All occupancy fields zero. This is probably not an X-Ray structure\n");
435 } else if ((nzero > 0) || (nnotone > 0)) {
438 "WARNING: there were %d atoms with zero occupancy and %d atoms with\n"
439 " occupancy unequal to one (out of %d atoms). Check your pdb file.\n"
441 nzero,nnotone,atoms->nr);
443 fprintf(stderr,"All occupancies are one\n");
448 void write_posres(char *fn,t_atoms *pdba,real fc)
453 fp=gmx_fio_fopen(fn,"w");
455 "; In this topology include file, you will find position restraint\n"
456 "; entries for all the heavy atoms in your original pdb file.\n"
457 "; This means that all the protons which were added by pdb2gmx are\n"
458 "; not restrained.\n"
460 "[ position_restraints ]\n"
461 "; %4s%6s%8s%8s%8s\n","atom","type","fx","fy","fz"
463 for(i=0; (i<pdba->nr); i++) {
464 if (!is_hydrogen(*pdba->atomname[i]) && !is_dummymass(*pdba->atomname[i]))
465 fprintf(fp,"%6d%6d %g %g %g\n",i+1,1,fc,fc,fc);
470 static int read_pdball(const char *inf, const char *outf,char *title,
471 t_atoms *atoms, rvec **x,
472 int *ePBC,matrix box, gmx_bool bRemoveH,
473 t_symtab *symtab,gmx_residuetype_t rt,const char *watres,
474 gmx_atomprop_t aps,gmx_bool bVerbose)
475 /* Read a pdb file. (containing proteins) */
477 int natom,new_natom,i;
480 printf("Reading %s...\n",inf);
481 get_stx_coordnum(inf,&natom);
482 init_t_atoms(atoms,natom,TRUE);
484 read_stx_conf(inf,title,atoms,*x,NULL,ePBC,box);
485 if (fn2ftp(inf) == efPDB)
486 get_pdb_atomnumber(atoms,aps);
489 for(i=0; i<atoms->nr; i++)
490 if (!is_hydrogen(*atoms->atomname[i])) {
491 atoms->atom[new_natom]=atoms->atom[i];
492 atoms->atomname[new_natom]=atoms->atomname[i];
493 atoms->pdbinfo[new_natom]=atoms->pdbinfo[i];
494 copy_rvec((*x)[i],(*x)[new_natom]);
502 if (title && title[0])
503 printf(" '%s',",title);
504 printf(" %d atoms\n",natom);
506 /* Rename residues */
507 rename_pdbres(atoms,"HOH",watres,FALSE,symtab);
508 rename_pdbres(atoms,"SOL",watres,FALSE,symtab);
509 rename_pdbres(atoms,"WAT",watres,FALSE,symtab);
511 rename_atoms("xlateat.dat",NULL,
512 atoms,symtab,NULL,TRUE,rt,TRUE,bVerbose);
518 write_sto_conf(outf,title,atoms,*x,NULL,*ePBC,box);
523 void process_chain(t_atoms *pdba, rvec *x,
524 gmx_bool bTrpU,gmx_bool bPheU,gmx_bool bTyrU,
525 gmx_bool bLysMan,gmx_bool bAspMan,gmx_bool bGluMan,
526 gmx_bool bHisMan,gmx_bool bArgMan,gmx_bool bGlnMan,
527 real angle,real distance,t_symtab *symtab,
528 int nrr,const rtprename_t *rr)
530 /* Rename aromatics, lys, asp and histidine */
531 if (bTyrU) rename_bb(pdba,"TYR","TYRU",FALSE,symtab);
532 if (bTrpU) rename_bb(pdba,"TRP","TRPU",FALSE,symtab);
533 if (bPheU) rename_bb(pdba,"PHE","PHEU",FALSE,symtab);
535 rename_bbint(pdba,"LYS",get_lystp,FALSE,symtab,nrr,rr);
537 rename_bbint(pdba,"ARG",get_argtp,FALSE,symtab,nrr,rr);
539 rename_bbint(pdba,"GLN",get_glntp,FALSE,symtab,nrr,rr);
541 rename_bbint(pdba,"ASP",get_asptp,FALSE,symtab,nrr,rr);
543 rename_bb(pdba,"ASPH","ASP",FALSE,symtab);
545 rename_bbint(pdba,"GLU",get_glutp,FALSE,symtab,nrr,rr);
547 rename_bb(pdba,"GLUH","GLU",FALSE,symtab);
550 set_histp(pdba,x,angle,distance);
552 rename_bbint(pdba,"HIS",get_histp,TRUE,symtab,nrr,rr);
554 /* Initialize the rtp builing block names with the residue names
555 * for the residues that have not been processed above.
557 pdbres_to_gmxrtp(pdba);
559 /* Now we have all rtp names set.
560 * The rtp names will conform to Gromacs naming,
561 * unless the input pdb file contained one or more force field specific
562 * rtp names as residue names.
566 /* struct for sorting the atoms from the pdb file */
568 int resnr; /* residue number */
569 int j; /* database order index */
570 int index; /* original atom number */
571 char anm1; /* second letter of atom name */
572 char altloc; /* alternate location indicator */
575 int pdbicomp(const void *a,const void *b)
583 d = (pa->resnr - pb->resnr);
587 d = (pa->anm1 - pb->anm1);
589 d = (pa->altloc - pb->altloc);
596 static void sort_pdbatoms(int nrtp,t_restp restp[],t_hackblock hb[],
597 int natoms,t_atoms **pdbaptr,rvec **x,
598 t_blocka *block,char ***gnames)
600 t_atoms *pdba,*pdbnew;
615 for(i=0; i<natoms; i++)
617 atomnm = *pdba->atomname[i];
618 rptr = &restp[pdba->atom[i].resind];
619 for(j=0; (j<rptr->natom); j++)
621 if (gmx_strcasecmp(atomnm,*(rptr->atomname[j])) == 0)
631 "Atom %s in residue %s %d was not found in rtp entry %s with %d atoms\n"
632 "while sorting atoms.\n%s",atomnm,
633 *pdba->resinfo[pdba->atom[i].resind].name,
634 pdba->resinfo[pdba->atom[i].resind].nr,
637 is_hydrogen(atomnm) ?
638 "\nFor a hydrogen, this can be a different protonation state, or it\n"
639 "might have had a different number in the PDB file and was rebuilt\n"
640 "(it might for instance have been H3, and we only expected H1 & H2).\n"
641 "Note that hydrogens might have been added to the entry for the N-terminus.\n"
642 "Remove this hydrogen or choose a different protonation state to solve it.\n"
643 "Option -ignh will ignore all hydrogens in the input." : ".");
644 gmx_fatal(FARGS,buf);
646 /* make shadow array to be sorted into indexgroup */
647 pdbi[i].resnr = pdba->atom[i].resind;
650 pdbi[i].anm1 = atomnm[1];
651 pdbi[i].altloc = pdba->pdbinfo[i].altloc;
653 qsort(pdbi,natoms,(size_t)sizeof(pdbi[0]),pdbicomp);
655 /* pdba is sorted in pdbnew using the pdbi index */
658 init_t_atoms(pdbnew,natoms,TRUE);
661 pdbnew->nres=pdba->nres;
662 sfree(pdbnew->resinfo);
663 pdbnew->resinfo=pdba->resinfo;
664 for (i=0; i<natoms; i++) {
665 pdbnew->atom[i] = pdba->atom[pdbi[i].index];
666 pdbnew->atomname[i] = pdba->atomname[pdbi[i].index];
667 pdbnew->pdbinfo[i] = pdba->pdbinfo[pdbi[i].index];
668 copy_rvec((*x)[pdbi[i].index],(*xnew)[i]);
669 /* make indexgroup in block */
673 sfree(pdba->atomname);
675 sfree(pdba->pdbinfo);
678 /* copy the sorted pdbnew back to pdba */
681 add_grp(block, gnames, natoms, a, "prot_sort");
687 static int remove_duplicate_atoms(t_atoms *pdba,rvec x[],gmx_bool bVerbose)
689 int i,j,oldnatoms,ndel;
692 printf("Checking for duplicate atoms....\n");
693 oldnatoms = pdba->nr;
695 /* NOTE: pdba->nr is modified inside the loop */
696 for(i=1; (i < pdba->nr); i++) {
697 /* compare 'i' and 'i-1', throw away 'i' if they are identical
698 this is a 'while' because multiple alternate locations can be present */
699 while ( (i < pdba->nr) &&
700 (pdba->atom[i-1].resind == pdba->atom[i].resind) &&
701 (strcmp(*pdba->atomname[i-1],*pdba->atomname[i])==0) ) {
704 ri = &pdba->resinfo[pdba->atom[i].resind];
705 printf("deleting duplicate atom %4s %s%4d%c",
706 *pdba->atomname[i],*ri->name,ri->nr,ri->ic);
707 if (ri->chainid && (ri->chainid != ' '))
708 printf(" ch %c", ri->chainid);
710 if (pdba->pdbinfo[i].atomnr)
711 printf(" pdb nr %4d",pdba->pdbinfo[i].atomnr);
712 if (pdba->pdbinfo[i].altloc && (pdba->pdbinfo[i].altloc!=' '))
713 printf(" altloc %c",pdba->pdbinfo[i].altloc);
718 /* We can not free, since it might be in the symtab */
719 /* sfree(pdba->atomname[i]); */
720 for (j=i; j < pdba->nr; j++) {
721 pdba->atom[j] = pdba->atom[j+1];
722 pdba->atomname[j] = pdba->atomname[j+1];
723 pdba->pdbinfo[j] = pdba->pdbinfo[j+1];
724 copy_rvec(x[j+1],x[j]);
726 srenew(pdba->atom, pdba->nr);
727 /* srenew(pdba->atomname, pdba->nr); */
728 srenew(pdba->pdbinfo, pdba->nr);
731 if (pdba->nr != oldnatoms)
732 printf("Now there are %d atoms. Deleted %d duplicates.\n",pdba->nr,ndel);
737 void find_nc_ter(t_atoms *pdba,int r0,int r1,int *r_start,int *r_end,gmx_residuetype_t rt)
740 const char *p_startrestype;
741 const char *p_restype;
742 int nstartwarn,nendwarn;
750 /* Find the starting terminus (typially N or 5') */
751 for(i=r0;i<r1 && *r_start==-1;i++)
753 gmx_residuetype_get_type(rt,*pdba->resinfo[i].name,&p_startrestype);
754 if( !gmx_strcasecmp(p_startrestype,"Protein") || !gmx_strcasecmp(p_startrestype,"DNA") || !gmx_strcasecmp(p_startrestype,"RNA") )
756 printf("Identified residue %s%d as a starting terminus.\n",*pdba->resinfo[i].name,pdba->resinfo[i].nr);
763 printf("Warning: Starting residue %s%d in chain not identified as Protein/RNA/DNA.\n",*pdba->resinfo[i].name,pdba->resinfo[i].nr);
767 printf("More than 5 unidentified residues at start of chain - disabling further warnings.\n");
775 /* Go through the rest of the residues, check that they are the same class, and identify the ending terminus. */
776 for(i=*r_start;i<r1;i++)
778 gmx_residuetype_get_type(rt,*pdba->resinfo[i].name,&p_restype);
779 if( !gmx_strcasecmp(p_restype,p_startrestype) && nendwarn==0)
787 printf("Warning: Residue %s%d in chain has different type (%s) from starting residue %s%d (%s).\n",
788 *pdba->resinfo[i].name,pdba->resinfo[i].nr,p_restype,
789 *pdba->resinfo[*r_start].name,pdba->resinfo[*r_start].nr,p_startrestype);
793 printf("More than 5 unidentified residues at end of chain - disabling further warnings.\n");
802 printf("Identified residue %s%d as a ending terminus.\n",*pdba->resinfo[*r_end].name,pdba->resinfo[*r_end].nr);
808 modify_chain_numbers(t_atoms * pdba,
809 const char * chainsep)
812 char old_prev_chainid;
813 char old_this_chainid;
814 int old_prev_chainnum;
815 int old_this_chainnum;
821 const char * prev_atomname;
822 const char * this_atomname;
823 const char * prev_resname;
824 const char * this_resname;
829 int prev_chainnumber;
830 int this_chainnumber;
842 splitting = SPLIT_TER_ONLY; /* keep compiler happy */
844 /* Be a bit flexible to catch typos */
845 if (!strncmp(chainsep,"id_o",4))
847 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
848 splitting = SPLIT_ID_OR_TER;
849 printf("Splitting chemical chains based on TER records or chain id changing.\n");
851 else if (!strncmp(chainsep,"int",3))
853 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
854 splitting = SPLIT_INTERACTIVE;
855 printf("Splitting chemical chains interactively.\n");
857 else if (!strncmp(chainsep,"id_a",4))
859 splitting = SPLIT_ID_AND_TER;
860 printf("Splitting chemical chains based on TER records and chain id changing.\n");
862 else if (strlen(chainsep)==2 && !strncmp(chainsep,"id",4))
864 splitting = SPLIT_ID_ONLY;
865 printf("Splitting chemical chains based on changing chain id only (ignoring TER records).\n");
867 else if (chainsep[0]=='t')
869 splitting = SPLIT_TER_ONLY;
870 printf("Splitting chemical chains based on TER records only (ignoring chain id).\n");
874 gmx_fatal(FARGS,"Unidentified setting for chain separation: %s\n",chainsep);
877 /* The default chain enumeration is based on TER records only, which is reflected in chainnum below */
879 old_prev_chainid = '?';
880 old_prev_chainnum = -1;
883 this_atomname = NULL;
888 this_chainnumber = -1;
890 for(i=0;i<pdba->nres;i++)
892 ri = &pdba->resinfo[i];
893 old_this_chainid = ri->chainid;
894 old_this_chainnum = ri->chainnum;
896 prev_atomname = this_atomname;
897 prev_atomnum = this_atomnum;
898 prev_resname = this_resname;
899 prev_resnum = this_resnum;
900 prev_chainid = this_chainid;
901 prev_chainnumber = this_chainnumber;
903 this_atomname = *(pdba->atomname[i]);
904 this_atomnum = (pdba->pdbinfo != NULL) ? pdba->pdbinfo[i].atomnr : i+1;
905 this_resname = *ri->name;
906 this_resnum = ri->nr;
907 this_chainid = ri->chainid;
908 this_chainnumber = ri->chainnum;
912 case SPLIT_ID_OR_TER:
913 if(old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
919 case SPLIT_ID_AND_TER:
920 if(old_this_chainid != old_prev_chainid && old_this_chainnum != old_prev_chainnum)
927 if(old_this_chainid != old_prev_chainid)
934 if(old_this_chainnum != old_prev_chainnum)
939 case SPLIT_INTERACTIVE:
940 if(old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
944 printf("Split the chain (and introduce termini) between residue %s%d (chain id '%c', atom %d %s)\n"
945 "and residue %s%d (chain id '%c', atom %d %s) ? [n/y]\n",
946 prev_resname,prev_resnum,prev_chainid,prev_atomnum,prev_atomname,
947 this_resname,this_resnum,this_chainid,this_atomnum,this_atomname);
949 if(NULL==fgets(select,STRLEN-1,stdin))
951 gmx_fatal(FARGS,"Error reading from stdin");
954 if(i==0 || select[0] == 'y')
961 gmx_fatal(FARGS,"Internal inconsistency - this shouldn't happen...");
964 old_prev_chainid = old_this_chainid;
965 old_prev_chainnum = old_this_chainnum;
967 ri->chainnum = new_chainnum;
996 int cmain(int argc, char *argv[])
998 const char *desc[] = {
999 "This program reads a [TT].pdb[tt] (or [TT].gro[tt]) file, reads",
1000 "some database files, adds hydrogens to the molecules and generates",
1001 "coordinates in GROMACS (GROMOS), or optionally [TT].pdb[tt], format",
1002 "and a topology in GROMACS format.",
1003 "These files can subsequently be processed to generate a run input file.",
1005 "[TT]pdb2gmx[tt] will search for force fields by looking for",
1006 "a [TT]forcefield.itp[tt] file in subdirectories [TT]<forcefield>.ff[tt]",
1007 "of the current working directory and of the GROMACS library directory",
1008 "as inferred from the path of the binary or the [TT]GMXLIB[tt] environment",
1010 "By default the forcefield selection is interactive,",
1011 "but you can use the [TT]-ff[tt] option to specify one of the short names",
1012 "in the list on the command line instead. In that case [TT]pdb2gmx[tt] just looks",
1013 "for the corresponding [TT]<forcefield>.ff[tt] directory.",
1015 "After choosing a force field, all files will be read only from",
1016 "the corresponding force field directory.",
1017 "If you want to modify or add a residue types, you can copy the force",
1018 "field directory from the GROMACS library directory to your current",
1019 "working directory. If you want to add new protein residue types,",
1020 "you will need to modify [TT]residuetypes.dat[tt] in the library directory",
1021 "or copy the whole library directory to a local directory and set",
1022 "the environment variable [TT]GMXLIB[tt] to the name of that directory.",
1023 "Check Chapter 5 of the manual for more information about file formats.",
1026 "Note that a [TT].pdb[tt] file is nothing more than a file format, and it",
1027 "need not necessarily contain a protein structure. Every kind of",
1028 "molecule for which there is support in the database can be converted.",
1029 "If there is no support in the database, you can add it yourself.[PAR]",
1031 "The program has limited intelligence, it reads a number of database",
1032 "files, that allow it to make special bonds (Cys-Cys, Heme-His, etc.),",
1033 "if necessary this can be done manually. The program can prompt the",
1034 "user to select which kind of LYS, ASP, GLU, CYS or HIS residue is",
1035 "desired. For Lys the choice is between neutral (two protons on NZ) or",
1036 "protonated (three protons, default), for Asp and Glu unprotonated",
1037 "(default) or protonated, for His the proton can be either on ND1,",
1038 "on NE2 or on both. By default these selections are done automatically.",
1039 "For His, this is based on an optimal hydrogen bonding",
1040 "conformation. Hydrogen bonds are defined based on a simple geometric",
1041 "criterion, specified by the maximum hydrogen-donor-acceptor angle",
1042 "and donor-acceptor distance, which are set by [TT]-angle[tt] and",
1043 "[TT]-dist[tt] respectively.[PAR]",
1045 "The protonation state of N- and C-termini can be chosen interactively",
1046 "with the [TT]-ter[tt] flag. Default termini are ionized (NH3+ and COO-),",
1047 "respectively. Some force fields support zwitterionic forms for chains of",
1048 "one residue, but for polypeptides these options should NOT be selected.",
1049 "The AMBER force fields have unique forms for the terminal residues,",
1050 "and these are incompatible with the [TT]-ter[tt] mechanism. You need",
1051 "to prefix your N- or C-terminal residue names with \"N\" or \"C\"",
1052 "respectively to use these forms, making sure you preserve the format",
1053 "of the coordinate file. Alternatively, use named terminating residues",
1054 "(e.g. ACE, NME).[PAR]",
1056 "The separation of chains is not entirely trivial since the markup",
1057 "in user-generated PDB files frequently varies and sometimes it",
1058 "is desirable to merge entries across a TER record, for instance",
1059 "if you want a disulfide bridge or distance restraints between",
1060 "two protein chains or if you have a HEME group bound to a protein.",
1061 "In such cases multiple chains should be contained in a single",
1062 "[TT]moleculetype[tt] definition.",
1063 "To handle this, [TT]pdb2gmx[tt] uses two separate options.",
1064 "First, [TT]-chainsep[tt] allows you to choose when a new chemical chain should",
1065 "start, and termini added when applicable. This can be done based on the",
1066 "existence of TER records, when the chain id changes, or combinations of either",
1067 "or both of these. You can also do the selection fully interactively.",
1068 "In addition, there is a [TT]-merge[tt] option that controls how multiple chains",
1069 "are merged into one moleculetype, after adding all the chemical termini (or not).",
1070 "This can be turned off (no merging), all non-water chains can be merged into a",
1071 "single molecule, or the selection can be done interactively.[PAR]",
1073 "[TT]pdb2gmx[tt] will also check the occupancy field of the [TT].pdb[tt] file.",
1074 "If any of the occupancies are not one, indicating that the atom is",
1075 "not resolved well in the structure, a warning message is issued.",
1076 "When a [TT].pdb[tt] file does not originate from an X-ray structure determination",
1077 "all occupancy fields may be zero. Either way, it is up to the user",
1078 "to verify the correctness of the input data (read the article!).[PAR]",
1080 "During processing the atoms will be reordered according to GROMACS",
1081 "conventions. With [TT]-n[tt] an index file can be generated that",
1082 "contains one group reordered in the same way. This allows you to",
1083 "convert a GROMOS trajectory and coordinate file to GROMOS. There is",
1084 "one limitation: reordering is done after the hydrogens are stripped",
1085 "from the input and before new hydrogens are added. This means that",
1086 "you should not use [TT]-ignh[tt].[PAR]",
1088 "The [TT].gro[tt] and [TT].g96[tt] file formats do not support chain",
1089 "identifiers. Therefore it is useful to enter a [TT].pdb[tt] file name at",
1090 "the [TT]-o[tt] option when you want to convert a multi-chain [TT].pdb[tt] file.",
1093 "The option [TT]-vsite[tt] removes hydrogen and fast improper dihedral",
1094 "motions. Angular and out-of-plane motions can be removed by changing",
1095 "hydrogens into virtual sites and fixing angles, which fixes their",
1096 "position relative to neighboring atoms. Additionally, all atoms in the",
1097 "aromatic rings of the standard amino acids (i.e. PHE, TRP, TYR and HIS)",
1098 "can be converted into virtual sites, eliminating the fast improper dihedral",
1099 "fluctuations in these rings. [BB]Note[bb] that in this case all other hydrogen",
1100 "atoms are also converted to virtual sites. The mass of all atoms that are",
1101 "converted into virtual sites, is added to the heavy atoms.[PAR]",
1102 "Also slowing down of dihedral motion can be done with [TT]-heavyh[tt]",
1103 "done by increasing the hydrogen-mass by a factor of 4. This is also",
1104 "done for water hydrogens to slow down the rotational motion of water.",
1105 "The increase in mass of the hydrogens is subtracted from the bonded",
1106 "(heavy) atom so that the total mass of the system remains the same."
1110 FILE *fp,*top_file,*top_file2,*itp_file=NULL;
1112 t_atoms pdba_all,*pdba;
1116 int chain,nch,maxch,nwaterchain;
1118 t_chain *chains,*cc;
1119 char select[STRLEN];
1132 gpp_atomtype_t atype;
1133 gmx_residuetype_t rt;
1135 char fn[256],itp_fn[STRLEN],posre_fn[STRLEN],buf_fn[STRLEN];
1136 char molname[STRLEN],title[STRLEN],quote[STRLEN],generator[STRLEN];
1137 char *c,forcefield[STRLEN],ffdir[STRLEN];
1138 char ffname[STRLEN],suffix[STRLEN],buf[STRLEN];
1147 rtprename_t *rtprename=NULL;
1148 int nah,nNtdb,nCtdb,ntdblist;
1149 t_hackblock *ntdb,*ctdb,**tdblist;
1153 gmx_bool bVsites=FALSE,bWat,bPrevWat=FALSE,bITP,bVsiteAromatics=FALSE,bCheckMerge;
1155 t_hackblock *hb_chain;
1156 t_restp *restp_chain;
1158 const char *p_restype;
1162 const char * prev_atomname;
1163 const char * this_atomname;
1164 const char * prev_resname;
1165 const char * this_resname;
1170 int prev_chainnumber;
1171 int this_chainnumber;
1173 int this_chainstart;
1174 int prev_chainstart;
1181 { efSTX, "-f", "eiwit.pdb", ffREAD },
1182 { efSTO, "-o", "conf", ffWRITE },
1183 { efTOP, NULL, NULL, ffWRITE },
1184 { efITP, "-i", "posre", ffWRITE },
1185 { efNDX, "-n", "clean", ffOPTWR },
1186 { efSTO, "-q", "clean.pdb", ffOPTWR }
1188 #define NFILE asize(fnm)
1191 /* Command line arguments must be static */
1192 static gmx_bool bNewRTP=FALSE;
1193 static gmx_bool bInter=FALSE, bCysMan=FALSE;
1194 static gmx_bool bLysMan=FALSE, bAspMan=FALSE, bGluMan=FALSE, bHisMan=FALSE;
1195 static gmx_bool bGlnMan=FALSE, bArgMan=FALSE;
1196 static gmx_bool bTerMan=FALSE, bUnA=FALSE, bHeavyH;
1197 static gmx_bool bSort=TRUE, bAllowMissing=FALSE, bRemoveH=FALSE;
1198 static gmx_bool bDeuterate=FALSE,bVerbose=FALSE,bChargeGroups=TRUE,bCmap=TRUE;
1199 static gmx_bool bRenumRes=FALSE,bRTPresname=FALSE;
1200 static real angle=135.0, distance=0.3,posre_fc=1000;
1201 static real long_bond_dist=0.25, short_bond_dist=0.05;
1202 static const char *vsitestr[] = { NULL, "none", "hydrogens", "aromatics", NULL };
1203 static const char *watstr[] = { NULL, "select", "none", "spc", "spce", "tip3p", "tip4p", "tip5p", NULL };
1204 static const char *chainsep[] = { NULL, "id_or_ter", "id_and_ter", "ter", "id", "interactive", NULL };
1205 static const char *merge[] = {NULL, "no", "all", "interactive", NULL };
1206 static const char *ff = "select";
1209 { "-newrtp", FALSE, etBOOL, {&bNewRTP},
1210 "HIDDENWrite the residue database in new format to [TT]new.rtp[tt]"},
1211 { "-lb", FALSE, etREAL, {&long_bond_dist},
1212 "HIDDENLong bond warning distance" },
1213 { "-sb", FALSE, etREAL, {&short_bond_dist},
1214 "HIDDENShort bond warning distance" },
1215 { "-chainsep", FALSE, etENUM, {chainsep},
1216 "Condition in PDB files when a new chain should be started (adding termini)" },
1217 { "-merge", FALSE, etENUM, {&merge},
1218 "Merge multiple chains into a single [moleculetype]" },
1219 { "-ff", FALSE, etSTR, {&ff},
1220 "Force field, interactive by default. Use [TT]-h[tt] for information." },
1221 { "-water", FALSE, etENUM, {watstr},
1222 "Water model to use" },
1223 { "-inter", FALSE, etBOOL, {&bInter},
1224 "Set the next 8 options to interactive"},
1225 { "-ss", FALSE, etBOOL, {&bCysMan},
1226 "Interactive SS bridge selection" },
1227 { "-ter", FALSE, etBOOL, {&bTerMan},
1228 "Interactive termini selection, instead of charged (default)" },
1229 { "-lys", FALSE, etBOOL, {&bLysMan},
1230 "Interactive lysine selection, instead of charged" },
1231 { "-arg", FALSE, etBOOL, {&bArgMan},
1232 "Interactive arginine selection, instead of charged" },
1233 { "-asp", FALSE, etBOOL, {&bAspMan},
1234 "Interactive aspartic acid selection, instead of charged" },
1235 { "-glu", FALSE, etBOOL, {&bGluMan},
1236 "Interactive glutamic acid selection, instead of charged" },
1237 { "-gln", FALSE, etBOOL, {&bGlnMan},
1238 "Interactive glutamine selection, instead of neutral" },
1239 { "-his", FALSE, etBOOL, {&bHisMan},
1240 "Interactive histidine selection, instead of checking H-bonds" },
1241 { "-angle", FALSE, etREAL, {&angle},
1242 "Minimum hydrogen-donor-acceptor angle for a H-bond (degrees)" },
1243 { "-dist", FALSE, etREAL, {&distance},
1244 "Maximum donor-acceptor distance for a H-bond (nm)" },
1245 { "-una", FALSE, etBOOL, {&bUnA},
1246 "Select aromatic rings with united CH atoms on phenylalanine, "
1247 "tryptophane and tyrosine" },
1248 { "-sort", FALSE, etBOOL, {&bSort},
1249 "HIDDENSort the residues according to database, turning this off is dangerous as charge groups might be broken in parts" },
1250 { "-ignh", FALSE, etBOOL, {&bRemoveH},
1251 "Ignore hydrogen atoms that are in the coordinate file" },
1252 { "-missing",FALSE, etBOOL, {&bAllowMissing},
1253 "Continue when atoms are missing, dangerous" },
1254 { "-v", FALSE, etBOOL, {&bVerbose},
1255 "Be slightly more verbose in messages" },
1256 { "-posrefc",FALSE, etREAL, {&posre_fc},
1257 "Force constant for position restraints" },
1258 { "-vsite", FALSE, etENUM, {vsitestr},
1259 "Convert atoms to virtual sites" },
1260 { "-heavyh", FALSE, etBOOL, {&bHeavyH},
1261 "Make hydrogen atoms heavy" },
1262 { "-deuterate", FALSE, etBOOL, {&bDeuterate},
1263 "Change the mass of hydrogens to 2 amu" },
1264 { "-chargegrp", TRUE, etBOOL, {&bChargeGroups},
1265 "Use charge groups in the [TT].rtp[tt] file" },
1266 { "-cmap", TRUE, etBOOL, {&bCmap},
1267 "Use cmap torsions (if enabled in the [TT].rtp[tt] file)" },
1268 { "-renum", TRUE, etBOOL, {&bRenumRes},
1269 "Renumber the residues consecutively in the output" },
1270 { "-rtpres", TRUE, etBOOL, {&bRTPresname},
1271 "Use [TT].rtp[tt] entry names as residue names" }
1273 #define NPARGS asize(pa)
1275 CopyRight(stderr,argv[0]);
1276 parse_common_args(&argc,argv,0,NFILE,fnm,asize(pa),pa,asize(desc),desc,
1279 /* Force field selection, interactive or direct */
1280 choose_ff(strcmp(ff,"select") == 0 ? NULL : ff,
1281 forcefield,sizeof(forcefield),
1282 ffdir,sizeof(ffdir));
1284 if (strlen(forcefield) > 0) {
1285 strcpy(ffname,forcefield);
1286 ffname[0] = toupper(ffname[0]);
1288 gmx_fatal(FARGS,"Empty forcefield string");
1291 printf("\nUsing the %s force field in directory %s\n\n",
1294 choose_watermodel(watstr[0],ffdir,&watermodel);
1297 /* if anything changes here, also change description of -inter */
1310 else if (bDeuterate)
1315 switch(vsitestr[0][0]) {
1316 case 'n': /* none */
1318 bVsiteAromatics=FALSE;
1320 case 'h': /* hydrogens */
1322 bVsiteAromatics=FALSE;
1324 case 'a': /* aromatics */
1326 bVsiteAromatics=TRUE;
1329 gmx_fatal(FARGS,"DEATH HORROR in $s (%d): vsitestr[0]='%s'",
1330 __FILE__,__LINE__,vsitestr[0]);
1333 /* Open the symbol table */
1334 open_symtab(&symtab);
1336 /* Residue type database */
1337 gmx_residuetype_init(&rt);
1339 /* Read residue renaming database(s), if present */
1340 nrrn = fflib_search_file_end(ffdir,".r2b",FALSE,&rrn);
1344 for(i=0; i<nrrn; i++) {
1345 fp = fflib_open(rrn[i]);
1346 read_rtprename(rrn[i],fp,&nrtprename,&rtprename);
1352 /* Add all alternative names from the residue renaming database to the list of recognized amino/nucleic acids. */
1354 for(i=0;i<nrtprename;i++)
1356 rc=gmx_residuetype_get_type(rt,rtprename[i].gmx,&p_restype);
1358 /* Only add names if the 'standard' gromacs/iupac base name was found */
1361 gmx_residuetype_add(rt,rtprename[i].main,p_restype);
1362 gmx_residuetype_add(rt,rtprename[i].nter,p_restype);
1363 gmx_residuetype_add(rt,rtprename[i].cter,p_restype);
1364 gmx_residuetype_add(rt,rtprename[i].bter,p_restype);
1369 if (watermodel != NULL && (strstr(watermodel,"4p") ||
1370 strstr(watermodel,"4P"))) {
1372 } else if (watermodel != NULL && (strstr(watermodel,"5p") ||
1373 strstr(watermodel,"5P"))) {
1379 aps = gmx_atomprop_init();
1380 natom = read_pdball(opt2fn("-f",NFILE,fnm),opt2fn_null("-q",NFILE,fnm),title,
1381 &pdba_all,&pdbx,&ePBC,box,bRemoveH,&symtab,rt,watres,
1385 gmx_fatal(FARGS,"No atoms found in pdb file %s\n",opt2fn("-f",NFILE,fnm));
1387 printf("Analyzing pdb file\n");
1392 modify_chain_numbers(&pdba_all,chainsep[0]);
1396 this_atomname = NULL;
1398 this_resname = NULL;
1401 this_chainnumber = -1;
1402 this_chainstart = 0;
1403 /* Keep the compiler happy */
1404 prev_chainstart = 0;
1409 for (i=0; (i<natom); i++)
1411 ri = &pdba_all.resinfo[pdba_all.atom[i].resind];
1413 prev_atomname = this_atomname;
1414 prev_atomnum = this_atomnum;
1415 prev_resname = this_resname;
1416 prev_resnum = this_resnum;
1417 prev_chainid = this_chainid;
1418 prev_chainnumber = this_chainnumber;
1421 prev_chainstart = this_chainstart;
1424 this_atomname = *pdba_all.atomname[i];
1425 this_atomnum = (pdba_all.pdbinfo != NULL) ? pdba_all.pdbinfo[i].atomnr : i+1;
1426 this_resname = *ri->name;
1427 this_resnum = ri->nr;
1428 this_chainid = ri->chainid;
1429 this_chainnumber = ri->chainnum;
1431 bWat = gmx_strcasecmp(*ri->name,watres) == 0;
1432 if ((i == 0) || (this_chainnumber != prev_chainnumber) || (bWat != bPrevWat))
1434 this_chainstart = pdba_all.atom[i].resind;
1439 if(!strncmp(merge[0],"int",3))
1441 printf("Merge chain ending with residue %s%d (chain id '%c', atom %d %s) and chain starting with\n"
1442 "residue %s%d (chain id '%c', atom %d %s) into a single moleculetype (keeping termini)? [n/y]\n",
1443 prev_resname,prev_resnum,prev_chainid,prev_atomnum,prev_atomname,
1444 this_resname,this_resnum,this_chainid,this_atomnum,this_atomname);
1446 if(NULL==fgets(select,STRLEN-1,stdin))
1448 gmx_fatal(FARGS,"Error reading from stdin");
1450 bMerged = (select[0] == 'y');
1452 else if(!strncmp(merge[0],"all",3))
1460 pdb_ch[nch-1].chainstart[pdb_ch[nch-1].nterpairs] =
1461 pdba_all.atom[i].resind - prev_chainstart;
1462 pdb_ch[nch-1].nterpairs++;
1463 srenew(pdb_ch[nch-1].chainstart,pdb_ch[nch-1].nterpairs+1);
1468 /* set natom for previous chain */
1471 pdb_ch[nch-1].natom=i-pdb_ch[nch-1].start;
1478 /* check if chain identifier was used before */
1479 for (j=0; (j<nch); j++)
1481 if (pdb_ch[j].chainid != ' ' && pdb_ch[j].chainid == ri->chainid)
1483 printf("WARNING: Chain identifier '%c' is used in two non-sequential blocks.\n"
1484 "They will be treated as separate chains unless you reorder your file.\n",
1491 srenew(pdb_ch,maxch);
1493 pdb_ch[nch].chainid = ri->chainid;
1494 pdb_ch[nch].chainnum = ri->chainnum;
1495 pdb_ch[nch].start=i;
1496 pdb_ch[nch].bAllWat=bWat;
1498 pdb_ch[nch].nterpairs=0;
1500 pdb_ch[nch].nterpairs=1;
1501 snew(pdb_ch[nch].chainstart,pdb_ch[nch].nterpairs+1);
1502 /* modified [nch] to [0] below */
1503 pdb_ch[nch].chainstart[0]=0;
1509 pdb_ch[nch-1].natom=natom-pdb_ch[nch-1].start;
1511 /* set all the water blocks at the end of the chain */
1512 snew(swap_index,nch);
1514 for(i=0; i<nch; i++)
1515 if (!pdb_ch[i].bAllWat) {
1519 for(i=0; i<nch; i++)
1520 if (pdb_ch[i].bAllWat) {
1525 printf("Moved all the water blocks to the end\n");
1528 /* copy pdb data and x for all chains */
1529 for (i=0; (i<nch); i++) {
1531 chains[i].chainid = pdb_ch[si].chainid;
1532 chains[i].chainnum = pdb_ch[si].chainnum;
1533 chains[i].bAllWat = pdb_ch[si].bAllWat;
1534 chains[i].nterpairs = pdb_ch[si].nterpairs;
1535 chains[i].chainstart = pdb_ch[si].chainstart;
1536 snew(chains[i].ntdb,pdb_ch[si].nterpairs);
1537 snew(chains[i].ctdb,pdb_ch[si].nterpairs);
1538 snew(chains[i].r_start,pdb_ch[si].nterpairs);
1539 snew(chains[i].r_end,pdb_ch[si].nterpairs);
1541 snew(chains[i].pdba,1);
1542 init_t_atoms(chains[i].pdba,pdb_ch[si].natom,TRUE);
1543 snew(chains[i].x,chains[i].pdba->nr);
1544 for (j=0; j<chains[i].pdba->nr; j++) {
1545 chains[i].pdba->atom[j] = pdba_all.atom[pdb_ch[si].start+j];
1546 snew(chains[i].pdba->atomname[j],1);
1547 *chains[i].pdba->atomname[j] =
1548 strdup(*pdba_all.atomname[pdb_ch[si].start+j]);
1549 chains[i].pdba->pdbinfo[j] = pdba_all.pdbinfo[pdb_ch[si].start+j];
1550 copy_rvec(pdbx[pdb_ch[si].start+j],chains[i].x[j]);
1552 /* Re-index the residues assuming that the indices are continuous */
1553 k = chains[i].pdba->atom[0].resind;
1554 nres = chains[i].pdba->atom[chains[i].pdba->nr-1].resind - k + 1;
1555 chains[i].pdba->nres = nres;
1556 for(j=0; j < chains[i].pdba->nr; j++) {
1557 chains[i].pdba->atom[j].resind -= k;
1559 srenew(chains[i].pdba->resinfo,nres);
1560 for(j=0; j<nres; j++) {
1561 chains[i].pdba->resinfo[j] = pdba_all.resinfo[k+j];
1562 snew(chains[i].pdba->resinfo[j].name,1);
1563 *chains[i].pdba->resinfo[j].name = strdup(*pdba_all.resinfo[k+j].name);
1564 /* make all chain identifiers equal to that of the chain */
1565 chains[i].pdba->resinfo[j].chainid = pdb_ch[si].chainid;
1570 printf("\nMerged chains into joint molecule definitions at %d places.\n\n",
1573 printf("There are %d chains and %d blocks of water and "
1574 "%d residues with %d atoms\n",
1575 nch-nwaterchain,nwaterchain,
1576 pdba_all.resinfo[pdba_all.atom[natom-1].resind].nr,natom);
1578 printf("\n %5s %4s %6s\n","chain","#res","#atoms");
1579 for (i=0; (i<nch); i++)
1580 printf(" %d '%c' %5d %6d %s\n",
1581 i+1, chains[i].chainid ? chains[i].chainid:'-',
1582 chains[i].pdba->nres, chains[i].pdba->nr,
1583 chains[i].bAllWat ? "(only water)":"");
1586 check_occupancy(&pdba_all,opt2fn("-f",NFILE,fnm),bVerbose);
1588 /* Read atomtypes... */
1589 atype = read_atype(ffdir,&symtab);
1591 /* read residue database */
1592 printf("Reading residue database... (%s)\n",forcefield);
1593 nrtpf = fflib_search_file_end(ffdir,".rtp",TRUE,&rtpf);
1596 for(i=0; i<nrtpf; i++) {
1597 read_resall(rtpf[i],&nrtp,&restp,atype,&symtab,FALSE);
1602 /* Not correct with multiple rtp input files with different bonded types */
1603 fp=gmx_fio_fopen("new.rtp","w");
1604 print_resall(fp,nrtp,restp,atype);
1608 /* read hydrogen database */
1609 nah = read_h_db(ffdir,&ah);
1611 /* Read Termini database... */
1612 nNtdb=read_ter_db(ffdir,'n',&ntdb,atype);
1613 nCtdb=read_ter_db(ffdir,'c',&ctdb,atype);
1615 top_fn=ftp2fn(efTOP,NFILE,fnm);
1616 top_file=gmx_fio_fopen(top_fn,"w");
1618 sprintf(generator,"%s - %s",ShortProgram(), GromacsVersion() );
1620 print_top_header(top_file,top_fn,generator,FALSE,ffdir,mHmult);
1627 for(chain=0; (chain<nch); chain++) {
1628 cc = &(chains[chain]);
1630 /* set pdba, natom and nres to the current chain */
1634 nres =cc->pdba->nres;
1636 if (cc->chainid && ( cc->chainid != ' ' ) )
1637 printf("Processing chain %d '%c' (%d atoms, %d residues)\n",
1638 chain+1,cc->chainid,natom,nres);
1640 printf("Processing chain %d (%d atoms, %d residues)\n",
1641 chain+1,natom,nres);
1643 process_chain(pdba,x,bUnA,bUnA,bUnA,bLysMan,bAspMan,bGluMan,
1644 bHisMan,bArgMan,bGlnMan,angle,distance,&symtab,
1645 nrtprename,rtprename);
1647 cc->chainstart[cc->nterpairs] = pdba->nres;
1649 for(i=0; i<cc->nterpairs; i++)
1651 find_nc_ter(pdba,cc->chainstart[i],cc->chainstart[i+1],
1652 &(cc->r_start[j]),&(cc->r_end[j]),rt);
1654 if (cc->r_start[j] >= 0 && cc->r_end[j] >= 0)
1660 if (cc->nterpairs == 0)
1662 printf("Problem with chain definition, or missing terminal residues.\n"
1663 "This chain does not appear to contain a recognized chain molecule.\n"
1664 "If this is incorrect, you can edit residuetypes.dat to modify the behavior.\n");
1667 /* Check for disulfides and other special bonds */
1668 nssbonds = mk_specbonds(pdba,x,bCysMan,&ssbonds,bVerbose);
1670 if (nrtprename > 0) {
1671 rename_resrtp(pdba,cc->nterpairs,cc->r_start,cc->r_end,nrtprename,rtprename,
1677 sprintf(fn,"chain.pdb");
1679 sprintf(fn,"chain_%c%d.pdb",cc->chainid,cc->chainnum);
1681 write_sto_conf(fn,title,pdba,x,NULL,ePBC,box);
1685 for(i=0; i<cc->nterpairs; i++)
1689 * We first apply a filter so we only have the
1690 * termini that can be applied to the residue in question
1691 * (or a generic terminus if no-residue specific is available).
1693 /* First the N terminus */
1696 tdblist = filter_ter(nrtp,restp,nNtdb,ntdb,
1697 *pdba->resinfo[cc->r_start[i]].name,
1698 *pdba->resinfo[cc->r_start[i]].rtp,
1702 printf("No suitable end (N or 5') terminus found in database - assuming this residue\n"
1703 "is already in a terminus-specific form and skipping terminus selection.\n");
1708 if(bTerMan && ntdblist>1)
1710 sprintf(select,"Select start terminus type for %s-%d",
1711 *pdba->resinfo[cc->r_start[i]].name,
1712 pdba->resinfo[cc->r_start[i]].nr);
1713 cc->ntdb[i] = choose_ter(ntdblist,tdblist,select);
1717 cc->ntdb[i] = tdblist[0];
1720 printf("Start terminus %s-%d: %s\n",
1721 *pdba->resinfo[cc->r_start[i]].name,
1722 pdba->resinfo[cc->r_start[i]].nr,
1723 (cc->ntdb[i])->name);
1732 /* And the C terminus */
1735 tdblist = filter_ter(nrtp,restp,nCtdb,ctdb,
1736 *pdba->resinfo[cc->r_end[i]].name,
1737 *pdba->resinfo[cc->r_end[i]].rtp,
1741 printf("No suitable end (C or 3') terminus found in database - assuming this residue\n"
1742 "is already in a terminus-specific form and skipping terminus selection.\n");
1747 if(bTerMan && ntdblist>1)
1749 sprintf(select,"Select end terminus type for %s-%d",
1750 *pdba->resinfo[cc->r_end[i]].name,
1751 pdba->resinfo[cc->r_end[i]].nr);
1752 cc->ctdb[i] = choose_ter(ntdblist,tdblist,select);
1756 cc->ctdb[i] = tdblist[0];
1758 printf("End terminus %s-%d: %s\n",
1759 *pdba->resinfo[cc->r_end[i]].name,
1760 pdba->resinfo[cc->r_end[i]].nr,
1761 (cc->ctdb[i])->name);
1770 /* lookup hackblocks and rtp for all residues */
1771 get_hackblocks_rtp(&hb_chain, &restp_chain,
1772 nrtp, restp, pdba->nres, pdba->resinfo,
1773 cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end);
1774 /* ideally, now we would not need the rtp itself anymore, but do
1775 everything using the hb and restp arrays. Unfortunately, that
1776 requires some re-thinking of code in gen_vsite.c, which I won't
1777 do now :( AF 26-7-99 */
1779 rename_atoms(NULL,ffdir,
1780 pdba,&symtab,restp_chain,FALSE,rt,FALSE,bVerbose);
1782 match_atomnames_with_rtp(restp_chain,hb_chain,pdba,x,bVerbose);
1785 block = new_blocka();
1787 sort_pdbatoms(pdba->nres,restp_chain,hb_chain,
1788 natom,&pdba,&x,block,&gnames);
1789 natom = remove_duplicate_atoms(pdba,x,bVerbose);
1790 if (ftp2bSet(efNDX,NFILE,fnm)) {
1792 fprintf(stderr,"WARNING: with the -remh option the generated "
1793 "index file (%s) might be useless\n"
1794 "(the index file is generated before hydrogens are added)",
1795 ftp2fn(efNDX,NFILE,fnm));
1797 write_index(ftp2fn(efNDX,NFILE,fnm),block,gnames);
1799 for(i=0; i < block->nr; i++)
1804 fprintf(stderr,"WARNING: "
1805 "without sorting no check for duplicate atoms can be done\n");
1808 /* Generate Hydrogen atoms (and termini) in the sequence */
1809 printf("Generating any missing hydrogen atoms and/or adding termini.\n");
1810 natom=add_h(&pdba,&x,nah,ah,
1811 cc->nterpairs,cc->ntdb,cc->ctdb,cc->r_start,cc->r_end,bAllowMissing,
1812 NULL,NULL,TRUE,FALSE);
1813 printf("Now there are %d residues with %d atoms\n",
1814 pdba->nres,pdba->nr);
1815 if (debug) write_pdbfile(debug,title,pdba,x,ePBC,box,' ',0,NULL,TRUE);
1818 for(i=0; (i<natom); i++)
1819 fprintf(debug,"Res %s%d atom %d %s\n",
1820 *(pdba->resinfo[pdba->atom[i].resind].name),
1821 pdba->resinfo[pdba->atom[i].resind].nr,i+1,*pdba->atomname[i]);
1823 strcpy(posre_fn,ftp2fn(efITP,NFILE,fnm));
1825 /* make up molecule name(s) */
1827 k = (cc->nterpairs>0 && cc->r_start[0]>=0) ? cc->r_start[0] : 0;
1829 gmx_residuetype_get_type(rt,*pdba->resinfo[k].name,&p_restype);
1835 sprintf(molname,"Water");
1839 this_chainid = cc->chainid;
1841 /* Add the chain id if we have one */
1842 if(this_chainid != ' ')
1844 sprintf(buf,"_chain_%c",this_chainid);
1848 /* Check if there have been previous chains with the same id */
1850 for(k=0;k<chain;k++)
1852 if(cc->chainid == chains[k].chainid)
1857 /* Add the number for this chain identifier if there are multiple copies */
1861 sprintf(buf,"%d",nid_used+1);
1865 if(strlen(suffix)>0)
1867 sprintf(molname,"%s%s",p_restype,suffix);
1871 strcpy(molname,p_restype);
1875 if ((nch-nwaterchain>1) && !cc->bAllWat) {
1877 strcpy(itp_fn,top_fn);
1878 printf("Chain time...\n");
1879 c=strrchr(itp_fn,'.');
1880 sprintf(c,"_%s.itp",molname);
1881 c=strrchr(posre_fn,'.');
1882 sprintf(c,"_%s.itp",molname);
1883 if (strcmp(itp_fn,posre_fn) == 0) {
1884 strcpy(buf_fn,posre_fn);
1885 c = strrchr(buf_fn,'.');
1887 sprintf(posre_fn,"%s_pr.itp",buf_fn);
1891 srenew(incls,nincl);
1892 incls[nincl-1]=strdup(itp_fn);
1893 itp_file=gmx_fio_fopen(itp_fn,"w");
1897 srenew(mols,nmol+1);
1899 mols[nmol].name = strdup("SOL");
1900 mols[nmol].nr = pdba->nres;
1902 mols[nmol].name = strdup(molname);
1908 print_top_comment(itp_file,itp_fn,generator,ffdir,TRUE);
1918 pdb2top(top_file2,posre_fn,molname,pdba,&x,atype,&symtab,
1920 restp_chain,hb_chain,
1921 cc->nterpairs,cc->ntdb,cc->ctdb,bAllowMissing,
1922 bVsites,bVsiteAromatics,forcefield,ffdir,
1923 mHmult,nssbonds,ssbonds,
1924 long_bond_dist,short_bond_dist,bDeuterate,bChargeGroups,bCmap,
1925 bRenumRes,bRTPresname);
1928 write_posres(posre_fn,pdba,posre_fc);
1931 gmx_fio_fclose(itp_file);
1933 /* pdba and natom have been reassigned somewhere so: */
1938 if (cc->chainid == ' ')
1939 sprintf(fn,"chain.pdb");
1941 sprintf(fn,"chain_%c.pdb",cc->chainid);
1942 cool_quote(quote,255,NULL);
1943 write_sto_conf(fn,quote,pdba,x,NULL,ePBC,box);
1947 if (watermodel == NULL) {
1948 for(chain=0; chain<nch; chain++) {
1949 if (chains[chain].bAllWat) {
1950 gmx_fatal(FARGS,"You have chosen not to include a water model, but there is water in the input file. Select a water model or remove the water from your input file.");
1954 sprintf(buf_fn,"%s%c%s.itp",ffdir,DIR_SEPARATOR,watermodel);
1955 if (!fflib_fexist(buf_fn)) {
1956 gmx_fatal(FARGS,"The topology file '%s' for the selected water model '%s' can not be found in the force field directory. Select a different water model.",
1961 print_top_mols(top_file,title,ffdir,watermodel,nincl,incls,nmol,mols);
1962 gmx_fio_fclose(top_file);
1964 gmx_residuetype_destroy(rt);
1966 /* now merge all chains back together */
1969 for (i=0; (i<nch); i++) {
1970 natom+=chains[i].pdba->nr;
1971 nres+=chains[i].pdba->nres;
1974 init_t_atoms(atoms,natom,FALSE);
1975 for(i=0; i < atoms->nres; i++)
1976 sfree(atoms->resinfo[i].name);
1977 sfree(atoms->resinfo);
1979 snew(atoms->resinfo,nres);
1983 for (i=0; (i<nch); i++) {
1985 printf("Including chain %d in system: %d atoms %d residues\n",
1986 i+1,chains[i].pdba->nr,chains[i].pdba->nres);
1987 for (j=0; (j<chains[i].pdba->nr); j++) {
1988 atoms->atom[k]=chains[i].pdba->atom[j];
1989 atoms->atom[k].resind += l; /* l is processed nr of residues */
1990 atoms->atomname[k]=chains[i].pdba->atomname[j];
1991 atoms->resinfo[atoms->atom[k].resind].chainid = chains[i].chainid;
1992 copy_rvec(chains[i].x[j],x[k]);
1995 for (j=0; (j<chains[i].pdba->nres); j++) {
1996 atoms->resinfo[l] = chains[i].pdba->resinfo[j];
1998 atoms->resinfo[l].name = atoms->resinfo[l].rtp;
2005 fprintf(stderr,"Now there are %d atoms and %d residues\n",k,l);
2006 print_sums(atoms, TRUE);
2009 fprintf(stderr,"\nWriting coordinate file...\n");
2010 clear_rvec(box_space);
2012 gen_box(0,atoms->nr,x,box,box_space,FALSE);
2013 write_sto_conf(ftp2fn(efSTO,NFILE,fnm),title,atoms,x,NULL,ePBC,box);
2015 printf("\t\t--------- PLEASE NOTE ------------\n");
2016 printf("You have successfully generated a topology from: %s.\n",
2017 opt2fn("-f",NFILE,fnm));
2018 if (watermodel != NULL) {
2019 printf("The %s force field and the %s water model are used.\n",
2022 printf("The %s force field is used.\n",
2025 printf("\t\t--------- ETON ESAELP ------------\n");