1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
53 #include "gmx_fatal.h"
72 #include "fflibutil.h"
85 static const char *res2bb_notermini(const char *name,
86 int nrr,const rtprename_t *rr)
88 /* NOTE: This function returns the main building block name,
89 * it does not take terminal renaming into account.
94 while (i < nrr && gmx_strcasecmp(name,rr[i].gmx) != 0) {
98 return (i < nrr ? rr[i].main : name);
101 static const char *select_res(int nr,int resnr,
102 const char *name[],const char *expl[],
104 int nrr,const rtprename_t *rr)
108 printf("Which %s type do you want for residue %d\n",title,resnr+1);
109 for(sel=0; (sel < nr); sel++) {
110 printf("%d. %s (%s)\n",
111 sel,expl[sel],res2bb_notermini(name[sel],nrr,rr));
113 printf("\nType a number:"); fflush(stdout);
115 if (scanf("%d",&sel) != 1)
116 gmx_fatal(FARGS,"Answer me for res %s %d!",title,resnr+1);
121 static const char *get_asptp(int resnr,int nrr,const rtprename_t *rr)
123 enum { easp, easpH, easpNR };
124 const char *lh[easpNR] = { "ASP", "ASPH" };
125 const char *expl[easpNR] = {
126 "Not protonated (charge -1)",
127 "Protonated (charge 0)"
130 return select_res(easpNR,resnr,lh,expl,"ASPARTIC ACID",nrr,rr);
133 static const char *get_glutp(int resnr,int nrr,const rtprename_t *rr)
135 enum { eglu, egluH, egluNR };
136 const char *lh[egluNR] = { "GLU", "GLUH" };
137 const char *expl[egluNR] = {
138 "Not protonated (charge -1)",
139 "Protonated (charge 0)"
142 return select_res(egluNR,resnr,lh,expl,"GLUTAMIC ACID",nrr,rr);
145 static const char *get_glntp(int resnr,int nrr,const rtprename_t *rr)
147 enum { egln, eglnH, eglnNR };
148 const char *lh[eglnNR] = { "GLN", "QLN" };
149 const char *expl[eglnNR] = {
150 "Not protonated (charge 0)",
151 "Protonated (charge +1)"
154 return select_res(eglnNR,resnr,lh,expl,"GLUTAMINE",nrr,rr);
157 static const char *get_lystp(int resnr,int nrr,const rtprename_t *rr)
159 enum { elys, elysH, elysNR };
160 const char *lh[elysNR] = { "LYSN", "LYS" };
161 const char *expl[elysNR] = {
162 "Not protonated (charge 0)",
163 "Protonated (charge +1)"
166 return select_res(elysNR,resnr,lh,expl,"LYSINE",nrr,rr);
169 static const char *get_argtp(int resnr,int nrr,const rtprename_t *rr)
171 enum { earg, eargH, eargNR };
172 const char *lh[eargNR] = { "ARGN", "ARG" };
173 const char *expl[eargNR] = {
174 "Not protonated (charge 0)",
175 "Protonated (charge +1)"
178 return select_res(eargNR,resnr,lh,expl,"ARGININE",nrr,rr);
181 static const char *get_histp(int resnr,int nrr,const rtprename_t *rr)
183 const char *expl[ehisNR] = {
190 return select_res(ehisNR,resnr,hh,expl,"HISTIDINE",nrr,rr);
193 static void read_rtprename(const char *fname,FILE *fp,
194 int *nrtprename,rtprename_t **rtprename)
196 char line[STRLEN],buf[STRLEN];
205 while(get_a_line(fp,line,STRLEN)) {
207 nc = sscanf(line,"%s %s %s %s %s %s",
208 rr[n].gmx,rr[n].main,rr[n].nter,rr[n].cter,rr[n].bter,buf);
210 if (nc != 2 && nc != 5) {
211 gmx_fatal(FARGS,"Residue renaming database '%s' has %d columns instead of %d, %d or %d",fname,ncol,2,5);
214 } else if (nc != ncol) {
215 gmx_fatal(FARGS,"A line in residue renaming database '%s' has %d columns, while previous lines have %d columns",fname,nc,ncol);
219 /* This file does not have special termini names, copy them from main */
220 strcpy(rr[n].nter,rr[n].main);
221 strcpy(rr[n].cter,rr[n].main);
222 strcpy(rr[n].bter,rr[n].main);
232 static char *search_resrename(int nrr,rtprename_t *rr,
234 gmx_bool bStart,gmx_bool bEnd,
235 gmx_bool bCompareFFRTPname)
243 while (i<nrr && ((!bCompareFFRTPname && strcmp(name,rr[i].gmx) != 0) ||
244 ( bCompareFFRTPname && strcmp(name,rr[i].main) != 0)))
249 /* If found in the database, rename this residue's rtp building block,
250 * otherwise keep the old name.
272 gmx_fatal(FARGS,"In the chosen force field there is no residue type for '%s'%s",name,bStart ? " as a starting terminus" : (bEnd ? " as an ending terminus" : ""));
280 static void rename_resrtp(t_atoms *pdba,int nterpairs,int *r_start,int *r_end,
281 int nrr,rtprename_t *rr,t_symtab *symtab,
285 gmx_bool bStart,bEnd;
287 gmx_bool bFFRTPTERRNM;
289 bFFRTPTERRNM = (getenv("GMX_NO_FFRTP_TER_RENAME") == NULL);
291 for(r=0; r<pdba->nres; r++)
295 for(j=0; j<nterpairs; j++)
302 for(j=0; j<nterpairs; j++)
310 nn = search_resrename(nrr,rr,*pdba->resinfo[r].rtp,bStart,bEnd,FALSE);
312 if (bFFRTPTERRNM && nn == NULL && (bStart || bEnd))
314 /* This is a terminal residue, but the residue name,
315 * currently stored in .rtp, is not a standard residue name,
316 * but probably a force field specific rtp name.
317 * Check if we need to rename it because it is terminal.
319 nn = search_resrename(nrr,rr,
320 *pdba->resinfo[r].rtp,bStart,bEnd,TRUE);
323 if (nn != NULL && strcmp(*pdba->resinfo[r].rtp,nn) != 0)
327 printf("Changing rtp entry of residue %d %s to '%s'\n",
328 pdba->resinfo[r].nr,*pdba->resinfo[r].name,nn);
330 pdba->resinfo[r].rtp = put_symtab(symtab,nn);
335 static void pdbres_to_gmxrtp(t_atoms *pdba)
339 for(i=0; (i<pdba->nres); i++)
341 if (pdba->resinfo[i].rtp == NULL)
343 pdba->resinfo[i].rtp = pdba->resinfo[i].name;
348 static void rename_pdbres(t_atoms *pdba,const char *oldnm,const char *newnm,
349 gmx_bool bFullCompare,t_symtab *symtab)
354 for(i=0; (i<pdba->nres); i++)
356 resnm = *pdba->resinfo[i].name;
357 if ((bFullCompare && (gmx_strcasecmp(resnm,oldnm) == 0)) ||
358 (!bFullCompare && strstr(resnm,oldnm) != NULL))
360 /* Rename the residue name (not the rtp name) */
361 pdba->resinfo[i].name = put_symtab(symtab,newnm);
366 static void rename_bb(t_atoms *pdba,const char *oldnm,const char *newnm,
367 gmx_bool bFullCompare,t_symtab *symtab)
372 for(i=0; (i<pdba->nres); i++)
374 /* We have not set the rtp name yes, use the residue name */
375 bbnm = *pdba->resinfo[i].name;
376 if ((bFullCompare && (gmx_strcasecmp(bbnm,oldnm) == 0)) ||
377 (!bFullCompare && strstr(bbnm,oldnm) != NULL))
379 /* Change the rtp builing block name */
380 pdba->resinfo[i].rtp = put_symtab(symtab,newnm);
385 static void rename_bbint(t_atoms *pdba,const char *oldnm,
386 const char *gettp(int,int,const rtprename_t *),
387 gmx_bool bFullCompare,
389 int nrr,const rtprename_t *rr)
395 for(i=0; i<pdba->nres; i++)
397 /* We have not set the rtp name yes, use the residue name */
398 bbnm = *pdba->resinfo[i].name;
399 if ((bFullCompare && (strcmp(bbnm,oldnm) == 0)) ||
400 (!bFullCompare && strstr(bbnm,oldnm) != NULL))
402 ptr = gettp(i,nrr,rr);
403 pdba->resinfo[i].rtp = put_symtab(symtab,ptr);
408 static void check_occupancy(t_atoms *atoms,const char *filename,gmx_bool bVerbose)
414 ftp = fn2ftp(filename);
415 if (!atoms->pdbinfo || ((ftp != efPDB) && (ftp != efBRK) && (ftp != efENT)))
416 fprintf(stderr,"No occupancies in %s\n",filename);
418 for(i=0; (i<atoms->nr); i++) {
419 if (atoms->pdbinfo[i].occup != 1) {
421 fprintf(stderr,"Occupancy for atom %s%d-%s is %f rather than 1\n",
422 *atoms->resinfo[atoms->atom[i].resind].name,
423 atoms->resinfo[ atoms->atom[i].resind].nr,
425 atoms->pdbinfo[i].occup);
426 if (atoms->pdbinfo[i].occup == 0)
432 if (nzero == atoms->nr) {
433 fprintf(stderr,"All occupancy fields zero. This is probably not an X-Ray structure\n");
434 } else if ((nzero > 0) || (nnotone > 0)) {
437 "WARNING: there were %d atoms with zero occupancy and %d atoms with\n"
438 " occupancy unequal to one (out of %d atoms). Check your pdb file.\n"
440 nzero,nnotone,atoms->nr);
442 fprintf(stderr,"All occupancies are one\n");
447 void write_posres(char *fn,t_atoms *pdba,real fc)
452 fp=gmx_fio_fopen(fn,"w");
454 "; In this topology include file, you will find position restraint\n"
455 "; entries for all the heavy atoms in your original pdb file.\n"
456 "; This means that all the protons which were added by pdb2gmx are\n"
457 "; not restrained.\n"
459 "[ position_restraints ]\n"
460 "; %4s%6s%8s%8s%8s\n","atom","type","fx","fy","fz"
462 for(i=0; (i<pdba->nr); i++) {
463 if (!is_hydrogen(*pdba->atomname[i]) && !is_dummymass(*pdba->atomname[i]))
464 fprintf(fp,"%6d%6d %g %g %g\n",i+1,1,fc,fc,fc);
469 static int read_pdball(const char *inf, const char *outf,char *title,
470 t_atoms *atoms, rvec **x,
471 int *ePBC,matrix box, gmx_bool bRemoveH,
472 t_symtab *symtab,gmx_residuetype_t rt,const char *watres,
473 gmx_atomprop_t aps,gmx_bool bVerbose)
474 /* Read a pdb file. (containing proteins) */
476 int natom,new_natom,i;
479 printf("Reading %s...\n",inf);
480 get_stx_coordnum(inf,&natom);
481 init_t_atoms(atoms,natom,TRUE);
483 read_stx_conf(inf,title,atoms,*x,NULL,ePBC,box);
484 if (fn2ftp(inf) == efPDB)
485 get_pdb_atomnumber(atoms,aps);
488 for(i=0; i<atoms->nr; i++)
489 if (!is_hydrogen(*atoms->atomname[i])) {
490 atoms->atom[new_natom]=atoms->atom[i];
491 atoms->atomname[new_natom]=atoms->atomname[i];
492 atoms->pdbinfo[new_natom]=atoms->pdbinfo[i];
493 copy_rvec((*x)[i],(*x)[new_natom]);
501 if (title && title[0])
502 printf(" '%s',",title);
503 printf(" %d atoms\n",natom);
505 /* Rename residues */
506 rename_pdbres(atoms,"HOH",watres,FALSE,symtab);
507 rename_pdbres(atoms,"SOL",watres,FALSE,symtab);
508 rename_pdbres(atoms,"WAT",watres,FALSE,symtab);
510 rename_atoms("xlateat.dat",NULL,
511 atoms,symtab,NULL,TRUE,rt,TRUE,bVerbose);
517 write_sto_conf(outf,title,atoms,*x,NULL,*ePBC,box);
522 void process_chain(t_atoms *pdba, rvec *x,
523 gmx_bool bTrpU,gmx_bool bPheU,gmx_bool bTyrU,
524 gmx_bool bLysMan,gmx_bool bAspMan,gmx_bool bGluMan,
525 gmx_bool bHisMan,gmx_bool bArgMan,gmx_bool bGlnMan,
526 real angle,real distance,t_symtab *symtab,
527 int nrr,const rtprename_t *rr)
529 /* Rename aromatics, lys, asp and histidine */
530 if (bTyrU) rename_bb(pdba,"TYR","TYRU",FALSE,symtab);
531 if (bTrpU) rename_bb(pdba,"TRP","TRPU",FALSE,symtab);
532 if (bPheU) rename_bb(pdba,"PHE","PHEU",FALSE,symtab);
534 rename_bbint(pdba,"LYS",get_lystp,FALSE,symtab,nrr,rr);
536 rename_bbint(pdba,"ARG",get_argtp,FALSE,symtab,nrr,rr);
538 rename_bbint(pdba,"GLN",get_glntp,FALSE,symtab,nrr,rr);
540 rename_bbint(pdba,"ASP",get_asptp,FALSE,symtab,nrr,rr);
542 rename_bb(pdba,"ASPH","ASP",FALSE,symtab);
544 rename_bbint(pdba,"GLU",get_glutp,FALSE,symtab,nrr,rr);
546 rename_bb(pdba,"GLUH","GLU",FALSE,symtab);
549 set_histp(pdba,x,angle,distance);
551 rename_bbint(pdba,"HIS",get_histp,TRUE,symtab,nrr,rr);
553 /* Initialize the rtp builing block names with the residue names
554 * for the residues that have not been processed above.
556 pdbres_to_gmxrtp(pdba);
558 /* Now we have all rtp names set.
559 * The rtp names will conform to Gromacs naming,
560 * unless the input pdb file contained one or more force field specific
561 * rtp names as residue names.
565 /* struct for sorting the atoms from the pdb file */
567 int resnr; /* residue number */
568 int j; /* database order index */
569 int index; /* original atom number */
570 char anm1; /* second letter of atom name */
571 char altloc; /* alternate location indicator */
574 int pdbicomp(const void *a,const void *b)
582 d = (pa->resnr - pb->resnr);
586 d = (pa->anm1 - pb->anm1);
588 d = (pa->altloc - pb->altloc);
595 static void sort_pdbatoms(int nrtp,t_restp restp[],t_hackblock hb[],
596 int natoms,t_atoms **pdbaptr,rvec **x,
597 t_blocka *block,char ***gnames)
599 t_atoms *pdba,*pdbnew;
614 for(i=0; i<natoms; i++)
616 atomnm = *pdba->atomname[i];
617 rptr = &restp[pdba->atom[i].resind];
618 for(j=0; (j<rptr->natom); j++)
620 if (gmx_strcasecmp(atomnm,*(rptr->atomname[j])) == 0)
630 "Atom %s in residue %s %d was not found in rtp entry %s with %d atoms\n"
631 "while sorting atoms.\n%s",atomnm,
632 *pdba->resinfo[pdba->atom[i].resind].name,
633 pdba->resinfo[pdba->atom[i].resind].nr,
636 is_hydrogen(atomnm) ?
637 "\nFor a hydrogen, this can be a different protonation state, or it\n"
638 "might have had a different number in the PDB file and was rebuilt\n"
639 "(it might for instance have been H3, and we only expected H1 & H2).\n"
640 "Note that hydrogens might have been added to the entry for the N-terminus.\n"
641 "Remove this hydrogen or choose a different protonation state to solve it.\n"
642 "Option -ignh will ignore all hydrogens in the input." : ".");
643 gmx_fatal(FARGS,buf);
645 /* make shadow array to be sorted into indexgroup */
646 pdbi[i].resnr = pdba->atom[i].resind;
649 pdbi[i].anm1 = atomnm[1];
650 pdbi[i].altloc = pdba->pdbinfo[i].altloc;
652 qsort(pdbi,natoms,(size_t)sizeof(pdbi[0]),pdbicomp);
654 /* pdba is sorted in pdbnew using the pdbi index */
657 init_t_atoms(pdbnew,natoms,TRUE);
660 pdbnew->nres=pdba->nres;
661 sfree(pdbnew->resinfo);
662 pdbnew->resinfo=pdba->resinfo;
663 for (i=0; i<natoms; i++) {
664 pdbnew->atom[i] = pdba->atom[pdbi[i].index];
665 pdbnew->atomname[i] = pdba->atomname[pdbi[i].index];
666 pdbnew->pdbinfo[i] = pdba->pdbinfo[pdbi[i].index];
667 copy_rvec((*x)[pdbi[i].index],(*xnew)[i]);
668 /* make indexgroup in block */
672 sfree(pdba->atomname);
674 sfree(pdba->pdbinfo);
677 /* copy the sorted pdbnew back to pdba */
680 add_grp(block, gnames, natoms, a, "prot_sort");
686 static int remove_duplicate_atoms(t_atoms *pdba,rvec x[],gmx_bool bVerbose)
688 int i,j,oldnatoms,ndel;
691 printf("Checking for duplicate atoms....\n");
692 oldnatoms = pdba->nr;
694 /* NOTE: pdba->nr is modified inside the loop */
695 for(i=1; (i < pdba->nr); i++) {
696 /* compare 'i' and 'i-1', throw away 'i' if they are identical
697 this is a 'while' because multiple alternate locations can be present */
698 while ( (i < pdba->nr) &&
699 (pdba->atom[i-1].resind == pdba->atom[i].resind) &&
700 (strcmp(*pdba->atomname[i-1],*pdba->atomname[i])==0) ) {
703 ri = &pdba->resinfo[pdba->atom[i].resind];
704 printf("deleting duplicate atom %4s %s%4d%c",
705 *pdba->atomname[i],*ri->name,ri->nr,ri->ic);
706 if (ri->chainid && (ri->chainid != ' '))
707 printf(" ch %c", ri->chainid);
709 if (pdba->pdbinfo[i].atomnr)
710 printf(" pdb nr %4d",pdba->pdbinfo[i].atomnr);
711 if (pdba->pdbinfo[i].altloc && (pdba->pdbinfo[i].altloc!=' '))
712 printf(" altloc %c",pdba->pdbinfo[i].altloc);
717 /* We can not free, since it might be in the symtab */
718 /* sfree(pdba->atomname[i]); */
719 for (j=i; j < pdba->nr; j++) {
720 pdba->atom[j] = pdba->atom[j+1];
721 pdba->atomname[j] = pdba->atomname[j+1];
722 pdba->pdbinfo[j] = pdba->pdbinfo[j+1];
723 copy_rvec(x[j+1],x[j]);
725 srenew(pdba->atom, pdba->nr);
726 /* srenew(pdba->atomname, pdba->nr); */
727 srenew(pdba->pdbinfo, pdba->nr);
730 if (pdba->nr != oldnatoms)
731 printf("Now there are %d atoms. Deleted %d duplicates.\n",pdba->nr,ndel);
736 void find_nc_ter(t_atoms *pdba,int r0,int r1,int *r_start,int *r_end,gmx_residuetype_t rt)
739 const char *p_startrestype;
740 const char *p_restype;
741 int nstartwarn,nendwarn;
749 /* Find the starting terminus (typially N or 5') */
750 for(i=r0;i<r1 && *r_start==-1;i++)
752 gmx_residuetype_get_type(rt,*pdba->resinfo[i].name,&p_startrestype);
753 if( !gmx_strcasecmp(p_startrestype,"Protein") || !gmx_strcasecmp(p_startrestype,"DNA") || !gmx_strcasecmp(p_startrestype,"RNA") )
755 printf("Identified residue %s%d as a starting terminus.\n",*pdba->resinfo[i].name,pdba->resinfo[i].nr);
762 printf("Warning: Starting residue %s%d in chain not identified as Protein/RNA/DNA.\n",*pdba->resinfo[i].name,pdba->resinfo[i].nr);
766 printf("More than 5 unidentified residues at start of chain - disabling further warnings.\n");
774 /* Go through the rest of the residues, check that they are the same class, and identify the ending terminus. */
775 for(i=*r_start;i<r1;i++)
777 gmx_residuetype_get_type(rt,*pdba->resinfo[i].name,&p_restype);
778 if( !gmx_strcasecmp(p_restype,p_startrestype) && nendwarn==0)
786 printf("Warning: Residue %s%d in chain has different type (%s) from starting residue %s%d (%s).\n",
787 *pdba->resinfo[i].name,pdba->resinfo[i].nr,p_restype,
788 *pdba->resinfo[*r_start].name,pdba->resinfo[*r_start].nr,p_startrestype);
792 printf("More than 5 unidentified residues at end of chain - disabling further warnings.\n");
801 printf("Identified residue %s%d as a ending terminus.\n",*pdba->resinfo[*r_end].name,pdba->resinfo[*r_end].nr);
807 modify_chain_numbers(t_atoms * pdba,
808 const char * chainsep)
811 char old_prev_chainid;
812 char old_this_chainid;
813 int old_prev_chainnum;
814 int old_this_chainnum;
820 const char * prev_atomname;
821 const char * this_atomname;
822 const char * prev_resname;
823 const char * this_resname;
828 int prev_chainnumber;
829 int this_chainnumber;
841 splitting = SPLIT_TER_ONLY; /* keep compiler happy */
843 /* Be a bit flexible to catch typos */
844 if (!strncmp(chainsep,"id_o",4))
846 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
847 splitting = SPLIT_ID_OR_TER;
848 printf("Splitting chemical chains based on TER records or chain id changing.\n");
850 else if (!strncmp(chainsep,"int",3))
852 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
853 splitting = SPLIT_INTERACTIVE;
854 printf("Splitting chemical chains interactively.\n");
856 else if (!strncmp(chainsep,"id_a",4))
858 splitting = SPLIT_ID_AND_TER;
859 printf("Splitting chemical chains based on TER records and chain id changing.\n");
861 else if (strlen(chainsep)==2 && !strncmp(chainsep,"id",4))
863 splitting = SPLIT_ID_ONLY;
864 printf("Splitting chemical chains based on changing chain id only (ignoring TER records).\n");
866 else if (chainsep[0]=='t')
868 splitting = SPLIT_TER_ONLY;
869 printf("Splitting chemical chains based on TER records only (ignoring chain id).\n");
873 gmx_fatal(FARGS,"Unidentified setting for chain separation: %s\n",chainsep);
876 /* The default chain enumeration is based on TER records only, which is reflected in chainnum below */
878 old_prev_chainid = '?';
879 old_prev_chainnum = -1;
882 this_atomname = NULL;
887 this_chainnumber = -1;
889 for(i=0;i<pdba->nres;i++)
891 ri = &pdba->resinfo[i];
892 old_this_chainid = ri->chainid;
893 old_this_chainnum = ri->chainnum;
895 prev_atomname = this_atomname;
896 prev_atomnum = this_atomnum;
897 prev_resname = this_resname;
898 prev_resnum = this_resnum;
899 prev_chainid = this_chainid;
900 prev_chainnumber = this_chainnumber;
902 this_atomname = *(pdba->atomname[i]);
903 this_atomnum = (pdba->pdbinfo != NULL) ? pdba->pdbinfo[i].atomnr : i+1;
904 this_resname = *ri->name;
905 this_resnum = ri->nr;
906 this_chainid = ri->chainid;
907 this_chainnumber = ri->chainnum;
911 case SPLIT_ID_OR_TER:
912 if(old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
918 case SPLIT_ID_AND_TER:
919 if(old_this_chainid != old_prev_chainid && old_this_chainnum != old_prev_chainnum)
926 if(old_this_chainid != old_prev_chainid)
933 if(old_this_chainnum != old_prev_chainnum)
938 case SPLIT_INTERACTIVE:
939 if(old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
943 printf("Split the chain (and introduce termini) between residue %s%d (chain id '%c', atom %d %s)\n"
944 "and residue %s%d (chain id '%c', atom %d %s) ? [n/y]\n",
945 prev_resname,prev_resnum,prev_chainid,prev_atomnum,prev_atomname,
946 this_resname,this_resnum,this_chainid,this_atomnum,this_atomname);
948 if(NULL==fgets(select,STRLEN-1,stdin))
950 gmx_fatal(FARGS,"Error reading from stdin");
953 if(i==0 || select[0] == 'y')
960 gmx_fatal(FARGS,"Internal inconsistency - this shouldn't happen...");
963 old_prev_chainid = old_this_chainid;
964 old_prev_chainnum = old_this_chainnum;
966 ri->chainnum = new_chainnum;
995 int cmain(int argc, char *argv[])
997 const char *desc[] = {
998 "This program reads a [TT].pdb[tt] (or [TT].gro[tt]) file, reads",
999 "some database files, adds hydrogens to the molecules and generates",
1000 "coordinates in GROMACS (GROMOS), or optionally [TT].pdb[tt], format",
1001 "and a topology in GROMACS format.",
1002 "These files can subsequently be processed to generate a run input file.",
1004 "[TT]pdb2gmx[tt] will search for force fields by looking for",
1005 "a [TT]forcefield.itp[tt] file in subdirectories [TT]<forcefield>.ff[tt]",
1006 "of the current working directory and of the GROMACS library directory",
1007 "as inferred from the path of the binary or the [TT]GMXLIB[tt] environment",
1009 "By default the forcefield selection is interactive,",
1010 "but you can use the [TT]-ff[tt] option to specify one of the short names",
1011 "in the list on the command line instead. In that case [TT]pdb2gmx[tt] just looks",
1012 "for the corresponding [TT]<forcefield>.ff[tt] directory.",
1014 "After choosing a force field, all files will be read only from",
1015 "the corresponding force field directory.",
1016 "If you want to modify or add a residue types, you can copy the force",
1017 "field directory from the GROMACS library directory to your current",
1018 "working directory. If you want to add new protein residue types,",
1019 "you will need to modify [TT]residuetypes.dat[tt] in the library directory",
1020 "or copy the whole library directory to a local directory and set",
1021 "the environment variable [TT]GMXLIB[tt] to the name of that directory.",
1022 "Check Chapter 5 of the manual for more information about file formats.",
1025 "Note that a [TT].pdb[tt] file is nothing more than a file format, and it",
1026 "need not necessarily contain a protein structure. Every kind of",
1027 "molecule for which there is support in the database can be converted.",
1028 "If there is no support in the database, you can add it yourself.[PAR]",
1030 "The program has limited intelligence, it reads a number of database",
1031 "files, that allow it to make special bonds (Cys-Cys, Heme-His, etc.),",
1032 "if necessary this can be done manually. The program can prompt the",
1033 "user to select which kind of LYS, ASP, GLU, CYS or HIS residue is",
1034 "desired. For Lys the choice is between neutral (two protons on NZ) or",
1035 "protonated (three protons, default), for Asp and Glu unprotonated",
1036 "(default) or protonated, for His the proton can be either on ND1,",
1037 "on NE2 or on both. By default these selections are done automatically.",
1038 "For His, this is based on an optimal hydrogen bonding",
1039 "conformation. Hydrogen bonds are defined based on a simple geometric",
1040 "criterion, specified by the maximum hydrogen-donor-acceptor angle",
1041 "and donor-acceptor distance, which are set by [TT]-angle[tt] and",
1042 "[TT]-dist[tt] respectively.[PAR]",
1044 "The protonation state of N- and C-termini can be chosen interactively",
1045 "with the [TT]-ter[tt] flag. Default termini are ionized (NH3+ and COO-),",
1046 "respectively. Some force fields support zwitterionic forms for chains of",
1047 "one residue, but for polypeptides these options should NOT be selected.",
1048 "The AMBER force fields have unique forms for the terminal residues,",
1049 "and these are incompatible with the [TT]-ter[tt] mechanism. You need",
1050 "to prefix your N- or C-terminal residue names with \"N\" or \"C\"",
1051 "respectively to use these forms, making sure you preserve the format",
1052 "of the coordinate file. Alternatively, use named terminating residues",
1053 "(e.g. ACE, NME).[PAR]",
1055 "The separation of chains is not entirely trivial since the markup",
1056 "in user-generated PDB files frequently varies and sometimes it",
1057 "is desirable to merge entries across a TER record, for instance",
1058 "if you want a disulfide bridge or distance restraints between",
1059 "two protein chains or if you have a HEME group bound to a protein.",
1060 "In such cases multiple chains should be contained in a single",
1061 "[TT]moleculetype[tt] definition.",
1062 "To handle this, [TT]pdb2gmx[tt] uses two separate options.",
1063 "First, [TT]-chainsep[tt] allows you to choose when a new chemical chain should",
1064 "start, and termini added when applicable. This can be done based on the",
1065 "existence of TER records, when the chain id changes, or combinations of either",
1066 "or both of these. You can also do the selection fully interactively.",
1067 "In addition, there is a [TT]-merge[tt] option that controls how multiple chains",
1068 "are merged into one moleculetype, after adding all the chemical termini (or not).",
1069 "This can be turned off (no merging), all non-water chains can be merged into a",
1070 "single molecule, or the selection can be done interactively.[PAR]",
1072 "[TT]pdb2gmx[tt] will also check the occupancy field of the [TT].pdb[tt] file.",
1073 "If any of the occupancies are not one, indicating that the atom is",
1074 "not resolved well in the structure, a warning message is issued.",
1075 "When a [TT].pdb[tt] file does not originate from an X-ray structure determination",
1076 "all occupancy fields may be zero. Either way, it is up to the user",
1077 "to verify the correctness of the input data (read the article!).[PAR]",
1079 "During processing the atoms will be reordered according to GROMACS",
1080 "conventions. With [TT]-n[tt] an index file can be generated that",
1081 "contains one group reordered in the same way. This allows you to",
1082 "convert a GROMOS trajectory and coordinate file to GROMOS. There is",
1083 "one limitation: reordering is done after the hydrogens are stripped",
1084 "from the input and before new hydrogens are added. This means that",
1085 "you should not use [TT]-ignh[tt].[PAR]",
1087 "The [TT].gro[tt] and [TT].g96[tt] file formats do not support chain",
1088 "identifiers. Therefore it is useful to enter a [TT].pdb[tt] file name at",
1089 "the [TT]-o[tt] option when you want to convert a multi-chain [TT].pdb[tt] file.",
1092 "The option [TT]-vsite[tt] removes hydrogen and fast improper dihedral",
1093 "motions. Angular and out-of-plane motions can be removed by changing",
1094 "hydrogens into virtual sites and fixing angles, which fixes their",
1095 "position relative to neighboring atoms. Additionally, all atoms in the",
1096 "aromatic rings of the standard amino acids (i.e. PHE, TRP, TYR and HIS)",
1097 "can be converted into virtual sites, eliminating the fast improper dihedral",
1098 "fluctuations in these rings. [BB]Note[bb] that in this case all other hydrogen",
1099 "atoms are also converted to virtual sites. The mass of all atoms that are",
1100 "converted into virtual sites, is added to the heavy atoms.[PAR]",
1101 "Also slowing down of dihedral motion can be done with [TT]-heavyh[tt]",
1102 "done by increasing the hydrogen-mass by a factor of 4. This is also",
1103 "done for water hydrogens to slow down the rotational motion of water.",
1104 "The increase in mass of the hydrogens is subtracted from the bonded",
1105 "(heavy) atom so that the total mass of the system remains the same."
1109 FILE *fp,*top_file,*top_file2,*itp_file=NULL;
1111 t_atoms pdba_all,*pdba;
1115 int chain,nch,maxch,nwaterchain;
1117 t_chain *chains,*cc;
1118 char select[STRLEN];
1131 gpp_atomtype_t atype;
1132 gmx_residuetype_t rt;
1134 char fn[256],itp_fn[STRLEN],posre_fn[STRLEN],buf_fn[STRLEN];
1135 char molname[STRLEN],title[STRLEN],quote[STRLEN],generator[STRLEN];
1136 char *c,forcefield[STRLEN],ffdir[STRLEN];
1137 char ffname[STRLEN],suffix[STRLEN],buf[STRLEN];
1146 rtprename_t *rtprename=NULL;
1147 int nah,nNtdb,nCtdb,ntdblist;
1148 t_hackblock *ntdb,*ctdb,**tdblist;
1152 gmx_bool bVsites=FALSE,bWat,bPrevWat=FALSE,bITP,bVsiteAromatics=FALSE,bCheckMerge;
1154 t_hackblock *hb_chain;
1155 t_restp *restp_chain;
1157 const char *p_restype;
1161 const char * prev_atomname;
1162 const char * this_atomname;
1163 const char * prev_resname;
1164 const char * this_resname;
1169 int prev_chainnumber;
1170 int this_chainnumber;
1172 int this_chainstart;
1173 int prev_chainstart;
1180 { efSTX, "-f", "eiwit.pdb", ffREAD },
1181 { efSTO, "-o", "conf", ffWRITE },
1182 { efTOP, NULL, NULL, ffWRITE },
1183 { efITP, "-i", "posre", ffWRITE },
1184 { efNDX, "-n", "clean", ffOPTWR },
1185 { efSTO, "-q", "clean.pdb", ffOPTWR }
1187 #define NFILE asize(fnm)
1190 /* Command line arguments must be static */
1191 static gmx_bool bNewRTP=FALSE;
1192 static gmx_bool bInter=FALSE, bCysMan=FALSE;
1193 static gmx_bool bLysMan=FALSE, bAspMan=FALSE, bGluMan=FALSE, bHisMan=FALSE;
1194 static gmx_bool bGlnMan=FALSE, bArgMan=FALSE;
1195 static gmx_bool bTerMan=FALSE, bUnA=FALSE, bHeavyH;
1196 static gmx_bool bSort=TRUE, bAllowMissing=FALSE, bRemoveH=FALSE;
1197 static gmx_bool bDeuterate=FALSE,bVerbose=FALSE,bChargeGroups=TRUE,bCmap=TRUE;
1198 static gmx_bool bRenumRes=FALSE,bRTPresname=FALSE;
1199 static real angle=135.0, distance=0.3,posre_fc=1000;
1200 static real long_bond_dist=0.25, short_bond_dist=0.05;
1201 static const char *vsitestr[] = { NULL, "none", "hydrogens", "aromatics", NULL };
1202 static const char *watstr[] = { NULL, "select", "none", "spc", "spce", "tip3p", "tip4p", "tip5p", NULL };
1203 static const char *chainsep[] = { NULL, "id_or_ter", "id_and_ter", "ter", "id", "interactive", NULL };
1204 static const char *merge[] = {NULL, "no", "all", "interactive", NULL };
1205 static const char *ff = "select";
1208 { "-newrtp", FALSE, etBOOL, {&bNewRTP},
1209 "HIDDENWrite the residue database in new format to [TT]new.rtp[tt]"},
1210 { "-lb", FALSE, etREAL, {&long_bond_dist},
1211 "HIDDENLong bond warning distance" },
1212 { "-sb", FALSE, etREAL, {&short_bond_dist},
1213 "HIDDENShort bond warning distance" },
1214 { "-chainsep", FALSE, etENUM, {chainsep},
1215 "Condition in PDB files when a new chain should be started (adding termini)" },
1216 { "-merge", FALSE, etENUM, {&merge},
1217 "Merge multiple chains into a single [moleculetype]" },
1218 { "-ff", FALSE, etSTR, {&ff},
1219 "Force field, interactive by default. Use [TT]-h[tt] for information." },
1220 { "-water", FALSE, etENUM, {watstr},
1221 "Water model to use" },
1222 { "-inter", FALSE, etBOOL, {&bInter},
1223 "Set the next 8 options to interactive"},
1224 { "-ss", FALSE, etBOOL, {&bCysMan},
1225 "Interactive SS bridge selection" },
1226 { "-ter", FALSE, etBOOL, {&bTerMan},
1227 "Interactive termini selection, instead of charged (default)" },
1228 { "-lys", FALSE, etBOOL, {&bLysMan},
1229 "Interactive lysine selection, instead of charged" },
1230 { "-arg", FALSE, etBOOL, {&bArgMan},
1231 "Interactive arginine selection, instead of charged" },
1232 { "-asp", FALSE, etBOOL, {&bAspMan},
1233 "Interactive aspartic acid selection, instead of charged" },
1234 { "-glu", FALSE, etBOOL, {&bGluMan},
1235 "Interactive glutamic acid selection, instead of charged" },
1236 { "-gln", FALSE, etBOOL, {&bGlnMan},
1237 "Interactive glutamine selection, instead of neutral" },
1238 { "-his", FALSE, etBOOL, {&bHisMan},
1239 "Interactive histidine selection, instead of checking H-bonds" },
1240 { "-angle", FALSE, etREAL, {&angle},
1241 "Minimum hydrogen-donor-acceptor angle for a H-bond (degrees)" },
1242 { "-dist", FALSE, etREAL, {&distance},
1243 "Maximum donor-acceptor distance for a H-bond (nm)" },
1244 { "-una", FALSE, etBOOL, {&bUnA},
1245 "Select aromatic rings with united CH atoms on phenylalanine, "
1246 "tryptophane and tyrosine" },
1247 { "-sort", FALSE, etBOOL, {&bSort},
1248 "HIDDENSort the residues according to database, turning this off is dangerous as charge groups might be broken in parts" },
1249 { "-ignh", FALSE, etBOOL, {&bRemoveH},
1250 "Ignore hydrogen atoms that are in the coordinate file" },
1251 { "-missing",FALSE, etBOOL, {&bAllowMissing},
1252 "Continue when atoms are missing, dangerous" },
1253 { "-v", FALSE, etBOOL, {&bVerbose},
1254 "Be slightly more verbose in messages" },
1255 { "-posrefc",FALSE, etREAL, {&posre_fc},
1256 "Force constant for position restraints" },
1257 { "-vsite", FALSE, etENUM, {vsitestr},
1258 "Convert atoms to virtual sites" },
1259 { "-heavyh", FALSE, etBOOL, {&bHeavyH},
1260 "Make hydrogen atoms heavy" },
1261 { "-deuterate", FALSE, etBOOL, {&bDeuterate},
1262 "Change the mass of hydrogens to 2 amu" },
1263 { "-chargegrp", TRUE, etBOOL, {&bChargeGroups},
1264 "Use charge groups in the [TT].rtp[tt] file" },
1265 { "-cmap", TRUE, etBOOL, {&bCmap},
1266 "Use cmap torsions (if enabled in the [TT].rtp[tt] file)" },
1267 { "-renum", TRUE, etBOOL, {&bRenumRes},
1268 "Renumber the residues consecutively in the output" },
1269 { "-rtpres", TRUE, etBOOL, {&bRTPresname},
1270 "Use [TT].rtp[tt] entry names as residue names" }
1272 #define NPARGS asize(pa)
1274 CopyRight(stderr,argv[0]);
1275 parse_common_args(&argc,argv,0,NFILE,fnm,asize(pa),pa,asize(desc),desc,
1278 /* Force field selection, interactive or direct */
1279 choose_ff(strcmp(ff,"select") == 0 ? NULL : ff,
1280 forcefield,sizeof(forcefield),
1281 ffdir,sizeof(ffdir));
1283 if (strlen(forcefield) > 0) {
1284 strcpy(ffname,forcefield);
1285 ffname[0] = toupper(ffname[0]);
1287 gmx_fatal(FARGS,"Empty forcefield string");
1290 printf("\nUsing the %s force field in directory %s\n\n",
1293 choose_watermodel(watstr[0],ffdir,&watermodel);
1296 /* if anything changes here, also change description of -inter */
1309 else if (bDeuterate)
1314 switch(vsitestr[0][0]) {
1315 case 'n': /* none */
1317 bVsiteAromatics=FALSE;
1319 case 'h': /* hydrogens */
1321 bVsiteAromatics=FALSE;
1323 case 'a': /* aromatics */
1325 bVsiteAromatics=TRUE;
1328 gmx_fatal(FARGS,"DEATH HORROR in $s (%d): vsitestr[0]='%s'",
1329 __FILE__,__LINE__,vsitestr[0]);
1332 /* Open the symbol table */
1333 open_symtab(&symtab);
1335 /* Residue type database */
1336 gmx_residuetype_init(&rt);
1338 /* Read residue renaming database(s), if present */
1339 nrrn = fflib_search_file_end(ffdir,".r2b",FALSE,&rrn);
1343 for(i=0; i<nrrn; i++) {
1344 fp = fflib_open(rrn[i]);
1345 read_rtprename(rrn[i],fp,&nrtprename,&rtprename);
1351 /* Add all alternative names from the residue renaming database to the list of recognized amino/nucleic acids. */
1353 for(i=0;i<nrtprename;i++)
1355 rc=gmx_residuetype_get_type(rt,rtprename[i].gmx,&p_restype);
1357 /* Only add names if the 'standard' gromacs/iupac base name was found */
1360 gmx_residuetype_add(rt,rtprename[i].main,p_restype);
1361 gmx_residuetype_add(rt,rtprename[i].nter,p_restype);
1362 gmx_residuetype_add(rt,rtprename[i].cter,p_restype);
1363 gmx_residuetype_add(rt,rtprename[i].bter,p_restype);
1368 if (watermodel != NULL && (strstr(watermodel,"4p") ||
1369 strstr(watermodel,"4P"))) {
1371 } else if (watermodel != NULL && (strstr(watermodel,"5p") ||
1372 strstr(watermodel,"5P"))) {
1378 aps = gmx_atomprop_init();
1379 natom = read_pdball(opt2fn("-f",NFILE,fnm),opt2fn_null("-q",NFILE,fnm),title,
1380 &pdba_all,&pdbx,&ePBC,box,bRemoveH,&symtab,rt,watres,
1384 gmx_fatal(FARGS,"No atoms found in pdb file %s\n",opt2fn("-f",NFILE,fnm));
1386 printf("Analyzing pdb file\n");
1391 modify_chain_numbers(&pdba_all,chainsep[0]);
1395 this_atomname = NULL;
1397 this_resname = NULL;
1400 this_chainnumber = -1;
1401 this_chainstart = 0;
1402 /* Keep the compiler happy */
1403 prev_chainstart = 0;
1408 for (i=0; (i<natom); i++)
1410 ri = &pdba_all.resinfo[pdba_all.atom[i].resind];
1412 prev_atomname = this_atomname;
1413 prev_atomnum = this_atomnum;
1414 prev_resname = this_resname;
1415 prev_resnum = this_resnum;
1416 prev_chainid = this_chainid;
1417 prev_chainnumber = this_chainnumber;
1420 prev_chainstart = this_chainstart;
1423 this_atomname = *pdba_all.atomname[i];
1424 this_atomnum = (pdba_all.pdbinfo != NULL) ? pdba_all.pdbinfo[i].atomnr : i+1;
1425 this_resname = *ri->name;
1426 this_resnum = ri->nr;
1427 this_chainid = ri->chainid;
1428 this_chainnumber = ri->chainnum;
1430 bWat = gmx_strcasecmp(*ri->name,watres) == 0;
1431 if ((i == 0) || (this_chainnumber != prev_chainnumber) || (bWat != bPrevWat))
1433 this_chainstart = pdba_all.atom[i].resind;
1438 if(!strncmp(merge[0],"int",3))
1440 printf("Merge chain ending with residue %s%d (chain id '%c', atom %d %s) and chain starting with\n"
1441 "residue %s%d (chain id '%c', atom %d %s) into a single moleculetype (keeping termini)? [n/y]\n",
1442 prev_resname,prev_resnum,prev_chainid,prev_atomnum,prev_atomname,
1443 this_resname,this_resnum,this_chainid,this_atomnum,this_atomname);
1445 if(NULL==fgets(select,STRLEN-1,stdin))
1447 gmx_fatal(FARGS,"Error reading from stdin");
1449 bMerged = (select[0] == 'y');
1451 else if(!strncmp(merge[0],"all",3))
1459 pdb_ch[nch-1].chainstart[pdb_ch[nch-1].nterpairs] =
1460 pdba_all.atom[i].resind - prev_chainstart;
1461 pdb_ch[nch-1].nterpairs++;
1462 srenew(pdb_ch[nch-1].chainstart,pdb_ch[nch-1].nterpairs+1);
1467 /* set natom for previous chain */
1470 pdb_ch[nch-1].natom=i-pdb_ch[nch-1].start;
1477 /* check if chain identifier was used before */
1478 for (j=0; (j<nch); j++)
1480 if (pdb_ch[j].chainid != ' ' && pdb_ch[j].chainid == ri->chainid)
1482 printf("WARNING: Chain identifier '%c' is used in two non-sequential blocks.\n"
1483 "They will be treated as separate chains unless you reorder your file.\n",
1490 srenew(pdb_ch,maxch);
1492 pdb_ch[nch].chainid = ri->chainid;
1493 pdb_ch[nch].chainnum = ri->chainnum;
1494 pdb_ch[nch].start=i;
1495 pdb_ch[nch].bAllWat=bWat;
1497 pdb_ch[nch].nterpairs=0;
1499 pdb_ch[nch].nterpairs=1;
1500 snew(pdb_ch[nch].chainstart,pdb_ch[nch].nterpairs+1);
1501 /* modified [nch] to [0] below */
1502 pdb_ch[nch].chainstart[0]=0;
1508 pdb_ch[nch-1].natom=natom-pdb_ch[nch-1].start;
1510 /* set all the water blocks at the end of the chain */
1511 snew(swap_index,nch);
1513 for(i=0; i<nch; i++)
1514 if (!pdb_ch[i].bAllWat) {
1518 for(i=0; i<nch; i++)
1519 if (pdb_ch[i].bAllWat) {
1524 printf("Moved all the water blocks to the end\n");
1527 /* copy pdb data and x for all chains */
1528 for (i=0; (i<nch); i++) {
1530 chains[i].chainid = pdb_ch[si].chainid;
1531 chains[i].chainnum = pdb_ch[si].chainnum;
1532 chains[i].bAllWat = pdb_ch[si].bAllWat;
1533 chains[i].nterpairs = pdb_ch[si].nterpairs;
1534 chains[i].chainstart = pdb_ch[si].chainstart;
1535 snew(chains[i].ntdb,pdb_ch[si].nterpairs);
1536 snew(chains[i].ctdb,pdb_ch[si].nterpairs);
1537 snew(chains[i].r_start,pdb_ch[si].nterpairs);
1538 snew(chains[i].r_end,pdb_ch[si].nterpairs);
1540 snew(chains[i].pdba,1);
1541 init_t_atoms(chains[i].pdba,pdb_ch[si].natom,TRUE);
1542 snew(chains[i].x,chains[i].pdba->nr);
1543 for (j=0; j<chains[i].pdba->nr; j++) {
1544 chains[i].pdba->atom[j] = pdba_all.atom[pdb_ch[si].start+j];
1545 snew(chains[i].pdba->atomname[j],1);
1546 *chains[i].pdba->atomname[j] =
1547 strdup(*pdba_all.atomname[pdb_ch[si].start+j]);
1548 chains[i].pdba->pdbinfo[j] = pdba_all.pdbinfo[pdb_ch[si].start+j];
1549 copy_rvec(pdbx[pdb_ch[si].start+j],chains[i].x[j]);
1551 /* Re-index the residues assuming that the indices are continuous */
1552 k = chains[i].pdba->atom[0].resind;
1553 nres = chains[i].pdba->atom[chains[i].pdba->nr-1].resind - k + 1;
1554 chains[i].pdba->nres = nres;
1555 for(j=0; j < chains[i].pdba->nr; j++) {
1556 chains[i].pdba->atom[j].resind -= k;
1558 srenew(chains[i].pdba->resinfo,nres);
1559 for(j=0; j<nres; j++) {
1560 chains[i].pdba->resinfo[j] = pdba_all.resinfo[k+j];
1561 snew(chains[i].pdba->resinfo[j].name,1);
1562 *chains[i].pdba->resinfo[j].name = strdup(*pdba_all.resinfo[k+j].name);
1563 /* make all chain identifiers equal to that of the chain */
1564 chains[i].pdba->resinfo[j].chainid = pdb_ch[si].chainid;
1569 printf("\nMerged chains into joint molecule definitions at %d places.\n\n",
1572 printf("There are %d chains and %d blocks of water and "
1573 "%d residues with %d atoms\n",
1574 nch-nwaterchain,nwaterchain,
1575 pdba_all.resinfo[pdba_all.atom[natom-1].resind].nr,natom);
1577 printf("\n %5s %4s %6s\n","chain","#res","#atoms");
1578 for (i=0; (i<nch); i++)
1579 printf(" %d '%c' %5d %6d %s\n",
1580 i+1, chains[i].chainid ? chains[i].chainid:'-',
1581 chains[i].pdba->nres, chains[i].pdba->nr,
1582 chains[i].bAllWat ? "(only water)":"");
1585 check_occupancy(&pdba_all,opt2fn("-f",NFILE,fnm),bVerbose);
1587 /* Read atomtypes... */
1588 atype = read_atype(ffdir,&symtab);
1590 /* read residue database */
1591 printf("Reading residue database... (%s)\n",forcefield);
1592 nrtpf = fflib_search_file_end(ffdir,".rtp",TRUE,&rtpf);
1595 for(i=0; i<nrtpf; i++) {
1596 read_resall(rtpf[i],&nrtp,&restp,atype,&symtab,FALSE);
1601 /* Not correct with multiple rtp input files with different bonded types */
1602 fp=gmx_fio_fopen("new.rtp","w");
1603 print_resall(fp,nrtp,restp,atype);
1607 /* read hydrogen database */
1608 nah = read_h_db(ffdir,&ah);
1610 /* Read Termini database... */
1611 nNtdb=read_ter_db(ffdir,'n',&ntdb,atype);
1612 nCtdb=read_ter_db(ffdir,'c',&ctdb,atype);
1614 top_fn=ftp2fn(efTOP,NFILE,fnm);
1615 top_file=gmx_fio_fopen(top_fn,"w");
1617 sprintf(generator,"%s - %s",ShortProgram(), GromacsVersion() );
1619 print_top_header(top_file,top_fn,generator,FALSE,ffdir,mHmult);
1626 for(chain=0; (chain<nch); chain++) {
1627 cc = &(chains[chain]);
1629 /* set pdba, natom and nres to the current chain */
1633 nres =cc->pdba->nres;
1635 if (cc->chainid && ( cc->chainid != ' ' ) )
1636 printf("Processing chain %d '%c' (%d atoms, %d residues)\n",
1637 chain+1,cc->chainid,natom,nres);
1639 printf("Processing chain %d (%d atoms, %d residues)\n",
1640 chain+1,natom,nres);
1642 process_chain(pdba,x,bUnA,bUnA,bUnA,bLysMan,bAspMan,bGluMan,
1643 bHisMan,bArgMan,bGlnMan,angle,distance,&symtab,
1644 nrtprename,rtprename);
1646 cc->chainstart[cc->nterpairs] = pdba->nres;
1648 for(i=0; i<cc->nterpairs; i++)
1650 find_nc_ter(pdba,cc->chainstart[i],cc->chainstart[i+1],
1651 &(cc->r_start[j]),&(cc->r_end[j]),rt);
1653 if (cc->r_start[j] >= 0 && cc->r_end[j] >= 0)
1659 if (cc->nterpairs == 0)
1661 printf("Problem with chain definition, or missing terminal residues.\n"
1662 "This chain does not appear to contain a recognized chain molecule.\n"
1663 "If this is incorrect, you can edit residuetypes.dat to modify the behavior.\n");
1666 /* Check for disulfides and other special bonds */
1667 nssbonds = mk_specbonds(pdba,x,bCysMan,&ssbonds,bVerbose);
1669 if (nrtprename > 0) {
1670 rename_resrtp(pdba,cc->nterpairs,cc->r_start,cc->r_end,nrtprename,rtprename,
1676 sprintf(fn,"chain.pdb");
1678 sprintf(fn,"chain_%c%d.pdb",cc->chainid,cc->chainnum);
1680 write_sto_conf(fn,title,pdba,x,NULL,ePBC,box);
1684 for(i=0; i<cc->nterpairs; i++)
1688 * We first apply a filter so we only have the
1689 * termini that can be applied to the residue in question
1690 * (or a generic terminus if no-residue specific is available).
1692 /* First the N terminus */
1695 tdblist = filter_ter(nrtp,restp,nNtdb,ntdb,
1696 *pdba->resinfo[cc->r_start[i]].name,
1697 *pdba->resinfo[cc->r_start[i]].rtp,
1701 printf("No suitable end (N or 5') terminus found in database - assuming this residue\n"
1702 "is already in a terminus-specific form and skipping terminus selection.\n");
1707 if(bTerMan && ntdblist>1)
1709 sprintf(select,"Select start terminus type for %s-%d",
1710 *pdba->resinfo[cc->r_start[i]].name,
1711 pdba->resinfo[cc->r_start[i]].nr);
1712 cc->ntdb[i] = choose_ter(ntdblist,tdblist,select);
1716 cc->ntdb[i] = tdblist[0];
1719 printf("Start terminus %s-%d: %s\n",
1720 *pdba->resinfo[cc->r_start[i]].name,
1721 pdba->resinfo[cc->r_start[i]].nr,
1722 (cc->ntdb[i])->name);
1731 /* And the C terminus */
1734 tdblist = filter_ter(nrtp,restp,nCtdb,ctdb,
1735 *pdba->resinfo[cc->r_end[i]].name,
1736 *pdba->resinfo[cc->r_end[i]].rtp,
1740 printf("No suitable end (C or 3') terminus found in database - assuming this residue\n"
1741 "is already in a terminus-specific form and skipping terminus selection.\n");
1746 if(bTerMan && ntdblist>1)
1748 sprintf(select,"Select end terminus type for %s-%d",
1749 *pdba->resinfo[cc->r_end[i]].name,
1750 pdba->resinfo[cc->r_end[i]].nr);
1751 cc->ctdb[i] = choose_ter(ntdblist,tdblist,select);
1755 cc->ctdb[i] = tdblist[0];
1757 printf("End terminus %s-%d: %s\n",
1758 *pdba->resinfo[cc->r_end[i]].name,
1759 pdba->resinfo[cc->r_end[i]].nr,
1760 (cc->ctdb[i])->name);
1769 /* lookup hackblocks and rtp for all residues */
1770 get_hackblocks_rtp(&hb_chain, &restp_chain,
1771 nrtp, restp, pdba->nres, pdba->resinfo,
1772 cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end);
1773 /* ideally, now we would not need the rtp itself anymore, but do
1774 everything using the hb and restp arrays. Unfortunately, that
1775 requires some re-thinking of code in gen_vsite.c, which I won't
1776 do now :( AF 26-7-99 */
1778 rename_atoms(NULL,ffdir,
1779 pdba,&symtab,restp_chain,FALSE,rt,FALSE,bVerbose);
1781 match_atomnames_with_rtp(restp_chain,hb_chain,pdba,x,bVerbose);
1784 block = new_blocka();
1786 sort_pdbatoms(pdba->nres,restp_chain,hb_chain,
1787 natom,&pdba,&x,block,&gnames);
1788 natom = remove_duplicate_atoms(pdba,x,bVerbose);
1789 if (ftp2bSet(efNDX,NFILE,fnm)) {
1791 fprintf(stderr,"WARNING: with the -remh option the generated "
1792 "index file (%s) might be useless\n"
1793 "(the index file is generated before hydrogens are added)",
1794 ftp2fn(efNDX,NFILE,fnm));
1796 write_index(ftp2fn(efNDX,NFILE,fnm),block,gnames);
1798 for(i=0; i < block->nr; i++)
1803 fprintf(stderr,"WARNING: "
1804 "without sorting no check for duplicate atoms can be done\n");
1807 /* Generate Hydrogen atoms (and termini) in the sequence */
1808 printf("Generating any missing hydrogen atoms and/or adding termini.\n");
1809 natom=add_h(&pdba,&x,nah,ah,
1810 cc->nterpairs,cc->ntdb,cc->ctdb,cc->r_start,cc->r_end,bAllowMissing,
1811 NULL,NULL,TRUE,FALSE);
1812 printf("Now there are %d residues with %d atoms\n",
1813 pdba->nres,pdba->nr);
1814 if (debug) write_pdbfile(debug,title,pdba,x,ePBC,box,' ',0,NULL,TRUE);
1817 for(i=0; (i<natom); i++)
1818 fprintf(debug,"Res %s%d atom %d %s\n",
1819 *(pdba->resinfo[pdba->atom[i].resind].name),
1820 pdba->resinfo[pdba->atom[i].resind].nr,i+1,*pdba->atomname[i]);
1822 strcpy(posre_fn,ftp2fn(efITP,NFILE,fnm));
1824 /* make up molecule name(s) */
1826 k = (cc->nterpairs>0 && cc->r_start[0]>=0) ? cc->r_start[0] : 0;
1828 gmx_residuetype_get_type(rt,*pdba->resinfo[k].name,&p_restype);
1834 sprintf(molname,"Water");
1838 this_chainid = cc->chainid;
1840 /* Add the chain id if we have one */
1841 if(this_chainid != ' ')
1843 sprintf(buf,"_chain_%c",this_chainid);
1847 /* Check if there have been previous chains with the same id */
1849 for(k=0;k<chain;k++)
1851 if(cc->chainid == chains[k].chainid)
1856 /* Add the number for this chain identifier if there are multiple copies */
1860 sprintf(buf,"%d",nid_used+1);
1864 if(strlen(suffix)>0)
1866 sprintf(molname,"%s%s",p_restype,suffix);
1870 strcpy(molname,p_restype);
1874 if ((nch-nwaterchain>1) && !cc->bAllWat) {
1876 strcpy(itp_fn,top_fn);
1877 printf("Chain time...\n");
1878 c=strrchr(itp_fn,'.');
1879 sprintf(c,"_%s.itp",molname);
1880 c=strrchr(posre_fn,'.');
1881 sprintf(c,"_%s.itp",molname);
1882 if (strcmp(itp_fn,posre_fn) == 0) {
1883 strcpy(buf_fn,posre_fn);
1884 c = strrchr(buf_fn,'.');
1886 sprintf(posre_fn,"%s_pr.itp",buf_fn);
1890 srenew(incls,nincl);
1891 incls[nincl-1]=strdup(itp_fn);
1892 itp_file=gmx_fio_fopen(itp_fn,"w");
1896 srenew(mols,nmol+1);
1898 mols[nmol].name = strdup("SOL");
1899 mols[nmol].nr = pdba->nres;
1901 mols[nmol].name = strdup(molname);
1907 print_top_comment(itp_file,itp_fn,generator,ffdir,TRUE);
1917 pdb2top(top_file2,posre_fn,molname,pdba,&x,atype,&symtab,
1919 restp_chain,hb_chain,
1920 cc->nterpairs,cc->ntdb,cc->ctdb,bAllowMissing,
1921 bVsites,bVsiteAromatics,forcefield,ffdir,
1922 mHmult,nssbonds,ssbonds,
1923 long_bond_dist,short_bond_dist,bDeuterate,bChargeGroups,bCmap,
1924 bRenumRes,bRTPresname);
1927 write_posres(posre_fn,pdba,posre_fc);
1930 gmx_fio_fclose(itp_file);
1932 /* pdba and natom have been reassigned somewhere so: */
1937 if (cc->chainid == ' ')
1938 sprintf(fn,"chain.pdb");
1940 sprintf(fn,"chain_%c.pdb",cc->chainid);
1941 cool_quote(quote,255,NULL);
1942 write_sto_conf(fn,quote,pdba,x,NULL,ePBC,box);
1946 if (watermodel == NULL) {
1947 for(chain=0; chain<nch; chain++) {
1948 if (chains[chain].bAllWat) {
1949 gmx_fatal(FARGS,"You have chosen not to include a water model, but there is water in the input file. Select a water model or remove the water from your input file.");
1953 sprintf(buf_fn,"%s%c%s.itp",ffdir,DIR_SEPARATOR,watermodel);
1954 if (!fflib_fexist(buf_fn)) {
1955 gmx_fatal(FARGS,"The topology file '%s' for the selected water model '%s' can not be found in the force field directory. Select a different water model.",
1960 print_top_mols(top_file,title,ffdir,watermodel,nincl,incls,nmol,mols);
1961 gmx_fio_fclose(top_file);
1963 gmx_residuetype_destroy(rt);
1965 /* now merge all chains back together */
1968 for (i=0; (i<nch); i++) {
1969 natom+=chains[i].pdba->nr;
1970 nres+=chains[i].pdba->nres;
1973 init_t_atoms(atoms,natom,FALSE);
1974 for(i=0; i < atoms->nres; i++)
1975 sfree(atoms->resinfo[i].name);
1976 sfree(atoms->resinfo);
1978 snew(atoms->resinfo,nres);
1982 for (i=0; (i<nch); i++) {
1984 printf("Including chain %d in system: %d atoms %d residues\n",
1985 i+1,chains[i].pdba->nr,chains[i].pdba->nres);
1986 for (j=0; (j<chains[i].pdba->nr); j++) {
1987 atoms->atom[k]=chains[i].pdba->atom[j];
1988 atoms->atom[k].resind += l; /* l is processed nr of residues */
1989 atoms->atomname[k]=chains[i].pdba->atomname[j];
1990 atoms->resinfo[atoms->atom[k].resind].chainid = chains[i].chainid;
1991 copy_rvec(chains[i].x[j],x[k]);
1994 for (j=0; (j<chains[i].pdba->nres); j++) {
1995 atoms->resinfo[l] = chains[i].pdba->resinfo[j];
1997 atoms->resinfo[l].name = atoms->resinfo[l].rtp;
2004 fprintf(stderr,"Now there are %d atoms and %d residues\n",k,l);
2005 print_sums(atoms, TRUE);
2008 fprintf(stderr,"\nWriting coordinate file...\n");
2009 clear_rvec(box_space);
2011 gen_box(0,atoms->nr,x,box,box_space,FALSE);
2012 write_sto_conf(ftp2fn(efSTO,NFILE,fnm),title,atoms,x,NULL,ePBC,box);
2014 printf("\t\t--------- PLEASE NOTE ------------\n");
2015 printf("You have successfully generated a topology from: %s.\n",
2016 opt2fn("-f",NFILE,fnm));
2017 if (watermodel != NULL) {
2018 printf("The %s force field and the %s water model are used.\n",
2021 printf("The %s force field is used.\n",
2024 printf("\t\t--------- ETON ESAELP ------------\n");