1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
53 #include "gmx_fatal.h"
72 #include "fflibutil.h"
84 static const char *res2bb_notermini(const char *name,
85 int nrr,const rtprename_t *rr)
87 /* NOTE: This function returns the main building block name,
88 * it does not take terminal renaming into account.
93 while (i < nrr && gmx_strcasecmp(name,rr[i].gmx) != 0) {
97 return (i < nrr ? rr[i].main : name);
100 static const char *select_res(int nr,int resnr,
101 const char *name[],const char *expl[],
103 int nrr,const rtprename_t *rr)
107 printf("Which %s type do you want for residue %d\n",title,resnr+1);
108 for(sel=0; (sel < nr); sel++) {
109 printf("%d. %s (%s)\n",
110 sel,expl[sel],res2bb_notermini(name[sel],nrr,rr));
112 printf("\nType a number:"); fflush(stdout);
114 if (scanf("%d",&sel) != 1)
115 gmx_fatal(FARGS,"Answer me for res %s %d!",title,resnr+1);
120 static const char *get_asptp(int resnr,int nrr,const rtprename_t *rr)
122 enum { easp, easpH, easpNR };
123 const char *lh[easpNR] = { "ASP", "ASPH" };
124 const char *expl[easpNR] = {
125 "Not protonated (charge -1)",
126 "Protonated (charge 0)"
129 return select_res(easpNR,resnr,lh,expl,"ASPARTIC ACID",nrr,rr);
132 static const char *get_glutp(int resnr,int nrr,const rtprename_t *rr)
134 enum { eglu, egluH, egluNR };
135 const char *lh[egluNR] = { "GLU", "GLUH" };
136 const char *expl[egluNR] = {
137 "Not protonated (charge -1)",
138 "Protonated (charge 0)"
141 return select_res(egluNR,resnr,lh,expl,"GLUTAMIC ACID",nrr,rr);
144 static const char *get_glntp(int resnr,int nrr,const rtprename_t *rr)
146 enum { egln, eglnH, eglnNR };
147 const char *lh[eglnNR] = { "GLN", "QLN" };
148 const char *expl[eglnNR] = {
149 "Not protonated (charge 0)",
150 "Protonated (charge +1)"
153 return select_res(eglnNR,resnr,lh,expl,"GLUTAMINE",nrr,rr);
156 static const char *get_lystp(int resnr,int nrr,const rtprename_t *rr)
158 enum { elys, elysH, elysNR };
159 const char *lh[elysNR] = { "LYSN", "LYS" };
160 const char *expl[elysNR] = {
161 "Not protonated (charge 0)",
162 "Protonated (charge +1)"
165 return select_res(elysNR,resnr,lh,expl,"LYSINE",nrr,rr);
168 static const char *get_argtp(int resnr,int nrr,const rtprename_t *rr)
170 enum { earg, eargH, eargNR };
171 const char *lh[eargNR] = { "ARGN", "ARG" };
172 const char *expl[eargNR] = {
173 "Not protonated (charge 0)",
174 "Protonated (charge +1)"
177 return select_res(eargNR,resnr,lh,expl,"ARGININE",nrr,rr);
180 static const char *get_cystp(int resnr,int nrr,const rtprename_t *rr)
182 enum { ecys, ecysH, ecysNR };
183 const char *lh[ecysNR] = { "CYS2", "CYS" };
184 const char *expl[ecysNR] = {
185 "Cysteine in disulfide bridge",
189 return select_res(ecysNR,resnr,lh,expl,"CYSTEINE",nrr,rr);
193 static const char *get_histp(int resnr,int nrr,const rtprename_t *rr)
195 const char *expl[ehisNR] = {
202 return select_res(ehisNR,resnr,hh,expl,"HISTIDINE",nrr,rr);
205 static void read_rtprename(const char *fname,FILE *fp,
206 int *nrtprename,rtprename_t **rtprename)
208 char line[STRLEN],buf[STRLEN];
217 while(get_a_line(fp,line,STRLEN)) {
219 nc = sscanf(line,"%s %s %s %s %s %s",
220 rr[n].gmx,rr[n].main,rr[n].nter,rr[n].cter,rr[n].bter,buf);
222 if (nc != 2 && nc != 5) {
223 gmx_fatal(FARGS,"Residue renaming database '%s' has %d columns instead of %d, %d or %d",fname,ncol,2,5);
226 } else if (nc != ncol) {
227 gmx_fatal(FARGS,"A line in residue renaming database '%s' has %d columns, while previous lines have %d columns",fname,nc,ncol);
231 /* This file does not have special termini names, copy them from main */
232 strcpy(rr[n].nter,rr[n].main);
233 strcpy(rr[n].cter,rr[n].main);
234 strcpy(rr[n].bter,rr[n].main);
244 static char *search_resrename(int nrr,rtprename_t *rr,
246 gmx_bool bStart,gmx_bool bEnd,
247 gmx_bool bCompareFFRTPname)
255 while (i<nrr && ((!bCompareFFRTPname && strcmp(name,rr[i].gmx) != 0) ||
256 ( bCompareFFRTPname && strcmp(name,rr[i].main) != 0)))
261 /* If found in the database, rename this residue's rtp building block,
262 * otherwise keep the old name.
284 gmx_fatal(FARGS,"In the chosen force field there is no residue type for '%s'%s",name,bStart ? " as a starting terminus" : (bEnd ? " as an ending terminus" : ""));
292 static void rename_resrtp(t_atoms *pdba,int nterpairs,int *r_start,int *r_end,
293 int nrr,rtprename_t *rr,t_symtab *symtab,
297 gmx_bool bStart,bEnd;
299 gmx_bool bFFRTPTERRNM;
301 bFFRTPTERRNM = (getenv("GMX_NO_FFRTP_TER_RENAME") == NULL);
303 for(r=0; r<pdba->nres; r++)
307 for(j=0; j<nterpairs; j++)
314 for(j=0; j<nterpairs; j++)
322 nn = search_resrename(nrr,rr,*pdba->resinfo[r].rtp,bStart,bEnd,FALSE);
324 if (bFFRTPTERRNM && nn == NULL && (bStart || bEnd))
326 /* This is a terminal residue, but the residue name,
327 * currently stored in .rtp, is not a standard residue name,
328 * but probably a force field specific rtp name.
329 * Check if we need to rename it because it is terminal.
331 nn = search_resrename(nrr,rr,
332 *pdba->resinfo[r].rtp,bStart,bEnd,TRUE);
335 if (nn != NULL && strcmp(*pdba->resinfo[r].rtp,nn) != 0)
339 printf("Changing rtp entry of residue %d %s to '%s'\n",
340 pdba->resinfo[r].nr,*pdba->resinfo[r].name,nn);
342 pdba->resinfo[r].rtp = put_symtab(symtab,nn);
347 static void pdbres_to_gmxrtp(t_atoms *pdba)
351 for(i=0; (i<pdba->nres); i++)
353 if (pdba->resinfo[i].rtp == NULL)
355 pdba->resinfo[i].rtp = pdba->resinfo[i].name;
360 static void rename_pdbres(t_atoms *pdba,const char *oldnm,const char *newnm,
361 gmx_bool bFullCompare,t_symtab *symtab)
366 for(i=0; (i<pdba->nres); i++)
368 resnm = *pdba->resinfo[i].name;
369 if ((bFullCompare && (gmx_strcasecmp(resnm,oldnm) == 0)) ||
370 (!bFullCompare && strstr(resnm,oldnm) != NULL))
372 /* Rename the residue name (not the rtp name) */
373 pdba->resinfo[i].name = put_symtab(symtab,newnm);
378 static void rename_bb(t_atoms *pdba,const char *oldnm,const char *newnm,
379 gmx_bool bFullCompare,t_symtab *symtab)
384 for(i=0; (i<pdba->nres); i++)
386 /* We have not set the rtp name yes, use the residue name */
387 bbnm = *pdba->resinfo[i].name;
388 if ((bFullCompare && (gmx_strcasecmp(bbnm,oldnm) == 0)) ||
389 (!bFullCompare && strstr(bbnm,oldnm) != NULL))
391 /* Change the rtp builing block name */
392 pdba->resinfo[i].rtp = put_symtab(symtab,newnm);
397 static void rename_bbint(t_atoms *pdba,const char *oldnm,
398 const char *gettp(int,int,const rtprename_t *),
399 gmx_bool bFullCompare,
401 int nrr,const rtprename_t *rr)
407 for(i=0; i<pdba->nres; i++)
409 /* We have not set the rtp name yes, use the residue name */
410 bbnm = *pdba->resinfo[i].name;
411 if ((bFullCompare && (strcmp(bbnm,oldnm) == 0)) ||
412 (!bFullCompare && strstr(bbnm,oldnm) != NULL))
414 ptr = gettp(i,nrr,rr);
415 pdba->resinfo[i].rtp = put_symtab(symtab,ptr);
420 static void check_occupancy(t_atoms *atoms,const char *filename,gmx_bool bVerbose)
426 ftp = fn2ftp(filename);
427 if (!atoms->pdbinfo || ((ftp != efPDB) && (ftp != efBRK) && (ftp != efENT)))
428 fprintf(stderr,"No occupancies in %s\n",filename);
430 for(i=0; (i<atoms->nr); i++) {
431 if (atoms->pdbinfo[i].occup != 1) {
433 fprintf(stderr,"Occupancy for atom %s%d-%s is %f rather than 1\n",
434 *atoms->resinfo[atoms->atom[i].resind].name,
435 atoms->resinfo[ atoms->atom[i].resind].nr,
437 atoms->pdbinfo[i].occup);
438 if (atoms->pdbinfo[i].occup == 0)
444 if (nzero == atoms->nr) {
445 fprintf(stderr,"All occupancy fields zero. This is probably not an X-Ray structure\n");
446 } else if ((nzero > 0) || (nnotone > 0)) {
449 "WARNING: there were %d atoms with zero occupancy and %d atoms with\n"
450 " occupancy unequal to one (out of %d atoms). Check your pdb file.\n"
452 nzero,nnotone,atoms->nr);
454 fprintf(stderr,"All occupancies are one\n");
459 void write_posres(char *fn,t_atoms *pdba,real fc)
464 fp=gmx_fio_fopen(fn,"w");
466 "; In this topology include file, you will find position restraint\n"
467 "; entries for all the heavy atoms in your original pdb file.\n"
468 "; This means that all the protons which were added by pdb2gmx are\n"
469 "; not restrained.\n"
471 "[ position_restraints ]\n"
472 "; %4s%6s%8s%8s%8s\n","atom","type","fx","fy","fz"
474 for(i=0; (i<pdba->nr); i++) {
475 if (!is_hydrogen(*pdba->atomname[i]) && !is_dummymass(*pdba->atomname[i]))
476 fprintf(fp,"%6d%6d %g %g %g\n",i+1,1,fc,fc,fc);
481 static int read_pdball(const char *inf, const char *outf,char *title,
482 t_atoms *atoms, rvec **x,
483 int *ePBC,matrix box, gmx_bool bRemoveH,
484 t_symtab *symtab,gmx_residuetype_t rt,const char *watres,
485 gmx_atomprop_t aps,gmx_bool bVerbose)
486 /* Read a pdb file. (containing proteins) */
488 int natom,new_natom,i;
491 printf("Reading %s...\n",inf);
492 get_stx_coordnum(inf,&natom);
493 init_t_atoms(atoms,natom,TRUE);
495 read_stx_conf(inf,title,atoms,*x,NULL,ePBC,box);
496 if (fn2ftp(inf) == efPDB)
497 get_pdb_atomnumber(atoms,aps);
500 for(i=0; i<atoms->nr; i++)
501 if (!is_hydrogen(*atoms->atomname[i])) {
502 atoms->atom[new_natom]=atoms->atom[i];
503 atoms->atomname[new_natom]=atoms->atomname[i];
504 atoms->pdbinfo[new_natom]=atoms->pdbinfo[i];
505 copy_rvec((*x)[i],(*x)[new_natom]);
513 if (title && title[0])
514 printf(" '%s',",title);
515 printf(" %d atoms\n",natom);
517 /* Rename residues */
518 rename_pdbres(atoms,"HOH",watres,FALSE,symtab);
519 rename_pdbres(atoms,"SOL",watres,FALSE,symtab);
520 rename_pdbres(atoms,"WAT",watres,FALSE,symtab);
522 rename_atoms("xlateat.dat",NULL,
523 atoms,symtab,NULL,TRUE,rt,TRUE,bVerbose);
529 write_sto_conf(outf,title,atoms,*x,NULL,*ePBC,box);
534 void process_chain(t_atoms *pdba, rvec *x,
535 gmx_bool bTrpU,gmx_bool bPheU,gmx_bool bTyrU,
536 gmx_bool bLysMan,gmx_bool bAspMan,gmx_bool bGluMan,
537 gmx_bool bHisMan,gmx_bool bArgMan,gmx_bool bGlnMan,
538 real angle,real distance,t_symtab *symtab,
539 int nrr,const rtprename_t *rr)
541 /* Rename aromatics, lys, asp and histidine */
542 if (bTyrU) rename_bb(pdba,"TYR","TYRU",FALSE,symtab);
543 if (bTrpU) rename_bb(pdba,"TRP","TRPU",FALSE,symtab);
544 if (bPheU) rename_bb(pdba,"PHE","PHEU",FALSE,symtab);
546 rename_bbint(pdba,"LYS",get_lystp,FALSE,symtab,nrr,rr);
548 rename_bbint(pdba,"ARG",get_argtp,FALSE,symtab,nrr,rr);
550 rename_bbint(pdba,"GLN",get_glntp,FALSE,symtab,nrr,rr);
552 rename_bbint(pdba,"ASP",get_asptp,FALSE,symtab,nrr,rr);
554 rename_bb(pdba,"ASPH","ASP",FALSE,symtab);
556 rename_bbint(pdba,"GLU",get_glutp,FALSE,symtab,nrr,rr);
558 rename_bb(pdba,"GLUH","GLU",FALSE,symtab);
561 set_histp(pdba,x,angle,distance);
563 rename_bbint(pdba,"HIS",get_histp,TRUE,symtab,nrr,rr);
565 /* Initialize the rtp builing block names with the residue names
566 * for the residues that have not been processed above.
568 pdbres_to_gmxrtp(pdba);
570 /* Now we have all rtp names set.
571 * The rtp names will conform to Gromacs naming,
572 * unless the input pdb file contained one or more force field specific
573 * rtp names as residue names.
577 /* struct for sorting the atoms from the pdb file */
579 int resnr; /* residue number */
580 int j; /* database order index */
581 int index; /* original atom number */
582 char anm1; /* second letter of atom name */
583 char altloc; /* alternate location indicator */
586 int pdbicomp(const void *a,const void *b)
594 d = (pa->resnr - pb->resnr);
598 d = (pa->anm1 - pb->anm1);
600 d = (pa->altloc - pb->altloc);
607 static void sort_pdbatoms(int nrtp,t_restp restp[],t_hackblock hb[],
608 int natoms,t_atoms **pdbaptr,rvec **x,
609 t_blocka *block,char ***gnames)
611 t_atoms *pdba,*pdbnew;
626 for(i=0; i<natoms; i++)
628 atomnm = *pdba->atomname[i];
629 rptr = &restp[pdba->atom[i].resind];
630 for(j=0; (j<rptr->natom); j++)
632 if (gmx_strcasecmp(atomnm,*(rptr->atomname[j])) == 0)
642 "Atom %s in residue %s %d was not found in rtp entry %s with %d atoms\n"
643 "while sorting atoms.\n%s",atomnm,
644 *pdba->resinfo[pdba->atom[i].resind].name,
645 pdba->resinfo[pdba->atom[i].resind].nr,
648 is_hydrogen(atomnm) ?
649 "\nFor a hydrogen, this can be a different protonation state, or it\n"
650 "might have had a different number in the PDB file and was rebuilt\n"
651 "(it might for instance have been H3, and we only expected H1 & H2).\n"
652 "Note that hydrogens might have been added to the entry for the N-terminus.\n"
653 "Remove this hydrogen or choose a different protonation state to solve it.\n"
654 "Option -ignh will ignore all hydrogens in the input." : ".");
655 gmx_fatal(FARGS,buf);
657 /* make shadow array to be sorted into indexgroup */
658 pdbi[i].resnr = pdba->atom[i].resind;
661 pdbi[i].anm1 = atomnm[1];
662 pdbi[i].altloc = pdba->pdbinfo[i].altloc;
664 qsort(pdbi,natoms,(size_t)sizeof(pdbi[0]),pdbicomp);
666 /* pdba is sorted in pdbnew using the pdbi index */
669 init_t_atoms(pdbnew,natoms,TRUE);
672 pdbnew->nres=pdba->nres;
673 sfree(pdbnew->resinfo);
674 pdbnew->resinfo=pdba->resinfo;
675 for (i=0; i<natoms; i++) {
676 pdbnew->atom[i] = pdba->atom[pdbi[i].index];
677 pdbnew->atomname[i] = pdba->atomname[pdbi[i].index];
678 pdbnew->pdbinfo[i] = pdba->pdbinfo[pdbi[i].index];
679 copy_rvec((*x)[pdbi[i].index],(*xnew)[i]);
680 /* make indexgroup in block */
684 sfree(pdba->atomname);
686 sfree(pdba->pdbinfo);
689 /* copy the sorted pdbnew back to pdba */
692 add_grp(block, gnames, natoms, a, "prot_sort");
698 static int remove_duplicate_atoms(t_atoms *pdba,rvec x[],gmx_bool bVerbose)
700 int i,j,oldnatoms,ndel;
703 printf("Checking for duplicate atoms....\n");
704 oldnatoms = pdba->nr;
706 /* NOTE: pdba->nr is modified inside the loop */
707 for(i=1; (i < pdba->nr); i++) {
708 /* compare 'i' and 'i-1', throw away 'i' if they are identical
709 this is a 'while' because multiple alternate locations can be present */
710 while ( (i < pdba->nr) &&
711 (pdba->atom[i-1].resind == pdba->atom[i].resind) &&
712 (strcmp(*pdba->atomname[i-1],*pdba->atomname[i])==0) ) {
715 ri = &pdba->resinfo[pdba->atom[i].resind];
716 printf("deleting duplicate atom %4s %s%4d%c",
717 *pdba->atomname[i],*ri->name,ri->nr,ri->ic);
718 if (ri->chainid && (ri->chainid != ' '))
719 printf(" ch %c", ri->chainid);
721 if (pdba->pdbinfo[i].atomnr)
722 printf(" pdb nr %4d",pdba->pdbinfo[i].atomnr);
723 if (pdba->pdbinfo[i].altloc && (pdba->pdbinfo[i].altloc!=' '))
724 printf(" altloc %c",pdba->pdbinfo[i].altloc);
729 /* We can not free, since it might be in the symtab */
730 /* sfree(pdba->atomname[i]); */
731 for (j=i; j < pdba->nr; j++) {
732 pdba->atom[j] = pdba->atom[j+1];
733 pdba->atomname[j] = pdba->atomname[j+1];
734 pdba->pdbinfo[j] = pdba->pdbinfo[j+1];
735 copy_rvec(x[j+1],x[j]);
737 srenew(pdba->atom, pdba->nr);
738 /* srenew(pdba->atomname, pdba->nr); */
739 srenew(pdba->pdbinfo, pdba->nr);
742 if (pdba->nr != oldnatoms)
743 printf("Now there are %d atoms. Deleted %d duplicates.\n",pdba->nr,ndel);
748 void find_nc_ter(t_atoms *pdba,int r0,int r1,int *r_start,int *r_end,gmx_residuetype_t rt)
751 const char *p_startrestype;
752 const char *p_restype;
753 int nstartwarn,nendwarn;
761 /* Find the starting terminus (typially N or 5') */
762 for(i=r0;i<r1 && *r_start==-1;i++)
764 gmx_residuetype_get_type(rt,*pdba->resinfo[i].name,&p_startrestype);
765 if( !gmx_strcasecmp(p_startrestype,"Protein") || !gmx_strcasecmp(p_startrestype,"DNA") || !gmx_strcasecmp(p_startrestype,"RNA") )
767 printf("Identified residue %s%d as a starting terminus.\n",*pdba->resinfo[i].name,pdba->resinfo[i].nr);
774 printf("Warning: Starting residue %s%d in chain not identified as Protein/RNA/DNA.\n",*pdba->resinfo[i].name,pdba->resinfo[i].nr);
778 printf("More than 5 unidentified residues at start of chain - disabling further warnings.\n");
786 /* Go through the rest of the residues, check that they are the same class, and identify the ending terminus. */
787 for(i=*r_start;i<r1;i++)
789 gmx_residuetype_get_type(rt,*pdba->resinfo[i].name,&p_restype);
790 if( !gmx_strcasecmp(p_restype,p_startrestype) && nendwarn==0)
798 printf("Warning: Residue %s%d in chain has different type (%s) from starting residue %s%d (%s).\n",
799 *pdba->resinfo[i].name,pdba->resinfo[i].nr,p_restype,
800 *pdba->resinfo[*r_start].name,pdba->resinfo[*r_start].nr,p_startrestype);
804 printf("More than 5 unidentified residues at end of chain - disabling further warnings.\n");
813 printf("Identified residue %s%d as a ending terminus.\n",*pdba->resinfo[*r_end].name,pdba->resinfo[*r_end].nr);
819 modify_chain_numbers(t_atoms * pdba,
820 const char * chainsep)
823 char old_prev_chainid;
824 char old_this_chainid;
825 int old_prev_chainnum;
826 int old_this_chainnum;
832 const char * prev_atomname;
833 const char * this_atomname;
834 const char * prev_resname;
835 const char * this_resname;
840 int prev_chainnumber;
841 int this_chainnumber;
853 splitting = SPLIT_TER_ONLY; /* keep compiler happy */
855 /* Be a bit flexible to catch typos */
856 if (!strncmp(chainsep,"id_o",4))
858 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
859 splitting = SPLIT_ID_OR_TER;
860 printf("Splitting chemical chains based on TER records or chain id changing.\n");
862 else if (!strncmp(chainsep,"int",3))
864 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
865 splitting = SPLIT_INTERACTIVE;
866 printf("Splitting chemical chains interactively.\n");
868 else if (!strncmp(chainsep,"id_a",4))
870 splitting = SPLIT_ID_AND_TER;
871 printf("Splitting chemical chains based on TER records and chain id changing.\n");
873 else if (strlen(chainsep)==2 && !strncmp(chainsep,"id",4))
875 splitting = SPLIT_ID_ONLY;
876 printf("Splitting chemical chains based on changing chain id only (ignoring TER records).\n");
878 else if (chainsep[0]=='t')
880 splitting = SPLIT_TER_ONLY;
881 printf("Splitting chemical chains based on TER records only (ignoring chain id).\n");
885 gmx_fatal(FARGS,"Unidentified setting for chain separation: %s\n",chainsep);
888 /* The default chain enumeration is based on TER records only, which is reflected in chainnum below */
890 old_prev_chainid = '?';
891 old_prev_chainnum = -1;
894 this_atomname = NULL;
899 this_chainnumber = -1;
901 for(i=0;i<pdba->nres;i++)
903 ri = &pdba->resinfo[i];
904 old_this_chainid = ri->chainid;
905 old_this_chainnum = ri->chainnum;
907 prev_atomname = this_atomname;
908 prev_atomnum = this_atomnum;
909 prev_resname = this_resname;
910 prev_resnum = this_resnum;
911 prev_chainid = this_chainid;
912 prev_chainnumber = this_chainnumber;
914 this_atomname = *(pdba->atomname[i]);
915 this_atomnum = (pdba->pdbinfo != NULL) ? pdba->pdbinfo[i].atomnr : i+1;
916 this_resname = *ri->name;
917 this_resnum = ri->nr;
918 this_chainid = ri->chainid;
919 this_chainnumber = ri->chainnum;
923 case SPLIT_ID_OR_TER:
924 if(old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
930 case SPLIT_ID_AND_TER:
931 if(old_this_chainid != old_prev_chainid && old_this_chainnum != old_prev_chainnum)
938 if(old_this_chainid != old_prev_chainid)
945 if(old_this_chainnum != old_prev_chainnum)
950 case SPLIT_INTERACTIVE:
951 if(old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
955 printf("Split the chain (and introduce termini) between residue %s%d (chain id '%c', atom %d %s)\n"
956 "and residue %s%d (chain id '%c', atom %d %s) ? [n/y]\n",
957 prev_resname,prev_resnum,prev_chainid,prev_atomnum,prev_atomname,
958 this_resname,this_resnum,this_chainid,this_atomnum,this_atomname);
960 if(NULL==fgets(select,STRLEN-1,stdin))
962 gmx_fatal(FARGS,"Error reading from stdin");
965 if(i==0 || select[0] == 'y')
972 gmx_fatal(FARGS,"Internal inconsistency - this shouldn't happen...");
975 old_prev_chainid = old_this_chainid;
976 old_prev_chainnum = old_this_chainnum;
978 ri->chainnum = new_chainnum;
1007 int main(int argc, char *argv[])
1009 const char *desc[] = {
1010 "This program reads a [TT].pdb[tt] (or [TT].gro[tt]) file, reads",
1011 "some database files, adds hydrogens to the molecules and generates",
1012 "coordinates in GROMACS (GROMOS), or optionally [TT].pdb[tt], format",
1013 "and a topology in GROMACS format.",
1014 "These files can subsequently be processed to generate a run input file.",
1016 "[TT]pdb2gmx[tt] will search for force fields by looking for",
1017 "a [TT]forcefield.itp[tt] file in subdirectories [TT]<forcefield>.ff[tt]",
1018 "of the current working directory and of the GROMACS library directory",
1019 "as inferred from the path of the binary or the [TT]GMXLIB[tt] environment",
1021 "By default the forcefield selection is interactive,",
1022 "but you can use the [TT]-ff[tt] option to specify one of the short names",
1023 "in the list on the command line instead. In that case [TT]pdb2gmx[tt] just looks",
1024 "for the corresponding [TT]<forcefield>.ff[tt] directory.",
1026 "After choosing a force field, all files will be read only from",
1027 "the corresponding force field directory.",
1028 "If you want to modify or add a residue types, you can copy the force",
1029 "field directory from the GROMACS library directory to your current",
1030 "working directory. If you want to add new protein residue types,",
1031 "you will need to modify [TT]residuetypes.dat[tt] in the library directory",
1032 "or copy the whole library directory to a local directory and set",
1033 "the environment variable [TT]GMXLIB[tt] to the name of that directory.",
1034 "Check Chapter 5 of the manual for more information about file formats.",
1037 "Note that a [TT].pdb[tt] file is nothing more than a file format, and it",
1038 "need not necessarily contain a protein structure. Every kind of",
1039 "molecule for which there is support in the database can be converted.",
1040 "If there is no support in the database, you can add it yourself.[PAR]",
1042 "The program has limited intelligence, it reads a number of database",
1043 "files, that allow it to make special bonds (Cys-Cys, Heme-His, etc.),",
1044 "if necessary this can be done manually. The program can prompt the",
1045 "user to select which kind of LYS, ASP, GLU, CYS or HIS residue is",
1046 "desired. For Lys the choice is between neutral (two protons on NZ) or",
1047 "protonated (three protons, default), for Asp and Glu unprotonated",
1048 "(default) or protonated, for His the proton can be either on ND1,",
1049 "on NE2 or on both. By default these selections are done automatically.",
1050 "For His, this is based on an optimal hydrogen bonding",
1051 "conformation. Hydrogen bonds are defined based on a simple geometric",
1052 "criterion, specified by the maximum hydrogen-donor-acceptor angle",
1053 "and donor-acceptor distance, which are set by [TT]-angle[tt] and",
1054 "[TT]-dist[tt] respectively.[PAR]",
1056 "The protonation state of N- and C-termini can be chosen interactively",
1057 "with the [TT]-ter[tt] flag. Default termini are ionized (NH3+ and COO-),",
1058 "respectively. Some force fields support zwitterionic forms for chains of",
1059 "one residue, but for polypeptides these options should NOT be selected.",
1060 "The AMBER force fields have unique forms for the terminal residues,",
1061 "and these are incompatible with the [TT]-ter[tt] mechanism. You need",
1062 "to prefix your N- or C-terminal residue names with \"N\" or \"C\"",
1063 "respectively to use these forms, making sure you preserve the format",
1064 "of the coordinate file. Alternatively, use named terminating residues",
1065 "(e.g. ACE, NME).[PAR]",
1067 "The separation of chains is not entirely trivial since the markup",
1068 "in user-generated PDB files frequently varies and sometimes it",
1069 "is desirable to merge entries across a TER record, for instance",
1070 "if you want a disulfide bridge or distance restraints between",
1071 "two protein chains or if you have a HEME group bound to a protein.",
1072 "In such cases multiple chains should be contained in a single",
1073 "[TT]moleculetype[tt] definition.",
1074 "To handle this, [TT]pdb2gmx[tt] uses two separate options.",
1075 "First, [TT]-chainsep[tt] allows you to choose when a new chemical chain should",
1076 "start, and termini added when applicable. This can be done based on the",
1077 "existence of TER records, when the chain id changes, or combinations of either",
1078 "or both of these. You can also do the selection fully interactively.",
1079 "In addition, there is a [TT]-merge[tt] option that controls how multiple chains",
1080 "are merged into one moleculetype, after adding all the chemical termini (or not).",
1081 "This can be turned off (no merging), all non-water chains can be merged into a",
1082 "single molecule, or the selection can be done interactively.[PAR]",
1084 "[TT]pdb2gmx[tt] will also check the occupancy field of the [TT].pdb[tt] file.",
1085 "If any of the occupancies are not one, indicating that the atom is",
1086 "not resolved well in the structure, a warning message is issued.",
1087 "When a [TT].pdb[tt] file does not originate from an X-ray structure determination",
1088 "all occupancy fields may be zero. Either way, it is up to the user",
1089 "to verify the correctness of the input data (read the article!).[PAR]",
1091 "During processing the atoms will be reordered according to GROMACS",
1092 "conventions. With [TT]-n[tt] an index file can be generated that",
1093 "contains one group reordered in the same way. This allows you to",
1094 "convert a GROMOS trajectory and coordinate file to GROMOS. There is",
1095 "one limitation: reordering is done after the hydrogens are stripped",
1096 "from the input and before new hydrogens are added. This means that",
1097 "you should not use [TT]-ignh[tt].[PAR]",
1099 "The [TT].gro[tt] and [TT].g96[tt] file formats do not support chain",
1100 "identifiers. Therefore it is useful to enter a [TT].pdb[tt] file name at",
1101 "the [TT]-o[tt] option when you want to convert a multi-chain [TT].pdb[tt] file.",
1104 "The option [TT]-vsite[tt] removes hydrogen and fast improper dihedral",
1105 "motions. Angular and out-of-plane motions can be removed by changing",
1106 "hydrogens into virtual sites and fixing angles, which fixes their",
1107 "position relative to neighboring atoms. Additionally, all atoms in the",
1108 "aromatic rings of the standard amino acids (i.e. PHE, TRP, TYR and HIS)",
1109 "can be converted into virtual sites, eliminating the fast improper dihedral",
1110 "fluctuations in these rings. [BB]Note[bb] that in this case all other hydrogen",
1111 "atoms are also converted to virtual sites. The mass of all atoms that are",
1112 "converted into virtual sites, is added to the heavy atoms.[PAR]",
1113 "Also slowing down of dihedral motion can be done with [TT]-heavyh[tt]",
1114 "done by increasing the hydrogen-mass by a factor of 4. This is also",
1115 "done for water hydrogens to slow down the rotational motion of water.",
1116 "The increase in mass of the hydrogens is subtracted from the bonded",
1117 "(heavy) atom so that the total mass of the system remains the same."
1121 FILE *fp,*top_file,*top_file2,*itp_file=NULL;
1123 t_atoms pdba_all,*pdba;
1127 int chain,nch,maxch,nwaterchain;
1129 t_chain *chains,*cc;
1130 char select[STRLEN];
1143 gpp_atomtype_t atype;
1144 gmx_residuetype_t rt;
1146 char fn[256],itp_fn[STRLEN],posre_fn[STRLEN],buf_fn[STRLEN];
1147 char molname[STRLEN],title[STRLEN],quote[STRLEN],generator[STRLEN];
1148 char *c,forcefield[STRLEN],ffdir[STRLEN];
1149 char ffname[STRLEN],suffix[STRLEN],buf[STRLEN];
1158 rtprename_t *rtprename=NULL;
1159 int nah,nNtdb,nCtdb,ntdblist;
1160 t_hackblock *ntdb,*ctdb,**tdblist;
1164 gmx_bool bVsites=FALSE,bWat,bPrevWat=FALSE,bITP,bVsiteAromatics=FALSE,bCheckMerge;
1166 t_hackblock *hb_chain;
1167 t_restp *restp_chain;
1169 const char *p_restype;
1173 const char * prev_atomname;
1174 const char * this_atomname;
1175 const char * prev_resname;
1176 const char * this_resname;
1181 int prev_chainnumber;
1182 int this_chainnumber;
1184 int this_chainstart;
1185 int prev_chainstart;
1192 { efSTX, "-f", "eiwit.pdb", ffREAD },
1193 { efSTO, "-o", "conf", ffWRITE },
1194 { efTOP, NULL, NULL, ffWRITE },
1195 { efITP, "-i", "posre", ffWRITE },
1196 { efNDX, "-n", "clean", ffOPTWR },
1197 { efSTO, "-q", "clean.pdb", ffOPTWR }
1199 #define NFILE asize(fnm)
1202 /* Command line arguments must be static */
1203 static gmx_bool bNewRTP=FALSE;
1204 static gmx_bool bInter=FALSE, bCysMan=FALSE;
1205 static gmx_bool bLysMan=FALSE, bAspMan=FALSE, bGluMan=FALSE, bHisMan=FALSE;
1206 static gmx_bool bGlnMan=FALSE, bArgMan=FALSE;
1207 static gmx_bool bTerMan=FALSE, bUnA=FALSE, bHeavyH;
1208 static gmx_bool bSort=TRUE, bAllowMissing=FALSE, bRemoveH=FALSE;
1209 static gmx_bool bDeuterate=FALSE,bVerbose=FALSE,bChargeGroups=TRUE,bCmap=TRUE;
1210 static gmx_bool bRenumRes=FALSE,bRTPresname=FALSE;
1211 static real angle=135.0, distance=0.3,posre_fc=1000;
1212 static real long_bond_dist=0.25, short_bond_dist=0.05;
1213 static const char *vsitestr[] = { NULL, "none", "hydrogens", "aromatics", NULL };
1214 static const char *watstr[] = { NULL, "select", "none", "spc", "spce", "tip3p", "tip4p", "tip5p", NULL };
1215 static const char *chainsep[] = { NULL, "id_or_ter", "id_and_ter", "ter", "id", "interactive", NULL };
1216 static const char *merge[] = {NULL, "no", "all", "interactive", NULL };
1217 static const char *ff = "select";
1220 { "-newrtp", FALSE, etBOOL, {&bNewRTP},
1221 "HIDDENWrite the residue database in new format to [TT]new.rtp[tt]"},
1222 { "-lb", FALSE, etREAL, {&long_bond_dist},
1223 "HIDDENLong bond warning distance" },
1224 { "-sb", FALSE, etREAL, {&short_bond_dist},
1225 "HIDDENShort bond warning distance" },
1226 { "-chainsep", FALSE, etENUM, {chainsep},
1227 "Condition in PDB files when a new chain should be started (adding termini)" },
1228 { "-merge", FALSE, etENUM, {&merge},
1229 "Merge multiple chains into a single [moleculetype]" },
1230 { "-ff", FALSE, etSTR, {&ff},
1231 "Force field, interactive by default. Use [TT]-h[tt] for information." },
1232 { "-water", FALSE, etENUM, {watstr},
1233 "Water model to use" },
1234 { "-inter", FALSE, etBOOL, {&bInter},
1235 "Set the next 8 options to interactive"},
1236 { "-ss", FALSE, etBOOL, {&bCysMan},
1237 "Interactive SS bridge selection" },
1238 { "-ter", FALSE, etBOOL, {&bTerMan},
1239 "Interactive termini selection, instead of charged (default)" },
1240 { "-lys", FALSE, etBOOL, {&bLysMan},
1241 "Interactive lysine selection, instead of charged" },
1242 { "-arg", FALSE, etBOOL, {&bArgMan},
1243 "Interactive arginine selection, instead of charged" },
1244 { "-asp", FALSE, etBOOL, {&bAspMan},
1245 "Interactive aspartic acid selection, instead of charged" },
1246 { "-glu", FALSE, etBOOL, {&bGluMan},
1247 "Interactive glutamic acid selection, instead of charged" },
1248 { "-gln", FALSE, etBOOL, {&bGlnMan},
1249 "Interactive glutamine selection, instead of neutral" },
1250 { "-his", FALSE, etBOOL, {&bHisMan},
1251 "Interactive histidine selection, instead of checking H-bonds" },
1252 { "-angle", FALSE, etREAL, {&angle},
1253 "Minimum hydrogen-donor-acceptor angle for a H-bond (degrees)" },
1254 { "-dist", FALSE, etREAL, {&distance},
1255 "Maximum donor-acceptor distance for a H-bond (nm)" },
1256 { "-una", FALSE, etBOOL, {&bUnA},
1257 "Select aromatic rings with united CH atoms on phenylalanine, "
1258 "tryptophane and tyrosine" },
1259 { "-sort", FALSE, etBOOL, {&bSort},
1260 "HIDDENSort the residues according to database, turning this off is dangerous as charge groups might be broken in parts" },
1261 { "-ignh", FALSE, etBOOL, {&bRemoveH},
1262 "Ignore hydrogen atoms that are in the coordinate file" },
1263 { "-missing",FALSE, etBOOL, {&bAllowMissing},
1264 "Continue when atoms are missing, dangerous" },
1265 { "-v", FALSE, etBOOL, {&bVerbose},
1266 "Be slightly more verbose in messages" },
1267 { "-posrefc",FALSE, etREAL, {&posre_fc},
1268 "Force constant for position restraints" },
1269 { "-vsite", FALSE, etENUM, {vsitestr},
1270 "Convert atoms to virtual sites" },
1271 { "-heavyh", FALSE, etBOOL, {&bHeavyH},
1272 "Make hydrogen atoms heavy" },
1273 { "-deuterate", FALSE, etBOOL, {&bDeuterate},
1274 "Change the mass of hydrogens to 2 amu" },
1275 { "-chargegrp", TRUE, etBOOL, {&bChargeGroups},
1276 "Use charge groups in the [TT].rtp[tt] file" },
1277 { "-cmap", TRUE, etBOOL, {&bCmap},
1278 "Use cmap torsions (if enabled in the [TT].rtp[tt] file)" },
1279 { "-renum", TRUE, etBOOL, {&bRenumRes},
1280 "Renumber the residues consecutively in the output" },
1281 { "-rtpres", TRUE, etBOOL, {&bRTPresname},
1282 "Use [TT].rtp[tt] entry names as residue names" }
1284 #define NPARGS asize(pa)
1286 CopyRight(stderr,argv[0]);
1287 parse_common_args(&argc,argv,0,NFILE,fnm,asize(pa),pa,asize(desc),desc,
1290 /* Force field selection, interactive or direct */
1291 choose_ff(strcmp(ff,"select") == 0 ? NULL : ff,
1292 forcefield,sizeof(forcefield),
1293 ffdir,sizeof(ffdir));
1295 if (strlen(forcefield) > 0) {
1296 strcpy(ffname,forcefield);
1297 ffname[0] = toupper(ffname[0]);
1299 gmx_fatal(FARGS,"Empty forcefield string");
1302 printf("\nUsing the %s force field in directory %s\n\n",
1305 choose_watermodel(watstr[0],ffdir,&watermodel);
1308 /* if anything changes here, also change description of -inter */
1321 else if (bDeuterate)
1326 switch(vsitestr[0][0]) {
1327 case 'n': /* none */
1329 bVsiteAromatics=FALSE;
1331 case 'h': /* hydrogens */
1333 bVsiteAromatics=FALSE;
1335 case 'a': /* aromatics */
1337 bVsiteAromatics=TRUE;
1340 gmx_fatal(FARGS,"DEATH HORROR in $s (%d): vsitestr[0]='%s'",
1341 __FILE__,__LINE__,vsitestr[0]);
1344 /* Open the symbol table */
1345 open_symtab(&symtab);
1347 /* Residue type database */
1348 gmx_residuetype_init(&rt);
1350 /* Read residue renaming database(s), if present */
1351 nrrn = fflib_search_file_end(ffdir,".r2b",FALSE,&rrn);
1355 for(i=0; i<nrrn; i++) {
1356 fp = fflib_open(rrn[i]);
1357 read_rtprename(rrn[i],fp,&nrtprename,&rtprename);
1363 /* Add all alternative names from the residue renaming database to the list of recognized amino/nucleic acids. */
1365 for(i=0;i<nrtprename;i++)
1367 rc=gmx_residuetype_get_type(rt,rtprename[i].gmx,&p_restype);
1369 /* Only add names if the 'standard' gromacs/iupac base name was found */
1372 gmx_residuetype_add(rt,rtprename[i].main,p_restype);
1373 gmx_residuetype_add(rt,rtprename[i].nter,p_restype);
1374 gmx_residuetype_add(rt,rtprename[i].cter,p_restype);
1375 gmx_residuetype_add(rt,rtprename[i].bter,p_restype);
1380 if (watermodel != NULL && (strstr(watermodel,"4p") ||
1381 strstr(watermodel,"4P"))) {
1383 } else if (watermodel != NULL && (strstr(watermodel,"5p") ||
1384 strstr(watermodel,"5P"))) {
1390 aps = gmx_atomprop_init();
1391 natom = read_pdball(opt2fn("-f",NFILE,fnm),opt2fn_null("-q",NFILE,fnm),title,
1392 &pdba_all,&pdbx,&ePBC,box,bRemoveH,&symtab,rt,watres,
1396 gmx_fatal(FARGS,"No atoms found in pdb file %s\n",opt2fn("-f",NFILE,fnm));
1398 printf("Analyzing pdb file\n");
1403 modify_chain_numbers(&pdba_all,chainsep[0]);
1407 this_atomname = NULL;
1409 this_resname = NULL;
1412 this_chainnumber = -1;
1413 this_chainstart = 0;
1414 /* Keep the compiler happy */
1415 prev_chainstart = 0;
1420 for (i=0; (i<natom); i++)
1422 ri = &pdba_all.resinfo[pdba_all.atom[i].resind];
1424 prev_atomname = this_atomname;
1425 prev_atomnum = this_atomnum;
1426 prev_resname = this_resname;
1427 prev_resnum = this_resnum;
1428 prev_chainid = this_chainid;
1429 prev_chainnumber = this_chainnumber;
1432 prev_chainstart = this_chainstart;
1435 this_atomname = *pdba_all.atomname[i];
1436 this_atomnum = (pdba_all.pdbinfo != NULL) ? pdba_all.pdbinfo[i].atomnr : i+1;
1437 this_resname = *ri->name;
1438 this_resnum = ri->nr;
1439 this_chainid = ri->chainid;
1440 this_chainnumber = ri->chainnum;
1442 bWat = gmx_strcasecmp(*ri->name,watres) == 0;
1443 if ((i == 0) || (this_chainnumber != prev_chainnumber) || (bWat != bPrevWat))
1445 this_chainstart = pdba_all.atom[i].resind;
1450 if(!strncmp(merge[0],"int",3))
1452 printf("Merge chain ending with residue %s%d (chain id '%c', atom %d %s) and chain starting with\n"
1453 "residue %s%d (chain id '%c', atom %d %s) into a single moleculetype (keeping termini)? [n/y]\n",
1454 prev_resname,prev_resnum,prev_chainid,prev_atomnum,prev_atomname,
1455 this_resname,this_resnum,this_chainid,this_atomnum,this_atomname);
1457 if(NULL==fgets(select,STRLEN-1,stdin))
1459 gmx_fatal(FARGS,"Error reading from stdin");
1461 bMerged = (select[0] == 'y');
1463 else if(!strncmp(merge[0],"all",3))
1471 pdb_ch[nch-1].chainstart[pdb_ch[nch-1].nterpairs] =
1472 pdba_all.atom[i].resind - prev_chainstart;
1473 pdb_ch[nch-1].nterpairs++;
1474 srenew(pdb_ch[nch-1].chainstart,pdb_ch[nch-1].nterpairs+1);
1479 /* set natom for previous chain */
1482 pdb_ch[nch-1].natom=i-pdb_ch[nch-1].start;
1489 /* check if chain identifier was used before */
1490 for (j=0; (j<nch); j++)
1492 if (pdb_ch[j].chainid != ' ' && pdb_ch[j].chainid == ri->chainid)
1494 printf("WARNING: Chain identifier '%c' is used in two non-sequential blocks.\n"
1495 "They will be treated as separate chains unless you reorder your file.\n",
1502 srenew(pdb_ch,maxch);
1504 pdb_ch[nch].chainid = ri->chainid;
1505 pdb_ch[nch].chainnum = ri->chainnum;
1506 pdb_ch[nch].start=i;
1507 pdb_ch[nch].bAllWat=bWat;
1509 pdb_ch[nch].nterpairs=0;
1511 pdb_ch[nch].nterpairs=1;
1512 snew(pdb_ch[nch].chainstart,pdb_ch[nch].nterpairs+1);
1513 /* modified [nch] to [0] below */
1514 pdb_ch[nch].chainstart[0]=0;
1520 pdb_ch[nch-1].natom=natom-pdb_ch[nch-1].start;
1522 /* set all the water blocks at the end of the chain */
1523 snew(swap_index,nch);
1525 for(i=0; i<nch; i++)
1526 if (!pdb_ch[i].bAllWat) {
1530 for(i=0; i<nch; i++)
1531 if (pdb_ch[i].bAllWat) {
1536 printf("Moved all the water blocks to the end\n");
1539 /* copy pdb data and x for all chains */
1540 for (i=0; (i<nch); i++) {
1542 chains[i].chainid = pdb_ch[si].chainid;
1543 chains[i].chainnum = pdb_ch[si].chainnum;
1544 chains[i].bAllWat = pdb_ch[si].bAllWat;
1545 chains[i].nterpairs = pdb_ch[si].nterpairs;
1546 chains[i].chainstart = pdb_ch[si].chainstart;
1547 snew(chains[i].ntdb,pdb_ch[si].nterpairs);
1548 snew(chains[i].ctdb,pdb_ch[si].nterpairs);
1549 snew(chains[i].r_start,pdb_ch[si].nterpairs);
1550 snew(chains[i].r_end,pdb_ch[si].nterpairs);
1552 snew(chains[i].pdba,1);
1553 init_t_atoms(chains[i].pdba,pdb_ch[si].natom,TRUE);
1554 snew(chains[i].x,chains[i].pdba->nr);
1555 for (j=0; j<chains[i].pdba->nr; j++) {
1556 chains[i].pdba->atom[j] = pdba_all.atom[pdb_ch[si].start+j];
1557 snew(chains[i].pdba->atomname[j],1);
1558 *chains[i].pdba->atomname[j] =
1559 strdup(*pdba_all.atomname[pdb_ch[si].start+j]);
1560 chains[i].pdba->pdbinfo[j] = pdba_all.pdbinfo[pdb_ch[si].start+j];
1561 copy_rvec(pdbx[pdb_ch[si].start+j],chains[i].x[j]);
1563 /* Re-index the residues assuming that the indices are continuous */
1564 k = chains[i].pdba->atom[0].resind;
1565 nres = chains[i].pdba->atom[chains[i].pdba->nr-1].resind - k + 1;
1566 chains[i].pdba->nres = nres;
1567 for(j=0; j < chains[i].pdba->nr; j++) {
1568 chains[i].pdba->atom[j].resind -= k;
1570 srenew(chains[i].pdba->resinfo,nres);
1571 for(j=0; j<nres; j++) {
1572 chains[i].pdba->resinfo[j] = pdba_all.resinfo[k+j];
1573 snew(chains[i].pdba->resinfo[j].name,1);
1574 *chains[i].pdba->resinfo[j].name = strdup(*pdba_all.resinfo[k+j].name);
1575 /* make all chain identifiers equal to that of the chain */
1576 chains[i].pdba->resinfo[j].chainid = pdb_ch[si].chainid;
1581 printf("\nMerged chains into joint molecule definitions at %d places.\n\n",
1584 printf("There are %d chains and %d blocks of water and "
1585 "%d residues with %d atoms\n",
1586 nch-nwaterchain,nwaterchain,
1587 pdba_all.resinfo[pdba_all.atom[natom-1].resind].nr,natom);
1589 printf("\n %5s %4s %6s\n","chain","#res","#atoms");
1590 for (i=0; (i<nch); i++)
1591 printf(" %d '%c' %5d %6d %s\n",
1592 i+1, chains[i].chainid ? chains[i].chainid:'-',
1593 chains[i].pdba->nres, chains[i].pdba->nr,
1594 chains[i].bAllWat ? "(only water)":"");
1597 check_occupancy(&pdba_all,opt2fn("-f",NFILE,fnm),bVerbose);
1599 /* Read atomtypes... */
1600 atype = read_atype(ffdir,&symtab);
1602 /* read residue database */
1603 printf("Reading residue database... (%s)\n",forcefield);
1604 nrtpf = fflib_search_file_end(ffdir,".rtp",TRUE,&rtpf);
1607 for(i=0; i<nrtpf; i++) {
1608 read_resall(rtpf[i],&nrtp,&restp,atype,&symtab,FALSE);
1613 /* Not correct with multiple rtp input files with different bonded types */
1614 fp=gmx_fio_fopen("new.rtp","w");
1615 print_resall(fp,nrtp,restp,atype);
1619 /* read hydrogen database */
1620 nah = read_h_db(ffdir,&ah);
1622 /* Read Termini database... */
1623 nNtdb=read_ter_db(ffdir,'n',&ntdb,atype);
1624 nCtdb=read_ter_db(ffdir,'c',&ctdb,atype);
1626 top_fn=ftp2fn(efTOP,NFILE,fnm);
1627 top_file=gmx_fio_fopen(top_fn,"w");
1629 sprintf(generator,"%s - %s",ShortProgram(), GromacsVersion() );
1631 print_top_header(top_file,top_fn,generator,FALSE,ffdir,mHmult);
1638 for(chain=0; (chain<nch); chain++) {
1639 cc = &(chains[chain]);
1641 /* set pdba, natom and nres to the current chain */
1645 nres =cc->pdba->nres;
1647 if (cc->chainid && ( cc->chainid != ' ' ) )
1648 printf("Processing chain %d '%c' (%d atoms, %d residues)\n",
1649 chain+1,cc->chainid,natom,nres);
1651 printf("Processing chain %d (%d atoms, %d residues)\n",
1652 chain+1,natom,nres);
1654 process_chain(pdba,x,bUnA,bUnA,bUnA,bLysMan,bAspMan,bGluMan,
1655 bHisMan,bArgMan,bGlnMan,angle,distance,&symtab,
1656 nrtprename,rtprename);
1658 cc->chainstart[cc->nterpairs] = pdba->nres;
1660 for(i=0; i<cc->nterpairs; i++)
1662 find_nc_ter(pdba,cc->chainstart[i],cc->chainstart[i+1],
1663 &(cc->r_start[j]),&(cc->r_end[j]),rt);
1665 if (cc->r_start[j] >= 0 && cc->r_end[j] >= 0)
1671 if (cc->nterpairs == 0)
1673 printf("Problem with chain definition, or missing terminal residues.\n"
1674 "This chain does not appear to contain a recognized chain molecule.\n"
1675 "If this is incorrect, you can edit residuetypes.dat to modify the behavior.\n");
1678 /* Check for disulfides and other special bonds */
1679 nssbonds = mk_specbonds(pdba,x,bCysMan,&ssbonds,bVerbose);
1681 if (nrtprename > 0) {
1682 rename_resrtp(pdba,cc->nterpairs,cc->r_start,cc->r_end,nrtprename,rtprename,
1688 sprintf(fn,"chain.pdb");
1690 sprintf(fn,"chain_%c%d.pdb",cc->chainid,cc->chainnum);
1692 write_sto_conf(fn,title,pdba,x,NULL,ePBC,box);
1696 for(i=0; i<cc->nterpairs; i++)
1700 * We first apply a filter so we only have the
1701 * termini that can be applied to the residue in question
1702 * (or a generic terminus if no-residue specific is available).
1704 /* First the N terminus */
1707 tdblist = filter_ter(nrtp,restp,nNtdb,ntdb,
1708 *pdba->resinfo[cc->r_start[i]].name,
1709 *pdba->resinfo[cc->r_start[i]].rtp,
1713 printf("No suitable end (N or 5') terminus found in database - assuming this residue\n"
1714 "is already in a terminus-specific form and skipping terminus selection.\n");
1719 if(bTerMan && ntdblist>1)
1721 sprintf(select,"Select start terminus type for %s-%d",
1722 *pdba->resinfo[cc->r_start[i]].name,
1723 pdba->resinfo[cc->r_start[i]].nr);
1724 cc->ntdb[i] = choose_ter(ntdblist,tdblist,select);
1728 cc->ntdb[i] = tdblist[0];
1731 printf("Start terminus %s-%d: %s\n",
1732 *pdba->resinfo[cc->r_start[i]].name,
1733 pdba->resinfo[cc->r_start[i]].nr,
1734 (cc->ntdb[i])->name);
1743 /* And the C terminus */
1746 tdblist = filter_ter(nrtp,restp,nCtdb,ctdb,
1747 *pdba->resinfo[cc->r_end[i]].name,
1748 *pdba->resinfo[cc->r_end[i]].rtp,
1752 printf("No suitable end (C or 3') terminus found in database - assuming this residue\n"
1753 "is already in a terminus-specific form and skipping terminus selection.\n");
1758 if(bTerMan && ntdblist>1)
1760 sprintf(select,"Select end terminus type for %s-%d",
1761 *pdba->resinfo[cc->r_end[i]].name,
1762 pdba->resinfo[cc->r_end[i]].nr);
1763 cc->ctdb[i] = choose_ter(ntdblist,tdblist,select);
1767 cc->ctdb[i] = tdblist[0];
1769 printf("End terminus %s-%d: %s\n",
1770 *pdba->resinfo[cc->r_end[i]].name,
1771 pdba->resinfo[cc->r_end[i]].nr,
1772 (cc->ctdb[i])->name);
1781 /* lookup hackblocks and rtp for all residues */
1782 get_hackblocks_rtp(&hb_chain, &restp_chain,
1783 nrtp, restp, pdba->nres, pdba->resinfo,
1784 cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end);
1785 /* ideally, now we would not need the rtp itself anymore, but do
1786 everything using the hb and restp arrays. Unfortunately, that
1787 requires some re-thinking of code in gen_vsite.c, which I won't
1788 do now :( AF 26-7-99 */
1790 rename_atoms(NULL,ffdir,
1791 pdba,&symtab,restp_chain,FALSE,rt,FALSE,bVerbose);
1793 match_atomnames_with_rtp(restp_chain,hb_chain,pdba,x,bVerbose);
1796 block = new_blocka();
1798 sort_pdbatoms(pdba->nres,restp_chain,hb_chain,
1799 natom,&pdba,&x,block,&gnames);
1800 natom = remove_duplicate_atoms(pdba,x,bVerbose);
1801 if (ftp2bSet(efNDX,NFILE,fnm)) {
1803 fprintf(stderr,"WARNING: with the -remh option the generated "
1804 "index file (%s) might be useless\n"
1805 "(the index file is generated before hydrogens are added)",
1806 ftp2fn(efNDX,NFILE,fnm));
1808 write_index(ftp2fn(efNDX,NFILE,fnm),block,gnames);
1810 for(i=0; i < block->nr; i++)
1815 fprintf(stderr,"WARNING: "
1816 "without sorting no check for duplicate atoms can be done\n");
1819 /* Generate Hydrogen atoms (and termini) in the sequence */
1820 natom=add_h(&pdba,&x,nah,ah,
1821 cc->nterpairs,cc->ntdb,cc->ctdb,cc->r_start,cc->r_end,bAllowMissing,
1822 NULL,NULL,TRUE,FALSE);
1823 printf("Now there are %d residues with %d atoms\n",
1824 pdba->nres,pdba->nr);
1825 if (debug) write_pdbfile(debug,title,pdba,x,ePBC,box,' ',0,NULL,TRUE);
1828 for(i=0; (i<natom); i++)
1829 fprintf(debug,"Res %s%d atom %d %s\n",
1830 *(pdba->resinfo[pdba->atom[i].resind].name),
1831 pdba->resinfo[pdba->atom[i].resind].nr,i+1,*pdba->atomname[i]);
1833 strcpy(posre_fn,ftp2fn(efITP,NFILE,fnm));
1835 /* make up molecule name(s) */
1837 k = (cc->nterpairs>0 && cc->r_start[0]>=0) ? cc->r_start[0] : 0;
1839 gmx_residuetype_get_type(rt,*pdba->resinfo[k].name,&p_restype);
1845 sprintf(molname,"Water");
1849 this_chainid = cc->chainid;
1851 /* Add the chain id if we have one */
1852 if(this_chainid != ' ')
1854 sprintf(buf,"_chain_%c",this_chainid);
1858 /* Check if there have been previous chains with the same id */
1860 for(k=0;k<chain;k++)
1862 if(cc->chainid == chains[k].chainid)
1867 /* Add the number for this chain identifier if there are multiple copies */
1871 sprintf(buf,"%d",nid_used+1);
1875 if(strlen(suffix)>0)
1877 sprintf(molname,"%s%s",p_restype,suffix);
1881 strcpy(molname,p_restype);
1885 if ((nch-nwaterchain>1) && !cc->bAllWat) {
1887 strcpy(itp_fn,top_fn);
1888 printf("Chain time...\n");
1889 c=strrchr(itp_fn,'.');
1890 sprintf(c,"_%s.itp",molname);
1891 c=strrchr(posre_fn,'.');
1892 sprintf(c,"_%s.itp",molname);
1893 if (strcmp(itp_fn,posre_fn) == 0) {
1894 strcpy(buf_fn,posre_fn);
1895 c = strrchr(buf_fn,'.');
1897 sprintf(posre_fn,"%s_pr.itp",buf_fn);
1901 srenew(incls,nincl);
1902 incls[nincl-1]=strdup(itp_fn);
1903 itp_file=gmx_fio_fopen(itp_fn,"w");
1907 srenew(mols,nmol+1);
1909 mols[nmol].name = strdup("SOL");
1910 mols[nmol].nr = pdba->nres;
1912 mols[nmol].name = strdup(molname);
1918 print_top_comment(itp_file,itp_fn,generator,ffdir,TRUE);
1928 pdb2top(top_file2,posre_fn,molname,pdba,&x,atype,&symtab,
1930 restp_chain,hb_chain,
1931 cc->nterpairs,cc->ntdb,cc->ctdb,bAllowMissing,
1932 bVsites,bVsiteAromatics,forcefield,ffdir,
1933 mHmult,nssbonds,ssbonds,
1934 long_bond_dist,short_bond_dist,bDeuterate,bChargeGroups,bCmap,
1935 bRenumRes,bRTPresname);
1938 write_posres(posre_fn,pdba,posre_fc);
1941 gmx_fio_fclose(itp_file);
1943 /* pdba and natom have been reassigned somewhere so: */
1948 if (cc->chainid == ' ')
1949 sprintf(fn,"chain.pdb");
1951 sprintf(fn,"chain_%c.pdb",cc->chainid);
1952 cool_quote(quote,255,NULL);
1953 write_sto_conf(fn,quote,pdba,x,NULL,ePBC,box);
1957 if (watermodel == NULL) {
1958 for(chain=0; chain<nch; chain++) {
1959 if (chains[chain].bAllWat) {
1960 gmx_fatal(FARGS,"You have chosen not to include a water model, but there is water in the input file. Select a water model or remove the water from your input file.");
1964 sprintf(buf_fn,"%s%c%s.itp",ffdir,DIR_SEPARATOR,watermodel);
1965 if (!fflib_fexist(buf_fn)) {
1966 gmx_fatal(FARGS,"The topology file '%s' for the selected water model '%s' can not be found in the force field directory. Select a different water model.",
1971 print_top_mols(top_file,title,ffdir,watermodel,nincl,incls,nmol,mols);
1972 gmx_fio_fclose(top_file);
1974 gmx_residuetype_destroy(rt);
1976 /* now merge all chains back together */
1979 for (i=0; (i<nch); i++) {
1980 natom+=chains[i].pdba->nr;
1981 nres+=chains[i].pdba->nres;
1984 init_t_atoms(atoms,natom,FALSE);
1985 for(i=0; i < atoms->nres; i++)
1986 sfree(atoms->resinfo[i].name);
1987 sfree(atoms->resinfo);
1989 snew(atoms->resinfo,nres);
1993 for (i=0; (i<nch); i++) {
1995 printf("Including chain %d in system: %d atoms %d residues\n",
1996 i+1,chains[i].pdba->nr,chains[i].pdba->nres);
1997 for (j=0; (j<chains[i].pdba->nr); j++) {
1998 atoms->atom[k]=chains[i].pdba->atom[j];
1999 atoms->atom[k].resind += l; /* l is processed nr of residues */
2000 atoms->atomname[k]=chains[i].pdba->atomname[j];
2001 atoms->resinfo[atoms->atom[k].resind].chainid = chains[i].chainid;
2002 copy_rvec(chains[i].x[j],x[k]);
2005 for (j=0; (j<chains[i].pdba->nres); j++) {
2006 atoms->resinfo[l] = chains[i].pdba->resinfo[j];
2008 atoms->resinfo[l].name = atoms->resinfo[l].rtp;
2015 fprintf(stderr,"Now there are %d atoms and %d residues\n",k,l);
2016 print_sums(atoms, TRUE);
2019 fprintf(stderr,"\nWriting coordinate file...\n");
2020 clear_rvec(box_space);
2022 gen_box(0,atoms->nr,x,box,box_space,FALSE);
2023 write_sto_conf(ftp2fn(efSTO,NFILE,fnm),title,atoms,x,NULL,ePBC,box);
2025 printf("\t\t--------- PLEASE NOTE ------------\n");
2026 printf("You have successfully generated a topology from: %s.\n",
2027 opt2fn("-f",NFILE,fnm));
2028 if (watermodel != NULL) {
2029 printf("The %s force field and the %s water model are used.\n",
2032 printf("The %s force field is used.\n",
2035 printf("\t\t--------- ETON ESAELP ------------\n");