1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
53 #include "gmx_fatal.h"
72 #include "fflibutil.h"
84 static const char *res2bb_notermini(const char *name,
85 int nrr,const rtprename_t *rr)
87 /* NOTE: This function returns the main building block name,
88 * it does not take terminal renaming into account.
93 while (i < nrr && gmx_strcasecmp(name,rr[i].gmx) != 0) {
97 return (i < nrr ? rr[i].main : name);
100 static const char *select_res(int nr,int resnr,
101 const char *name[],const char *expl[],
103 int nrr,const rtprename_t *rr)
107 printf("Which %s type do you want for residue %d\n",title,resnr+1);
108 for(sel=0; (sel < nr); sel++) {
109 printf("%d. %s (%s)\n",
110 sel,expl[sel],res2bb_notermini(name[sel],nrr,rr));
112 printf("\nType a number:"); fflush(stdout);
114 if (scanf("%d",&sel) != 1)
115 gmx_fatal(FARGS,"Answer me for res %s %d!",title,resnr+1);
120 static const char *get_asptp(int resnr,int nrr,const rtprename_t *rr)
122 enum { easp, easpH, easpNR };
123 const char *lh[easpNR] = { "ASP", "ASPH" };
124 const char *expl[easpNR] = {
125 "Not protonated (charge -1)",
126 "Protonated (charge 0)"
129 return select_res(easpNR,resnr,lh,expl,"ASPARTIC ACID",nrr,rr);
132 static const char *get_glutp(int resnr,int nrr,const rtprename_t *rr)
134 enum { eglu, egluH, egluNR };
135 const char *lh[egluNR] = { "GLU", "GLUH" };
136 const char *expl[egluNR] = {
137 "Not protonated (charge -1)",
138 "Protonated (charge 0)"
141 return select_res(egluNR,resnr,lh,expl,"GLUTAMIC ACID",nrr,rr);
144 static const char *get_glntp(int resnr,int nrr,const rtprename_t *rr)
146 enum { egln, eglnH, eglnNR };
147 const char *lh[eglnNR] = { "GLN", "QLN" };
148 const char *expl[eglnNR] = {
149 "Not protonated (charge 0)",
150 "Protonated (charge +1)"
153 return select_res(eglnNR,resnr,lh,expl,"GLUTAMINE",nrr,rr);
156 static const char *get_lystp(int resnr,int nrr,const rtprename_t *rr)
158 enum { elys, elysH, elysNR };
159 const char *lh[elysNR] = { "LYSN", "LYS" };
160 const char *expl[elysNR] = {
161 "Not protonated (charge 0)",
162 "Protonated (charge +1)"
165 return select_res(elysNR,resnr,lh,expl,"LYSINE",nrr,rr);
168 static const char *get_argtp(int resnr,int nrr,const rtprename_t *rr)
170 enum { earg, eargH, eargNR };
171 const char *lh[eargNR] = { "ARGN", "ARG" };
172 const char *expl[eargNR] = {
173 "Not protonated (charge 0)",
174 "Protonated (charge +1)"
177 return select_res(eargNR,resnr,lh,expl,"ARGININE",nrr,rr);
180 static const char *get_histp(int resnr,int nrr,const rtprename_t *rr)
182 const char *expl[ehisNR] = {
189 return select_res(ehisNR,resnr,hh,expl,"HISTIDINE",nrr,rr);
192 static void read_rtprename(const char *fname,FILE *fp,
193 int *nrtprename,rtprename_t **rtprename)
195 char line[STRLEN],buf[STRLEN];
204 while(get_a_line(fp,line,STRLEN)) {
206 nc = sscanf(line,"%s %s %s %s %s %s",
207 rr[n].gmx,rr[n].main,rr[n].nter,rr[n].cter,rr[n].bter,buf);
209 if (nc != 2 && nc != 5) {
210 gmx_fatal(FARGS,"Residue renaming database '%s' has %d columns instead of %d, %d or %d",fname,ncol,2,5);
213 } else if (nc != ncol) {
214 gmx_fatal(FARGS,"A line in residue renaming database '%s' has %d columns, while previous lines have %d columns",fname,nc,ncol);
218 /* This file does not have special termini names, copy them from main */
219 strcpy(rr[n].nter,rr[n].main);
220 strcpy(rr[n].cter,rr[n].main);
221 strcpy(rr[n].bter,rr[n].main);
231 static char *search_resrename(int nrr,rtprename_t *rr,
233 gmx_bool bStart,gmx_bool bEnd,
234 gmx_bool bCompareFFRTPname)
242 while (i<nrr && ((!bCompareFFRTPname && strcmp(name,rr[i].gmx) != 0) ||
243 ( bCompareFFRTPname && strcmp(name,rr[i].main) != 0)))
248 /* If found in the database, rename this residue's rtp building block,
249 * otherwise keep the old name.
271 gmx_fatal(FARGS,"In the chosen force field there is no residue type for '%s'%s",name,bStart ? " as a starting terminus" : (bEnd ? " as an ending terminus" : ""));
279 static void rename_resrtp(t_atoms *pdba,int nterpairs,int *r_start,int *r_end,
280 int nrr,rtprename_t *rr,t_symtab *symtab,
284 gmx_bool bStart,bEnd;
286 gmx_bool bFFRTPTERRNM;
288 bFFRTPTERRNM = (getenv("GMX_NO_FFRTP_TER_RENAME") == NULL);
290 for(r=0; r<pdba->nres; r++)
294 for(j=0; j<nterpairs; j++)
301 for(j=0; j<nterpairs; j++)
309 nn = search_resrename(nrr,rr,*pdba->resinfo[r].rtp,bStart,bEnd,FALSE);
311 if (bFFRTPTERRNM && nn == NULL && (bStart || bEnd))
313 /* This is a terminal residue, but the residue name,
314 * currently stored in .rtp, is not a standard residue name,
315 * but probably a force field specific rtp name.
316 * Check if we need to rename it because it is terminal.
318 nn = search_resrename(nrr,rr,
319 *pdba->resinfo[r].rtp,bStart,bEnd,TRUE);
322 if (nn != NULL && strcmp(*pdba->resinfo[r].rtp,nn) != 0)
326 printf("Changing rtp entry of residue %d %s to '%s'\n",
327 pdba->resinfo[r].nr,*pdba->resinfo[r].name,nn);
329 pdba->resinfo[r].rtp = put_symtab(symtab,nn);
334 static void pdbres_to_gmxrtp(t_atoms *pdba)
338 for(i=0; (i<pdba->nres); i++)
340 if (pdba->resinfo[i].rtp == NULL)
342 pdba->resinfo[i].rtp = pdba->resinfo[i].name;
347 static void rename_pdbres(t_atoms *pdba,const char *oldnm,const char *newnm,
348 gmx_bool bFullCompare,t_symtab *symtab)
353 for(i=0; (i<pdba->nres); i++)
355 resnm = *pdba->resinfo[i].name;
356 if ((bFullCompare && (gmx_strcasecmp(resnm,oldnm) == 0)) ||
357 (!bFullCompare && strstr(resnm,oldnm) != NULL))
359 /* Rename the residue name (not the rtp name) */
360 pdba->resinfo[i].name = put_symtab(symtab,newnm);
365 static void rename_bb(t_atoms *pdba,const char *oldnm,const char *newnm,
366 gmx_bool bFullCompare,t_symtab *symtab)
371 for(i=0; (i<pdba->nres); i++)
373 /* We have not set the rtp name yes, use the residue name */
374 bbnm = *pdba->resinfo[i].name;
375 if ((bFullCompare && (gmx_strcasecmp(bbnm,oldnm) == 0)) ||
376 (!bFullCompare && strstr(bbnm,oldnm) != NULL))
378 /* Change the rtp builing block name */
379 pdba->resinfo[i].rtp = put_symtab(symtab,newnm);
384 static void rename_bbint(t_atoms *pdba,const char *oldnm,
385 const char *gettp(int,int,const rtprename_t *),
386 gmx_bool bFullCompare,
388 int nrr,const rtprename_t *rr)
394 for(i=0; i<pdba->nres; i++)
396 /* We have not set the rtp name yes, use the residue name */
397 bbnm = *pdba->resinfo[i].name;
398 if ((bFullCompare && (strcmp(bbnm,oldnm) == 0)) ||
399 (!bFullCompare && strstr(bbnm,oldnm) != NULL))
401 ptr = gettp(i,nrr,rr);
402 pdba->resinfo[i].rtp = put_symtab(symtab,ptr);
407 static void check_occupancy(t_atoms *atoms,const char *filename,gmx_bool bVerbose)
413 ftp = fn2ftp(filename);
414 if (!atoms->pdbinfo || ((ftp != efPDB) && (ftp != efBRK) && (ftp != efENT)))
415 fprintf(stderr,"No occupancies in %s\n",filename);
417 for(i=0; (i<atoms->nr); i++) {
418 if (atoms->pdbinfo[i].occup != 1) {
420 fprintf(stderr,"Occupancy for atom %s%d-%s is %f rather than 1\n",
421 *atoms->resinfo[atoms->atom[i].resind].name,
422 atoms->resinfo[ atoms->atom[i].resind].nr,
424 atoms->pdbinfo[i].occup);
425 if (atoms->pdbinfo[i].occup == 0)
431 if (nzero == atoms->nr) {
432 fprintf(stderr,"All occupancy fields zero. This is probably not an X-Ray structure\n");
433 } else if ((nzero > 0) || (nnotone > 0)) {
436 "WARNING: there were %d atoms with zero occupancy and %d atoms with\n"
437 " occupancy unequal to one (out of %d atoms). Check your pdb file.\n"
439 nzero,nnotone,atoms->nr);
441 fprintf(stderr,"All occupancies are one\n");
446 void write_posres(char *fn,t_atoms *pdba,real fc)
451 fp=gmx_fio_fopen(fn,"w");
453 "; In this topology include file, you will find position restraint\n"
454 "; entries for all the heavy atoms in your original pdb file.\n"
455 "; This means that all the protons which were added by pdb2gmx are\n"
456 "; not restrained.\n"
458 "[ position_restraints ]\n"
459 "; %4s%6s%8s%8s%8s\n","atom","type","fx","fy","fz"
461 for(i=0; (i<pdba->nr); i++) {
462 if (!is_hydrogen(*pdba->atomname[i]) && !is_dummymass(*pdba->atomname[i]))
463 fprintf(fp,"%6d%6d %g %g %g\n",i+1,1,fc,fc,fc);
468 static int read_pdball(const char *inf, const char *outf,char *title,
469 t_atoms *atoms, rvec **x,
470 int *ePBC,matrix box, gmx_bool bRemoveH,
471 t_symtab *symtab,gmx_residuetype_t rt,const char *watres,
472 gmx_atomprop_t aps,gmx_bool bVerbose)
473 /* Read a pdb file. (containing proteins) */
475 int natom,new_natom,i;
478 printf("Reading %s...\n",inf);
479 get_stx_coordnum(inf,&natom);
480 init_t_atoms(atoms,natom,TRUE);
482 read_stx_conf(inf,title,atoms,*x,NULL,ePBC,box);
483 if (fn2ftp(inf) == efPDB)
484 get_pdb_atomnumber(atoms,aps);
487 for(i=0; i<atoms->nr; i++)
488 if (!is_hydrogen(*atoms->atomname[i])) {
489 atoms->atom[new_natom]=atoms->atom[i];
490 atoms->atomname[new_natom]=atoms->atomname[i];
491 atoms->pdbinfo[new_natom]=atoms->pdbinfo[i];
492 copy_rvec((*x)[i],(*x)[new_natom]);
500 if (title && title[0])
501 printf(" '%s',",title);
502 printf(" %d atoms\n",natom);
504 /* Rename residues */
505 rename_pdbres(atoms,"HOH",watres,FALSE,symtab);
506 rename_pdbres(atoms,"SOL",watres,FALSE,symtab);
507 rename_pdbres(atoms,"WAT",watres,FALSE,symtab);
509 rename_atoms("xlateat.dat",NULL,
510 atoms,symtab,NULL,TRUE,rt,TRUE,bVerbose);
516 write_sto_conf(outf,title,atoms,*x,NULL,*ePBC,box);
521 void process_chain(t_atoms *pdba, rvec *x,
522 gmx_bool bTrpU,gmx_bool bPheU,gmx_bool bTyrU,
523 gmx_bool bLysMan,gmx_bool bAspMan,gmx_bool bGluMan,
524 gmx_bool bHisMan,gmx_bool bArgMan,gmx_bool bGlnMan,
525 real angle,real distance,t_symtab *symtab,
526 int nrr,const rtprename_t *rr)
528 /* Rename aromatics, lys, asp and histidine */
529 if (bTyrU) rename_bb(pdba,"TYR","TYRU",FALSE,symtab);
530 if (bTrpU) rename_bb(pdba,"TRP","TRPU",FALSE,symtab);
531 if (bPheU) rename_bb(pdba,"PHE","PHEU",FALSE,symtab);
533 rename_bbint(pdba,"LYS",get_lystp,FALSE,symtab,nrr,rr);
535 rename_bbint(pdba,"ARG",get_argtp,FALSE,symtab,nrr,rr);
537 rename_bbint(pdba,"GLN",get_glntp,FALSE,symtab,nrr,rr);
539 rename_bbint(pdba,"ASP",get_asptp,FALSE,symtab,nrr,rr);
541 rename_bb(pdba,"ASPH","ASP",FALSE,symtab);
543 rename_bbint(pdba,"GLU",get_glutp,FALSE,symtab,nrr,rr);
545 rename_bb(pdba,"GLUH","GLU",FALSE,symtab);
548 set_histp(pdba,x,angle,distance);
550 rename_bbint(pdba,"HIS",get_histp,TRUE,symtab,nrr,rr);
552 /* Initialize the rtp builing block names with the residue names
553 * for the residues that have not been processed above.
555 pdbres_to_gmxrtp(pdba);
557 /* Now we have all rtp names set.
558 * The rtp names will conform to Gromacs naming,
559 * unless the input pdb file contained one or more force field specific
560 * rtp names as residue names.
564 /* struct for sorting the atoms from the pdb file */
566 int resnr; /* residue number */
567 int j; /* database order index */
568 int index; /* original atom number */
569 char anm1; /* second letter of atom name */
570 char altloc; /* alternate location indicator */
573 int pdbicomp(const void *a,const void *b)
581 d = (pa->resnr - pb->resnr);
585 d = (pa->anm1 - pb->anm1);
587 d = (pa->altloc - pb->altloc);
594 static void sort_pdbatoms(int nrtp,t_restp restp[],t_hackblock hb[],
595 int natoms,t_atoms **pdbaptr,rvec **x,
596 t_blocka *block,char ***gnames)
598 t_atoms *pdba,*pdbnew;
613 for(i=0; i<natoms; i++)
615 atomnm = *pdba->atomname[i];
616 rptr = &restp[pdba->atom[i].resind];
617 for(j=0; (j<rptr->natom); j++)
619 if (gmx_strcasecmp(atomnm,*(rptr->atomname[j])) == 0)
629 "Atom %s in residue %s %d was not found in rtp entry %s with %d atoms\n"
630 "while sorting atoms.\n%s",atomnm,
631 *pdba->resinfo[pdba->atom[i].resind].name,
632 pdba->resinfo[pdba->atom[i].resind].nr,
635 is_hydrogen(atomnm) ?
636 "\nFor a hydrogen, this can be a different protonation state, or it\n"
637 "might have had a different number in the PDB file and was rebuilt\n"
638 "(it might for instance have been H3, and we only expected H1 & H2).\n"
639 "Note that hydrogens might have been added to the entry for the N-terminus.\n"
640 "Remove this hydrogen or choose a different protonation state to solve it.\n"
641 "Option -ignh will ignore all hydrogens in the input." : ".");
642 gmx_fatal(FARGS,buf);
644 /* make shadow array to be sorted into indexgroup */
645 pdbi[i].resnr = pdba->atom[i].resind;
648 pdbi[i].anm1 = atomnm[1];
649 pdbi[i].altloc = pdba->pdbinfo[i].altloc;
651 qsort(pdbi,natoms,(size_t)sizeof(pdbi[0]),pdbicomp);
653 /* pdba is sorted in pdbnew using the pdbi index */
656 init_t_atoms(pdbnew,natoms,TRUE);
659 pdbnew->nres=pdba->nres;
660 sfree(pdbnew->resinfo);
661 pdbnew->resinfo=pdba->resinfo;
662 for (i=0; i<natoms; i++) {
663 pdbnew->atom[i] = pdba->atom[pdbi[i].index];
664 pdbnew->atomname[i] = pdba->atomname[pdbi[i].index];
665 pdbnew->pdbinfo[i] = pdba->pdbinfo[pdbi[i].index];
666 copy_rvec((*x)[pdbi[i].index],(*xnew)[i]);
667 /* make indexgroup in block */
671 sfree(pdba->atomname);
673 sfree(pdba->pdbinfo);
676 /* copy the sorted pdbnew back to pdba */
679 add_grp(block, gnames, natoms, a, "prot_sort");
685 static int remove_duplicate_atoms(t_atoms *pdba,rvec x[],gmx_bool bVerbose)
687 int i,j,oldnatoms,ndel;
690 printf("Checking for duplicate atoms....\n");
691 oldnatoms = pdba->nr;
693 /* NOTE: pdba->nr is modified inside the loop */
694 for(i=1; (i < pdba->nr); i++) {
695 /* compare 'i' and 'i-1', throw away 'i' if they are identical
696 this is a 'while' because multiple alternate locations can be present */
697 while ( (i < pdba->nr) &&
698 (pdba->atom[i-1].resind == pdba->atom[i].resind) &&
699 (strcmp(*pdba->atomname[i-1],*pdba->atomname[i])==0) ) {
702 ri = &pdba->resinfo[pdba->atom[i].resind];
703 printf("deleting duplicate atom %4s %s%4d%c",
704 *pdba->atomname[i],*ri->name,ri->nr,ri->ic);
705 if (ri->chainid && (ri->chainid != ' '))
706 printf(" ch %c", ri->chainid);
708 if (pdba->pdbinfo[i].atomnr)
709 printf(" pdb nr %4d",pdba->pdbinfo[i].atomnr);
710 if (pdba->pdbinfo[i].altloc && (pdba->pdbinfo[i].altloc!=' '))
711 printf(" altloc %c",pdba->pdbinfo[i].altloc);
716 /* We can not free, since it might be in the symtab */
717 /* sfree(pdba->atomname[i]); */
718 for (j=i; j < pdba->nr; j++) {
719 pdba->atom[j] = pdba->atom[j+1];
720 pdba->atomname[j] = pdba->atomname[j+1];
721 pdba->pdbinfo[j] = pdba->pdbinfo[j+1];
722 copy_rvec(x[j+1],x[j]);
724 srenew(pdba->atom, pdba->nr);
725 /* srenew(pdba->atomname, pdba->nr); */
726 srenew(pdba->pdbinfo, pdba->nr);
729 if (pdba->nr != oldnatoms)
730 printf("Now there are %d atoms. Deleted %d duplicates.\n",pdba->nr,ndel);
735 void find_nc_ter(t_atoms *pdba,int r0,int r1,int *r_start,int *r_end,gmx_residuetype_t rt)
738 const char *p_startrestype;
739 const char *p_restype;
740 int nstartwarn,nendwarn;
748 /* Find the starting terminus (typially N or 5') */
749 for(i=r0;i<r1 && *r_start==-1;i++)
751 gmx_residuetype_get_type(rt,*pdba->resinfo[i].name,&p_startrestype);
752 if( !gmx_strcasecmp(p_startrestype,"Protein") || !gmx_strcasecmp(p_startrestype,"DNA") || !gmx_strcasecmp(p_startrestype,"RNA") )
754 printf("Identified residue %s%d as a starting terminus.\n",*pdba->resinfo[i].name,pdba->resinfo[i].nr);
761 printf("Warning: Starting residue %s%d in chain not identified as Protein/RNA/DNA.\n",*pdba->resinfo[i].name,pdba->resinfo[i].nr);
765 printf("More than 5 unidentified residues at start of chain - disabling further warnings.\n");
773 /* Go through the rest of the residues, check that they are the same class, and identify the ending terminus. */
774 for(i=*r_start;i<r1;i++)
776 gmx_residuetype_get_type(rt,*pdba->resinfo[i].name,&p_restype);
777 if( !gmx_strcasecmp(p_restype,p_startrestype) && nendwarn==0)
785 printf("Warning: Residue %s%d in chain has different type (%s) from starting residue %s%d (%s).\n",
786 *pdba->resinfo[i].name,pdba->resinfo[i].nr,p_restype,
787 *pdba->resinfo[*r_start].name,pdba->resinfo[*r_start].nr,p_startrestype);
791 printf("More than 5 unidentified residues at end of chain - disabling further warnings.\n");
800 printf("Identified residue %s%d as a ending terminus.\n",*pdba->resinfo[*r_end].name,pdba->resinfo[*r_end].nr);
806 modify_chain_numbers(t_atoms * pdba,
807 const char * chainsep)
810 char old_prev_chainid;
811 char old_this_chainid;
812 int old_prev_chainnum;
813 int old_this_chainnum;
819 const char * prev_atomname;
820 const char * this_atomname;
821 const char * prev_resname;
822 const char * this_resname;
827 int prev_chainnumber;
828 int this_chainnumber;
840 splitting = SPLIT_TER_ONLY; /* keep compiler happy */
842 /* Be a bit flexible to catch typos */
843 if (!strncmp(chainsep,"id_o",4))
845 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
846 splitting = SPLIT_ID_OR_TER;
847 printf("Splitting chemical chains based on TER records or chain id changing.\n");
849 else if (!strncmp(chainsep,"int",3))
851 /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
852 splitting = SPLIT_INTERACTIVE;
853 printf("Splitting chemical chains interactively.\n");
855 else if (!strncmp(chainsep,"id_a",4))
857 splitting = SPLIT_ID_AND_TER;
858 printf("Splitting chemical chains based on TER records and chain id changing.\n");
860 else if (strlen(chainsep)==2 && !strncmp(chainsep,"id",4))
862 splitting = SPLIT_ID_ONLY;
863 printf("Splitting chemical chains based on changing chain id only (ignoring TER records).\n");
865 else if (chainsep[0]=='t')
867 splitting = SPLIT_TER_ONLY;
868 printf("Splitting chemical chains based on TER records only (ignoring chain id).\n");
872 gmx_fatal(FARGS,"Unidentified setting for chain separation: %s\n",chainsep);
875 /* The default chain enumeration is based on TER records only, which is reflected in chainnum below */
877 old_prev_chainid = '?';
878 old_prev_chainnum = -1;
881 this_atomname = NULL;
886 this_chainnumber = -1;
888 for(i=0;i<pdba->nres;i++)
890 ri = &pdba->resinfo[i];
891 old_this_chainid = ri->chainid;
892 old_this_chainnum = ri->chainnum;
894 prev_atomname = this_atomname;
895 prev_atomnum = this_atomnum;
896 prev_resname = this_resname;
897 prev_resnum = this_resnum;
898 prev_chainid = this_chainid;
899 prev_chainnumber = this_chainnumber;
901 this_atomname = *(pdba->atomname[i]);
902 this_atomnum = (pdba->pdbinfo != NULL) ? pdba->pdbinfo[i].atomnr : i+1;
903 this_resname = *ri->name;
904 this_resnum = ri->nr;
905 this_chainid = ri->chainid;
906 this_chainnumber = ri->chainnum;
910 case SPLIT_ID_OR_TER:
911 if(old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
917 case SPLIT_ID_AND_TER:
918 if(old_this_chainid != old_prev_chainid && old_this_chainnum != old_prev_chainnum)
925 if(old_this_chainid != old_prev_chainid)
932 if(old_this_chainnum != old_prev_chainnum)
937 case SPLIT_INTERACTIVE:
938 if(old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
942 printf("Split the chain (and introduce termini) between residue %s%d (chain id '%c', atom %d %s)\n"
943 "and residue %s%d (chain id '%c', atom %d %s) ? [n/y]\n",
944 prev_resname,prev_resnum,prev_chainid,prev_atomnum,prev_atomname,
945 this_resname,this_resnum,this_chainid,this_atomnum,this_atomname);
947 if(NULL==fgets(select,STRLEN-1,stdin))
949 gmx_fatal(FARGS,"Error reading from stdin");
952 if(i==0 || select[0] == 'y')
959 gmx_fatal(FARGS,"Internal inconsistency - this shouldn't happen...");
962 old_prev_chainid = old_this_chainid;
963 old_prev_chainnum = old_this_chainnum;
965 ri->chainnum = new_chainnum;
994 int main(int argc, char *argv[])
996 const char *desc[] = {
997 "This program reads a [TT].pdb[tt] (or [TT].gro[tt]) file, reads",
998 "some database files, adds hydrogens to the molecules and generates",
999 "coordinates in GROMACS (GROMOS), or optionally [TT].pdb[tt], format",
1000 "and a topology in GROMACS format.",
1001 "These files can subsequently be processed to generate a run input file.",
1003 "[TT]pdb2gmx[tt] will search for force fields by looking for",
1004 "a [TT]forcefield.itp[tt] file in subdirectories [TT]<forcefield>.ff[tt]",
1005 "of the current working directory and of the GROMACS library directory",
1006 "as inferred from the path of the binary or the [TT]GMXLIB[tt] environment",
1008 "By default the forcefield selection is interactive,",
1009 "but you can use the [TT]-ff[tt] option to specify one of the short names",
1010 "in the list on the command line instead. In that case [TT]pdb2gmx[tt] just looks",
1011 "for the corresponding [TT]<forcefield>.ff[tt] directory.",
1013 "After choosing a force field, all files will be read only from",
1014 "the corresponding force field directory.",
1015 "If you want to modify or add a residue types, you can copy the force",
1016 "field directory from the GROMACS library directory to your current",
1017 "working directory. If you want to add new protein residue types,",
1018 "you will need to modify [TT]residuetypes.dat[tt] in the library directory",
1019 "or copy the whole library directory to a local directory and set",
1020 "the environment variable [TT]GMXLIB[tt] to the name of that directory.",
1021 "Check Chapter 5 of the manual for more information about file formats.",
1024 "Note that a [TT].pdb[tt] file is nothing more than a file format, and it",
1025 "need not necessarily contain a protein structure. Every kind of",
1026 "molecule for which there is support in the database can be converted.",
1027 "If there is no support in the database, you can add it yourself.[PAR]",
1029 "The program has limited intelligence, it reads a number of database",
1030 "files, that allow it to make special bonds (Cys-Cys, Heme-His, etc.),",
1031 "if necessary this can be done manually. The program can prompt the",
1032 "user to select which kind of LYS, ASP, GLU, CYS or HIS residue is",
1033 "desired. For Lys the choice is between neutral (two protons on NZ) or",
1034 "protonated (three protons, default), for Asp and Glu unprotonated",
1035 "(default) or protonated, for His the proton can be either on ND1,",
1036 "on NE2 or on both. By default these selections are done automatically.",
1037 "For His, this is based on an optimal hydrogen bonding",
1038 "conformation. Hydrogen bonds are defined based on a simple geometric",
1039 "criterion, specified by the maximum hydrogen-donor-acceptor angle",
1040 "and donor-acceptor distance, which are set by [TT]-angle[tt] and",
1041 "[TT]-dist[tt] respectively.[PAR]",
1043 "The protonation state of N- and C-termini can be chosen interactively",
1044 "with the [TT]-ter[tt] flag. Default termini are ionized (NH3+ and COO-),",
1045 "respectively. Some force fields support zwitterionic forms for chains of",
1046 "one residue, but for polypeptides these options should NOT be selected.",
1047 "The AMBER force fields have unique forms for the terminal residues,",
1048 "and these are incompatible with the [TT]-ter[tt] mechanism. You need",
1049 "to prefix your N- or C-terminal residue names with \"N\" or \"C\"",
1050 "respectively to use these forms, making sure you preserve the format",
1051 "of the coordinate file. Alternatively, use named terminating residues",
1052 "(e.g. ACE, NME).[PAR]",
1054 "The separation of chains is not entirely trivial since the markup",
1055 "in user-generated PDB files frequently varies and sometimes it",
1056 "is desirable to merge entries across a TER record, for instance",
1057 "if you want a disulfide bridge or distance restraints between",
1058 "two protein chains or if you have a HEME group bound to a protein.",
1059 "In such cases multiple chains should be contained in a single",
1060 "[TT]moleculetype[tt] definition.",
1061 "To handle this, [TT]pdb2gmx[tt] uses two separate options.",
1062 "First, [TT]-chainsep[tt] allows you to choose when a new chemical chain should",
1063 "start, and termini added when applicable. This can be done based on the",
1064 "existence of TER records, when the chain id changes, or combinations of either",
1065 "or both of these. You can also do the selection fully interactively.",
1066 "In addition, there is a [TT]-merge[tt] option that controls how multiple chains",
1067 "are merged into one moleculetype, after adding all the chemical termini (or not).",
1068 "This can be turned off (no merging), all non-water chains can be merged into a",
1069 "single molecule, or the selection can be done interactively.[PAR]",
1071 "[TT]pdb2gmx[tt] will also check the occupancy field of the [TT].pdb[tt] file.",
1072 "If any of the occupancies are not one, indicating that the atom is",
1073 "not resolved well in the structure, a warning message is issued.",
1074 "When a [TT].pdb[tt] file does not originate from an X-ray structure determination",
1075 "all occupancy fields may be zero. Either way, it is up to the user",
1076 "to verify the correctness of the input data (read the article!).[PAR]",
1078 "During processing the atoms will be reordered according to GROMACS",
1079 "conventions. With [TT]-n[tt] an index file can be generated that",
1080 "contains one group reordered in the same way. This allows you to",
1081 "convert a GROMOS trajectory and coordinate file to GROMOS. There is",
1082 "one limitation: reordering is done after the hydrogens are stripped",
1083 "from the input and before new hydrogens are added. This means that",
1084 "you should not use [TT]-ignh[tt].[PAR]",
1086 "The [TT].gro[tt] and [TT].g96[tt] file formats do not support chain",
1087 "identifiers. Therefore it is useful to enter a [TT].pdb[tt] file name at",
1088 "the [TT]-o[tt] option when you want to convert a multi-chain [TT].pdb[tt] file.",
1091 "The option [TT]-vsite[tt] removes hydrogen and fast improper dihedral",
1092 "motions. Angular and out-of-plane motions can be removed by changing",
1093 "hydrogens into virtual sites and fixing angles, which fixes their",
1094 "position relative to neighboring atoms. Additionally, all atoms in the",
1095 "aromatic rings of the standard amino acids (i.e. PHE, TRP, TYR and HIS)",
1096 "can be converted into virtual sites, eliminating the fast improper dihedral",
1097 "fluctuations in these rings. [BB]Note[bb] that in this case all other hydrogen",
1098 "atoms are also converted to virtual sites. The mass of all atoms that are",
1099 "converted into virtual sites, is added to the heavy atoms.[PAR]",
1100 "Also slowing down of dihedral motion can be done with [TT]-heavyh[tt]",
1101 "done by increasing the hydrogen-mass by a factor of 4. This is also",
1102 "done for water hydrogens to slow down the rotational motion of water.",
1103 "The increase in mass of the hydrogens is subtracted from the bonded",
1104 "(heavy) atom so that the total mass of the system remains the same."
1108 FILE *fp,*top_file,*top_file2,*itp_file=NULL;
1110 t_atoms pdba_all,*pdba;
1114 int chain,nch,maxch,nwaterchain;
1116 t_chain *chains,*cc;
1117 char select[STRLEN];
1130 gpp_atomtype_t atype;
1131 gmx_residuetype_t rt;
1133 char fn[256],itp_fn[STRLEN],posre_fn[STRLEN],buf_fn[STRLEN];
1134 char molname[STRLEN],title[STRLEN],quote[STRLEN],generator[STRLEN];
1135 char *c,forcefield[STRLEN],ffdir[STRLEN];
1136 char ffname[STRLEN],suffix[STRLEN],buf[STRLEN];
1145 rtprename_t *rtprename=NULL;
1146 int nah,nNtdb,nCtdb,ntdblist;
1147 t_hackblock *ntdb,*ctdb,**tdblist;
1151 gmx_bool bVsites=FALSE,bWat,bPrevWat=FALSE,bITP,bVsiteAromatics=FALSE,bCheckMerge;
1153 t_hackblock *hb_chain;
1154 t_restp *restp_chain;
1156 const char *p_restype;
1160 const char * prev_atomname;
1161 const char * this_atomname;
1162 const char * prev_resname;
1163 const char * this_resname;
1168 int prev_chainnumber;
1169 int this_chainnumber;
1171 int this_chainstart;
1172 int prev_chainstart;
1179 { efSTX, "-f", "eiwit.pdb", ffREAD },
1180 { efSTO, "-o", "conf", ffWRITE },
1181 { efTOP, NULL, NULL, ffWRITE },
1182 { efITP, "-i", "posre", ffWRITE },
1183 { efNDX, "-n", "clean", ffOPTWR },
1184 { efSTO, "-q", "clean.pdb", ffOPTWR }
1186 #define NFILE asize(fnm)
1189 /* Command line arguments must be static */
1190 static gmx_bool bNewRTP=FALSE;
1191 static gmx_bool bInter=FALSE, bCysMan=FALSE;
1192 static gmx_bool bLysMan=FALSE, bAspMan=FALSE, bGluMan=FALSE, bHisMan=FALSE;
1193 static gmx_bool bGlnMan=FALSE, bArgMan=FALSE;
1194 static gmx_bool bTerMan=FALSE, bUnA=FALSE, bHeavyH;
1195 static gmx_bool bSort=TRUE, bAllowMissing=FALSE, bRemoveH=FALSE;
1196 static gmx_bool bDeuterate=FALSE,bVerbose=FALSE,bChargeGroups=TRUE,bCmap=TRUE;
1197 static gmx_bool bRenumRes=FALSE,bRTPresname=FALSE;
1198 static real angle=135.0, distance=0.3,posre_fc=1000;
1199 static real long_bond_dist=0.25, short_bond_dist=0.05;
1200 static const char *vsitestr[] = { NULL, "none", "hydrogens", "aromatics", NULL };
1201 static const char *watstr[] = { NULL, "select", "none", "spc", "spce", "tip3p", "tip4p", "tip5p", NULL };
1202 static const char *chainsep[] = { NULL, "id_or_ter", "id_and_ter", "ter", "id", "interactive", NULL };
1203 static const char *merge[] = {NULL, "no", "all", "interactive", NULL };
1204 static const char *ff = "select";
1207 { "-newrtp", FALSE, etBOOL, {&bNewRTP},
1208 "HIDDENWrite the residue database in new format to [TT]new.rtp[tt]"},
1209 { "-lb", FALSE, etREAL, {&long_bond_dist},
1210 "HIDDENLong bond warning distance" },
1211 { "-sb", FALSE, etREAL, {&short_bond_dist},
1212 "HIDDENShort bond warning distance" },
1213 { "-chainsep", FALSE, etENUM, {chainsep},
1214 "Condition in PDB files when a new chain should be started (adding termini)" },
1215 { "-merge", FALSE, etENUM, {&merge},
1216 "Merge multiple chains into a single [moleculetype]" },
1217 { "-ff", FALSE, etSTR, {&ff},
1218 "Force field, interactive by default. Use [TT]-h[tt] for information." },
1219 { "-water", FALSE, etENUM, {watstr},
1220 "Water model to use" },
1221 { "-inter", FALSE, etBOOL, {&bInter},
1222 "Set the next 8 options to interactive"},
1223 { "-ss", FALSE, etBOOL, {&bCysMan},
1224 "Interactive SS bridge selection" },
1225 { "-ter", FALSE, etBOOL, {&bTerMan},
1226 "Interactive termini selection, instead of charged (default)" },
1227 { "-lys", FALSE, etBOOL, {&bLysMan},
1228 "Interactive lysine selection, instead of charged" },
1229 { "-arg", FALSE, etBOOL, {&bArgMan},
1230 "Interactive arginine selection, instead of charged" },
1231 { "-asp", FALSE, etBOOL, {&bAspMan},
1232 "Interactive aspartic acid selection, instead of charged" },
1233 { "-glu", FALSE, etBOOL, {&bGluMan},
1234 "Interactive glutamic acid selection, instead of charged" },
1235 { "-gln", FALSE, etBOOL, {&bGlnMan},
1236 "Interactive glutamine selection, instead of neutral" },
1237 { "-his", FALSE, etBOOL, {&bHisMan},
1238 "Interactive histidine selection, instead of checking H-bonds" },
1239 { "-angle", FALSE, etREAL, {&angle},
1240 "Minimum hydrogen-donor-acceptor angle for a H-bond (degrees)" },
1241 { "-dist", FALSE, etREAL, {&distance},
1242 "Maximum donor-acceptor distance for a H-bond (nm)" },
1243 { "-una", FALSE, etBOOL, {&bUnA},
1244 "Select aromatic rings with united CH atoms on phenylalanine, "
1245 "tryptophane and tyrosine" },
1246 { "-sort", FALSE, etBOOL, {&bSort},
1247 "HIDDENSort the residues according to database, turning this off is dangerous as charge groups might be broken in parts" },
1248 { "-ignh", FALSE, etBOOL, {&bRemoveH},
1249 "Ignore hydrogen atoms that are in the coordinate file" },
1250 { "-missing",FALSE, etBOOL, {&bAllowMissing},
1251 "Continue when atoms are missing, dangerous" },
1252 { "-v", FALSE, etBOOL, {&bVerbose},
1253 "Be slightly more verbose in messages" },
1254 { "-posrefc",FALSE, etREAL, {&posre_fc},
1255 "Force constant for position restraints" },
1256 { "-vsite", FALSE, etENUM, {vsitestr},
1257 "Convert atoms to virtual sites" },
1258 { "-heavyh", FALSE, etBOOL, {&bHeavyH},
1259 "Make hydrogen atoms heavy" },
1260 { "-deuterate", FALSE, etBOOL, {&bDeuterate},
1261 "Change the mass of hydrogens to 2 amu" },
1262 { "-chargegrp", TRUE, etBOOL, {&bChargeGroups},
1263 "Use charge groups in the [TT].rtp[tt] file" },
1264 { "-cmap", TRUE, etBOOL, {&bCmap},
1265 "Use cmap torsions (if enabled in the [TT].rtp[tt] file)" },
1266 { "-renum", TRUE, etBOOL, {&bRenumRes},
1267 "Renumber the residues consecutively in the output" },
1268 { "-rtpres", TRUE, etBOOL, {&bRTPresname},
1269 "Use [TT].rtp[tt] entry names as residue names" }
1271 #define NPARGS asize(pa)
1273 CopyRight(stderr,argv[0]);
1274 parse_common_args(&argc,argv,0,NFILE,fnm,asize(pa),pa,asize(desc),desc,
1277 /* Force field selection, interactive or direct */
1278 choose_ff(strcmp(ff,"select") == 0 ? NULL : ff,
1279 forcefield,sizeof(forcefield),
1280 ffdir,sizeof(ffdir));
1282 if (strlen(forcefield) > 0) {
1283 strcpy(ffname,forcefield);
1284 ffname[0] = toupper(ffname[0]);
1286 gmx_fatal(FARGS,"Empty forcefield string");
1289 printf("\nUsing the %s force field in directory %s\n\n",
1292 choose_watermodel(watstr[0],ffdir,&watermodel);
1295 /* if anything changes here, also change description of -inter */
1308 else if (bDeuterate)
1313 switch(vsitestr[0][0]) {
1314 case 'n': /* none */
1316 bVsiteAromatics=FALSE;
1318 case 'h': /* hydrogens */
1320 bVsiteAromatics=FALSE;
1322 case 'a': /* aromatics */
1324 bVsiteAromatics=TRUE;
1327 gmx_fatal(FARGS,"DEATH HORROR in $s (%d): vsitestr[0]='%s'",
1328 __FILE__,__LINE__,vsitestr[0]);
1331 /* Open the symbol table */
1332 open_symtab(&symtab);
1334 /* Residue type database */
1335 gmx_residuetype_init(&rt);
1337 /* Read residue renaming database(s), if present */
1338 nrrn = fflib_search_file_end(ffdir,".r2b",FALSE,&rrn);
1342 for(i=0; i<nrrn; i++) {
1343 fp = fflib_open(rrn[i]);
1344 read_rtprename(rrn[i],fp,&nrtprename,&rtprename);
1350 /* Add all alternative names from the residue renaming database to the list of recognized amino/nucleic acids. */
1352 for(i=0;i<nrtprename;i++)
1354 rc=gmx_residuetype_get_type(rt,rtprename[i].gmx,&p_restype);
1356 /* Only add names if the 'standard' gromacs/iupac base name was found */
1359 gmx_residuetype_add(rt,rtprename[i].main,p_restype);
1360 gmx_residuetype_add(rt,rtprename[i].nter,p_restype);
1361 gmx_residuetype_add(rt,rtprename[i].cter,p_restype);
1362 gmx_residuetype_add(rt,rtprename[i].bter,p_restype);
1367 if (watermodel != NULL && (strstr(watermodel,"4p") ||
1368 strstr(watermodel,"4P"))) {
1370 } else if (watermodel != NULL && (strstr(watermodel,"5p") ||
1371 strstr(watermodel,"5P"))) {
1377 aps = gmx_atomprop_init();
1378 natom = read_pdball(opt2fn("-f",NFILE,fnm),opt2fn_null("-q",NFILE,fnm),title,
1379 &pdba_all,&pdbx,&ePBC,box,bRemoveH,&symtab,rt,watres,
1383 gmx_fatal(FARGS,"No atoms found in pdb file %s\n",opt2fn("-f",NFILE,fnm));
1385 printf("Analyzing pdb file\n");
1390 modify_chain_numbers(&pdba_all,chainsep[0]);
1394 this_atomname = NULL;
1396 this_resname = NULL;
1399 this_chainnumber = -1;
1400 this_chainstart = 0;
1401 /* Keep the compiler happy */
1402 prev_chainstart = 0;
1407 for (i=0; (i<natom); i++)
1409 ri = &pdba_all.resinfo[pdba_all.atom[i].resind];
1411 prev_atomname = this_atomname;
1412 prev_atomnum = this_atomnum;
1413 prev_resname = this_resname;
1414 prev_resnum = this_resnum;
1415 prev_chainid = this_chainid;
1416 prev_chainnumber = this_chainnumber;
1419 prev_chainstart = this_chainstart;
1422 this_atomname = *pdba_all.atomname[i];
1423 this_atomnum = (pdba_all.pdbinfo != NULL) ? pdba_all.pdbinfo[i].atomnr : i+1;
1424 this_resname = *ri->name;
1425 this_resnum = ri->nr;
1426 this_chainid = ri->chainid;
1427 this_chainnumber = ri->chainnum;
1429 bWat = gmx_strcasecmp(*ri->name,watres) == 0;
1430 if ((i == 0) || (this_chainnumber != prev_chainnumber) || (bWat != bPrevWat))
1432 this_chainstart = pdba_all.atom[i].resind;
1437 if(!strncmp(merge[0],"int",3))
1439 printf("Merge chain ending with residue %s%d (chain id '%c', atom %d %s) and chain starting with\n"
1440 "residue %s%d (chain id '%c', atom %d %s) into a single moleculetype (keeping termini)? [n/y]\n",
1441 prev_resname,prev_resnum,prev_chainid,prev_atomnum,prev_atomname,
1442 this_resname,this_resnum,this_chainid,this_atomnum,this_atomname);
1444 if(NULL==fgets(select,STRLEN-1,stdin))
1446 gmx_fatal(FARGS,"Error reading from stdin");
1448 bMerged = (select[0] == 'y');
1450 else if(!strncmp(merge[0],"all",3))
1458 pdb_ch[nch-1].chainstart[pdb_ch[nch-1].nterpairs] =
1459 pdba_all.atom[i].resind - prev_chainstart;
1460 pdb_ch[nch-1].nterpairs++;
1461 srenew(pdb_ch[nch-1].chainstart,pdb_ch[nch-1].nterpairs+1);
1466 /* set natom for previous chain */
1469 pdb_ch[nch-1].natom=i-pdb_ch[nch-1].start;
1476 /* check if chain identifier was used before */
1477 for (j=0; (j<nch); j++)
1479 if (pdb_ch[j].chainid != ' ' && pdb_ch[j].chainid == ri->chainid)
1481 printf("WARNING: Chain identifier '%c' is used in two non-sequential blocks.\n"
1482 "They will be treated as separate chains unless you reorder your file.\n",
1489 srenew(pdb_ch,maxch);
1491 pdb_ch[nch].chainid = ri->chainid;
1492 pdb_ch[nch].chainnum = ri->chainnum;
1493 pdb_ch[nch].start=i;
1494 pdb_ch[nch].bAllWat=bWat;
1496 pdb_ch[nch].nterpairs=0;
1498 pdb_ch[nch].nterpairs=1;
1499 snew(pdb_ch[nch].chainstart,pdb_ch[nch].nterpairs+1);
1500 /* modified [nch] to [0] below */
1501 pdb_ch[nch].chainstart[0]=0;
1507 pdb_ch[nch-1].natom=natom-pdb_ch[nch-1].start;
1509 /* set all the water blocks at the end of the chain */
1510 snew(swap_index,nch);
1512 for(i=0; i<nch; i++)
1513 if (!pdb_ch[i].bAllWat) {
1517 for(i=0; i<nch; i++)
1518 if (pdb_ch[i].bAllWat) {
1523 printf("Moved all the water blocks to the end\n");
1526 /* copy pdb data and x for all chains */
1527 for (i=0; (i<nch); i++) {
1529 chains[i].chainid = pdb_ch[si].chainid;
1530 chains[i].chainnum = pdb_ch[si].chainnum;
1531 chains[i].bAllWat = pdb_ch[si].bAllWat;
1532 chains[i].nterpairs = pdb_ch[si].nterpairs;
1533 chains[i].chainstart = pdb_ch[si].chainstart;
1534 snew(chains[i].ntdb,pdb_ch[si].nterpairs);
1535 snew(chains[i].ctdb,pdb_ch[si].nterpairs);
1536 snew(chains[i].r_start,pdb_ch[si].nterpairs);
1537 snew(chains[i].r_end,pdb_ch[si].nterpairs);
1539 snew(chains[i].pdba,1);
1540 init_t_atoms(chains[i].pdba,pdb_ch[si].natom,TRUE);
1541 snew(chains[i].x,chains[i].pdba->nr);
1542 for (j=0; j<chains[i].pdba->nr; j++) {
1543 chains[i].pdba->atom[j] = pdba_all.atom[pdb_ch[si].start+j];
1544 snew(chains[i].pdba->atomname[j],1);
1545 *chains[i].pdba->atomname[j] =
1546 strdup(*pdba_all.atomname[pdb_ch[si].start+j]);
1547 chains[i].pdba->pdbinfo[j] = pdba_all.pdbinfo[pdb_ch[si].start+j];
1548 copy_rvec(pdbx[pdb_ch[si].start+j],chains[i].x[j]);
1550 /* Re-index the residues assuming that the indices are continuous */
1551 k = chains[i].pdba->atom[0].resind;
1552 nres = chains[i].pdba->atom[chains[i].pdba->nr-1].resind - k + 1;
1553 chains[i].pdba->nres = nres;
1554 for(j=0; j < chains[i].pdba->nr; j++) {
1555 chains[i].pdba->atom[j].resind -= k;
1557 srenew(chains[i].pdba->resinfo,nres);
1558 for(j=0; j<nres; j++) {
1559 chains[i].pdba->resinfo[j] = pdba_all.resinfo[k+j];
1560 snew(chains[i].pdba->resinfo[j].name,1);
1561 *chains[i].pdba->resinfo[j].name = strdup(*pdba_all.resinfo[k+j].name);
1562 /* make all chain identifiers equal to that of the chain */
1563 chains[i].pdba->resinfo[j].chainid = pdb_ch[si].chainid;
1568 printf("\nMerged chains into joint molecule definitions at %d places.\n\n",
1571 printf("There are %d chains and %d blocks of water and "
1572 "%d residues with %d atoms\n",
1573 nch-nwaterchain,nwaterchain,
1574 pdba_all.resinfo[pdba_all.atom[natom-1].resind].nr,natom);
1576 printf("\n %5s %4s %6s\n","chain","#res","#atoms");
1577 for (i=0; (i<nch); i++)
1578 printf(" %d '%c' %5d %6d %s\n",
1579 i+1, chains[i].chainid ? chains[i].chainid:'-',
1580 chains[i].pdba->nres, chains[i].pdba->nr,
1581 chains[i].bAllWat ? "(only water)":"");
1584 check_occupancy(&pdba_all,opt2fn("-f",NFILE,fnm),bVerbose);
1586 /* Read atomtypes... */
1587 atype = read_atype(ffdir,&symtab);
1589 /* read residue database */
1590 printf("Reading residue database... (%s)\n",forcefield);
1591 nrtpf = fflib_search_file_end(ffdir,".rtp",TRUE,&rtpf);
1594 for(i=0; i<nrtpf; i++) {
1595 read_resall(rtpf[i],&nrtp,&restp,atype,&symtab,FALSE);
1600 /* Not correct with multiple rtp input files with different bonded types */
1601 fp=gmx_fio_fopen("new.rtp","w");
1602 print_resall(fp,nrtp,restp,atype);
1606 /* read hydrogen database */
1607 nah = read_h_db(ffdir,&ah);
1609 /* Read Termini database... */
1610 nNtdb=read_ter_db(ffdir,'n',&ntdb,atype);
1611 nCtdb=read_ter_db(ffdir,'c',&ctdb,atype);
1613 top_fn=ftp2fn(efTOP,NFILE,fnm);
1614 top_file=gmx_fio_fopen(top_fn,"w");
1616 sprintf(generator,"%s - %s",ShortProgram(), GromacsVersion() );
1618 print_top_header(top_file,top_fn,generator,FALSE,ffdir,mHmult);
1625 for(chain=0; (chain<nch); chain++) {
1626 cc = &(chains[chain]);
1628 /* set pdba, natom and nres to the current chain */
1632 nres =cc->pdba->nres;
1634 if (cc->chainid && ( cc->chainid != ' ' ) )
1635 printf("Processing chain %d '%c' (%d atoms, %d residues)\n",
1636 chain+1,cc->chainid,natom,nres);
1638 printf("Processing chain %d (%d atoms, %d residues)\n",
1639 chain+1,natom,nres);
1641 process_chain(pdba,x,bUnA,bUnA,bUnA,bLysMan,bAspMan,bGluMan,
1642 bHisMan,bArgMan,bGlnMan,angle,distance,&symtab,
1643 nrtprename,rtprename);
1645 cc->chainstart[cc->nterpairs] = pdba->nres;
1647 for(i=0; i<cc->nterpairs; i++)
1649 find_nc_ter(pdba,cc->chainstart[i],cc->chainstart[i+1],
1650 &(cc->r_start[j]),&(cc->r_end[j]),rt);
1652 if (cc->r_start[j] >= 0 && cc->r_end[j] >= 0)
1658 if (cc->nterpairs == 0)
1660 printf("Problem with chain definition, or missing terminal residues.\n"
1661 "This chain does not appear to contain a recognized chain molecule.\n"
1662 "If this is incorrect, you can edit residuetypes.dat to modify the behavior.\n");
1665 /* Check for disulfides and other special bonds */
1666 nssbonds = mk_specbonds(pdba,x,bCysMan,&ssbonds,bVerbose);
1668 if (nrtprename > 0) {
1669 rename_resrtp(pdba,cc->nterpairs,cc->r_start,cc->r_end,nrtprename,rtprename,
1675 sprintf(fn,"chain.pdb");
1677 sprintf(fn,"chain_%c%d.pdb",cc->chainid,cc->chainnum);
1679 write_sto_conf(fn,title,pdba,x,NULL,ePBC,box);
1683 for(i=0; i<cc->nterpairs; i++)
1687 * We first apply a filter so we only have the
1688 * termini that can be applied to the residue in question
1689 * (or a generic terminus if no-residue specific is available).
1691 /* First the N terminus */
1694 tdblist = filter_ter(nrtp,restp,nNtdb,ntdb,
1695 *pdba->resinfo[cc->r_start[i]].name,
1696 *pdba->resinfo[cc->r_start[i]].rtp,
1700 printf("No suitable end (N or 5') terminus found in database - assuming this residue\n"
1701 "is already in a terminus-specific form and skipping terminus selection.\n");
1706 if(bTerMan && ntdblist>1)
1708 sprintf(select,"Select start terminus type for %s-%d",
1709 *pdba->resinfo[cc->r_start[i]].name,
1710 pdba->resinfo[cc->r_start[i]].nr);
1711 cc->ntdb[i] = choose_ter(ntdblist,tdblist,select);
1715 cc->ntdb[i] = tdblist[0];
1718 printf("Start terminus %s-%d: %s\n",
1719 *pdba->resinfo[cc->r_start[i]].name,
1720 pdba->resinfo[cc->r_start[i]].nr,
1721 (cc->ntdb[i])->name);
1730 /* And the C terminus */
1733 tdblist = filter_ter(nrtp,restp,nCtdb,ctdb,
1734 *pdba->resinfo[cc->r_end[i]].name,
1735 *pdba->resinfo[cc->r_end[i]].rtp,
1739 printf("No suitable end (C or 3') terminus found in database - assuming this residue\n"
1740 "is already in a terminus-specific form and skipping terminus selection.\n");
1745 if(bTerMan && ntdblist>1)
1747 sprintf(select,"Select end terminus type for %s-%d",
1748 *pdba->resinfo[cc->r_end[i]].name,
1749 pdba->resinfo[cc->r_end[i]].nr);
1750 cc->ctdb[i] = choose_ter(ntdblist,tdblist,select);
1754 cc->ctdb[i] = tdblist[0];
1756 printf("End terminus %s-%d: %s\n",
1757 *pdba->resinfo[cc->r_end[i]].name,
1758 pdba->resinfo[cc->r_end[i]].nr,
1759 (cc->ctdb[i])->name);
1768 /* lookup hackblocks and rtp for all residues */
1769 get_hackblocks_rtp(&hb_chain, &restp_chain,
1770 nrtp, restp, pdba->nres, pdba->resinfo,
1771 cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end);
1772 /* ideally, now we would not need the rtp itself anymore, but do
1773 everything using the hb and restp arrays. Unfortunately, that
1774 requires some re-thinking of code in gen_vsite.c, which I won't
1775 do now :( AF 26-7-99 */
1777 rename_atoms(NULL,ffdir,
1778 pdba,&symtab,restp_chain,FALSE,rt,FALSE,bVerbose);
1780 match_atomnames_with_rtp(restp_chain,hb_chain,pdba,x,bVerbose);
1783 block = new_blocka();
1785 sort_pdbatoms(pdba->nres,restp_chain,hb_chain,
1786 natom,&pdba,&x,block,&gnames);
1787 natom = remove_duplicate_atoms(pdba,x,bVerbose);
1788 if (ftp2bSet(efNDX,NFILE,fnm)) {
1790 fprintf(stderr,"WARNING: with the -remh option the generated "
1791 "index file (%s) might be useless\n"
1792 "(the index file is generated before hydrogens are added)",
1793 ftp2fn(efNDX,NFILE,fnm));
1795 write_index(ftp2fn(efNDX,NFILE,fnm),block,gnames);
1797 for(i=0; i < block->nr; i++)
1802 fprintf(stderr,"WARNING: "
1803 "without sorting no check for duplicate atoms can be done\n");
1806 /* Generate Hydrogen atoms (and termini) in the sequence */
1807 printf("Generating any missing hydrogen atoms and/or adding termini.\n");
1808 natom=add_h(&pdba,&x,nah,ah,
1809 cc->nterpairs,cc->ntdb,cc->ctdb,cc->r_start,cc->r_end,bAllowMissing,
1810 NULL,NULL,TRUE,FALSE);
1811 printf("Now there are %d residues with %d atoms\n",
1812 pdba->nres,pdba->nr);
1813 if (debug) write_pdbfile(debug,title,pdba,x,ePBC,box,' ',0,NULL,TRUE);
1816 for(i=0; (i<natom); i++)
1817 fprintf(debug,"Res %s%d atom %d %s\n",
1818 *(pdba->resinfo[pdba->atom[i].resind].name),
1819 pdba->resinfo[pdba->atom[i].resind].nr,i+1,*pdba->atomname[i]);
1821 strcpy(posre_fn,ftp2fn(efITP,NFILE,fnm));
1823 /* make up molecule name(s) */
1825 k = (cc->nterpairs>0 && cc->r_start[0]>=0) ? cc->r_start[0] : 0;
1827 gmx_residuetype_get_type(rt,*pdba->resinfo[k].name,&p_restype);
1833 sprintf(molname,"Water");
1837 this_chainid = cc->chainid;
1839 /* Add the chain id if we have one */
1840 if(this_chainid != ' ')
1842 sprintf(buf,"_chain_%c",this_chainid);
1846 /* Check if there have been previous chains with the same id */
1848 for(k=0;k<chain;k++)
1850 if(cc->chainid == chains[k].chainid)
1855 /* Add the number for this chain identifier if there are multiple copies */
1859 sprintf(buf,"%d",nid_used+1);
1863 if(strlen(suffix)>0)
1865 sprintf(molname,"%s%s",p_restype,suffix);
1869 strcpy(molname,p_restype);
1873 if ((nch-nwaterchain>1) && !cc->bAllWat) {
1875 strcpy(itp_fn,top_fn);
1876 printf("Chain time...\n");
1877 c=strrchr(itp_fn,'.');
1878 sprintf(c,"_%s.itp",molname);
1879 c=strrchr(posre_fn,'.');
1880 sprintf(c,"_%s.itp",molname);
1881 if (strcmp(itp_fn,posre_fn) == 0) {
1882 strcpy(buf_fn,posre_fn);
1883 c = strrchr(buf_fn,'.');
1885 sprintf(posre_fn,"%s_pr.itp",buf_fn);
1889 srenew(incls,nincl);
1890 incls[nincl-1]=strdup(itp_fn);
1891 itp_file=gmx_fio_fopen(itp_fn,"w");
1895 srenew(mols,nmol+1);
1897 mols[nmol].name = strdup("SOL");
1898 mols[nmol].nr = pdba->nres;
1900 mols[nmol].name = strdup(molname);
1906 print_top_comment(itp_file,itp_fn,generator,ffdir,TRUE);
1916 pdb2top(top_file2,posre_fn,molname,pdba,&x,atype,&symtab,
1918 restp_chain,hb_chain,
1919 cc->nterpairs,cc->ntdb,cc->ctdb,bAllowMissing,
1920 bVsites,bVsiteAromatics,forcefield,ffdir,
1921 mHmult,nssbonds,ssbonds,
1922 long_bond_dist,short_bond_dist,bDeuterate,bChargeGroups,bCmap,
1923 bRenumRes,bRTPresname);
1926 write_posres(posre_fn,pdba,posre_fc);
1929 gmx_fio_fclose(itp_file);
1931 /* pdba and natom have been reassigned somewhere so: */
1936 if (cc->chainid == ' ')
1937 sprintf(fn,"chain.pdb");
1939 sprintf(fn,"chain_%c.pdb",cc->chainid);
1940 cool_quote(quote,255,NULL);
1941 write_sto_conf(fn,quote,pdba,x,NULL,ePBC,box);
1945 if (watermodel == NULL) {
1946 for(chain=0; chain<nch; chain++) {
1947 if (chains[chain].bAllWat) {
1948 gmx_fatal(FARGS,"You have chosen not to include a water model, but there is water in the input file. Select a water model or remove the water from your input file.");
1952 sprintf(buf_fn,"%s%c%s.itp",ffdir,DIR_SEPARATOR,watermodel);
1953 if (!fflib_fexist(buf_fn)) {
1954 gmx_fatal(FARGS,"The topology file '%s' for the selected water model '%s' can not be found in the force field directory. Select a different water model.",
1959 print_top_mols(top_file,title,ffdir,watermodel,nincl,incls,nmol,mols);
1960 gmx_fio_fclose(top_file);
1962 gmx_residuetype_destroy(rt);
1964 /* now merge all chains back together */
1967 for (i=0; (i<nch); i++) {
1968 natom+=chains[i].pdba->nr;
1969 nres+=chains[i].pdba->nres;
1972 init_t_atoms(atoms,natom,FALSE);
1973 for(i=0; i < atoms->nres; i++)
1974 sfree(atoms->resinfo[i].name);
1975 sfree(atoms->resinfo);
1977 snew(atoms->resinfo,nres);
1981 for (i=0; (i<nch); i++) {
1983 printf("Including chain %d in system: %d atoms %d residues\n",
1984 i+1,chains[i].pdba->nr,chains[i].pdba->nres);
1985 for (j=0; (j<chains[i].pdba->nr); j++) {
1986 atoms->atom[k]=chains[i].pdba->atom[j];
1987 atoms->atom[k].resind += l; /* l is processed nr of residues */
1988 atoms->atomname[k]=chains[i].pdba->atomname[j];
1989 atoms->resinfo[atoms->atom[k].resind].chainid = chains[i].chainid;
1990 copy_rvec(chains[i].x[j],x[k]);
1993 for (j=0; (j<chains[i].pdba->nres); j++) {
1994 atoms->resinfo[l] = chains[i].pdba->resinfo[j];
1996 atoms->resinfo[l].name = atoms->resinfo[l].rtp;
2003 fprintf(stderr,"Now there are %d atoms and %d residues\n",k,l);
2004 print_sums(atoms, TRUE);
2007 fprintf(stderr,"\nWriting coordinate file...\n");
2008 clear_rvec(box_space);
2010 gen_box(0,atoms->nr,x,box,box_space,FALSE);
2011 write_sto_conf(ftp2fn(efSTO,NFILE,fnm),title,atoms,x,NULL,ePBC,box);
2013 printf("\t\t--------- PLEASE NOTE ------------\n");
2014 printf("You have successfully generated a topology from: %s.\n",
2015 opt2fn("-f",NFILE,fnm));
2016 if (watermodel != NULL) {
2017 printf("The %s force field and the %s water model are used.\n",
2020 printf("The %s force field is used.\n",
2023 printf("\t\t--------- ETON ESAELP ------------\n");