int old_prev_chainnum;
int old_this_chainnum;
t_resinfo *ri;
+ char select[STRLEN];
int new_chainnum;
-
+ int this_atomnum;
+ int prev_atomnum;
+ const char * prev_atomname;
+ const char * this_atomname;
+ const char * prev_resname;
+ const char * this_resname;
+ int prev_resnum;
+ int this_resnum;
+ char prev_chainid;
+ char this_chainid;
+ int prev_chainnumber;
+ int this_chainnumber;
+
enum
{
SPLIT_ID_OR_TER,
SPLIT_ID_AND_TER,
SPLIT_ID_ONLY,
- SPLIT_TER_ONLY
+ SPLIT_TER_ONLY,
+ SPLIT_INTERACTIVE
}
splitting;
- printf("Splitting PDB chains based on ");
splitting = SPLIT_TER_ONLY; /* keep compiler happy */
/* Be a bit flexible to catch typos */
- if (!strncmp(chainsep,"id_o",4) || !strncmp(chainsep,"int",3))
+ if (!strncmp(chainsep,"id_o",4))
{
/* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
splitting = SPLIT_ID_OR_TER;
- printf("TER records or changing chain id.\n");
+ printf("Splitting chemical chains based on TER records or chain id changing.\n");
+ }
+ else if (!strncmp(chainsep,"int",3))
+ {
+ /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
+ splitting = SPLIT_INTERACTIVE;
+ printf("Splitting chemical chains interactively.\n");
}
else if (!strncmp(chainsep,"id_a",4))
{
splitting = SPLIT_ID_AND_TER;
- printf("TER records and chain id.\n");
+ printf("Splitting chemical chains based on TER records and chain id changing.\n");
}
else if (strlen(chainsep)==2 && !strncmp(chainsep,"id",4))
{
splitting = SPLIT_ID_ONLY;
- printf("changing chain id only (ignoring TER records).\n");
+ printf("Splitting chemical chains based on changing chain id only (ignoring TER records).\n");
}
else if (chainsep[0]=='t')
{
splitting = SPLIT_TER_ONLY;
- printf("TER records only (ignoring chain id).\n");
+ printf("Splitting chemical chains based on TER records only (ignoring chain id).\n");
}
else
{
old_prev_chainnum = -1;
new_chainnum = -1;
+ this_atomname = NULL;
+ this_atomnum = -1;
+ this_resname = NULL;
+ this_resnum = -1;
+ this_chainid = '?';
+ this_chainnumber = -1;
+
for(i=0;i<pdba->nres;i++)
{
ri = &pdba->resinfo[i];
- old_this_chainid = ri->chainid;
- old_this_chainnum = ri->chainnum;
+ old_this_chainid = ri->chainid;
+ old_this_chainnum = ri->chainnum;
+
+ prev_atomname = this_atomname;
+ prev_atomnum = this_atomnum;
+ prev_resname = this_resname;
+ prev_resnum = this_resnum;
+ prev_chainid = this_chainid;
+ prev_chainnumber = this_chainnumber;
+
+ this_atomname = *(pdba->atomname[i]);
+ this_atomnum = (pdba->pdbinfo != NULL) ? pdba->pdbinfo[i].atomnr : i+1;
+ this_resname = *ri->name;
+ this_resnum = ri->nr;
+ this_chainid = ri->chainid;
+ this_chainnumber = ri->chainnum;
switch (splitting)
{
new_chainnum++;
}
break;
+ case SPLIT_INTERACTIVE:
+ if(old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
+ {
+ if(i>0)
+ {
+ printf("Split the chain (and introduce termini) between residue %s%d (chain id '%c', atom %d %s)\n"
+ "and residue %s%d (chain id '%c', atom %d %s) ? [n/y]\n",
+ prev_resname,prev_resnum,prev_chainid,prev_atomnum,prev_atomname,
+ this_resname,this_resnum,this_chainid,this_atomnum,this_atomname);
+
+ if(NULL==fgets(select,STRLEN-1,stdin))
+ {
+ gmx_fatal(FARGS,"Error reading from stdin");
+ }
+ }
+ if(i==0 || select[0] == 'y')
+ {
+ new_chainnum++;
+ }
+ }
+ break;
default:
gmx_fatal(FARGS,"Internal inconsistency - this shouldn't happen...");
break;
"The protonation state of N- and C-termini can be chosen interactively",
"with the [TT]-ter[tt] flag. Default termini are ionized (NH3+ and COO-),",
"respectively. Some force fields support zwitterionic forms for chains of",
- "one residue, but for polypeptides these options should NOT be selected.[PAR]",
-
+ "one residue, but for polypeptides these options should NOT be selected.",
+ "The AMBER force fields have unique forms for the terminal residues,",
+ "and these are incompatible with the [TT]-ter[tt] mechanism. You need",
+ "to prefix your N- or C-terminal residue names with \"N\" or \"C\"",
+ "respectively to use these forms, making sure you preserve the format",
+ "of the coordinate file. Alternatively, use named terminating residues",
+ "(e.g. ACE, NME).[PAR]",
+
"The separation of chains is not entirely trivial since the markup",
"in user-generated PDB files frequently varies and sometimes it",
"is desirable to merge entries across a TER record, for instance",
"two protein chains or if you have a HEME group bound to a protein.",
"In such cases multiple chains should be contained in a single",
"[TT]moleculetype[tt] definition.",
- "To handle this, [TT]pdb2gmx[tt] has an option [TT]-chainsep[tt] so you can",
- "choose whether a new chain should start when we find a TER record,",
- "when the chain id changes, combinations of either or both of these",
- "or fully interactively.[PAR]",
-
+ "To handle this, [TT]pdb2gmx[tt] uses two separate options.",
+ "First, [TT]-chainsep[tt] allows you to choose when a new chemical chain should",
+ "start, and termini added when applicable. This can be done based on the",
+ "existence of TER records, when the chain id changes, or combinations of either",
+ "or both of these. You can also do the selection fully interactively.",
+ "In addition, there is a [TT]-merge[tt] option that controls how multiple chains",
+ "are merged into one moleculetype, after adding all the chemical termini (or not).",
+ "This can be turned off (no merging), all non-water chains can be merged into a",
+ "single molecule, or the selection can be done interactively.[PAR]",
+
"[TT]pdb2gmx[tt] will also check the occupancy field of the [TT].pdb[tt] file.",
"If any of the occupancies are not one, indicating that the atom is",
"not resolved well in the structure, a warning message is issued.",
int nssbonds;
t_ssbond *ssbonds;
rvec *pdbx,*x;
- gmx_bool bVsites=FALSE,bWat,bPrevWat=FALSE,bITP,bVsiteAromatics=FALSE,bMerge;
+ gmx_bool bVsites=FALSE,bWat,bPrevWat=FALSE,bITP,bVsiteAromatics=FALSE,bCheckMerge;
real mHmult=0;
t_hackblock *hb_chain;
t_restp *restp_chain;
int this_chainstart;
int prev_chainstart;
gmx_bool bMerged;
+ int nchainmerges;
gmx_atomprop_t aps;
static const char *vsitestr[] = { NULL, "none", "hydrogens", "aromatics", NULL };
static const char *watstr[] = { NULL, "select", "none", "spc", "spce", "tip3p", "tip4p", "tip5p", NULL };
static const char *chainsep[] = { NULL, "id_or_ter", "id_and_ter", "ter", "id", "interactive", NULL };
+ static const char *merge[] = {NULL, "no", "all", "interactive", NULL };
static const char *ff = "select";
t_pargs pa[] = {
{ "-sb", FALSE, etREAL, {&short_bond_dist},
"HIDDENShort bond warning distance" },
{ "-chainsep", FALSE, etENUM, {chainsep},
- "Condition in PDB files when a new chain and molecule_type should be started" },
+ "Condition in PDB files when a new chain should be started (adding termini)" },
+ { "-merge", FALSE, etENUM, {&merge},
+ "Merge multiple chains into a single [moleculetype]" },
{ "-ff", FALSE, etSTR, {&ff},
"Force field, interactive by default. Use [TT]-h[tt] for information." },
{ "-water", FALSE, etENUM, {watstr},
{ "-ss", FALSE, etBOOL, {&bCysMan},
"Interactive SS bridge selection" },
{ "-ter", FALSE, etBOOL, {&bTerMan},
- "Interactive termini selection, iso charged" },
+ "Interactive termini selection, instead of charged (default)" },
{ "-lys", FALSE, etBOOL, {&bLysMan},
- "Interactive lysine selection, iso charged" },
+ "Interactive lysine selection, instead of charged" },
{ "-arg", FALSE, etBOOL, {&bArgMan},
- "Interactive arginine selection, iso charged" },
+ "Interactive arginine selection, instead of charged" },
{ "-asp", FALSE, etBOOL, {&bAspMan},
- "Interactive aspartic Acid selection, iso charged" },
+ "Interactive aspartic acid selection, instead of charged" },
{ "-glu", FALSE, etBOOL, {&bGluMan},
- "Interactive glutamic Acid selection, iso charged" },
+ "Interactive glutamic acid selection, instead of charged" },
{ "-gln", FALSE, etBOOL, {&bGlnMan},
- "Interactive glutamine selection, iso neutral" },
+ "Interactive glutamine selection, instead of neutral" },
{ "-his", FALSE, etBOOL, {&bHisMan},
- "Interactive histidine selection, iso checking H-bonds" },
+ "Interactive histidine selection, instead of checking H-bonds" },
{ "-angle", FALSE, etREAL, {&angle},
"Minimum hydrogen-donor-acceptor angle for a H-bond (degrees)" },
{ "-dist", FALSE, etREAL, {&distance},
modify_chain_numbers(&pdba_all,chainsep[0]);
-
- bMerge = !strncmp(chainsep[0],"int",3);
+ nchainmerges = 0;
this_atomname = NULL;
this_atomnum = -1;
prev_chainstart = 0;
pdb_ch=NULL;
+
bMerged = FALSE;
for (i=0; (i<natom); i++)
{
if ((i == 0) || (this_chainnumber != prev_chainnumber) || (bWat != bPrevWat))
{
this_chainstart = pdba_all.atom[i].resind;
- if (bMerge && i>0 && !bWat)
+
+ bMerged = FALSE;
+ if (i>0 && !bWat)
{
- printf("Merge chain ending with residue %s%d (chain id '%c', atom %d %s) with\n"
- "chain starting with residue %s%d (chain id '%c', atom %d %s)? [n/y]\n",
- prev_resname,prev_resnum,prev_chainid,prev_atomnum,prev_atomname,
- this_resname,this_resnum,this_chainid,this_atomnum,this_atomname);
-
- if(NULL==fgets(select,STRLEN-1,stdin))
+ if(!strncmp(merge[0],"int",3))
+ {
+ printf("Merge chain ending with residue %s%d (chain id '%c', atom %d %s) and chain starting with\n"
+ "residue %s%d (chain id '%c', atom %d %s) into a single moleculetype (keeping termini)? [n/y]\n",
+ prev_resname,prev_resnum,prev_chainid,prev_atomnum,prev_atomname,
+ this_resname,this_resnum,this_chainid,this_atomnum,this_atomname);
+
+ if(NULL==fgets(select,STRLEN-1,stdin))
+ {
+ gmx_fatal(FARGS,"Error reading from stdin");
+ }
+ bMerged = (select[0] == 'y');
+ }
+ else if(!strncmp(merge[0],"all",3))
{
- gmx_fatal(FARGS,"Error reading from stdin");
+ bMerged = TRUE;
}
- }
- else
- {
- select[0] = 'n';
}
- bMerged = (select[0] == 'y');
- if (bMerged)
- {
+ if (bMerged)
+ {
pdb_ch[nch-1].chainstart[pdb_ch[nch-1].nterpairs] =
- pdba_all.atom[i].resind - prev_chainstart;
+ pdba_all.atom[i].resind - prev_chainstart;
pdb_ch[nch-1].nterpairs++;
srenew(pdb_ch[nch-1].chainstart,pdb_ch[nch-1].nterpairs+1);
+ nchainmerges++;
}
else
{
}
}
- if (bMerge)
- printf("\nMerged %d chains into one molecule definition\n\n",
- pdb_ch[0].nterpairs);
+ if (nchainmerges>0)
+ printf("\nMerged chains into joint molecule definitions at %d places.\n\n",
+ nchainmerges);
printf("There are %d chains and %d blocks of water and "
"%d residues with %d atoms\n",