src/programs/gmx/pdb2gmx.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #ifdef HAVE_CONFIG_H
  38 #include <config.h>
  39 #endif
  40
  41 #include <time.h>
  42 #include <ctype.h>
  43 #include "sysstuff.h"
  44 #include "typedefs.h"
  45 #include "gromacs/fileio/gmxfio.h"
  46 #include "smalloc.h"
  47 #include "copyrite.h"
  48 #include "string2.h"
  49 #include "gromacs/fileio/confio.h"
  50 #include "symtab.h"
  51 #include "vec.h"
  52 #include "statutil.h"
  53 #include "gromacs/fileio/futil.h"
  54 #include "gmx_fatal.h"
  55 #include "gromacs/fileio/pdbio.h"
  56 #include "toputil.h"
  57 #include "h_db.h"
  58 #include "physics.h"
  59 #include "pgutil.h"
  60 #include "calch.h"
  61 #include "resall.h"
  62 #include "pdb2top.h"
  63 #include "ter_db.h"
  64 #include "strdb.h"
  65 #include "gbutil.h"
  66 #include "genhydro.h"
  67 #include "readinp.h"
  68 #include "atomprop.h"
  69 #include "index.h"
  70 #include "fflibutil.h"
  71 #include "macros.h"
  72
  73 #include "hizzie.h"
  74 #include "specbond.h"
  75 #include "xlate.h"
  76
  77 typedef struct {
  78     char gmx[6];
  79     char main[6];
  80     char nter[6];
  81     char cter[6];
  82     char bter[6];
  83 } rtprename_t;
  84
  85
  86 static const char *res2bb_notermini(const char *name,
  87                                     int nrr, const rtprename_t *rr)
  88 {
  89     /* NOTE: This function returns the main building block name,
  90      *       it does not take terminal renaming into account.
  91      */
  92     int i;
  93
  94     i = 0;
  95     while (i < nrr && gmx_strcasecmp(name, rr[i].gmx) != 0)
  96     {
  97         i++;
  98     }
  99
 100     return (i < nrr ? rr[i].main : name);
 101 }
 102
 103 static const char *select_res(int nr, int resnr,
 104                               const char *name[], const char *expl[],
 105                               const char *title,
 106                               int nrr, const rtprename_t *rr)
 107 {
 108     int sel = 0;
 109
 110     printf("Which %s type do you want for residue %d\n", title, resnr+1);
 111     for (sel = 0; (sel < nr); sel++)
 112     {
 113         printf("%d. %s (%s)\n",
 114                sel, expl[sel], res2bb_notermini(name[sel], nrr, rr));
 115     }
 116     printf("\nType a number:"); fflush(stdout);
 117
 118     if (scanf("%d", &sel) != 1)
 119     {
 120         gmx_fatal(FARGS, "Answer me for res %s %d!", title, resnr+1);
 121     }
 122
 123     return name[sel];
 124 }
 125
 126 static const char *get_asptp(int resnr, int nrr, const rtprename_t *rr)
 127 {
 128     enum {
 129         easp, easpH, easpNR
 130     };
 131     const char *lh[easpNR]   = { "ASP", "ASPH" };
 132     const char *expl[easpNR] = {
 133         "Not protonated (charge -1)",
 134         "Protonated (charge 0)"
 135     };
 136
 137     return select_res(easpNR, resnr, lh, expl, "ASPARTIC ACID", nrr, rr);
 138 }
 139
 140 static const char *get_glutp(int resnr, int nrr, const rtprename_t *rr)
 141 {
 142     enum {
 143         eglu, egluH, egluNR
 144     };
 145     const char *lh[egluNR]   = { "GLU", "GLUH" };
 146     const char *expl[egluNR] = {
 147         "Not protonated (charge -1)",
 148         "Protonated (charge 0)"
 149     };
 150
 151     return select_res(egluNR, resnr, lh, expl, "GLUTAMIC ACID", nrr, rr);
 152 }
 153
 154 static const char *get_glntp(int resnr, int nrr, const rtprename_t *rr)
 155 {
 156     enum {
 157         egln, eglnH, eglnNR
 158     };
 159     const char *lh[eglnNR]   = { "GLN", "QLN" };
 160     const char *expl[eglnNR] = {
 161         "Not protonated (charge 0)",
 162         "Protonated (charge +1)"
 163     };
 164
 165     return select_res(eglnNR, resnr, lh, expl, "GLUTAMINE", nrr, rr);
 166 }
 167
 168 static const char *get_lystp(int resnr, int nrr, const rtprename_t *rr)
 169 {
 170     enum {
 171         elys, elysH, elysNR
 172     };
 173     const  char *lh[elysNR]   = { "LYSN", "LYS" };
 174     const char  *expl[elysNR] = {
 175         "Not protonated (charge 0)",
 176         "Protonated (charge +1)"
 177     };
 178
 179     return select_res(elysNR, resnr, lh, expl, "LYSINE", nrr, rr);
 180 }
 181
 182 static const char *get_argtp(int resnr, int nrr, const rtprename_t *rr)
 183 {
 184     enum {
 185         earg, eargH, eargNR
 186     };
 187     const  char *lh[eargNR]   = { "ARGN", "ARG" };
 188     const char  *expl[eargNR] = {
 189         "Not protonated (charge 0)",
 190         "Protonated (charge +1)"
 191     };
 192
 193     return select_res(eargNR, resnr, lh, expl, "ARGININE", nrr, rr);
 194 }
 195
 196 static const char *get_histp(int resnr, int nrr, const rtprename_t *rr)
 197 {
 198     const char *expl[ehisNR] = {
 199         "H on ND1 only",
 200         "H on NE2 only",
 201         "H on ND1 and NE2",
 202         "Coupled to Heme"
 203     };
 204
 205     return select_res(ehisNR, resnr, hh, expl, "HISTIDINE", nrr, rr);
 206 }
 207
 208 static void read_rtprename(const char *fname, FILE *fp,
 209                            int *nrtprename, rtprename_t **rtprename)
 210 {
 211     char         line[STRLEN], buf[STRLEN];
 212     int          n;
 213     rtprename_t *rr;
 214     int          ncol, nc;
 215
 216     n  = *nrtprename;
 217     rr = *rtprename;
 218
 219     ncol = 0;
 220     while (get_a_line(fp, line, STRLEN))
 221     {
 222         srenew(rr, n+1);
 223         nc = sscanf(line, "%s %s %s %s %s %s",
 224                     rr[n].gmx, rr[n].main, rr[n].nter, rr[n].cter, rr[n].bter, buf);
 225         if (ncol == 0)
 226         {
 227             if (nc != 2 && nc != 5)
 228             {
 229                 gmx_fatal(FARGS, "Residue renaming database '%s' has %d columns instead of %d, %d or %d", fname, ncol, 2, 5);
 230             }
 231             ncol = nc;
 232         }
 233         else if (nc != ncol)
 234         {
 235             gmx_fatal(FARGS, "A line in residue renaming database '%s' has %d columns, while previous lines have %d columns", fname, nc, ncol);
 236         }
 237
 238         if (nc == 2)
 239         {
 240             /* This file does not have special termini names, copy them from main */
 241             strcpy(rr[n].nter, rr[n].main);
 242             strcpy(rr[n].cter, rr[n].main);
 243             strcpy(rr[n].bter, rr[n].main);
 244         }
 245
 246         n++;
 247     }
 248
 249     *nrtprename = n;
 250     *rtprename  = rr;
 251 }
 252
 253 static char *search_resrename(int nrr, rtprename_t *rr,
 254                               const char *name,
 255                               gmx_bool bStart, gmx_bool bEnd,
 256                               gmx_bool bCompareFFRTPname)
 257 {
 258     char *nn;
 259     int   i;
 260
 261     nn = NULL;
 262
 263     i = 0;
 264     while (i < nrr && ((!bCompareFFRTPname && strcmp(name, rr[i].gmx)  != 0) ||
 265                        ( bCompareFFRTPname && strcmp(name, rr[i].main) != 0)))
 266     {
 267         i++;
 268     }
 269
 270     /* If found in the database, rename this residue's rtp building block,
 271      * otherwise keep the old name.
 272      */
 273     if (i < nrr)
 274     {
 275         if (bStart && bEnd)
 276         {
 277             nn = rr[i].bter;
 278         }
 279         else if (bStart)
 280         {
 281             nn = rr[i].nter;
 282         }
 283         else if (bEnd)
 284         {
 285             nn = rr[i].cter;
 286         }
 287         else
 288         {
 289             nn = rr[i].main;
 290         }
 291
 292         if (nn[0] == '-')
 293         {
 294             gmx_fatal(FARGS, "In the chosen force field there is no residue type for '%s'%s", name, bStart ? ( bEnd ? " as a standalone (starting & ending) residue" : " as a starting terminus") : (bEnd ? " as an ending terminus" : ""));
 295         }
 296     }
 297
 298     return nn;
 299 }
 300
 301
 302 static void rename_resrtp(t_atoms *pdba, int nterpairs, int *r_start, int *r_end,
 303                           int nrr, rtprename_t *rr, t_symtab *symtab,
 304                           gmx_bool bVerbose)
 305 {
 306     int      r, i, j;
 307     gmx_bool bStart, bEnd;
 308     char    *nn;
 309     gmx_bool bFFRTPTERRNM;
 310
 311     bFFRTPTERRNM = (getenv("GMX_NO_FFRTP_TER_RENAME") == NULL);
 312
 313     for (r = 0; r < pdba->nres; r++)
 314     {
 315         bStart = FALSE;
 316         bEnd   = FALSE;
 317         for (j = 0; j < nterpairs; j++)
 318         {
 319             if (r == r_start[j])
 320             {
 321                 bStart = TRUE;
 322             }
 323         }
 324         for (j = 0; j < nterpairs; j++)
 325         {
 326             if (r == r_end[j])
 327             {
 328                 bEnd = TRUE;
 329             }
 330         }
 331
 332         nn = search_resrename(nrr, rr, *pdba->resinfo[r].rtp, bStart, bEnd, FALSE);
 333
 334         if (bFFRTPTERRNM && nn == NULL && (bStart || bEnd))
 335         {
 336             /* This is a terminal residue, but the residue name,
 337              * currently stored in .rtp, is not a standard residue name,
 338              * but probably a force field specific rtp name.
 339              * Check if we need to rename it because it is terminal.
 340              */
 341             nn = search_resrename(nrr, rr,
 342                                   *pdba->resinfo[r].rtp, bStart, bEnd, TRUE);
 343         }
 344
 345         if (nn != NULL && strcmp(*pdba->resinfo[r].rtp, nn) != 0)
 346         {
 347             if (bVerbose)
 348             {
 349                 printf("Changing rtp entry of residue %d %s to '%s'\n",
 350                        pdba->resinfo[r].nr, *pdba->resinfo[r].name, nn);
 351             }
 352             pdba->resinfo[r].rtp = put_symtab(symtab, nn);
 353         }
 354     }
 355 }
 356
 357 static void pdbres_to_gmxrtp(t_atoms *pdba)
 358 {
 359     int i;
 360
 361     for (i = 0; (i < pdba->nres); i++)
 362     {
 363         if (pdba->resinfo[i].rtp == NULL)
 364         {
 365             pdba->resinfo[i].rtp = pdba->resinfo[i].name;
 366         }
 367     }
 368 }
 369
 370 static void rename_pdbres(t_atoms *pdba, const char *oldnm, const char *newnm,
 371                           gmx_bool bFullCompare, t_symtab *symtab)
 372 {
 373     char *resnm;
 374     int   i;
 375
 376     for (i = 0; (i < pdba->nres); i++)
 377     {
 378         resnm = *pdba->resinfo[i].name;
 379         if ((bFullCompare && (gmx_strcasecmp(resnm, oldnm) == 0)) ||
 380             (!bFullCompare && strstr(resnm, oldnm) != NULL))
 381         {
 382             /* Rename the residue name (not the rtp name) */
 383             pdba->resinfo[i].name = put_symtab(symtab, newnm);
 384         }
 385     }
 386 }
 387
 388 static void rename_bb(t_atoms *pdba, const char *oldnm, const char *newnm,
 389                       gmx_bool bFullCompare, t_symtab *symtab)
 390 {
 391     char *bbnm;
 392     int   i;
 393
 394     for (i = 0; (i < pdba->nres); i++)
 395     {
 396         /* We have not set the rtp name yes, use the residue name */
 397         bbnm = *pdba->resinfo[i].name;
 398         if ((bFullCompare && (gmx_strcasecmp(bbnm, oldnm) == 0)) ||
 399             (!bFullCompare && strstr(bbnm, oldnm) != NULL))
 400         {
 401             /* Change the rtp builing block name */
 402             pdba->resinfo[i].rtp = put_symtab(symtab, newnm);
 403         }
 404     }
 405 }
 406
 407 static void rename_bbint(t_atoms *pdba, const char *oldnm,
 408                          const char *gettp(int, int, const rtprename_t *),
 409                          gmx_bool bFullCompare,
 410                          t_symtab *symtab,
 411                          int nrr, const rtprename_t *rr)
 412 {
 413     int         i;
 414     const char *ptr;
 415     char       *bbnm;
 416
 417     for (i = 0; i < pdba->nres; i++)
 418     {
 419         /* We have not set the rtp name yes, use the residue name */
 420         bbnm = *pdba->resinfo[i].name;
 421         if ((bFullCompare && (strcmp(bbnm, oldnm) == 0)) ||
 422             (!bFullCompare && strstr(bbnm, oldnm) != NULL))
 423         {
 424             ptr                  = gettp(i, nrr, rr);
 425             pdba->resinfo[i].rtp = put_symtab(symtab, ptr);
 426         }
 427     }
 428 }
 429
 430 static void check_occupancy(t_atoms *atoms, const char *filename, gmx_bool bVerbose)
 431 {
 432     int i, ftp;
 433     int nzero   = 0;
 434     int nnotone = 0;
 435
 436     ftp = fn2ftp(filename);
 437     if (!atoms->pdbinfo || ((ftp != efPDB) && (ftp != efBRK) && (ftp != efENT)))
 438     {
 439         fprintf(stderr, "No occupancies in %s\n", filename);
 440     }
 441     else
 442     {
 443         for (i = 0; (i < atoms->nr); i++)
 444         {
 445             if (atoms->pdbinfo[i].occup != 1)
 446             {
 447                 if (bVerbose)
 448                 {
 449                     fprintf(stderr, "Occupancy for atom %s%d-%s is %f rather than 1\n",
 450                             *atoms->resinfo[atoms->atom[i].resind].name,
 451                             atoms->resinfo[ atoms->atom[i].resind].nr,
 452                             *atoms->atomname[i],
 453                             atoms->pdbinfo[i].occup);
 454                 }
 455                 if (atoms->pdbinfo[i].occup == 0)
 456                 {
 457                     nzero++;
 458                 }
 459                 else
 460                 {
 461                     nnotone++;
 462                 }
 463             }
 464         }
 465         if (nzero == atoms->nr)
 466         {
 467             fprintf(stderr, "All occupancy fields zero. This is probably not an X-Ray structure\n");
 468         }
 469         else if ((nzero > 0) || (nnotone > 0))
 470         {
 471             fprintf(stderr,
 472                     "\n"
 473                     "WARNING: there were %d atoms with zero occupancy and %d atoms with\n"
 474                     "         occupancy unequal to one (out of %d atoms). Check your pdb file.\n"
 475                     "\n",
 476                     nzero, nnotone, atoms->nr);
 477         }
 478         else
 479         {
 480             fprintf(stderr, "All occupancies are one\n");
 481         }
 482     }
 483 }
 484
 485 void write_posres(char *fn, t_atoms *pdba, real fc)
 486 {
 487     FILE *fp;
 488     int   i;
 489
 490     fp = gmx_fio_fopen(fn, "w");
 491     fprintf(fp,
 492             "; In this topology include file, you will find position restraint\n"
 493             "; entries for all the heavy atoms in your original pdb file.\n"
 494             "; This means that all the protons which were added by pdb2gmx are\n"
 495             "; not restrained.\n"
 496             "\n"
 497             "[ position_restraints ]\n"
 498             "; %4s%6s%8s%8s%8s\n", "atom", "type", "fx", "fy", "fz"
 499             );
 500     for (i = 0; (i < pdba->nr); i++)
 501     {
 502         if (!is_hydrogen(*pdba->atomname[i]) && !is_dummymass(*pdba->atomname[i]))
 503         {
 504             fprintf(fp, "%6d%6d  %g  %g  %g\n", i+1, 1, fc, fc, fc);
 505         }
 506     }
 507     gmx_fio_fclose(fp);
 508 }
 509
 510 static int read_pdball(const char *inf, const char *outf, char *title,
 511                        t_atoms *atoms, rvec **x,
 512                        int *ePBC, matrix box, gmx_bool bRemoveH,
 513                        t_symtab *symtab, gmx_residuetype_t rt, const char *watres,
 514                        gmx_atomprop_t aps, gmx_bool bVerbose)
 515 /* Read a pdb file. (containing proteins) */
 516 {
 517     int  natom, new_natom, i;
 518
 519     /* READ IT */
 520     printf("Reading %s...\n", inf);
 521     get_stx_coordnum(inf, &natom);
 522     init_t_atoms(atoms, natom, TRUE);
 523     snew(*x, natom);
 524     read_stx_conf(inf, title, atoms, *x, NULL, ePBC, box);
 525     if (fn2ftp(inf) == efPDB)
 526     {
 527         get_pdb_atomnumber(atoms, aps);
 528     }
 529     if (bRemoveH)
 530     {
 531         new_natom = 0;
 532         for (i = 0; i < atoms->nr; i++)
 533         {
 534             if (!is_hydrogen(*atoms->atomname[i]))
 535             {
 536                 atoms->atom[new_natom]     = atoms->atom[i];
 537                 atoms->atomname[new_natom] = atoms->atomname[i];
 538                 atoms->pdbinfo[new_natom]  = atoms->pdbinfo[i];
 539                 copy_rvec((*x)[i], (*x)[new_natom]);
 540                 new_natom++;
 541             }
 542         }
 543         atoms->nr = new_natom;
 544         natom     = new_natom;
 545     }
 546
 547     printf("Read");
 548     if (title && title[0])
 549     {
 550         printf(" '%s',", title);
 551     }
 552     printf(" %d atoms\n", natom);
 553
 554     /* Rename residues */
 555     rename_pdbres(atoms, "HOH", watres, FALSE, symtab);
 556     rename_pdbres(atoms, "SOL", watres, FALSE, symtab);
 557     rename_pdbres(atoms, "WAT", watres, FALSE, symtab);
 558
 559     rename_atoms("xlateat.dat", NULL,
 560                  atoms, symtab, NULL, TRUE, rt, TRUE, bVerbose);
 561
 562     if (natom == 0)
 563     {
 564         return 0;
 565     }
 566
 567     if (outf)
 568     {
 569         write_sto_conf(outf, title, atoms, *x, NULL, *ePBC, box);
 570     }
 571
 572     return natom;
 573 }
 574
 575 void process_chain(t_atoms *pdba, rvec *x,
 576                    gmx_bool bTrpU, gmx_bool bPheU, gmx_bool bTyrU,
 577                    gmx_bool bLysMan, gmx_bool bAspMan, gmx_bool bGluMan,
 578                    gmx_bool bHisMan, gmx_bool bArgMan, gmx_bool bGlnMan,
 579                    real angle, real distance, t_symtab *symtab,
 580                    int nrr, const rtprename_t *rr)
 581 {
 582     /* Rename aromatics, lys, asp and histidine */
 583     if (bTyrU)
 584     {
 585         rename_bb(pdba, "TYR", "TYRU", FALSE, symtab);
 586     }
 587     if (bTrpU)
 588     {
 589         rename_bb(pdba, "TRP", "TRPU", FALSE, symtab);
 590     }
 591     if (bPheU)
 592     {
 593         rename_bb(pdba, "PHE", "PHEU", FALSE, symtab);
 594     }
 595     if (bLysMan)
 596     {
 597         rename_bbint(pdba, "LYS", get_lystp, FALSE, symtab, nrr, rr);
 598     }
 599     if (bArgMan)
 600     {
 601         rename_bbint(pdba, "ARG", get_argtp, FALSE, symtab, nrr, rr);
 602     }
 603     if (bGlnMan)
 604     {
 605         rename_bbint(pdba, "GLN", get_glntp, FALSE, symtab, nrr, rr);
 606     }
 607     if (bAspMan)
 608     {
 609         rename_bbint(pdba, "ASP", get_asptp, FALSE, symtab, nrr, rr);
 610     }
 611     else
 612     {
 613         rename_bb(pdba, "ASPH", "ASP", FALSE, symtab);
 614     }
 615     if (bGluMan)
 616     {
 617         rename_bbint(pdba, "GLU", get_glutp, FALSE, symtab, nrr, rr);
 618     }
 619     else
 620     {
 621         rename_bb(pdba, "GLUH", "GLU", FALSE, symtab);
 622     }
 623
 624     if (!bHisMan)
 625     {
 626         set_histp(pdba, x, angle, distance);
 627     }
 628     else
 629     {
 630         rename_bbint(pdba, "HIS", get_histp, TRUE, symtab, nrr, rr);
 631     }
 632
 633     /* Initialize the rtp builing block names with the residue names
 634      * for the residues that have not been processed above.
 635      */
 636     pdbres_to_gmxrtp(pdba);
 637
 638     /* Now we have all rtp names set.
 639      * The rtp names will conform to Gromacs naming,
 640      * unless the input pdb file contained one or more force field specific
 641      * rtp names as residue names.
 642      */
 643 }
 644
 645 /* struct for sorting the atoms from the pdb file */
 646 typedef struct {
 647     int  resnr;  /* residue number               */
 648     int  j;      /* database order index         */
 649     int  index;  /* original atom number         */
 650     char anm1;   /* second letter of atom name   */
 651     char altloc; /* alternate location indicator */
 652 } t_pdbindex;
 653
 654 int pdbicomp(const void *a, const void *b)
 655 {
 656     t_pdbindex *pa, *pb;
 657     int         d;
 658
 659     pa = (t_pdbindex *)a;
 660     pb = (t_pdbindex *)b;
 661
 662     d = (pa->resnr - pb->resnr);
 663     if (d == 0)
 664     {
 665         d = (pa->j - pb->j);
 666         if (d == 0)
 667         {
 668             d = (pa->anm1 - pb->anm1);
 669             if (d == 0)
 670             {
 671                 d = (pa->altloc - pb->altloc);
 672             }
 673         }
 674     }
 675
 676     return d;
 677 }
 678
 679 static void sort_pdbatoms(t_restp restp[],
 680                           int natoms, t_atoms **pdbaptr, rvec **x,
 681                           t_blocka *block, char ***gnames)
 682 {
 683     t_atoms     *pdba, *pdbnew;
 684     rvec       **xnew;
 685     int          i, j;
 686     t_restp     *rptr;
 687     t_hackblock *hbr;
 688     t_pdbindex  *pdbi;
 689     atom_id     *a;
 690     char        *atomnm;
 691
 692     pdba   = *pdbaptr;
 693     natoms = pdba->nr;
 694     pdbnew = NULL;
 695     snew(xnew, 1);
 696     snew(pdbi, natoms);
 697
 698     for (i = 0; i < natoms; i++)
 699     {
 700         atomnm = *pdba->atomname[i];
 701         rptr   = &restp[pdba->atom[i].resind];
 702         for (j = 0; (j < rptr->natom); j++)
 703         {
 704             if (gmx_strcasecmp(atomnm, *(rptr->atomname[j])) == 0)
 705             {
 706                 break;
 707             }
 708         }
 709         if (j == rptr->natom)
 710         {
 711             char buf[STRLEN];
 712
 713             sprintf(buf,
 714                     "Atom %s in residue %s %d was not found in rtp entry %s with %d atoms\n"
 715                     "while sorting atoms.\n%s", atomnm,
 716                     *pdba->resinfo[pdba->atom[i].resind].name,
 717                     pdba->resinfo[pdba->atom[i].resind].nr,
 718                     rptr->resname,
 719                     rptr->natom,
 720                     is_hydrogen(atomnm) ?
 721                     "\nFor a hydrogen, this can be a different protonation state, or it\n"
 722                     "might have had a different number in the PDB file and was rebuilt\n"
 723                     "(it might for instance have been H3, and we only expected H1 & H2).\n"
 724                     "Note that hydrogens might have been added to the entry for the N-terminus.\n"
 725                     "Remove this hydrogen or choose a different protonation state to solve it.\n"
 726                     "Option -ignh will ignore all hydrogens in the input." : ".");
 727             gmx_fatal(FARGS, buf);
 728         }
 729         /* make shadow array to be sorted into indexgroup */
 730         pdbi[i].resnr  = pdba->atom[i].resind;
 731         pdbi[i].j      = j;
 732         pdbi[i].index  = i;
 733         pdbi[i].anm1   = atomnm[1];
 734         pdbi[i].altloc = pdba->pdbinfo[i].altloc;
 735     }
 736     qsort(pdbi, natoms, (size_t)sizeof(pdbi[0]), pdbicomp);
 737
 738     /* pdba is sorted in pdbnew using the pdbi index */
 739     snew(a, natoms);
 740     snew(pdbnew, 1);
 741     init_t_atoms(pdbnew, natoms, TRUE);
 742     snew(*xnew, natoms);
 743     pdbnew->nr   = pdba->nr;
 744     pdbnew->nres = pdba->nres;
 745     sfree(pdbnew->resinfo);
 746     pdbnew->resinfo = pdba->resinfo;
 747     for (i = 0; i < natoms; i++)
 748     {
 749         pdbnew->atom[i]     = pdba->atom[pdbi[i].index];
 750         pdbnew->atomname[i] = pdba->atomname[pdbi[i].index];
 751         pdbnew->pdbinfo[i]  = pdba->pdbinfo[pdbi[i].index];
 752         copy_rvec((*x)[pdbi[i].index], (*xnew)[i]);
 753         /* make indexgroup in block */
 754         a[i] = pdbi[i].index;
 755     }
 756     /* clean up */
 757     sfree(pdba->atomname);
 758     sfree(pdba->atom);
 759     sfree(pdba->pdbinfo);
 760     sfree(pdba);
 761     sfree(*x);
 762     /* copy the sorted pdbnew back to pdba */
 763     *pdbaptr = pdbnew;
 764     *x       = *xnew;
 765     add_grp(block, gnames, natoms, a, "prot_sort");
 766     sfree(xnew);
 767     sfree(a);
 768     sfree(pdbi);
 769 }
 770
 771 static int remove_duplicate_atoms(t_atoms *pdba, rvec x[], gmx_bool bVerbose)
 772 {
 773     int        i, j, oldnatoms, ndel;
 774     t_resinfo *ri;
 775
 776     printf("Checking for duplicate atoms....\n");
 777     oldnatoms    = pdba->nr;
 778     ndel         = 0;
 779     /* NOTE: pdba->nr is modified inside the loop */
 780     for (i = 1; (i < pdba->nr); i++)
 781     {
 782         /* compare 'i' and 'i-1', throw away 'i' if they are identical
 783            this is a 'while' because multiple alternate locations can be present */
 784         while ( (i < pdba->nr) &&
 785                 (pdba->atom[i-1].resind == pdba->atom[i].resind) &&
 786                 (strcmp(*pdba->atomname[i-1], *pdba->atomname[i]) == 0) )
 787         {
 788             ndel++;
 789             if (bVerbose)
 790             {
 791                 ri = &pdba->resinfo[pdba->atom[i].resind];
 792                 printf("deleting duplicate atom %4s  %s%4d%c",
 793                        *pdba->atomname[i], *ri->name, ri->nr, ri->ic);
 794                 if (ri->chainid && (ri->chainid != ' '))
 795                 {
 796                     printf(" ch %c", ri->chainid);
 797                 }
 798                 if (pdba->pdbinfo)
 799                 {
 800                     if (pdba->pdbinfo[i].atomnr)
 801                     {
 802                         printf("  pdb nr %4d", pdba->pdbinfo[i].atomnr);
 803                     }
 804                     if (pdba->pdbinfo[i].altloc && (pdba->pdbinfo[i].altloc != ' '))
 805                     {
 806                         printf("  altloc %c", pdba->pdbinfo[i].altloc);
 807                     }
 808                 }
 809                 printf("\n");
 810             }
 811             pdba->nr--;
 812             /* We can not free, since it might be in the symtab */
 813             /* sfree(pdba->atomname[i]); */
 814             for (j = i; j < pdba->nr; j++)
 815             {
 816                 pdba->atom[j]     = pdba->atom[j+1];
 817                 pdba->atomname[j] = pdba->atomname[j+1];
 818                 pdba->pdbinfo[j]  = pdba->pdbinfo[j+1];
 819                 copy_rvec(x[j+1], x[j]);
 820             }
 821             srenew(pdba->atom,     pdba->nr);
 822             /* srenew(pdba->atomname, pdba->nr); */
 823             srenew(pdba->pdbinfo,  pdba->nr);
 824         }
 825     }
 826     if (pdba->nr != oldnatoms)
 827     {
 828         printf("Now there are %d atoms. Deleted %d duplicates.\n", pdba->nr, ndel);
 829     }
 830
 831     return pdba->nr;
 832 }
 833
 834 void find_nc_ter(t_atoms *pdba, int r0, int r1, int *r_start, int *r_end, gmx_residuetype_t rt)
 835 {
 836     int         i;
 837     const char *p_startrestype;
 838     const char *p_restype;
 839     int         nstartwarn, nendwarn;
 840
 841     *r_start = -1;
 842     *r_end   = -1;
 843
 844     nstartwarn = 0;
 845     nendwarn   = 0;
 846
 847     /* Find the starting terminus (typially N or 5') */
 848     for (i = r0; i < r1 && *r_start == -1; i++)
 849     {
 850         gmx_residuetype_get_type(rt, *pdba->resinfo[i].name, &p_startrestype);
 851         if (!gmx_strcasecmp(p_startrestype, "Protein") || !gmx_strcasecmp(p_startrestype, "DNA") || !gmx_strcasecmp(p_startrestype, "RNA") )
 852         {
 853             printf("Identified residue %s%d as a starting terminus.\n", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
 854             *r_start = i;
 855         }
 856         else
 857         {
 858             if (nstartwarn < 5)
 859             {
 860                 printf("Warning: Starting residue %s%d in chain not identified as Protein/RNA/DNA.\n", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
 861             }
 862             if (nstartwarn == 5)
 863             {
 864                 printf("More than 5 unidentified residues at start of chain - disabling further warnings.\n");
 865             }
 866             nstartwarn++;
 867         }
 868     }
 869
 870     if (*r_start >= 0)
 871     {
 872         /* Go through the rest of the residues, check that they are the same class, and identify the ending terminus. */
 873         for (i = *r_start; i < r1; i++)
 874         {
 875             gmx_residuetype_get_type(rt, *pdba->resinfo[i].name, &p_restype);
 876             if (!gmx_strcasecmp(p_restype, p_startrestype) && nendwarn == 0)
 877             {
 878                 *r_end = i;
 879             }
 880             else
 881             {
 882                 if (nendwarn < 5)
 883                 {
 884                     printf("Warning: Residue %s%d in chain has different type (%s) from starting residue %s%d (%s).\n",
 885                            *pdba->resinfo[i].name, pdba->resinfo[i].nr, p_restype,
 886                            *pdba->resinfo[*r_start].name, pdba->resinfo[*r_start].nr, p_startrestype);
 887                 }
 888                 if (nendwarn == 5)
 889                 {
 890                     printf("More than 5 unidentified residues at end of chain - disabling further warnings.\n");
 891                 }
 892                 nendwarn++;
 893             }
 894         }
 895     }
 896
 897     if (*r_end >= 0)
 898     {
 899         printf("Identified residue %s%d as a ending terminus.\n", *pdba->resinfo[*r_end].name, pdba->resinfo[*r_end].nr);
 900     }
 901 }
 902
 903
 904 static void
 905 modify_chain_numbers(t_atoms *       pdba,
 906                      const char *    chainsep)
 907 {
 908     int           i;
 909     char          old_prev_chainid;
 910     char          old_this_chainid;
 911     int           old_prev_chainnum;
 912     int           old_this_chainnum;
 913     t_resinfo    *ri;
 914     char          select[STRLEN];
 915     int           new_chainnum;
 916     int           this_atomnum;
 917     int           prev_atomnum;
 918     const char *  prev_atomname;
 919     const char *  this_atomname;
 920     const char *  prev_resname;
 921     const char *  this_resname;
 922     int           prev_resnum;
 923     int           this_resnum;
 924     char          prev_chainid;
 925     char          this_chainid;
 926     int           prev_chainnumber;
 927     int           this_chainnumber;
 928
 929     enum
 930     {
 931         SPLIT_ID_OR_TER,
 932         SPLIT_ID_AND_TER,
 933         SPLIT_ID_ONLY,
 934         SPLIT_TER_ONLY,
 935         SPLIT_INTERACTIVE
 936     }
 937     splitting;
 938
 939     splitting = SPLIT_TER_ONLY; /* keep compiler happy */
 940
 941     /* Be a bit flexible to catch typos */
 942     if (!strncmp(chainsep, "id_o", 4))
 943     {
 944         /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
 945         splitting = SPLIT_ID_OR_TER;
 946         printf("Splitting chemical chains based on TER records or chain id changing.\n");
 947     }
 948     else if (!strncmp(chainsep, "int", 3))
 949     {
 950         /* For later interactive splitting we tentatively assign new chain numbers at either changing id or ter records */
 951         splitting = SPLIT_INTERACTIVE;
 952         printf("Splitting chemical chains interactively.\n");
 953     }
 954     else if (!strncmp(chainsep, "id_a", 4))
 955     {
 956         splitting = SPLIT_ID_AND_TER;
 957         printf("Splitting chemical chains based on TER records and chain id changing.\n");
 958     }
 959     else if (strlen(chainsep) == 2 && !strncmp(chainsep, "id", 4))
 960     {
 961         splitting = SPLIT_ID_ONLY;
 962         printf("Splitting chemical chains based on changing chain id only (ignoring TER records).\n");
 963     }
 964     else if (chainsep[0] == 't')
 965     {
 966         splitting = SPLIT_TER_ONLY;
 967         printf("Splitting chemical chains based on TER records only (ignoring chain id).\n");
 968     }
 969     else
 970     {
 971         gmx_fatal(FARGS, "Unidentified setting for chain separation: %s\n", chainsep);
 972     }
 973
 974     /* The default chain enumeration is based on TER records only, which is reflected in chainnum below */
 975
 976     old_prev_chainid  = '?';
 977     old_prev_chainnum = -1;
 978     new_chainnum      = -1;
 979
 980     this_atomname       = NULL;
 981     this_atomnum        = -1;
 982     this_resname        = NULL;
 983     this_resnum         = -1;
 984     this_chainid        = '?';
 985     this_chainnumber    = -1;
 986
 987     for (i = 0; i < pdba->nres; i++)
 988     {
 989         ri                 = &pdba->resinfo[i];
 990         old_this_chainid   = ri->chainid;
 991         old_this_chainnum  = ri->chainnum;
 992
 993         prev_atomname      = this_atomname;
 994         prev_atomnum       = this_atomnum;
 995         prev_resname       = this_resname;
 996         prev_resnum        = this_resnum;
 997         prev_chainid       = this_chainid;
 998         prev_chainnumber   = this_chainnumber;
 999
1000         this_atomname      = *(pdba->atomname[i]);
1001         this_atomnum       = (pdba->pdbinfo != NULL) ? pdba->pdbinfo[i].atomnr : i+1;
1002         this_resname       = *ri->name;
1003         this_resnum        = ri->nr;
1004         this_chainid       = ri->chainid;
1005         this_chainnumber   = ri->chainnum;
1006
1007         switch (splitting)
1008         {
1009             case SPLIT_ID_OR_TER:
1010                 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1011                 {
1012                     new_chainnum++;
1013                 }
1014                 break;
1015
1016             case SPLIT_ID_AND_TER:
1017                 if (old_this_chainid != old_prev_chainid && old_this_chainnum != old_prev_chainnum)
1018                 {
1019                     new_chainnum++;
1020                 }
1021                 break;
1022
1023             case SPLIT_ID_ONLY:
1024                 if (old_this_chainid != old_prev_chainid)
1025                 {
1026                     new_chainnum++;
1027                 }
1028                 break;
1029
1030             case SPLIT_TER_ONLY:
1031                 if (old_this_chainnum != old_prev_chainnum)
1032                 {
1033                     new_chainnum++;
1034                 }
1035                 break;
1036             case SPLIT_INTERACTIVE:
1037                 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1038                 {
1039                     if (i > 0)
1040                     {
1041                         printf("Split the chain (and introduce termini) between residue %s%d (chain id '%c', atom %d %s)\n"
1042                                "and residue %s%d (chain id '%c', atom %d %s) ? [n/y]\n",
1043                                prev_resname, prev_resnum, prev_chainid, prev_atomnum, prev_atomname,
1044                                this_resname, this_resnum, this_chainid, this_atomnum, this_atomname);
1045
1046                         if (NULL == fgets(select, STRLEN-1, stdin))
1047                         {
1048                             gmx_fatal(FARGS, "Error reading from stdin");
1049                         }
1050                     }
1051                     if (i == 0 || select[0] == 'y')
1052                     {
1053                         new_chainnum++;
1054                     }
1055                 }
1056                 break;
1057             default:
1058                 gmx_fatal(FARGS, "Internal inconsistency - this shouldn't happen...");
1059                 break;
1060         }
1061         old_prev_chainid  = old_this_chainid;
1062         old_prev_chainnum = old_this_chainnum;
1063
1064         ri->chainnum = new_chainnum;
1065     }
1066 }
1067
1068
1069 typedef struct {
1070     char     chainid;
1071     char     chainnum;
1072     int      start;
1073     int      natom;
1074     gmx_bool bAllWat;
1075     int      nterpairs;
1076     int     *chainstart;
1077 } t_pdbchain;
1078
1079 typedef struct {
1080     char          chainid;
1081     int           chainnum;
1082     gmx_bool      bAllWat;
1083     int           nterpairs;
1084     int          *chainstart;
1085     t_hackblock **ntdb;
1086     t_hackblock **ctdb;
1087     int          *r_start;
1088     int          *r_end;
1089     t_atoms      *pdba;
1090     rvec         *x;
1091 } t_chain;
1092
1093 int gmx_pdb2gmx(int argc, char *argv[])
1094 {
1095     const char *desc[] = {
1096         "[THISMODULE] reads a [TT].pdb[tt] (or [TT].gro[tt]) file, reads",
1097         "some database files, adds hydrogens to the molecules and generates",
1098         "coordinates in GROMACS (GROMOS), or optionally [TT].pdb[tt], format",
1099         "and a topology in GROMACS format.",
1100         "These files can subsequently be processed to generate a run input file.",
1101         "[PAR]",
1102         "[THISMODULE] will search for force fields by looking for",
1103         "a [TT]forcefield.itp[tt] file in subdirectories [TT]<forcefield>.ff[tt]",
1104         "of the current working directory and of the GROMACS library directory",
1105         "as inferred from the path of the binary or the [TT]GMXLIB[tt] environment",
1106         "variable.",
1107         "By default the forcefield selection is interactive,",
1108         "but you can use the [TT]-ff[tt] option to specify one of the short names",
1109         "in the list on the command line instead. In that case [THISMODULE] just looks",
1110         "for the corresponding [TT]<forcefield>.ff[tt] directory.",
1111         "[PAR]",
1112         "After choosing a force field, all files will be read only from",
1113         "the corresponding force field directory.",
1114         "If you want to modify or add a residue types, you can copy the force",
1115         "field directory from the GROMACS library directory to your current",
1116         "working directory. If you want to add new protein residue types,",
1117         "you will need to modify [TT]residuetypes.dat[tt] in the library directory",
1118         "or copy the whole library directory to a local directory and set",
1119         "the environment variable [TT]GMXLIB[tt] to the name of that directory.",
1120         "Check Chapter 5 of the manual for more information about file formats.",
1121         "[PAR]",
1122
1123         "Note that a [TT].pdb[tt] file is nothing more than a file format, and it",
1124         "need not necessarily contain a protein structure. Every kind of",
1125         "molecule for which there is support in the database can be converted.",
1126         "If there is no support in the database, you can add it yourself.[PAR]",
1127
1128         "The program has limited intelligence, it reads a number of database",
1129         "files, that allow it to make special bonds (Cys-Cys, Heme-His, etc.),",
1130         "if necessary this can be done manually. The program can prompt the",
1131         "user to select which kind of LYS, ASP, GLU, CYS or HIS residue is",
1132         "desired. For Lys the choice is between neutral (two protons on NZ) or",
1133         "protonated (three protons, default), for Asp and Glu unprotonated",
1134         "(default) or protonated, for His the proton can be either on ND1,",
1135         "on NE2 or on both. By default these selections are done automatically.",
1136         "For His, this is based on an optimal hydrogen bonding",
1137         "conformation. Hydrogen bonds are defined based on a simple geometric",
1138         "criterion, specified by the maximum hydrogen-donor-acceptor angle",
1139         "and donor-acceptor distance, which are set by [TT]-angle[tt] and",
1140         "[TT]-dist[tt] respectively.[PAR]",
1141
1142         "The protonation state of N- and C-termini can be chosen interactively",
1143         "with the [TT]-ter[tt] flag.  Default termini are ionized (NH3+ and COO-),",
1144         "respectively.  Some force fields support zwitterionic forms for chains of",
1145         "one residue, but for polypeptides these options should NOT be selected.",
1146         "The AMBER force fields have unique forms for the terminal residues,",
1147         "and these are incompatible with the [TT]-ter[tt] mechanism. You need",
1148         "to prefix your N- or C-terminal residue names with \"N\" or \"C\"",
1149         "respectively to use these forms, making sure you preserve the format",
1150         "of the coordinate file. Alternatively, use named terminating residues",
1151         "(e.g. ACE, NME).[PAR]",
1152
1153         "The separation of chains is not entirely trivial since the markup",
1154         "in user-generated PDB files frequently varies and sometimes it",
1155         "is desirable to merge entries across a TER record, for instance",
1156         "if you want a disulfide bridge or distance restraints between",
1157         "two protein chains or if you have a HEME group bound to a protein.",
1158         "In such cases multiple chains should be contained in a single",
1159         "[TT]moleculetype[tt] definition.",
1160         "To handle this, [THISMODULE] uses two separate options.",
1161         "First, [TT]-chainsep[tt] allows you to choose when a new chemical chain should",
1162         "start, and termini added when applicable. This can be done based on the",
1163         "existence of TER records, when the chain id changes, or combinations of either",
1164         "or both of these. You can also do the selection fully interactively.",
1165         "In addition, there is a [TT]-merge[tt] option that controls how multiple chains",
1166         "are merged into one moleculetype, after adding all the chemical termini (or not).",
1167         "This can be turned off (no merging), all non-water chains can be merged into a",
1168         "single molecule, or the selection can be done interactively.[PAR]",
1169
1170         "[THISMODULE] will also check the occupancy field of the [TT].pdb[tt] file.",
1171         "If any of the occupancies are not one, indicating that the atom is",
1172         "not resolved well in the structure, a warning message is issued.",
1173         "When a [TT].pdb[tt] file does not originate from an X-ray structure determination",
1174         "all occupancy fields may be zero. Either way, it is up to the user",
1175         "to verify the correctness of the input data (read the article!).[PAR]",
1176
1177         "During processing the atoms will be reordered according to GROMACS",
1178         "conventions. With [TT]-n[tt] an index file can be generated that",
1179         "contains one group reordered in the same way. This allows you to",
1180         "convert a GROMOS trajectory and coordinate file to GROMOS. There is",
1181         "one limitation: reordering is done after the hydrogens are stripped",
1182         "from the input and before new hydrogens are added. This means that",
1183         "you should not use [TT]-ignh[tt].[PAR]",
1184
1185         "The [TT].gro[tt] and [TT].g96[tt] file formats do not support chain",
1186         "identifiers. Therefore it is useful to enter a [TT].pdb[tt] file name at",
1187         "the [TT]-o[tt] option when you want to convert a multi-chain [TT].pdb[tt] file.",
1188         "[PAR]",
1189
1190         "The option [TT]-vsite[tt] removes hydrogen and fast improper dihedral",
1191         "motions. Angular and out-of-plane motions can be removed by changing",
1192         "hydrogens into virtual sites and fixing angles, which fixes their",
1193         "position relative to neighboring atoms. Additionally, all atoms in the",
1194         "aromatic rings of the standard amino acids (i.e. PHE, TRP, TYR and HIS)",
1195         "can be converted into virtual sites, eliminating the fast improper dihedral",
1196         "fluctuations in these rings. [BB]Note[bb] that in this case all other hydrogen",
1197         "atoms are also converted to virtual sites. The mass of all atoms that are",
1198         "converted into virtual sites, is added to the heavy atoms.[PAR]",
1199         "Also slowing down of dihedral motion can be done with [TT]-heavyh[tt]",
1200         "done by increasing the hydrogen-mass by a factor of 4. This is also",
1201         "done for water hydrogens to slow down the rotational motion of water.",
1202         "The increase in mass of the hydrogens is subtracted from the bonded",
1203         "(heavy) atom so that the total mass of the system remains the same."
1204     };
1205
1206
1207     FILE             *fp, *top_file, *top_file2, *itp_file = NULL;
1208     int               natom, nres;
1209     t_atoms           pdba_all, *pdba;
1210     t_atoms          *atoms;
1211     t_resinfo        *ri;
1212     t_blocka         *block;
1213     int               chain, nch, maxch, nwaterchain;
1214     t_pdbchain       *pdb_ch;
1215     t_chain          *chains, *cc;
1216     char              select[STRLEN];
1217     int               nincl, nmol;
1218     char            **incls;
1219     t_mols           *mols;
1220     char            **gnames;
1221     int               ePBC;
1222     matrix            box;
1223     rvec              box_space;
1224     int               i, j, k, l, nrtp;
1225     int              *swap_index, si;
1226     t_restp          *restp;
1227     t_hackblock      *ah;
1228     t_symtab          symtab;
1229     gpp_atomtype_t    atype;
1230     gmx_residuetype_t rt;
1231     const char       *top_fn;
1232     char              fn[256], itp_fn[STRLEN], posre_fn[STRLEN], buf_fn[STRLEN];
1233     char              molname[STRLEN], title[STRLEN], quote[STRLEN];
1234     char             *c, forcefield[STRLEN], ffdir[STRLEN];
1235     char              ffname[STRLEN], suffix[STRLEN], buf[STRLEN];
1236     char             *watermodel;
1237     const char       *watres;
1238     int               nrtpf;
1239     char            **rtpf;
1240     char              rtp[STRLEN];
1241     int               nrrn;
1242     char            **rrn;
1243     int               nrtprename, naa;
1244     rtprename_t      *rtprename = NULL;
1245     int               nah, nNtdb, nCtdb, ntdblist;
1246     t_hackblock      *ntdb, *ctdb, **tdblist;
1247     int               nssbonds;
1248     t_ssbond         *ssbonds;
1249     rvec             *pdbx, *x;
1250     gmx_bool          bVsites = FALSE, bWat, bPrevWat = FALSE, bITP, bVsiteAromatics = FALSE, bCheckMerge;
1251     real              mHmult  = 0;
1252     t_hackblock      *hb_chain;
1253     t_restp          *restp_chain;
1254     output_env_t      oenv;
1255     const char       *p_restype;
1256     int               rc;
1257     int               this_atomnum;
1258     int               prev_atomnum;
1259     const char     *  prev_atomname;
1260     const char     *  this_atomname;
1261     const char     *  prev_resname;
1262     const char     *  this_resname;
1263     int               prev_resnum;
1264     int               this_resnum;
1265     char              prev_chainid;
1266     char              this_chainid;
1267     int               prev_chainnumber;
1268     int               this_chainnumber;
1269     int               nid_used;
1270     int               this_chainstart;
1271     int               prev_chainstart;
1272     gmx_bool          bMerged;
1273     int               nchainmerges;
1274
1275     gmx_atomprop_t    aps;
1276
1277     t_filenm          fnm[] = {
1278         { efSTX, "-f", "eiwit.pdb", ffREAD  },
1279         { efSTO, "-o", "conf",      ffWRITE },
1280         { efTOP, NULL, NULL,        ffWRITE },
1281         { efITP, "-i", "posre",     ffWRITE },
1282         { efNDX, "-n", "clean",     ffOPTWR },
1283         { efSTO, "-q", "clean.pdb", ffOPTWR }
1284     };
1285 #define NFILE asize(fnm)
1286
1287
1288     /* Command line arguments must be static */
1289     static gmx_bool    bNewRTP        = FALSE;
1290     static gmx_bool    bInter         = FALSE, bCysMan = FALSE;
1291     static gmx_bool    bLysMan        = FALSE, bAspMan = FALSE, bGluMan = FALSE, bHisMan = FALSE;
1292     static gmx_bool    bGlnMan        = FALSE, bArgMan = FALSE;
1293     static gmx_bool    bTerMan        = FALSE, bUnA = FALSE, bHeavyH;
1294     static gmx_bool    bSort          = TRUE, bAllowMissing = FALSE, bRemoveH = FALSE;
1295     static gmx_bool    bDeuterate     = FALSE, bVerbose = FALSE, bChargeGroups = TRUE, bCmap = TRUE;
1296     static gmx_bool    bRenumRes      = FALSE, bRTPresname = FALSE;
1297     static real        angle          = 135.0, distance = 0.3, posre_fc = 1000;
1298     static real        long_bond_dist = 0.25, short_bond_dist = 0.05;
1299     static const char *vsitestr[]     = { NULL, "none", "hydrogens", "aromatics", NULL };
1300     static const char *watstr[]       = { NULL, "select", "none", "spc", "spce", "tip3p", "tip4p", "tip5p", NULL };
1301     static const char *chainsep[]     = { NULL, "id_or_ter", "id_and_ter", "ter", "id", "interactive", NULL };
1302     static const char *merge[]        = {NULL, "no", "all", "interactive", NULL };
1303     static const char *ff             = "select";
1304
1305     t_pargs            pa[] = {
1306         { "-newrtp", FALSE, etBOOL, {&bNewRTP},
1307           "HIDDENWrite the residue database in new format to [TT]new.rtp[tt]"},
1308         { "-lb",     FALSE, etREAL, {&long_bond_dist},
1309           "HIDDENLong bond warning distance" },
1310         { "-sb",     FALSE, etREAL, {&short_bond_dist},
1311           "HIDDENShort bond warning distance" },
1312         { "-chainsep", FALSE, etENUM, {chainsep},
1313           "Condition in PDB files when a new chain should be started (adding termini)" },
1314         { "-merge",  FALSE, etENUM, {&merge},
1315           "Merge multiple chains into a single [moleculetype]" },
1316         { "-ff",     FALSE, etSTR,  {&ff},
1317           "Force field, interactive by default. Use [TT]-h[tt] for information." },
1318         { "-water",  FALSE, etENUM, {watstr},
1319           "Water model to use" },
1320         { "-inter",  FALSE, etBOOL, {&bInter},
1321           "Set the next 8 options to interactive"},
1322         { "-ss",     FALSE, etBOOL, {&bCysMan},
1323           "Interactive SS bridge selection" },
1324         { "-ter",    FALSE, etBOOL, {&bTerMan},
1325           "Interactive termini selection, instead of charged (default)" },
1326         { "-lys",    FALSE, etBOOL, {&bLysMan},
1327           "Interactive lysine selection, instead of charged" },
1328         { "-arg",    FALSE, etBOOL, {&bArgMan},
1329           "Interactive arginine selection, instead of charged" },
1330         { "-asp",    FALSE, etBOOL, {&bAspMan},
1331           "Interactive aspartic acid selection, instead of charged" },
1332         { "-glu",    FALSE, etBOOL, {&bGluMan},
1333           "Interactive glutamic acid selection, instead of charged" },
1334         { "-gln",    FALSE, etBOOL, {&bGlnMan},
1335           "Interactive glutamine selection, instead of neutral" },
1336         { "-his",    FALSE, etBOOL, {&bHisMan},
1337           "Interactive histidine selection, instead of checking H-bonds" },
1338         { "-angle",  FALSE, etREAL, {&angle},
1339           "Minimum hydrogen-donor-acceptor angle for a H-bond (degrees)" },
1340         { "-dist",   FALSE, etREAL, {&distance},
1341           "Maximum donor-acceptor distance for a H-bond (nm)" },
1342         { "-una",    FALSE, etBOOL, {&bUnA},
1343           "Select aromatic rings with united CH atoms on phenylalanine, "
1344           "tryptophane and tyrosine" },
1345         { "-sort",   FALSE, etBOOL, {&bSort},
1346           "HIDDENSort the residues according to database, turning this off is dangerous as charge groups might be broken in parts" },
1347         { "-ignh",   FALSE, etBOOL, {&bRemoveH},
1348           "Ignore hydrogen atoms that are in the coordinate file" },
1349         { "-missing", FALSE, etBOOL, {&bAllowMissing},
1350           "Continue when atoms are missing, dangerous" },
1351         { "-v",      FALSE, etBOOL, {&bVerbose},
1352           "Be slightly more verbose in messages" },
1353         { "-posrefc", FALSE, etREAL, {&posre_fc},
1354           "Force constant for position restraints" },
1355         { "-vsite",  FALSE, etENUM, {vsitestr},
1356           "Convert atoms to virtual sites" },
1357         { "-heavyh", FALSE, etBOOL, {&bHeavyH},
1358           "Make hydrogen atoms heavy" },
1359         { "-deuterate", FALSE, etBOOL, {&bDeuterate},
1360           "Change the mass of hydrogens to 2 amu" },
1361         { "-chargegrp", TRUE, etBOOL, {&bChargeGroups},
1362           "Use charge groups in the [TT].rtp[tt] file"  },
1363         { "-cmap", TRUE, etBOOL, {&bCmap},
1364           "Use cmap torsions (if enabled in the [TT].rtp[tt] file)"  },
1365         { "-renum", TRUE, etBOOL, {&bRenumRes},
1366           "Renumber the residues consecutively in the output"  },
1367         { "-rtpres", TRUE, etBOOL, {&bRTPresname},
1368           "Use [TT].rtp[tt] entry names as residue names"  }
1369     };
1370 #define NPARGS asize(pa)
1371
1372     if (!parse_common_args(&argc, argv, 0, NFILE, fnm, asize(pa), pa, asize(desc), desc,
1373                            0, NULL, &oenv))
1374     {
1375         return 0;
1376     }
1377
1378     /* Force field selection, interactive or direct */
1379     choose_ff(strcmp(ff, "select") == 0 ? NULL : ff,
1380               forcefield, sizeof(forcefield),
1381               ffdir, sizeof(ffdir));
1382
1383     if (strlen(forcefield) > 0)
1384     {
1385         strcpy(ffname, forcefield);
1386         ffname[0] = toupper(ffname[0]);
1387     }
1388     else
1389     {
1390         gmx_fatal(FARGS, "Empty forcefield string");
1391     }
1392
1393     printf("\nUsing the %s force field in directory %s\n\n",
1394            ffname, ffdir);
1395
1396     choose_watermodel(watstr[0], ffdir, &watermodel);
1397
1398     if (bInter)
1399     {
1400         /* if anything changes here, also change description of -inter */
1401         bCysMan = TRUE;
1402         bTerMan = TRUE;
1403         bLysMan = TRUE;
1404         bArgMan = TRUE;
1405         bAspMan = TRUE;
1406         bGluMan = TRUE;
1407         bGlnMan = TRUE;
1408         bHisMan = TRUE;
1409     }
1410
1411     if (bHeavyH)
1412     {
1413         mHmult = 4.0;
1414     }
1415     else if (bDeuterate)
1416     {
1417         mHmult = 2.0;
1418     }
1419     else
1420     {
1421         mHmult = 1.0;
1422     }
1423
1424     switch (vsitestr[0][0])
1425     {
1426         case 'n': /* none */
1427             bVsites         = FALSE;
1428             bVsiteAromatics = FALSE;
1429             break;
1430         case 'h': /* hydrogens */
1431             bVsites         = TRUE;
1432             bVsiteAromatics = FALSE;
1433             break;
1434         case 'a': /* aromatics */
1435             bVsites         = TRUE;
1436             bVsiteAromatics = TRUE;
1437             break;
1438         default:
1439             gmx_fatal(FARGS, "DEATH HORROR in $s (%d): vsitestr[0]='%s'",
1440                       __FILE__, __LINE__, vsitestr[0]);
1441     } /* end switch */
1442
1443     /* Open the symbol table */
1444     open_symtab(&symtab);
1445
1446     /* Residue type database */
1447     gmx_residuetype_init(&rt);
1448
1449     /* Read residue renaming database(s), if present */
1450     nrrn = fflib_search_file_end(ffdir, ".r2b", FALSE, &rrn);
1451
1452     nrtprename = 0;
1453     rtprename  = NULL;
1454     for (i = 0; i < nrrn; i++)
1455     {
1456         fp = fflib_open(rrn[i]);
1457         read_rtprename(rrn[i], fp, &nrtprename, &rtprename);
1458         ffclose(fp);
1459         sfree(rrn[i]);
1460     }
1461     sfree(rrn);
1462
1463     /* Add all alternative names from the residue renaming database to the list of recognized amino/nucleic acids. */
1464     naa = 0;
1465     for (i = 0; i < nrtprename; i++)
1466     {
1467         rc = gmx_residuetype_get_type(rt, rtprename[i].gmx, &p_restype);
1468
1469         /* Only add names if the 'standard' gromacs/iupac base name was found */
1470         if (rc == 0)
1471         {
1472             gmx_residuetype_add(rt, rtprename[i].main, p_restype);
1473             gmx_residuetype_add(rt, rtprename[i].nter, p_restype);
1474             gmx_residuetype_add(rt, rtprename[i].cter, p_restype);
1475             gmx_residuetype_add(rt, rtprename[i].bter, p_restype);
1476         }
1477     }
1478
1479     clear_mat(box);
1480     if (watermodel != NULL && (strstr(watermodel, "4p") ||
1481                                strstr(watermodel, "4P")))
1482     {
1483         watres = "HO4";
1484     }
1485     else if (watermodel != NULL && (strstr(watermodel, "5p") ||
1486                                     strstr(watermodel, "5P")))
1487     {
1488         watres = "HO5";
1489     }
1490     else
1491     {
1492         watres = "HOH";
1493     }
1494
1495     aps   = gmx_atomprop_init();
1496     natom = read_pdball(opt2fn("-f", NFILE, fnm), opt2fn_null("-q", NFILE, fnm), title,
1497                         &pdba_all, &pdbx, &ePBC, box, bRemoveH, &symtab, rt, watres,
1498                         aps, bVerbose);
1499
1500     if (natom == 0)
1501     {
1502         gmx_fatal(FARGS, "No atoms found in pdb file %s\n", opt2fn("-f", NFILE, fnm));
1503     }
1504
1505     printf("Analyzing pdb file\n");
1506     nch         = 0;
1507     maxch       = 0;
1508     nwaterchain = 0;
1509
1510     modify_chain_numbers(&pdba_all, chainsep[0]);
1511
1512     nchainmerges        = 0;
1513
1514     this_atomname       = NULL;
1515     this_atomnum        = -1;
1516     this_resname        = NULL;
1517     this_resnum         = -1;
1518     this_chainid        = '?';
1519     this_chainnumber    = -1;
1520     this_chainstart     = 0;
1521     /* Keep the compiler happy */
1522     prev_chainstart     = 0;
1523
1524     pdb_ch = NULL;
1525
1526     bMerged = FALSE;
1527     for (i = 0; (i < natom); i++)
1528     {
1529         ri = &pdba_all.resinfo[pdba_all.atom[i].resind];
1530
1531         prev_atomname      = this_atomname;
1532         prev_atomnum       = this_atomnum;
1533         prev_resname       = this_resname;
1534         prev_resnum        = this_resnum;
1535         prev_chainid       = this_chainid;
1536         prev_chainnumber   = this_chainnumber;
1537         if (!bMerged)
1538         {
1539             prev_chainstart    = this_chainstart;
1540         }
1541
1542         this_atomname      = *pdba_all.atomname[i];
1543         this_atomnum       = (pdba_all.pdbinfo != NULL) ? pdba_all.pdbinfo[i].atomnr : i+1;
1544         this_resname       = *ri->name;
1545         this_resnum        = ri->nr;
1546         this_chainid       = ri->chainid;
1547         this_chainnumber   = ri->chainnum;
1548
1549         bWat = gmx_strcasecmp(*ri->name, watres) == 0;
1550         if ((i == 0) || (this_chainnumber != prev_chainnumber) || (bWat != bPrevWat))
1551         {
1552             this_chainstart = pdba_all.atom[i].resind;
1553
1554             bMerged = FALSE;
1555             if (i > 0 && !bWat)
1556             {
1557                 if (!strncmp(merge[0], "int", 3))
1558                 {
1559                     printf("Merge chain ending with residue %s%d (chain id '%c', atom %d %s) and chain starting with\n"
1560                            "residue %s%d (chain id '%c', atom %d %s) into a single moleculetype (keeping termini)? [n/y]\n",
1561                            prev_resname, prev_resnum, prev_chainid, prev_atomnum, prev_atomname,
1562                            this_resname, this_resnum, this_chainid, this_atomnum, this_atomname);
1563
1564                     if (NULL == fgets(select, STRLEN-1, stdin))
1565                     {
1566                         gmx_fatal(FARGS, "Error reading from stdin");
1567                     }
1568                     bMerged = (select[0] == 'y');
1569                 }
1570                 else if (!strncmp(merge[0], "all", 3))
1571                 {
1572                     bMerged = TRUE;
1573                 }
1574             }
1575
1576             if (bMerged)
1577             {
1578                 pdb_ch[nch-1].chainstart[pdb_ch[nch-1].nterpairs] =
1579                     pdba_all.atom[i].resind - prev_chainstart;
1580                 pdb_ch[nch-1].nterpairs++;
1581                 srenew(pdb_ch[nch-1].chainstart, pdb_ch[nch-1].nterpairs+1);
1582                 nchainmerges++;
1583             }
1584             else
1585             {
1586                 /* set natom for previous chain */
1587                 if (nch > 0)
1588                 {
1589                     pdb_ch[nch-1].natom = i-pdb_ch[nch-1].start;
1590                 }
1591                 if (bWat)
1592                 {
1593                     nwaterchain++;
1594                     ri->chainid = ' ';
1595                 }
1596                 /* check if chain identifier was used before */
1597                 for (j = 0; (j < nch); j++)
1598                 {
1599                     if (pdb_ch[j].chainid != ' ' && pdb_ch[j].chainid == ri->chainid)
1600                     {
1601                         printf("WARNING: Chain identifier '%c' is used in two non-sequential blocks.\n"
1602                                "They will be treated as separate chains unless you reorder your file.\n",
1603                                ri->chainid);
1604                     }
1605                 }
1606                 if (nch == maxch)
1607                 {
1608                     maxch += 16;
1609                     srenew(pdb_ch, maxch);
1610                 }
1611                 pdb_ch[nch].chainid  = ri->chainid;
1612                 pdb_ch[nch].chainnum = ri->chainnum;
1613                 pdb_ch[nch].start    = i;
1614                 pdb_ch[nch].bAllWat  = bWat;
1615                 if (bWat)
1616                 {
1617                     pdb_ch[nch].nterpairs = 0;
1618                 }
1619                 else
1620                 {
1621                     pdb_ch[nch].nterpairs = 1;
1622                 }
1623                 snew(pdb_ch[nch].chainstart, pdb_ch[nch].nterpairs+1);
1624                 /* modified [nch] to [0] below */
1625                 pdb_ch[nch].chainstart[0] = 0;
1626                 nch++;
1627             }
1628         }
1629         bPrevWat = bWat;
1630     }
1631     pdb_ch[nch-1].natom = natom-pdb_ch[nch-1].start;
1632
1633     /* set all the water blocks at the end of the chain */
1634     snew(swap_index, nch);
1635     j = 0;
1636     for (i = 0; i < nch; i++)
1637     {
1638         if (!pdb_ch[i].bAllWat)
1639         {
1640             swap_index[j] = i;
1641             j++;
1642         }
1643     }
1644     for (i = 0; i < nch; i++)
1645     {
1646         if (pdb_ch[i].bAllWat)
1647         {
1648             swap_index[j] = i;
1649             j++;
1650         }
1651     }
1652     if (nwaterchain > 1)
1653     {
1654         printf("Moved all the water blocks to the end\n");
1655     }
1656
1657     snew(chains, nch);
1658     /* copy pdb data and x for all chains */
1659     for (i = 0; (i < nch); i++)
1660     {
1661         si                   = swap_index[i];
1662         chains[i].chainid    = pdb_ch[si].chainid;
1663         chains[i].chainnum   = pdb_ch[si].chainnum;
1664         chains[i].bAllWat    = pdb_ch[si].bAllWat;
1665         chains[i].nterpairs  = pdb_ch[si].nterpairs;
1666         chains[i].chainstart = pdb_ch[si].chainstart;
1667         snew(chains[i].ntdb, pdb_ch[si].nterpairs);
1668         snew(chains[i].ctdb, pdb_ch[si].nterpairs);
1669         snew(chains[i].r_start, pdb_ch[si].nterpairs);
1670         snew(chains[i].r_end, pdb_ch[si].nterpairs);
1671
1672         snew(chains[i].pdba, 1);
1673         init_t_atoms(chains[i].pdba, pdb_ch[si].natom, TRUE);
1674         snew(chains[i].x, chains[i].pdba->nr);
1675         for (j = 0; j < chains[i].pdba->nr; j++)
1676         {
1677             chains[i].pdba->atom[j] = pdba_all.atom[pdb_ch[si].start+j];
1678             snew(chains[i].pdba->atomname[j], 1);
1679             *chains[i].pdba->atomname[j] =
1680                 strdup(*pdba_all.atomname[pdb_ch[si].start+j]);
1681             chains[i].pdba->pdbinfo[j] = pdba_all.pdbinfo[pdb_ch[si].start+j];
1682             copy_rvec(pdbx[pdb_ch[si].start+j], chains[i].x[j]);
1683         }
1684         /* Re-index the residues assuming that the indices are continuous */
1685         k                    = chains[i].pdba->atom[0].resind;
1686         nres                 = chains[i].pdba->atom[chains[i].pdba->nr-1].resind - k + 1;
1687         chains[i].pdba->nres = nres;
1688         for (j = 0; j < chains[i].pdba->nr; j++)
1689         {
1690             chains[i].pdba->atom[j].resind -= k;
1691         }
1692         srenew(chains[i].pdba->resinfo, nres);
1693         for (j = 0; j < nres; j++)
1694         {
1695             chains[i].pdba->resinfo[j] = pdba_all.resinfo[k+j];
1696             snew(chains[i].pdba->resinfo[j].name, 1);
1697             *chains[i].pdba->resinfo[j].name = strdup(*pdba_all.resinfo[k+j].name);
1698             /* make all chain identifiers equal to that of the chain */
1699             chains[i].pdba->resinfo[j].chainid = pdb_ch[si].chainid;
1700         }
1701     }
1702
1703     if (nchainmerges > 0)
1704     {
1705         printf("\nMerged chains into joint molecule definitions at %d places.\n\n",
1706                nchainmerges);
1707     }
1708
1709     printf("There are %d chains and %d blocks of water and "
1710            "%d residues with %d atoms\n",
1711            nch-nwaterchain, nwaterchain,
1712            pdba_all.resinfo[pdba_all.atom[natom-1].resind].nr, natom);
1713
1714     printf("\n  %5s  %4s %6s\n", "chain", "#res", "#atoms");
1715     for (i = 0; (i < nch); i++)
1716     {
1717         printf("  %d '%c' %5d %6d  %s\n",
1718                i+1, chains[i].chainid ? chains[i].chainid : '-',
1719                chains[i].pdba->nres, chains[i].pdba->nr,
1720                chains[i].bAllWat ? "(only water)" : "");
1721     }
1722     printf("\n");
1723
1724     check_occupancy(&pdba_all, opt2fn("-f", NFILE, fnm), bVerbose);
1725
1726     /* Read atomtypes... */
1727     atype = read_atype(ffdir, &symtab);
1728
1729     /* read residue database */
1730     printf("Reading residue database... (%s)\n", forcefield);
1731     nrtpf = fflib_search_file_end(ffdir, ".rtp", TRUE, &rtpf);
1732     nrtp  = 0;
1733     restp = NULL;
1734     for (i = 0; i < nrtpf; i++)
1735     {
1736         read_resall(rtpf[i], &nrtp, &restp, atype, &symtab, FALSE);
1737         sfree(rtpf[i]);
1738     }
1739     sfree(rtpf);
1740     if (bNewRTP)
1741     {
1742         /* Not correct with multiple rtp input files with different bonded types */
1743         fp = gmx_fio_fopen("new.rtp", "w");
1744         print_resall(fp, nrtp, restp, atype);
1745         gmx_fio_fclose(fp);
1746     }
1747
1748     /* read hydrogen database */
1749     nah = read_h_db(ffdir, &ah);
1750
1751     /* Read Termini database... */
1752     nNtdb = read_ter_db(ffdir, 'n', &ntdb, atype);
1753     nCtdb = read_ter_db(ffdir, 'c', &ctdb, atype);
1754
1755     top_fn   = ftp2fn(efTOP, NFILE, fnm);
1756     top_file = gmx_fio_fopen(top_fn, "w");
1757
1758     print_top_header(top_file, top_fn, FALSE, ffdir, mHmult);
1759
1760     nincl = 0;
1761     nmol  = 0;
1762     incls = NULL;
1763     mols  = NULL;
1764     nres  = 0;
1765     for (chain = 0; (chain < nch); chain++)
1766     {
1767         cc = &(chains[chain]);
1768
1769         /* set pdba, natom and nres to the current chain */
1770         pdba  = cc->pdba;
1771         x     = cc->x;
1772         natom = cc->pdba->nr;
1773         nres  = cc->pdba->nres;
1774
1775         if (cc->chainid && ( cc->chainid != ' ' ) )
1776         {
1777             printf("Processing chain %d '%c' (%d atoms, %d residues)\n",
1778                    chain+1, cc->chainid, natom, nres);
1779         }
1780         else
1781         {
1782             printf("Processing chain %d (%d atoms, %d residues)\n",
1783                    chain+1, natom, nres);
1784         }
1785
1786         process_chain(pdba, x, bUnA, bUnA, bUnA, bLysMan, bAspMan, bGluMan,
1787                       bHisMan, bArgMan, bGlnMan, angle, distance, &symtab,
1788                       nrtprename, rtprename);
1789
1790         cc->chainstart[cc->nterpairs] = pdba->nres;
1791         j = 0;
1792         for (i = 0; i < cc->nterpairs; i++)
1793         {
1794             find_nc_ter(pdba, cc->chainstart[i], cc->chainstart[i+1],
1795                         &(cc->r_start[j]), &(cc->r_end[j]), rt);
1796
1797             if (cc->r_start[j] >= 0 && cc->r_end[j] >= 0)
1798             {
1799                 j++;
1800             }
1801         }
1802         cc->nterpairs = j;
1803         if (cc->nterpairs == 0)
1804         {
1805             printf("Problem with chain definition, or missing terminal residues.\n"
1806                    "This chain does not appear to contain a recognized chain molecule.\n"
1807                    "If this is incorrect, you can edit residuetypes.dat to modify the behavior.\n");
1808         }
1809
1810         /* Check for disulfides and other special bonds */
1811         nssbonds = mk_specbonds(pdba, x, bCysMan, &ssbonds, bVerbose);
1812
1813         if (nrtprename > 0)
1814         {
1815             rename_resrtp(pdba, cc->nterpairs, cc->r_start, cc->r_end, nrtprename, rtprename,
1816                           &symtab, bVerbose);
1817         }
1818
1819         if (debug)
1820         {
1821             if (nch == 1)
1822             {
1823                 sprintf(fn, "chain.pdb");
1824             }
1825             else
1826             {
1827                 sprintf(fn, "chain_%c%d.pdb", cc->chainid, cc->chainnum);
1828             }
1829             write_sto_conf(fn, title, pdba, x, NULL, ePBC, box);
1830         }
1831
1832
1833         for (i = 0; i < cc->nterpairs; i++)
1834         {
1835
1836             /* Set termini.
1837              * We first apply a filter so we only have the
1838              * termini that can be applied to the residue in question
1839              * (or a generic terminus if no-residue specific is available).
1840              */
1841             /* First the N terminus */
1842             if (nNtdb > 0)
1843             {
1844                 tdblist = filter_ter(nrtp, restp, nNtdb, ntdb,
1845                                      *pdba->resinfo[cc->r_start[i]].name,
1846                                      *pdba->resinfo[cc->r_start[i]].rtp,
1847                                      &ntdblist);
1848                 if (ntdblist == 0)
1849                 {
1850                     printf("No suitable end (N or 5') terminus found in database - assuming this residue\n"
1851                            "is already in a terminus-specific form and skipping terminus selection.\n");
1852                     cc->ntdb[i] = NULL;
1853                 }
1854                 else
1855                 {
1856                     if (bTerMan && ntdblist > 1)
1857                     {
1858                         sprintf(select, "Select start terminus type for %s-%d",
1859                                 *pdba->resinfo[cc->r_start[i]].name,
1860                                 pdba->resinfo[cc->r_start[i]].nr);
1861                         cc->ntdb[i] = choose_ter(ntdblist, tdblist, select);
1862                     }
1863                     else
1864                     {
1865                         cc->ntdb[i] = tdblist[0];
1866                     }
1867
1868                     printf("Start terminus %s-%d: %s\n",
1869                            *pdba->resinfo[cc->r_start[i]].name,
1870                            pdba->resinfo[cc->r_start[i]].nr,
1871                            (cc->ntdb[i])->name);
1872                     sfree(tdblist);
1873                 }
1874             }
1875             else
1876             {
1877                 cc->ntdb[i] = NULL;
1878             }
1879
1880             /* And the C terminus */
1881             if (nCtdb > 0)
1882             {
1883                 tdblist = filter_ter(nrtp, restp, nCtdb, ctdb,
1884                                      *pdba->resinfo[cc->r_end[i]].name,
1885                                      *pdba->resinfo[cc->r_end[i]].rtp,
1886                                      &ntdblist);
1887                 if (ntdblist == 0)
1888                 {
1889                     printf("No suitable end (C or 3') terminus found in database - assuming this residue\n"
1890                            "is already in a terminus-specific form and skipping terminus selection.\n");
1891                     cc->ctdb[i] = NULL;
1892                 }
1893                 else
1894                 {
1895                     if (bTerMan && ntdblist > 1)
1896                     {
1897                         sprintf(select, "Select end terminus type for %s-%d",
1898                                 *pdba->resinfo[cc->r_end[i]].name,
1899                                 pdba->resinfo[cc->r_end[i]].nr);
1900                         cc->ctdb[i] = choose_ter(ntdblist, tdblist, select);
1901                     }
1902                     else
1903                     {
1904                         cc->ctdb[i] = tdblist[0];
1905                     }
1906                     printf("End terminus %s-%d: %s\n",
1907                            *pdba->resinfo[cc->r_end[i]].name,
1908                            pdba->resinfo[cc->r_end[i]].nr,
1909                            (cc->ctdb[i])->name);
1910                     sfree(tdblist);
1911                 }
1912             }
1913             else
1914             {
1915                 cc->ctdb[i] = NULL;
1916             }
1917         }
1918         /* lookup hackblocks and rtp for all residues */
1919         get_hackblocks_rtp(&hb_chain, &restp_chain,
1920                            nrtp, restp, pdba->nres, pdba->resinfo,
1921                            cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end);
1922         /* ideally, now we would not need the rtp itself anymore, but do
1923            everything using the hb and restp arrays. Unfortunately, that
1924            requires some re-thinking of code in gen_vsite.c, which I won't
1925            do now :( AF 26-7-99 */
1926
1927         rename_atoms(NULL, ffdir,
1928                      pdba, &symtab, restp_chain, FALSE, rt, FALSE, bVerbose);
1929
1930         match_atomnames_with_rtp(restp_chain, hb_chain, pdba, x, bVerbose);
1931
1932         if (bSort)
1933         {
1934             block = new_blocka();
1935             snew(gnames, 1);
1936             sort_pdbatoms(restp_chain, natom, &pdba, &x, block, &gnames);
1937             natom = remove_duplicate_atoms(pdba, x, bVerbose);
1938             if (ftp2bSet(efNDX, NFILE, fnm))
1939             {
1940                 if (bRemoveH)
1941                 {
1942                     fprintf(stderr, "WARNING: with the -remh option the generated "
1943                             "index file (%s) might be useless\n"
1944                             "(the index file is generated before hydrogens are added)",
1945                             ftp2fn(efNDX, NFILE, fnm));
1946                 }
1947                 write_index(ftp2fn(efNDX, NFILE, fnm), block, gnames);
1948             }
1949             for (i = 0; i < block->nr; i++)
1950             {
1951                 sfree(gnames[i]);
1952             }
1953             sfree(gnames);
1954             done_blocka(block);
1955         }
1956         else
1957         {
1958             fprintf(stderr, "WARNING: "
1959                     "without sorting no check for duplicate atoms can be done\n");
1960         }
1961
1962         /* Generate Hydrogen atoms (and termini) in the sequence */
1963         printf("Generating any missing hydrogen atoms and/or adding termini.\n");
1964         natom = add_h(&pdba, &x, nah, ah,
1965                       cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end, bAllowMissing,
1966                       NULL, NULL, TRUE, FALSE);
1967         printf("Now there are %d residues with %d atoms\n",
1968                pdba->nres, pdba->nr);
1969         if (debug)
1970         {
1971             write_pdbfile(debug, title, pdba, x, ePBC, box, ' ', 0, NULL, TRUE);
1972         }
1973
1974         if (debug)
1975         {
1976             for (i = 0; (i < natom); i++)
1977             {
1978                 fprintf(debug, "Res %s%d atom %d %s\n",
1979                         *(pdba->resinfo[pdba->atom[i].resind].name),
1980                         pdba->resinfo[pdba->atom[i].resind].nr, i+1, *pdba->atomname[i]);
1981             }
1982         }
1983
1984         strcpy(posre_fn, ftp2fn(efITP, NFILE, fnm));
1985
1986         /* make up molecule name(s) */
1987
1988         k = (cc->nterpairs > 0 && cc->r_start[0] >= 0) ? cc->r_start[0] : 0;
1989
1990         gmx_residuetype_get_type(rt, *pdba->resinfo[k].name, &p_restype);
1991
1992         suffix[0] = '\0';
1993
1994         if (cc->bAllWat)
1995         {
1996             sprintf(molname, "Water");
1997         }
1998         else
1999         {
2000             this_chainid = cc->chainid;
2001
2002             /* Add the chain id if we have one */
2003             if (this_chainid != ' ')
2004             {
2005                 sprintf(buf, "_chain_%c", this_chainid);
2006                 strcat(suffix, buf);
2007             }
2008
2009             /* Check if there have been previous chains with the same id */
2010             nid_used = 0;
2011             for (k = 0; k < chain; k++)
2012             {
2013                 if (cc->chainid == chains[k].chainid)
2014                 {
2015                     nid_used++;
2016                 }
2017             }
2018             /* Add the number for this chain identifier if there are multiple copies */
2019             if (nid_used > 0)
2020             {
2021
2022                 sprintf(buf, "%d", nid_used+1);
2023                 strcat(suffix, buf);
2024             }
2025
2026             if (strlen(suffix) > 0)
2027             {
2028                 sprintf(molname, "%s%s", p_restype, suffix);
2029             }
2030             else
2031             {
2032                 strcpy(molname, p_restype);
2033             }
2034         }
2035
2036         if ((nch-nwaterchain > 1) && !cc->bAllWat)
2037         {
2038             bITP = TRUE;
2039             strcpy(itp_fn, top_fn);
2040             printf("Chain time...\n");
2041             c = strrchr(itp_fn, '.');
2042             sprintf(c, "_%s.itp", molname);
2043             c = strrchr(posre_fn, '.');
2044             sprintf(c, "_%s.itp", molname);
2045             if (strcmp(itp_fn, posre_fn) == 0)
2046             {
2047                 strcpy(buf_fn, posre_fn);
2048                 c  = strrchr(buf_fn, '.');
2049                 *c = '\0';
2050                 sprintf(posre_fn, "%s_pr.itp", buf_fn);
2051             }
2052
2053             nincl++;
2054             srenew(incls, nincl);
2055             incls[nincl-1] = strdup(itp_fn);
2056             itp_file       = gmx_fio_fopen(itp_fn, "w");
2057         }
2058         else
2059         {
2060             bITP = FALSE;
2061         }
2062
2063         srenew(mols, nmol+1);
2064         if (cc->bAllWat)
2065         {
2066             mols[nmol].name = strdup("SOL");
2067             mols[nmol].nr   = pdba->nres;
2068         }
2069         else
2070         {
2071             mols[nmol].name = strdup(molname);
2072             mols[nmol].nr   = 1;
2073         }
2074         nmol++;
2075
2076         if (bITP)
2077         {
2078             print_top_comment(itp_file, itp_fn, ffdir, TRUE);
2079         }
2080
2081         if (cc->bAllWat)
2082         {
2083             top_file2 = NULL;
2084         }
2085         else
2086         if (bITP)
2087         {
2088             top_file2 = itp_file;
2089         }
2090         else
2091         {
2092             top_file2 = top_file;
2093         }
2094
2095         pdb2top(top_file2, posre_fn, molname, pdba, &x, atype, &symtab,
2096                 nrtp, restp,
2097                 restp_chain, hb_chain,
2098                 bAllowMissing,
2099                 bVsites, bVsiteAromatics, ffdir,
2100                 mHmult, nssbonds, ssbonds,
2101                 long_bond_dist, short_bond_dist, bDeuterate, bChargeGroups, bCmap,
2102                 bRenumRes, bRTPresname);
2103
2104         if (!cc->bAllWat)
2105         {
2106             write_posres(posre_fn, pdba, posre_fc);
2107         }
2108
2109         if (bITP)
2110         {
2111             gmx_fio_fclose(itp_file);
2112         }
2113
2114         /* pdba and natom have been reassigned somewhere so: */
2115         cc->pdba = pdba;
2116         cc->x    = x;
2117
2118         if (debug)
2119         {
2120             if (cc->chainid == ' ')
2121             {
2122                 sprintf(fn, "chain.pdb");
2123             }
2124             else
2125             {
2126                 sprintf(fn, "chain_%c.pdb", cc->chainid);
2127             }
2128             cool_quote(quote, 255, NULL);
2129             write_sto_conf(fn, quote, pdba, x, NULL, ePBC, box);
2130         }
2131     }
2132
2133     if (watermodel == NULL)
2134     {
2135         for (chain = 0; chain < nch; chain++)
2136         {
2137             if (chains[chain].bAllWat)
2138             {
2139                 gmx_fatal(FARGS, "You have chosen not to include a water model, but there is water in the input file. Select a water model or remove the water from your input file.");
2140             }
2141         }
2142     }
2143     else
2144     {
2145         sprintf(buf_fn, "%s%c%s.itp", ffdir, DIR_SEPARATOR, watermodel);
2146         if (!fflib_fexist(buf_fn))
2147         {
2148             gmx_fatal(FARGS, "The topology file '%s' for the selected water model '%s' can not be found in the force field directory. Select a different water model.",
2149                       buf_fn, watermodel);
2150         }
2151     }
2152
2153     print_top_mols(top_file, title, ffdir, watermodel, nincl, incls, nmol, mols);
2154     gmx_fio_fclose(top_file);
2155
2156     gmx_residuetype_destroy(rt);
2157
2158     /* now merge all chains back together */
2159     natom = 0;
2160     nres  = 0;
2161     for (i = 0; (i < nch); i++)
2162     {
2163         natom += chains[i].pdba->nr;
2164         nres  += chains[i].pdba->nres;
2165     }
2166     snew(atoms, 1);
2167     init_t_atoms(atoms, natom, FALSE);
2168     for (i = 0; i < atoms->nres; i++)
2169     {
2170         sfree(atoms->resinfo[i].name);
2171     }
2172     sfree(atoms->resinfo);
2173     atoms->nres = nres;
2174     snew(atoms->resinfo, nres);
2175     snew(x, natom);
2176     k = 0;
2177     l = 0;
2178     for (i = 0; (i < nch); i++)
2179     {
2180         if (nch > 1)
2181         {
2182             printf("Including chain %d in system: %d atoms %d residues\n",
2183                    i+1, chains[i].pdba->nr, chains[i].pdba->nres);
2184         }
2185         for (j = 0; (j < chains[i].pdba->nr); j++)
2186         {
2187             atoms->atom[k]         = chains[i].pdba->atom[j];
2188             atoms->atom[k].resind += l; /* l is processed nr of residues */
2189             atoms->atomname[k]     = chains[i].pdba->atomname[j];
2190             atoms->resinfo[atoms->atom[k].resind].chainid = chains[i].chainid;
2191             copy_rvec(chains[i].x[j], x[k]);
2192             k++;
2193         }
2194         for (j = 0; (j < chains[i].pdba->nres); j++)
2195         {
2196             atoms->resinfo[l] = chains[i].pdba->resinfo[j];
2197             if (bRTPresname)
2198             {
2199                 atoms->resinfo[l].name = atoms->resinfo[l].rtp;
2200             }
2201             l++;
2202         }
2203     }
2204
2205     if (nch > 1)
2206     {
2207         fprintf(stderr, "Now there are %d atoms and %d residues\n", k, l);
2208         print_sums(atoms, TRUE);
2209     }
2210
2211     fprintf(stderr, "\nWriting coordinate file...\n");
2212     clear_rvec(box_space);
2213     if (box[0][0] == 0)
2214     {
2215         gen_box(0, atoms->nr, x, box, box_space, FALSE);
2216     }
2217     write_sto_conf(ftp2fn(efSTO, NFILE, fnm), title, atoms, x, NULL, ePBC, box);
2218
2219     printf("\t\t--------- PLEASE NOTE ------------\n");
2220     printf("You have successfully generated a topology from: %s.\n",
2221            opt2fn("-f", NFILE, fnm));
2222     if (watermodel != NULL)
2223     {
2224         printf("The %s force field and the %s water model are used.\n",
2225                ffname, watermodel);
2226     }
2227     else
2228     {
2229         printf("The %s force field is used.\n",
2230                ffname);
2231     }
2232     printf("\t\t--------- ETON ESAELP ------------\n");
2233
2234     return 0;
2235 }