src/gromacs/gmxpreprocess/pdb2gmx.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
   7  * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 #include "gmxpre.h"
  39
  40 #include "pdb2gmx.h"
  41
  42 #include <cctype>
  43 #include <cstdlib>
  44 #include <cstring>
  45 #include <ctime>
  46
  47 #include <algorithm>
  48 #include <memory>
  49 #include <string>
  50 #include <vector>
  51
  52 #include "gromacs/commandline/cmdlineoptionsmodule.h"
  53 #include "gromacs/fileio/confio.h"
  54 #include "gromacs/fileio/filetypes.h"
  55 #include "gromacs/fileio/gmxfio.h"
  56 #include "gromacs/fileio/pdbio.h"
  57 #include "gromacs/gmxlib/conformation_utilities.h"
  58 #include "gromacs/gmxpreprocess/fflibutil.h"
  59 #include "gromacs/gmxpreprocess/genhydro.h"
  60 #include "gromacs/gmxpreprocess/gpp_atomtype.h"
  61 #include "gromacs/gmxpreprocess/grompp_impl.h"
  62 #include "gromacs/gmxpreprocess/h_db.h"
  63 #include "gromacs/gmxpreprocess/hizzie.h"
  64 #include "gromacs/gmxpreprocess/pdb2top.h"
  65 #include "gromacs/gmxpreprocess/pgutil.h"
  66 #include "gromacs/gmxpreprocess/specbond.h"
  67 #include "gromacs/gmxpreprocess/ter_db.h"
  68 #include "gromacs/gmxpreprocess/toputil.h"
  69 #include "gromacs/gmxpreprocess/xlate.h"
  70 #include "gromacs/math/vec.h"
  71 #include "gromacs/options/basicoptions.h"
  72 #include "gromacs/options/filenameoption.h"
  73 #include "gromacs/options/ioptionscontainer.h"
  74 #include "gromacs/topology/atomprop.h"
  75 #include "gromacs/topology/block.h"
  76 #include "gromacs/topology/index.h"
  77 #include "gromacs/topology/residuetypes.h"
  78 #include "gromacs/topology/symtab.h"
  79 #include "gromacs/topology/topology.h"
  80 #include "gromacs/utility/dir_separator.h"
  81 #include "gromacs/utility/exceptions.h"
  82 #include "gromacs/utility/fatalerror.h"
  83 #include "gromacs/utility/filestream.h"
  84 #include "gromacs/utility/loggerbuilder.h"
  85 #include "gromacs/utility/path.h"
  86 #include "gromacs/utility/smalloc.h"
  87 #include "gromacs/utility/strdb.h"
  88 #include "gromacs/utility/stringutil.h"
  89
  90 #include "hackblock.h"
  91 #include "resall.h"
  92
  93 struct RtpRename
  94 {
  95     RtpRename(const char* newGmx, const char* newMain, const char* newNter, const char* newCter, const char* newBter) :
  96         gmx(newGmx),
  97         main(newMain),
  98         nter(newNter),
  99         cter(newCter),
 100         bter(newBter)
 101     {
 102     }
 103     std::string gmx;
 104     std::string main;
 105     std::string nter;
 106     std::string cter;
 107     std::string bter;
 108 };
 109
 110 namespace
 111 {
 112
 113 const char* res2bb_notermini(const std::string& name, gmx::ArrayRef<const RtpRename> rr)
 114 {
 115     /* NOTE: This function returns the main building block name,
 116      *       it does not take terminal renaming into account.
 117      */
 118     auto found = std::find_if(rr.begin(), rr.end(), [&name](const auto& rename) {
 119         return gmx::equalCaseInsensitive(name, rename.gmx);
 120     });
 121     return found != rr.end() ? found->main.c_str() : name.c_str();
 122 }
 123
 124 const char* select_res(int                            nr,
 125                        int                            resnr,
 126                        const char*                    name[],
 127                        const char*                    expl[],
 128                        const char*                    title,
 129                        gmx::ArrayRef<const RtpRename> rr)
 130 {
 131     printf("Which %s type do you want for residue %d\n", title, resnr + 1);
 132     for (int sel = 0; (sel < nr); sel++)
 133     {
 134         printf("%d. %s (%s)\n", sel, expl[sel], res2bb_notermini(name[sel], rr));
 135     }
 136     printf("\nType a number:");
 137     fflush(stdout);
 138
 139     int userSelection;
 140     if (scanf("%d", &userSelection) != 1)
 141     {
 142         gmx_fatal(FARGS, "Answer me for res %s %d!", title, resnr + 1);
 143     }
 144
 145     return name[userSelection];
 146 }
 147
 148 const char* get_asptp(int resnr, gmx::ArrayRef<const RtpRename> rr)
 149 {
 150     enum
 151     {
 152         easp,
 153         easpH,
 154         easpNR
 155     };
 156     const char* lh[easpNR]   = { "ASP", "ASPH" };
 157     const char* expl[easpNR] = { "Not protonated (charge -1)", "Protonated (charge 0)" };
 158
 159     return select_res(easpNR, resnr, lh, expl, "ASPARTIC ACID", rr);
 160 }
 161
 162 const char* get_glutp(int resnr, gmx::ArrayRef<const RtpRename> rr)
 163 {
 164     enum
 165     {
 166         eglu,
 167         egluH,
 168         egluNR
 169     };
 170     const char* lh[egluNR]   = { "GLU", "GLUH" };
 171     const char* expl[egluNR] = { "Not protonated (charge -1)", "Protonated (charge 0)" };
 172
 173     return select_res(egluNR, resnr, lh, expl, "GLUTAMIC ACID", rr);
 174 }
 175
 176 const char* get_glntp(int resnr, gmx::ArrayRef<const RtpRename> rr)
 177 {
 178     enum
 179     {
 180         egln,
 181         eglnH,
 182         eglnNR
 183     };
 184     const char* lh[eglnNR]   = { "GLN", "QLN" };
 185     const char* expl[eglnNR] = { "Not protonated (charge 0)", "Protonated (charge +1)" };
 186
 187     return select_res(eglnNR, resnr, lh, expl, "GLUTAMINE", rr);
 188 }
 189
 190 const char* get_lystp(int resnr, gmx::ArrayRef<const RtpRename> rr)
 191 {
 192     enum
 193     {
 194         elys,
 195         elysH,
 196         elysNR
 197     };
 198     const char* lh[elysNR]   = { "LYSN", "LYS" };
 199     const char* expl[elysNR] = { "Not protonated (charge 0)", "Protonated (charge +1)" };
 200
 201     return select_res(elysNR, resnr, lh, expl, "LYSINE", rr);
 202 }
 203
 204 const char* get_argtp(int resnr, gmx::ArrayRef<const RtpRename> rr)
 205 {
 206     enum
 207     {
 208         earg,
 209         eargH,
 210         eargNR
 211     };
 212     const char* lh[eargNR]   = { "ARGN", "ARG" };
 213     const char* expl[eargNR] = { "Not protonated (charge 0)", "Protonated (charge +1)" };
 214
 215     return select_res(eargNR, resnr, lh, expl, "ARGININE", rr);
 216 }
 217
 218 const char* get_histp(int resnr, gmx::ArrayRef<const RtpRename> rr)
 219 {
 220     const char* expl[ehisNR] = { "H on ND1 only", "H on NE2 only", "H on ND1 and NE2",
 221                                  "Coupled to Heme" };
 222
 223     return select_res(ehisNR, resnr, hh, expl, "HISTIDINE", rr);
 224 }
 225
 226 void read_rtprename(const char* fname, FILE* fp, std::vector<RtpRename>* rtprename)
 227 {
 228     char line[STRLEN], buf[STRLEN];
 229
 230     int ncol = 0;
 231     while (get_a_line(fp, line, STRLEN))
 232     {
 233         /* line is NULL-terminated and length<STRLEN, so final arg cannot overflow.
 234          * For other args, we read up to 6 chars (so we can detect if the length is > 5).
 235          * Note that the buffer length has been increased to 7 to allow this,
 236          * so we just need to make sure the strings have zero-length initially.
 237          */
 238         char gmx[STRLEN];
 239         char main[STRLEN];
 240         char nter[STRLEN];
 241         char cter[STRLEN];
 242         char bter[STRLEN];
 243         gmx[0]       = '\0';
 244         main[0]      = '\0';
 245         nter[0]      = '\0';
 246         cter[0]      = '\0';
 247         bter[0]      = '\0';
 248         int       nc = sscanf(line, "%6s %6s %6s %6s %6s %s", gmx, main, nter, cter, bter, buf);
 249         RtpRename newEntry(gmx, main, nter, cter, bter);
 250         if (ncol == 0)
 251         {
 252             if (nc != 2 && nc != 5)
 253             {
 254                 gmx_fatal(FARGS, "Residue renaming database '%s' has %d columns instead of %d or %d",
 255                           fname, ncol, 2, 5);
 256             }
 257             ncol = nc;
 258         }
 259         else if (nc != ncol)
 260         {
 261             gmx_fatal(FARGS,
 262                       "A line in residue renaming database '%s' has %d columns, while previous "
 263                       "lines have %d columns",
 264                       fname, nc, ncol);
 265         }
 266
 267         if (nc == 2)
 268         {
 269             /* This file does not have special termini names, copy them from main */
 270             newEntry.nter = newEntry.main;
 271             newEntry.cter = newEntry.main;
 272             newEntry.bter = newEntry.main;
 273         }
 274         rtprename->push_back(newEntry);
 275     }
 276 }
 277
 278 std::string search_resrename(gmx::ArrayRef<const RtpRename> rr, const char* name, bool bStart, bool bEnd, bool bCompareFFRTPname)
 279 {
 280     auto found = std::find_if(rr.begin(), rr.end(), [&name, &bCompareFFRTPname](const auto& rename) {
 281         return ((!bCompareFFRTPname && (name == rename.gmx))
 282                 || (bCompareFFRTPname && (name == rename.main)));
 283     });
 284
 285     std::string newName;
 286     /* If found in the database, rename this residue's rtp building block,
 287      * otherwise keep the old name.
 288      */
 289     if (found != rr.end())
 290     {
 291         if (bStart && bEnd)
 292         {
 293             newName = found->bter;
 294         }
 295         else if (bStart)
 296         {
 297             newName = found->nter;
 298         }
 299         else if (bEnd)
 300         {
 301             newName = found->cter;
 302         }
 303         else
 304         {
 305             newName = found->main;
 306         }
 307
 308         if (newName[0] == '-')
 309         {
 310             gmx_fatal(FARGS, "In the chosen force field there is no residue type for '%s'%s", name,
 311                       bStart ? (bEnd ? " as a standalone (starting & ending) residue" : " as a starting terminus")
 312                              : (bEnd ? " as an ending terminus" : ""));
 313         }
 314     }
 315
 316     return newName;
 317 }
 318
 319 void rename_resrtp(t_atoms*                       pdba,
 320                    int                            nterpairs,
 321                    gmx::ArrayRef<const int>       r_start,
 322                    gmx::ArrayRef<const int>       r_end,
 323                    gmx::ArrayRef<const RtpRename> rr,
 324                    t_symtab*                      symtab,
 325                    bool                           bVerbose,
 326                    const gmx::MDLogger&           logger)
 327 {
 328     bool bFFRTPTERRNM = (getenv("GMX_NO_FFRTP_TER_RENAME") == nullptr);
 329
 330     for (int r = 0; r < pdba->nres; r++)
 331     {
 332         bool bStart = false;
 333         bool bEnd   = false;
 334         for (int j = 0; j < nterpairs; j++)
 335         {
 336             if (r == r_start[j])
 337             {
 338                 bStart = true;
 339             }
 340         }
 341         for (int j = 0; j < nterpairs; j++)
 342         {
 343             if (r == r_end[j])
 344             {
 345                 bEnd = true;
 346             }
 347         }
 348
 349         std::string newName = search_resrename(rr, *pdba->resinfo[r].rtp, bStart, bEnd, false);
 350
 351         if (bFFRTPTERRNM && newName.empty() && (bStart || bEnd))
 352         {
 353             /* This is a terminal residue, but the residue name,
 354              * currently stored in .rtp, is not a standard residue name,
 355              * but probably a force field specific rtp name.
 356              * Check if we need to rename it because it is terminal.
 357              */
 358             newName = search_resrename(rr, *pdba->resinfo[r].rtp, bStart, bEnd, true);
 359         }
 360
 361         if (!newName.empty() && newName != *pdba->resinfo[r].rtp)
 362         {
 363             if (bVerbose)
 364             {
 365                 GMX_LOG(logger.info)
 366                         .asParagraph()
 367                         .appendTextFormatted("Changing rtp entry of residue %d %s to '%s'",
 368                                              pdba->resinfo[r].nr, *pdba->resinfo[r].name,
 369                                              newName.c_str());
 370             }
 371             pdba->resinfo[r].rtp = put_symtab(symtab, newName.c_str());
 372         }
 373     }
 374 }
 375
 376 void pdbres_to_gmxrtp(t_atoms* pdba)
 377 {
 378     int i;
 379
 380     for (i = 0; (i < pdba->nres); i++)
 381     {
 382         if (pdba->resinfo[i].rtp == nullptr)
 383         {
 384             pdba->resinfo[i].rtp = pdba->resinfo[i].name;
 385         }
 386     }
 387 }
 388
 389 void rename_pdbres(t_atoms* pdba, const char* oldnm, const char* newnm, bool bFullCompare, t_symtab* symtab)
 390 {
 391     char* resnm;
 392     int   i;
 393
 394     for (i = 0; (i < pdba->nres); i++)
 395     {
 396         resnm = *pdba->resinfo[i].name;
 397         if ((bFullCompare && (gmx::equalCaseInsensitive(resnm, oldnm)))
 398             || (!bFullCompare && strstr(resnm, oldnm) != nullptr))
 399         {
 400             /* Rename the residue name (not the rtp name) */
 401             pdba->resinfo[i].name = put_symtab(symtab, newnm);
 402         }
 403     }
 404 }
 405
 406 void rename_bb(t_atoms* pdba, const char* oldnm, const char* newnm, bool bFullCompare, t_symtab* symtab)
 407 {
 408     char* bbnm;
 409     int   i;
 410
 411     for (i = 0; (i < pdba->nres); i++)
 412     {
 413         /* We have not set the rtp name yes, use the residue name */
 414         bbnm = *pdba->resinfo[i].name;
 415         if ((bFullCompare && (gmx::equalCaseInsensitive(bbnm, oldnm)))
 416             || (!bFullCompare && strstr(bbnm, oldnm) != nullptr))
 417         {
 418             /* Change the rtp builing block name */
 419             pdba->resinfo[i].rtp = put_symtab(symtab, newnm);
 420         }
 421     }
 422 }
 423
 424 void rename_bbint(t_atoms*                       pdba,
 425                   const char*                    oldnm,
 426                   const char*                    gettp(int, gmx::ArrayRef<const RtpRename>),
 427                   bool                           bFullCompare,
 428                   t_symtab*                      symtab,
 429                   gmx::ArrayRef<const RtpRename> rr)
 430 {
 431     int         i;
 432     const char* ptr;
 433     char*       bbnm;
 434
 435     for (i = 0; i < pdba->nres; i++)
 436     {
 437         /* We have not set the rtp name yet, use the residue name */
 438         bbnm = *pdba->resinfo[i].name;
 439         if ((bFullCompare && (strcmp(bbnm, oldnm) == 0)) || (!bFullCompare && strstr(bbnm, oldnm) != nullptr))
 440         {
 441             ptr                  = gettp(i, rr);
 442             pdba->resinfo[i].rtp = put_symtab(symtab, ptr);
 443         }
 444     }
 445 }
 446
 447 void check_occupancy(t_atoms* atoms, const char* filename, bool bVerbose, const gmx::MDLogger& logger)
 448 {
 449     int i, ftp;
 450     int nzero   = 0;
 451     int nnotone = 0;
 452
 453     ftp = fn2ftp(filename);
 454     if (!atoms->pdbinfo || ((ftp != efPDB) && (ftp != efBRK) && (ftp != efENT)))
 455     {
 456         GMX_LOG(logger.warning).asParagraph().appendTextFormatted("No occupancies in %s", filename);
 457     }
 458     else
 459     {
 460         for (i = 0; (i < atoms->nr); i++)
 461         {
 462             if (atoms->pdbinfo[i].occup != 1)
 463             {
 464                 if (bVerbose)
 465                 {
 466                     GMX_LOG(logger.warning)
 467                             .asParagraph()
 468                             .appendTextFormatted("Occupancy for atom %s%d-%s is %f rather than 1",
 469                                                  *atoms->resinfo[atoms->atom[i].resind].name,
 470                                                  atoms->resinfo[atoms->atom[i].resind].nr,
 471                                                  *atoms->atomname[i], atoms->pdbinfo[i].occup);
 472                 }
 473                 if (atoms->pdbinfo[i].occup == 0)
 474                 {
 475                     nzero++;
 476                 }
 477                 else
 478                 {
 479                     nnotone++;
 480                 }
 481             }
 482         }
 483         if (nzero == atoms->nr)
 484         {
 485             GMX_LOG(logger.warning)
 486                     .asParagraph()
 487                     .appendTextFormatted(
 488                             "All occupancy fields zero. This is probably not an X-Ray structure");
 489         }
 490         else if ((nzero > 0) || (nnotone > 0))
 491         {
 492             GMX_LOG(logger.warning)
 493                     .asParagraph()
 494                     .appendTextFormatted(
 495                             "there were %d atoms with zero occupancy and %d atoms with "
 496                             "         occupancy unequal to one (out of %d atoms). Check your pdb "
 497                             "file.",
 498                             nzero, nnotone, atoms->nr);
 499         }
 500         else
 501         {
 502             GMX_LOG(logger.warning).asParagraph().appendTextFormatted("All occupancies are one");
 503         }
 504     }
 505 }
 506
 507 void write_posres(const char* fn, t_atoms* pdba, real fc)
 508 {
 509     FILE* fp;
 510     int   i;
 511
 512     fp = gmx_fio_fopen(fn, "w");
 513     fprintf(fp,
 514             "; In this topology include file, you will find position restraint\n"
 515             "; entries for all the heavy atoms in your original pdb file.\n"
 516             "; This means that all the protons which were added by pdb2gmx are\n"
 517             "; not restrained.\n"
 518             "\n"
 519             "[ position_restraints ]\n"
 520             "; %4s%6s%8s%8s%8s\n",
 521             "atom", "type", "fx", "fy", "fz");
 522     for (i = 0; (i < pdba->nr); i++)
 523     {
 524         if (!is_hydrogen(*pdba->atomname[i]) && !is_dummymass(*pdba->atomname[i]))
 525         {
 526             fprintf(fp, "%6d%6d  %g  %g  %g\n", i + 1, 1, fc, fc, fc);
 527         }
 528     }
 529     gmx_fio_fclose(fp);
 530 }
 531
 532 int read_pdball(const char*     inf,
 533                 bool            bOutput,
 534                 const char*     outf,
 535                 char**          title,
 536                 t_atoms*        atoms,
 537                 rvec**          x,
 538                 PbcType*        pbcType,
 539                 matrix          box,
 540                 bool            bRemoveH,
 541                 t_symtab*       symtab,
 542                 ResidueType*    rt,
 543                 const char*     watres,
 544                 AtomProperties* aps,
 545                 bool            bVerbose)
 546 /* Read a pdb file. (containing proteins) */
 547 {
 548     int natom, new_natom, i;
 549
 550     /* READ IT */
 551     printf("Reading %s...\n", inf);
 552     readConfAndAtoms(inf, symtab, title, atoms, pbcType, x, nullptr, box);
 553     natom = atoms->nr;
 554     if (atoms->pdbinfo == nullptr)
 555     {
 556         snew(atoms->pdbinfo, atoms->nr);
 557     }
 558     if (fn2ftp(inf) == efPDB)
 559     {
 560         get_pdb_atomnumber(atoms, aps);
 561     }
 562     if (bRemoveH)
 563     {
 564         new_natom = 0;
 565         for (i = 0; i < atoms->nr; i++)
 566         {
 567             if (!is_hydrogen(*atoms->atomname[i]))
 568             {
 569                 atoms->atom[new_natom]     = atoms->atom[i];
 570                 atoms->atomname[new_natom] = atoms->atomname[i];
 571                 atoms->pdbinfo[new_natom]  = atoms->pdbinfo[i];
 572                 copy_rvec((*x)[i], (*x)[new_natom]);
 573                 new_natom++;
 574             }
 575         }
 576         atoms->nr = new_natom;
 577         natom     = new_natom;
 578     }
 579
 580     printf("Read");
 581     if (title[0])
 582     {
 583         printf(" '%s',", *title);
 584     }
 585     printf(" %d atoms\n", natom);
 586
 587     /* Rename residues */
 588     rename_pdbres(atoms, "HOH", watres, false, symtab);
 589     rename_pdbres(atoms, "SOL", watres, false, symtab);
 590     rename_pdbres(atoms, "WAT", watres, false, symtab);
 591
 592     rename_atoms("xlateat.dat", nullptr, atoms, symtab, {}, true, rt, true, bVerbose);
 593
 594     if (natom == 0)
 595     {
 596         return 0;
 597     }
 598     if (bOutput)
 599     {
 600         write_sto_conf(outf, *title, atoms, *x, nullptr, *pbcType, box);
 601     }
 602
 603     return natom;
 604 }
 605
 606 void process_chain(t_atoms*                       pdba,
 607                    gmx::ArrayRef<gmx::RVec>       x,
 608                    bool                           bTrpU,
 609                    bool                           bPheU,
 610                    bool                           bTyrU,
 611                    bool                           bLysMan,
 612                    bool                           bAspMan,
 613                    bool                           bGluMan,
 614                    bool                           bHisMan,
 615                    bool                           bArgMan,
 616                    bool                           bGlnMan,
 617                    real                           angle,
 618                    real                           distance,
 619                    t_symtab*                      symtab,
 620                    gmx::ArrayRef<const RtpRename> rr)
 621 {
 622     /* Rename aromatics, lys, asp and histidine */
 623     if (bTyrU)
 624     {
 625         rename_bb(pdba, "TYR", "TYRU", false, symtab);
 626     }
 627     if (bTrpU)
 628     {
 629         rename_bb(pdba, "TRP", "TRPU", false, symtab);
 630     }
 631     if (bPheU)
 632     {
 633         rename_bb(pdba, "PHE", "PHEU", false, symtab);
 634     }
 635     if (bLysMan)
 636     {
 637         rename_bbint(pdba, "LYS", get_lystp, false, symtab, rr);
 638     }
 639     if (bArgMan)
 640     {
 641         rename_bbint(pdba, "ARG", get_argtp, false, symtab, rr);
 642     }
 643     if (bGlnMan)
 644     {
 645         rename_bbint(pdba, "GLN", get_glntp, false, symtab, rr);
 646     }
 647     if (bAspMan)
 648     {
 649         rename_bbint(pdba, "ASP", get_asptp, false, symtab, rr);
 650     }
 651     else
 652     {
 653         rename_bb(pdba, "ASPH", "ASP", false, symtab);
 654     }
 655     if (bGluMan)
 656     {
 657         rename_bbint(pdba, "GLU", get_glutp, false, symtab, rr);
 658     }
 659     else
 660     {
 661         rename_bb(pdba, "GLUH", "GLU", false, symtab);
 662     }
 663
 664     if (!bHisMan)
 665     {
 666         set_histp(pdba, gmx::as_rvec_array(x.data()), symtab, angle, distance);
 667     }
 668     else
 669     {
 670         rename_bbint(pdba, "HIS", get_histp, true, symtab, rr);
 671     }
 672
 673     /* Initialize the rtp builing block names with the residue names
 674      * for the residues that have not been processed above.
 675      */
 676     pdbres_to_gmxrtp(pdba);
 677
 678     /* Now we have all rtp names set.
 679      * The rtp names will conform to Gromacs naming,
 680      * unless the input pdb file contained one or more force field specific
 681      * rtp names as residue names.
 682      */
 683 }
 684
 685 /* struct for sorting the atoms from the pdb file */
 686 typedef struct
 687 {
 688     int  resnr;  /* residue number               */
 689     int  j;      /* database order index         */
 690     int  index;  /* original atom number         */
 691     char anm1;   /* second letter of atom name   */
 692     char altloc; /* alternate location indicator */
 693 } t_pdbindex;
 694
 695 bool pdbicomp(const t_pdbindex& a, const t_pdbindex& b)
 696 {
 697     int d = (a.resnr - b.resnr);
 698     if (d == 0)
 699     {
 700         d = (a.j - b.j);
 701         if (d == 0)
 702         {
 703             d = (a.anm1 - b.anm1);
 704             if (d == 0)
 705             {
 706                 d = (a.altloc - b.altloc);
 707             }
 708         }
 709     }
 710     return d < 0;
 711 }
 712
 713 void sort_pdbatoms(gmx::ArrayRef<const PreprocessResidue> restp_chain,
 714                    int                                    natoms,
 715                    t_atoms**                              pdbaptr,
 716                    t_atoms**                              newPdbAtoms,
 717                    std::vector<gmx::RVec>*                x,
 718                    t_blocka*                              block,
 719                    char***                                gnames)
 720 {
 721     t_atoms*               pdba = *pdbaptr;
 722     std::vector<gmx::RVec> xnew;
 723     t_pdbindex*            pdbi;
 724     char*                  atomnm;
 725
 726     natoms = pdba->nr;
 727     snew(pdbi, natoms);
 728
 729     for (int i = 0; i < natoms; i++)
 730     {
 731         atomnm                                  = *pdba->atomname[i];
 732         const PreprocessResidue* localPpResidue = &restp_chain[pdba->atom[i].resind];
 733         auto                     found =
 734                 std::find_if(localPpResidue->atomname.begin(), localPpResidue->atomname.end(),
 735                              [&atomnm](char** it) { return gmx::equalCaseInsensitive(atomnm, *it); });
 736         if (found == localPpResidue->atomname.end())
 737         {
 738             std::string buf = gmx::formatString(
 739                     "Atom %s in residue %s %d was not found in rtp entry %s with %d atoms\n"
 740                     "while sorting atoms.\n%s",
 741                     atomnm, *pdba->resinfo[pdba->atom[i].resind].name,
 742                     pdba->resinfo[pdba->atom[i].resind].nr, localPpResidue->resname.c_str(),
 743                     localPpResidue->natom(),
 744                     is_hydrogen(atomnm)
 745                             ? "\nFor a hydrogen, this can be a different protonation state, or it\n"
 746                               "might have had a different number in the PDB file and was rebuilt\n"
 747                               "(it might for instance have been H3, and we only expected H1 & "
 748                               "H2).\n"
 749                               "Note that hydrogens might have been added to the entry for the "
 750                               "N-terminus.\n"
 751                               "Remove this hydrogen or choose a different protonation state to "
 752                               "solve it.\n"
 753                               "Option -ignh will ignore all hydrogens in the input."
 754                             : ".");
 755             gmx_fatal(FARGS, "%s", buf.c_str());
 756         }
 757         /* make shadow array to be sorted into indexgroup */
 758         pdbi[i].resnr  = pdba->atom[i].resind;
 759         pdbi[i].j      = std::distance(localPpResidue->atomname.begin(), found);
 760         pdbi[i].index  = i;
 761         pdbi[i].anm1   = atomnm[1];
 762         pdbi[i].altloc = pdba->pdbinfo[i].altloc;
 763     }
 764     std::sort(pdbi, pdbi + natoms, pdbicomp);
 765
 766     /* pdba is sorted in pdbnew using the pdbi index */
 767     std::vector<int> a(natoms);
 768     srenew(*newPdbAtoms, 1);
 769     init_t_atoms((*newPdbAtoms), natoms, true);
 770     (*newPdbAtoms)->nr   = pdba->nr;
 771     (*newPdbAtoms)->nres = pdba->nres;
 772     srenew((*newPdbAtoms)->resinfo, pdba->nres);
 773     std::copy(pdba->resinfo, pdba->resinfo + pdba->nres, (*newPdbAtoms)->resinfo);
 774     for (int i = 0; i < natoms; i++)
 775     {
 776         (*newPdbAtoms)->atom[i]     = pdba->atom[pdbi[i].index];
 777         (*newPdbAtoms)->atomname[i] = pdba->atomname[pdbi[i].index];
 778         (*newPdbAtoms)->pdbinfo[i]  = pdba->pdbinfo[pdbi[i].index];
 779         xnew.push_back(x->at(pdbi[i].index));
 780         /* make indexgroup in block */
 781         a[i] = pdbi[i].index;
 782     }
 783     /* clean up */
 784     done_atom(pdba);
 785     sfree(pdba);
 786     /* copy the sorted pdbnew back to pdba */
 787     *pdbaptr = *newPdbAtoms;
 788     *x       = xnew;
 789     add_grp(block, gnames, a, "prot_sort");
 790     sfree(pdbi);
 791 }
 792
 793 int remove_duplicate_atoms(t_atoms* pdba, gmx::ArrayRef<gmx::RVec> x, bool bVerbose, const gmx::MDLogger& logger)
 794 {
 795     int        i, j, oldnatoms, ndel;
 796     t_resinfo* ri;
 797
 798     GMX_LOG(logger.info).asParagraph().appendTextFormatted("Checking for duplicate atoms....");
 799     oldnatoms = pdba->nr;
 800     ndel      = 0;
 801     /* NOTE: pdba->nr is modified inside the loop */
 802     for (i = 1; (i < pdba->nr); i++)
 803     {
 804         /* compare 'i' and 'i-1', throw away 'i' if they are identical
 805            this is a 'while' because multiple alternate locations can be present */
 806         while ((i < pdba->nr) && (pdba->atom[i - 1].resind == pdba->atom[i].resind)
 807                && (strcmp(*pdba->atomname[i - 1], *pdba->atomname[i]) == 0))
 808         {
 809             ndel++;
 810             if (bVerbose)
 811             {
 812                 ri = &pdba->resinfo[pdba->atom[i].resind];
 813                 GMX_LOG(logger.info)
 814                         .asParagraph()
 815                         .appendTextFormatted("deleting duplicate atom %4s  %s%4d%c",
 816                                              *pdba->atomname[i], *ri->name, ri->nr, ri->ic);
 817                 if (ri->chainid && (ri->chainid != ' '))
 818                 {
 819                     printf(" ch %c", ri->chainid);
 820                 }
 821                 if (pdba->pdbinfo)
 822                 {
 823                     if (pdba->pdbinfo[i].atomnr)
 824                     {
 825                         printf("  pdb nr %4d", pdba->pdbinfo[i].atomnr);
 826                     }
 827                     if (pdba->pdbinfo[i].altloc && (pdba->pdbinfo[i].altloc != ' '))
 828                     {
 829                         printf("  altloc %c", pdba->pdbinfo[i].altloc);
 830                     }
 831                 }
 832                 printf("\n");
 833             }
 834             pdba->nr--;
 835             /* We can not free, since it might be in the symtab */
 836             /* sfree(pdba->atomname[i]); */
 837             for (j = i; j < pdba->nr; j++)
 838             {
 839                 pdba->atom[j]     = pdba->atom[j + 1];
 840                 pdba->atomname[j] = pdba->atomname[j + 1];
 841                 if (pdba->pdbinfo)
 842                 {
 843                     pdba->pdbinfo[j] = pdba->pdbinfo[j + 1];
 844                 }
 845                 copy_rvec(x[j + 1], x[j]);
 846             }
 847             srenew(pdba->atom, pdba->nr);
 848             /* srenew(pdba->atomname, pdba->nr); */
 849             srenew(pdba->pdbinfo, pdba->nr);
 850         }
 851     }
 852     if (pdba->nr != oldnatoms)
 853     {
 854         GMX_LOG(logger.info)
 855                 .asParagraph()
 856                 .appendTextFormatted("Now there are %d atoms. Deleted %d duplicates.", pdba->nr, ndel);
 857     }
 858
 859     return pdba->nr;
 860 }
 861
 862 void checkResidueTypeSanity(t_atoms* pdba, int r0, int r1, ResidueType* rt)
 863 {
 864     std::string startResidueString =
 865             gmx::formatString("%s%d", *pdba->resinfo[r0].name, pdba->resinfo[r0].nr);
 866     std::string endResidueString =
 867             gmx::formatString("%s%d", *pdba->resinfo[r1 - 1].name, pdba->resinfo[r1 - 1].nr);
 868
 869     // Check whether all residues in chain have the same chain ID.
 870     bool        allResiduesHaveSameChainID = true;
 871     char        chainID0                   = pdba->resinfo[r0].chainid;
 872     char        chainID;
 873     std::string residueString;
 874
 875     for (int i = r0 + 1; i < r1; i++)
 876     {
 877         chainID = pdba->resinfo[i].chainid;
 878         if (chainID != chainID0)
 879         {
 880             allResiduesHaveSameChainID = false;
 881             residueString = gmx::formatString("%s%d", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
 882             break;
 883         }
 884     }
 885
 886     if (!allResiduesHaveSameChainID)
 887     {
 888         gmx_fatal(FARGS,
 889                   "The chain covering the range %s--%s does not have a consistent chain ID. "
 890                   "The first residue has ID '%c', while residue %s has ID '%c'.",
 891                   startResidueString.c_str(), endResidueString.c_str(), chainID0,
 892                   residueString.c_str(), chainID);
 893     }
 894
 895     // At this point all residues have the same ID. If they are also non-blank
 896     // we can be a bit more aggressive and require the types match too.
 897     if (chainID0 != ' ')
 898     {
 899         bool        allResiduesHaveSameType = true;
 900         std::string restype;
 901         std::string restype0 = rt->typeOfNamedDatabaseResidue(*pdba->resinfo[r0].name);
 902
 903         for (int i = r0 + 1; i < r1; i++)
 904         {
 905             restype = rt->typeOfNamedDatabaseResidue(*pdba->resinfo[i].name);
 906             if (!gmx::equalCaseInsensitive(restype, restype0))
 907             {
 908                 allResiduesHaveSameType = false;
 909                 residueString = gmx::formatString("%s%d", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
 910                 break;
 911             }
 912         }
 913
 914         if (!allResiduesHaveSameType)
 915         {
 916             gmx_fatal(FARGS,
 917                       "The residues in the chain %s--%s do not have a consistent type. "
 918                       "The first residue has type '%s', while residue %s is of type '%s'. "
 919                       "Either there is a mistake in your chain, or it includes nonstandard "
 920                       "residue names that have not yet been added to the residuetypes.dat "
 921                       "file in the GROMACS library directory. If there are other molecules "
 922                       "such as ligands, they should not have the same chain ID as the "
 923                       "adjacent protein chain since it's a separate molecule.",
 924                       startResidueString.c_str(), endResidueString.c_str(), restype0.c_str(),
 925                       residueString.c_str(), restype.c_str());
 926         }
 927     }
 928 }
 929
 930 void find_nc_ter(t_atoms* pdba, int r0, int r1, int* r_start, int* r_end, ResidueType* rt, const gmx::MDLogger& logger)
 931 {
 932     int                        i;
 933     std::optional<std::string> startrestype;
 934
 935     *r_start = -1;
 936     *r_end   = -1;
 937
 938     int startWarnings = 0;
 939     int endWarnings   = 0;
 940     int ionNotes      = 0;
 941
 942     // Check that all residues have the same chain identifier, and if it is
 943     // non-blank we also require the residue types to match.
 944     checkResidueTypeSanity(pdba, r0, r1, rt);
 945
 946     // If we return correctly from checkResidueTypeSanity(), the only
 947     // remaining cases where we can have non-matching residue types is if
 948     // the chain ID was blank, which could be the case e.g. for a structure
 949     // read from a GRO file or other file types without chain information.
 950     // In that case we need to be a bit more liberal and detect chains based
 951     // on the residue type.
 952
 953     // If we get here, the chain ID must be identical for all residues
 954     char chainID = pdba->resinfo[r0].chainid;
 955
 956     /* Find the starting terminus (typially N or 5') */
 957     for (i = r0; i < r1 && *r_start == -1; i++)
 958     {
 959         startrestype = rt->optionalTypeOfNamedDatabaseResidue(*pdba->resinfo[i].name);
 960         if (!startrestype)
 961         {
 962             continue;
 963         }
 964         if (gmx::equalCaseInsensitive(*startrestype, "Protein")
 965             || gmx::equalCaseInsensitive(*startrestype, "DNA")
 966             || gmx::equalCaseInsensitive(*startrestype, "RNA"))
 967         {
 968             GMX_LOG(logger.info)
 969                     .asParagraph()
 970                     .appendTextFormatted("Identified residue %s%d as a starting terminus.",
 971                                          *pdba->resinfo[i].name, pdba->resinfo[i].nr);
 972             *r_start = i;
 973         }
 974         else if (gmx::equalCaseInsensitive(*startrestype, "Ion"))
 975         {
 976             if (ionNotes < 5)
 977             {
 978                 GMX_LOG(logger.info)
 979                         .asParagraph()
 980                         .appendTextFormatted(
 981                                 "Residue %s%d has type 'Ion', assuming it is not linked into a "
 982                                 "chain.",
 983                                 *pdba->resinfo[i].name, pdba->resinfo[i].nr);
 984             }
 985             if (ionNotes == 4)
 986             {
 987                 GMX_LOG(logger.info)
 988                         .asParagraph()
 989                         .appendTextFormatted("Disabling further notes about ions.");
 990             }
 991             ionNotes++;
 992         }
 993         else
 994         {
 995             // Either no known residue type, or one not needing special handling
 996             if (startWarnings < 5)
 997             {
 998                 if (chainID == ' ')
 999                 {
1000                     GMX_LOG(logger.warning)
1001                             .asParagraph()
1002                             .appendTextFormatted(
1003                                     "Starting residue %s%d in chain not identified as "
1004                                     "Protein/RNA/DNA. "
1005                                     "This chain lacks identifiers, which makes it impossible to do "
1006                                     "strict "
1007                                     "classification of the start/end residues. Here we need to "
1008                                     "guess this residue "
1009                                     "should not be part of the chain and instead introduce a "
1010                                     "break, but that will "
1011                                     "be catastrophic if they should in fact be linked. Please "
1012                                     "check your structure, "
1013                                     "and add %s to residuetypes.dat if this was not correct.",
1014                                     *pdba->resinfo[i].name, pdba->resinfo[i].nr, *pdba->resinfo[i].name);
1015                 }
1016                 else
1017                 {
1018                     GMX_LOG(logger.warning)
1019                             .asParagraph()
1020                             .appendTextFormatted(
1021                                     "No residues in chain starting at %s%d identified as "
1022                                     "Protein/RNA/DNA. "
1023                                     "This makes it impossible to link them into a molecule, which "
1024                                     "could either be "
1025                                     "correct or a catastrophic error. Please check your structure, "
1026                                     "and add all "
1027                                     "necessary residue names to residuetypes.dat if this was not "
1028                                     "correct.",
1029                                     *pdba->resinfo[i].name, pdba->resinfo[i].nr);
1030                 }
1031             }
1032             if (startWarnings == 4)
1033             {
1034                 GMX_LOG(logger.warning)
1035                         .asParagraph()
1036                         .appendTextFormatted(
1037                                 "Disabling further warnings about unidentified residues at start "
1038                                 "of chain.");
1039             }
1040             startWarnings++;
1041         }
1042     }
1043
1044     if (*r_start >= 0)
1045     {
1046         /* Go through the rest of the residues, check that they are the same class, and identify the ending terminus. */
1047         for (int i = *r_start; i < r1; i++)
1048         {
1049             std::optional<std::string> restype =
1050                     rt->optionalTypeOfNamedDatabaseResidue(*pdba->resinfo[i].name);
1051             if (!restype)
1052             {
1053                 continue;
1054             }
1055             if (gmx::equalCaseInsensitive(*restype, *startrestype) && endWarnings == 0)
1056             {
1057                 *r_end = i;
1058             }
1059             else if (gmx::equalCaseInsensitive(*startrestype, "Ion"))
1060             {
1061                 if (ionNotes < 5)
1062                 {
1063                     GMX_LOG(logger.info)
1064                             .asParagraph()
1065                             .appendTextFormatted(
1066                                     "Residue %s%d has type 'Ion', assuming it is not linked into a "
1067                                     "chain.",
1068                                     *pdba->resinfo[i].name, pdba->resinfo[i].nr);
1069                 }
1070                 if (ionNotes == 4)
1071                 {
1072                     GMX_LOG(logger.info)
1073                             .asParagraph()
1074                             .appendTextFormatted("Disabling further notes about ions.");
1075                 }
1076                 ionNotes++;
1077             }
1078             else
1079             {
1080                 // Either no known residue type, or one not needing special handling.
1081                 GMX_RELEASE_ASSERT(chainID == ' ', "Chain ID must be blank");
1082                 // Otherwise the call to checkResidueTypeSanity() will
1083                 // have caught the problem.
1084                 if (endWarnings < 5)
1085                 {
1086                     GMX_LOG(logger.warning)
1087                             .asParagraph()
1088                             .appendTextFormatted(
1089                                     "Residue %s%d in chain has different type ('%s') from "
1090                                     "residue %s%d ('%s'). This chain lacks identifiers, which "
1091                                     "makes "
1092                                     "it impossible to do strict classification of the start/end "
1093                                     "residues. Here we "
1094                                     "need to guess this residue should not be part of the chain "
1095                                     "and instead "
1096                                     "introduce a break, but that will be catastrophic if they "
1097                                     "should in fact be "
1098                                     "linked. Please check your structure, and add %s to "
1099                                     "residuetypes.dat "
1100                                     "if this was not correct.",
1101                                     *pdba->resinfo[i].name, pdba->resinfo[i].nr, restype->c_str(),
1102                                     *pdba->resinfo[*r_start].name, pdba->resinfo[*r_start].nr,
1103                                     startrestype->c_str(), *pdba->resinfo[i].name);
1104                 }
1105                 if (endWarnings == 4)
1106                 {
1107                     GMX_LOG(logger.warning)
1108                             .asParagraph()
1109                             .appendTextFormatted(
1110                                     "Disabling further warnings about unidentified residues at end "
1111                                     "of chain.");
1112                 }
1113                 endWarnings++;
1114             }
1115         }
1116     }
1117
1118     if (*r_end >= 0)
1119     {
1120         GMX_LOG(logger.info)
1121                 .asParagraph()
1122                 .appendTextFormatted("Identified residue %s%d as a ending terminus.",
1123                                      *pdba->resinfo[*r_end].name, pdba->resinfo[*r_end].nr);
1124     }
1125 }
1126
1127 enum class ChainSeparationType : int
1128 {
1129     IdOrTer,
1130     IdAndTer,
1131     Ter,
1132     Id,
1133     Interactive,
1134     Count
1135 };
1136 const gmx::EnumerationArray<ChainSeparationType, const char*> c_chainSeparationTypeNames = {
1137     { "id_or_ter", "id_and_ter", "ter", "id", "interactive" }
1138 };
1139 const gmx::EnumerationArray<ChainSeparationType, const char*> c_chainSeparationTypeNotificationMessages = {
1140     { "Splitting chemical chains based on TER records or chain id changing.\n",
1141       "Splitting chemical chains based on TER records and chain id changing.\n",
1142       "Splitting chemical chains based on TER records only (ignoring chain id).\n",
1143       "Splitting chemical chains based on changing chain id only (ignoring TER records).\n",
1144       "Splitting chemical chains interactively.\n" }
1145 };
1146
1147 void modify_chain_numbers(t_atoms* pdba, ChainSeparationType chainSeparation, const gmx::MDLogger& logger)
1148 {
1149     int         i;
1150     char        old_prev_chainid;
1151     char        old_this_chainid;
1152     int         old_prev_chainnum;
1153     int         old_this_chainnum;
1154     t_resinfo*  ri;
1155     char        select[STRLEN];
1156     int         new_chainnum;
1157     int         this_atomnum;
1158     int         prev_atomnum;
1159     const char* prev_atomname;
1160     const char* this_atomname;
1161     const char* prev_resname;
1162     const char* this_resname;
1163     int         prev_resnum;
1164     int         this_resnum;
1165     char        prev_chainid;
1166     char        this_chainid;
1167
1168     /* The default chain enumeration is based on TER records only */
1169     printf("%s", c_chainSeparationTypeNotificationMessages[chainSeparation]);
1170
1171     old_prev_chainid  = '?';
1172     old_prev_chainnum = -1;
1173     new_chainnum      = -1;
1174
1175     this_atomname = nullptr;
1176     this_atomnum  = -1;
1177     this_resname  = nullptr;
1178     this_resnum   = -1;
1179     this_chainid  = '?';
1180
1181     for (i = 0; i < pdba->nres; i++)
1182     {
1183         ri                = &pdba->resinfo[i];
1184         old_this_chainid  = ri->chainid;
1185         old_this_chainnum = ri->chainnum;
1186
1187         prev_atomname = this_atomname;
1188         prev_atomnum  = this_atomnum;
1189         prev_resname  = this_resname;
1190         prev_resnum   = this_resnum;
1191         prev_chainid  = this_chainid;
1192
1193         this_atomname = *(pdba->atomname[i]);
1194         this_atomnum  = (pdba->pdbinfo != nullptr) ? pdba->pdbinfo[i].atomnr : i + 1;
1195         this_resname  = *ri->name;
1196         this_resnum   = ri->nr;
1197         this_chainid  = ri->chainid;
1198
1199         switch (chainSeparation)
1200         {
1201             case ChainSeparationType::IdOrTer:
1202                 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1203                 {
1204                     new_chainnum++;
1205                 }
1206                 break;
1207
1208             case ChainSeparationType::IdAndTer:
1209                 if (old_this_chainid != old_prev_chainid && old_this_chainnum != old_prev_chainnum)
1210                 {
1211                     new_chainnum++;
1212                 }
1213                 break;
1214
1215             case ChainSeparationType::Id:
1216                 if (old_this_chainid != old_prev_chainid)
1217                 {
1218                     new_chainnum++;
1219                 }
1220                 break;
1221
1222             case ChainSeparationType::Ter:
1223                 if (old_this_chainnum != old_prev_chainnum)
1224                 {
1225                     new_chainnum++;
1226                 }
1227                 break;
1228             case ChainSeparationType::Interactive:
1229                 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1230                 {
1231                     if (i > 0)
1232                     {
1233                         GMX_LOG(logger.info)
1234                                 .asParagraph()
1235                                 .appendTextFormatted(
1236                                         "Split the chain (and introduce termini) between residue %s%d (chain id '%c', atom %d %s)\
1237 "
1238                                         "and residue %s%d (chain id '%c', atom %d %s) ? [n/y]",
1239                                         prev_resname, prev_resnum, prev_chainid, prev_atomnum,
1240                                         prev_atomname, this_resname, this_resnum, this_chainid,
1241                                         this_atomnum, this_atomname);
1242
1243                         if (nullptr == fgets(select, STRLEN - 1, stdin))
1244                         {
1245                             gmx_fatal(FARGS, "Error reading from stdin");
1246                         }
1247                     }
1248                     if (i == 0 || select[0] == 'y')
1249                     {
1250                         new_chainnum++;
1251                     }
1252                 }
1253                 break;
1254             case ChainSeparationType::Count:
1255                 gmx_fatal(FARGS, "Internal inconsistency - this shouldn't happen...");
1256         }
1257         old_prev_chainid  = old_this_chainid;
1258         old_prev_chainnum = old_this_chainnum;
1259
1260         ri->chainnum = new_chainnum;
1261     }
1262 }
1263
1264 bool checkChainCyclicity(t_atoms*                               pdba,
1265                          rvec*                                  pdbx,
1266                          int                                    start_ter,
1267                          int                                    end_ter,
1268                          gmx::ArrayRef<const PreprocessResidue> rtpFFDB,
1269                          gmx::ArrayRef<const RtpRename>         rr,
1270                          real                                   long_bond_dist_,
1271                          real                                   short_bond_dist_)
1272 {
1273     int         ai = -1, aj = -1;
1274     char*       rtpname = *(pdba->resinfo[start_ter].rtp);
1275     std::string newName = search_resrename(rr, rtpname, false, false, false);
1276     if (newName.empty())
1277     {
1278         newName = rtpname;
1279     }
1280     auto        res = getDatabaseEntry(newName, rtpFFDB);
1281     const char *name_ai, *name_aj;
1282
1283     for (const auto& patch : res->rb[ebtsBONDS].b)
1284     { /* Search backward bond for n/5' terminus */
1285         name_ai = patch.ai().c_str();
1286         name_aj = patch.aj().c_str();
1287         if (name_ai[0] == '-')
1288         {
1289             aj = search_res_atom(++name_ai, end_ter, pdba, "check", TRUE);
1290             ai = search_res_atom(name_aj, start_ter, pdba, "check", TRUE);
1291         }
1292         else if (name_aj[0] == '-')
1293         {
1294             aj = search_res_atom(++name_aj, end_ter, pdba, "check", TRUE);
1295             ai = search_res_atom(name_ai, start_ter, pdba, "check", TRUE);
1296         }
1297         if (ai >= 0 && aj >= 0)
1298         {
1299             break; /* Found */
1300         }
1301     }
1302
1303     if (!(ai >= 0 && aj >= 0))
1304     {
1305         rtpname = *(pdba->resinfo[end_ter].rtp);
1306         newName = search_resrename(rr, rtpname, false, false, false);
1307         if (newName.empty())
1308         {
1309             newName = rtpname;
1310         }
1311         res = getDatabaseEntry(newName, rtpFFDB);
1312         for (const auto& patch : res->rb[ebtsBONDS].b)
1313         {
1314             /* Seach forward bond for c/3' terminus */
1315             name_ai = patch.ai().c_str();
1316             name_aj = patch.aj().c_str();
1317
1318             if (name_ai[0] == '+')
1319             {
1320                 ai = search_res_atom(name_aj, end_ter, pdba, "check", TRUE);
1321                 aj = search_res_atom(++name_ai, start_ter, pdba, "check", TRUE);
1322             }
1323             else if (name_aj[0] == '+')
1324             {
1325                 ai = search_res_atom(name_ai, end_ter, pdba, "check", TRUE);
1326                 aj = search_res_atom(++name_aj, start_ter, pdba, "check", TRUE);
1327             }
1328             if (ai >= 0 && aj >= 0)
1329             {
1330                 break;
1331             }
1332         }
1333     }
1334
1335     if (ai >= 0 && aj >= 0)
1336     {
1337         real dist = distance2(pdbx[ai], pdbx[aj]);
1338         /* it is better to read bond length from ffbonded.itp */
1339         return (dist < gmx::square(long_bond_dist_) && dist > gmx::square(short_bond_dist_));
1340     }
1341     else
1342     {
1343         return false;
1344     }
1345 }
1346
1347 struct t_pdbchain
1348 {
1349     char             chainid   = ' ';
1350     char             chainnum  = ' ';
1351     int              start     = -1;
1352     int              natom     = -1;
1353     bool             bAllWat   = false;
1354     int              nterpairs = -1;
1355     std::vector<int> chainstart;
1356 };
1357
1358 struct t_chain
1359 {
1360     char                                chainid   = ' ';
1361     int                                 chainnum  = ' ';
1362     bool                                bAllWat   = false;
1363     int                                 nterpairs = -1;
1364     std::vector<int>                    chainstart;
1365     std::vector<MoleculePatchDatabase*> ntdb;
1366     std::vector<MoleculePatchDatabase*> ctdb;
1367     std::vector<int>                    r_start;
1368     std::vector<int>                    r_end;
1369     t_atoms*                            pdba;
1370     std::vector<gmx::RVec>              x;
1371     std::vector<int>                    cyclicBondsIndex;
1372 };
1373
1374 enum class VSitesType : int
1375 {
1376     None,
1377     Hydrogens,
1378     Aromatics,
1379     Count
1380 };
1381 const gmx::EnumerationArray<VSitesType, const char*> c_vsitesTypeNames = { { "none", "hydrogens",
1382                                                                              "aromatics" } };
1383
1384 enum class WaterType : int
1385 {
1386     Select,
1387     None,
1388     Spc,
1389     SpcE,
1390     Tip3p,
1391     Tip4p,
1392     Tip5p,
1393     Tips3p,
1394     Count
1395 };
1396 const gmx::EnumerationArray<WaterType, const char*> c_waterTypeNames = {
1397     { "select", "none", "spc", "spce", "tip3p", "tip4p", "tip5p", "tips3p" }
1398 };
1399
1400 enum class MergeType : int
1401 {
1402     No,
1403     All,
1404     Interactive,
1405     Count
1406 };
1407 const gmx::EnumerationArray<MergeType, const char*> c_mergeTypeNames = { { "no", "all",
1408                                                                            "interactive" } };
1409
1410 } // namespace
1411
1412 namespace gmx
1413 {
1414
1415 namespace
1416 {
1417
1418 class pdb2gmx : public ICommandLineOptionsModule
1419 {
1420 public:
1421     pdb2gmx() :
1422         bVsites_(FALSE),
1423         bPrevWat_(FALSE),
1424         bVsiteAromatics_(FALSE),
1425         chainSeparation_(ChainSeparationType::IdOrTer),
1426         vsitesType_(VSitesType::None),
1427         waterType_(WaterType::Select),
1428         mergeType_(MergeType::No),
1429         itp_file_(nullptr),
1430         mHmult_(0)
1431     {
1432         gmx::LoggerBuilder builder;
1433         builder.addTargetStream(gmx::MDLogger::LogLevel::Info, &gmx::TextOutputFile::standardOutput());
1434         builder.addTargetStream(gmx::MDLogger::LogLevel::Warning, &gmx::TextOutputFile::standardError());
1435         loggerOwner_ = std::make_unique<LoggerOwner>(builder.build());
1436     }
1437
1438     // From ICommandLineOptionsModule
1439     void init(CommandLineModuleSettings* /*settings*/) override {}
1440
1441     void initOptions(IOptionsContainer* options, ICommandLineOptionsModuleSettings* settings) override;
1442
1443     void optionsFinished() override;
1444
1445     int run() override;
1446
1447 private:
1448     bool bNewRTP_;
1449     bool bInter_;
1450     bool bCysMan_;
1451     bool bLysMan_;
1452     bool bAspMan_;
1453     bool bGluMan_;
1454     bool bHisMan_;
1455     bool bGlnMan_;
1456     bool bArgMan_;
1457     bool bTerMan_;
1458     bool bUnA_;
1459     bool bHeavyH_;
1460     bool bSort_;
1461     bool bAllowMissing_;
1462     bool bRemoveH_;
1463     bool bDeuterate_;
1464     bool bVerbose_;
1465     bool bChargeGroups_;
1466     bool bCmap_;
1467     bool bRenumRes_;
1468     bool bRTPresname_;
1469     bool bIndexSet_;
1470     bool bOutputSet_;
1471     bool bVsites_;
1472     bool bWat_;
1473     bool bPrevWat_;
1474     bool bITP_;
1475     bool bVsiteAromatics_;
1476     real angle_;
1477     real distance_;
1478     real posre_fc_;
1479     real long_bond_dist_;
1480     real short_bond_dist_;
1481
1482     std::string indexOutputFile_;
1483     std::string outputFile_;
1484     std::string topologyFile_;
1485     std::string includeTopologyFile_;
1486     std::string outputConfFile_;
1487     std::string inputConfFile_;
1488     std::string outFile_;
1489     std::string ff_;
1490
1491     ChainSeparationType chainSeparation_;
1492     VSitesType          vsitesType_;
1493     WaterType           waterType_;
1494     MergeType           mergeType_;
1495
1496     FILE*                        itp_file_;
1497     char                         forcefield_[STRLEN];
1498     char                         ffdir_[STRLEN];
1499     char*                        ffname_;
1500     char*                        watermodel_;
1501     std::vector<std::string>     incls_;
1502     std::vector<t_mols>          mols_;
1503     real                         mHmult_;
1504     std::unique_ptr<LoggerOwner> loggerOwner_;
1505 };
1506
1507 void pdb2gmx::initOptions(IOptionsContainer* options, ICommandLineOptionsModuleSettings* settings)
1508 {
1509     const char* desc[] = {
1510         "[THISMODULE] reads a [REF].pdb[ref] (or [REF].gro[ref]) file, reads",
1511         "some database files, adds hydrogens to the molecules and generates",
1512         "coordinates in GROMACS (GROMOS), or optionally [REF].pdb[ref], format",
1513         "and a topology in GROMACS format.",
1514         "These files can subsequently be processed to generate a run input file.",
1515         "[PAR]",
1516         "[THISMODULE] will search for force fields by looking for",
1517         "a [TT]forcefield.itp[tt] file in subdirectories [TT]<forcefield>.ff[tt]",
1518         "of the current working directory and of the GROMACS library directory",
1519         "as inferred from the path of the binary or the [TT]GMXLIB[tt] environment",
1520         "variable.",
1521         "By default the forcefield selection is interactive,",
1522         "but you can use the [TT]-ff[tt] option to specify one of the short names",
1523         "in the list on the command line instead. In that case [THISMODULE] just looks",
1524         "for the corresponding [TT]<forcefield>.ff[tt] directory.",
1525         "[PAR]",
1526         "After choosing a force field, all files will be read only from",
1527         "the corresponding force field directory.",
1528         "If you want to modify or add a residue types, you can copy the force",
1529         "field directory from the GROMACS library directory to your current",
1530         "working directory. If you want to add new protein residue types,",
1531         "you will need to modify [TT]residuetypes.dat[tt] in the library directory",
1532         "or copy the whole library directory to a local directory and set",
1533         "the environment variable [TT]GMXLIB[tt] to the name of that directory.",
1534         "Check Chapter 5 of the manual for more information about file formats.",
1535         "[PAR]",
1536
1537         "Note that a [REF].pdb[ref] file is nothing more than a file format, and it",
1538         "need not necessarily contain a protein structure. Every kind of",
1539         "molecule for which there is support in the database can be converted.",
1540         "If there is no support in the database, you can add it yourself.[PAR]",
1541
1542         "The program has limited intelligence, it reads a number of database",
1543         "files, that allow it to make special bonds (Cys-Cys, Heme-His, etc.),",
1544         "if necessary this can be done manually. The program can prompt the",
1545         "user to select which kind of LYS, ASP, GLU, CYS or HIS residue is",
1546         "desired. For Lys the choice is between neutral (two protons on NZ) or",
1547         "protonated (three protons, default), for Asp and Glu unprotonated",
1548         "(default) or protonated, for His the proton can be either on ND1,",
1549         "on NE2 or on both. By default these selections are done automatically.",
1550         "For His, this is based on an optimal hydrogen bonding",
1551         "conformation. Hydrogen bonds are defined based on a simple geometric",
1552         "criterion, specified by the maximum hydrogen-donor-acceptor angle",
1553         "and donor-acceptor distance, which are set by [TT]-angle[tt] and",
1554         "[TT]-dist[tt] respectively.[PAR]",
1555
1556         "The protonation state of N- and C-termini can be chosen interactively",
1557         "with the [TT]-ter[tt] flag.  Default termini are ionized (NH3+ and COO-),",
1558         "respectively.  Some force fields support zwitterionic forms for chains of",
1559         "one residue, but for polypeptides these options should NOT be selected.",
1560         "The AMBER force fields have unique forms for the terminal residues,",
1561         "and these are incompatible with the [TT]-ter[tt] mechanism. You need",
1562         "to prefix your N- or C-terminal residue names with \"N\" or \"C\"",
1563         "respectively to use these forms, making sure you preserve the format",
1564         "of the coordinate file. Alternatively, use named terminating residues",
1565         "(e.g. ACE, NME).[PAR]",
1566
1567         "The separation of chains is not entirely trivial since the markup",
1568         "in user-generated PDB files frequently varies and sometimes it",
1569         "is desirable to merge entries across a TER record, for instance",
1570         "if you want a disulfide bridge or distance restraints between",
1571         "two protein chains or if you have a HEME group bound to a protein.",
1572         "In such cases multiple chains should be contained in a single",
1573         "[TT]moleculetype[tt] definition.",
1574         "To handle this, [THISMODULE] uses two separate options.",
1575         "First, [TT]-chainsep[tt] allows you to choose when a new chemical chain should",
1576         "start, and termini added when applicable. This can be done based on the",
1577         "existence of TER records, when the chain id changes, or combinations of either",
1578         "or both of these. You can also do the selection fully interactively.",
1579         "In addition, there is a [TT]-merge[tt] option that controls how multiple chains",
1580         "are merged into one moleculetype, after adding all the chemical termini (or not).",
1581         "This can be turned off (no merging), all non-water chains can be merged into a",
1582         "single molecule, or the selection can be done interactively.[PAR]",
1583
1584         "[THISMODULE] will also check the occupancy field of the [REF].pdb[ref] file.",
1585         "If any of the occupancies are not one, indicating that the atom is",
1586         "not resolved well in the structure, a warning message is issued.",
1587         "When a [REF].pdb[ref] file does not originate from an X-ray structure determination",
1588         "all occupancy fields may be zero. Either way, it is up to the user",
1589         "to verify the correctness of the input data (read the article!).[PAR]",
1590
1591         "During processing the atoms will be reordered according to GROMACS",
1592         "conventions. With [TT]-n[tt] an index file can be generated that",
1593         "contains one group reordered in the same way. This allows you to",
1594         "convert a GROMOS trajectory and coordinate file to GROMOS. There is",
1595         "one limitation: reordering is done after the hydrogens are stripped",
1596         "from the input and before new hydrogens are added. This means that",
1597         "you should not use [TT]-ignh[tt].[PAR]",
1598
1599         "The [REF].gro[ref] and [TT].g96[tt] file formats do not support chain",
1600         "identifiers. Therefore it is useful to enter a [REF].pdb[ref] file name at",
1601         "the [TT]-o[tt] option when you want to convert a multi-chain [REF].pdb[ref] file.",
1602         "[PAR]",
1603
1604         "The option [TT]-vsite[tt] removes hydrogen and fast improper dihedral",
1605         "motions. Angular and out-of-plane motions can be removed by changing",
1606         "hydrogens into virtual sites and fixing angles, which fixes their",
1607         "position relative to neighboring atoms. Additionally, all atoms in the",
1608         "aromatic rings of the standard amino acids (i.e. PHE, TRP, TYR and HIS)",
1609         "can be converted into virtual sites, eliminating the fast improper dihedral",
1610         "fluctuations in these rings (but this feature is deprecated).",
1611         "[BB]Note[bb] that in this case all other hydrogen",
1612         "atoms are also converted to virtual sites. The mass of all atoms that are",
1613         "converted into virtual sites, is added to the heavy atoms.[PAR]",
1614         "Also slowing down of dihedral motion can be done with [TT]-heavyh[tt]",
1615         "done by increasing the hydrogen-mass by a factor of 4. This is also",
1616         "done for water hydrogens to slow down the rotational motion of water.",
1617         "The increase in mass of the hydrogens is subtracted from the bonded",
1618         "(heavy) atom so that the total mass of the system remains the same.",
1619
1620         "As a special case, ring-closed (or cyclic) molecules are considered.",
1621         "[THISMODULE] automatically determines if a cyclic molecule is present",
1622         "by evaluating the distance between the terminal atoms of a given chain.",
1623         "If this distance is greater than the [TT]-sb[tt]",
1624         "(\"Short bond warning distance\", default 0.05 nm)",
1625         "and less than the [TT]-lb[tt] (\"Long bond warning distance\", default 0.25 nm)",
1626         "the molecule is considered to be ring closed and will be processed as such.",
1627         "Please note that this does not detect cyclic bonds over periodic boundaries."
1628     };
1629
1630     settings->setHelpText(desc);
1631
1632     options->addOption(BooleanOption("newrtp").store(&bNewRTP_).defaultValue(false).hidden().description(
1633             "Write the residue database in new format to [TT]new.rtp[tt]"));
1634     options->addOption(
1635             RealOption("lb").store(&long_bond_dist_).defaultValue(0.25).hidden().description("Long bond warning distance"));
1636     options->addOption(
1637             RealOption("sb").store(&short_bond_dist_).defaultValue(0.05).hidden().description("Short bond warning distance"));
1638     options->addOption(EnumOption<ChainSeparationType>("chainsep")
1639                                .enumValue(c_chainSeparationTypeNames)
1640                                .store(&chainSeparation_)
1641                                .description("Condition in PDB files when a new chain should be "
1642                                             "started (adding termini)"));
1643     options->addOption(EnumOption<MergeType>("merge")
1644                                .enumValue(c_mergeTypeNames)
1645                                .store(&mergeType_)
1646                                .description("Merge multiple chains into a single [moleculetype]"));
1647     options->addOption(StringOption("ff").store(&ff_).defaultValue("select").description(
1648             "Force field, interactive by default. Use [TT]-h[tt] for information."));
1649     options->addOption(
1650             EnumOption<WaterType>("water").store(&waterType_).enumValue(c_waterTypeNames).description("Water model to use"));
1651     options->addOption(BooleanOption("inter").store(&bInter_).defaultValue(false).description(
1652             "Set the next 8 options to interactive"));
1653     options->addOption(BooleanOption("ss").store(&bCysMan_).defaultValue(false).description(
1654             "Interactive SS bridge selection"));
1655     options->addOption(BooleanOption("ter").store(&bTerMan_).defaultValue(false).description(
1656             "Interactive termini selection, instead of charged (default)"));
1657     options->addOption(BooleanOption("lys").store(&bLysMan_).defaultValue(false).description(
1658             "Interactive lysine selection, instead of charged"));
1659     options->addOption(BooleanOption("arg").store(&bArgMan_).defaultValue(false).description(
1660             "Interactive arginine selection, instead of charged"));
1661     options->addOption(BooleanOption("asp").store(&bAspMan_).defaultValue(false).description(
1662             "Interactive aspartic acid selection, instead of charged"));
1663     options->addOption(BooleanOption("glu").store(&bGluMan_).defaultValue(false).description(
1664             "Interactive glutamic acid selection, instead of charged"));
1665     options->addOption(BooleanOption("gln").store(&bGlnMan_).defaultValue(false).description(
1666             "Interactive glutamine selection, instead of charged"));
1667     options->addOption(BooleanOption("his").store(&bHisMan_).defaultValue(false).description(
1668             "Interactive histidine selection, instead of checking H-bonds"));
1669     options->addOption(RealOption("angle").store(&angle_).defaultValue(135.0).description(
1670             "Minimum hydrogen-donor-acceptor angle for a H-bond (degrees)"));
1671     options->addOption(
1672             RealOption("dist").store(&distance_).defaultValue(0.3).description("Maximum donor-acceptor distance for a H-bond (nm)"));
1673     options->addOption(BooleanOption("una").store(&bUnA_).defaultValue(false).description(
1674             "Select aromatic rings with united CH atoms on phenylalanine, tryptophane and "
1675             "tyrosine"));
1676     options->addOption(BooleanOption("sort").store(&bSort_).defaultValue(true).hidden().description(
1677             "Sort the residues according to database, turning this off is dangerous as charge "
1678             "groups might be broken in parts"));
1679     options->addOption(
1680             BooleanOption("ignh").store(&bRemoveH_).defaultValue(false).description("Ignore hydrogen atoms that are in the coordinate file"));
1681     options->addOption(
1682             BooleanOption("missing")
1683                     .store(&bAllowMissing_)
1684                     .defaultValue(false)
1685                     .description(
1686                             "Continue when atoms are missing and bonds cannot be made, dangerous"));
1687     options->addOption(
1688             BooleanOption("v").store(&bVerbose_).defaultValue(false).description("Be slightly more verbose in messages"));
1689     options->addOption(
1690             RealOption("posrefc").store(&posre_fc_).defaultValue(1000).description("Force constant for position restraints"));
1691     options->addOption(EnumOption<VSitesType>("vsite")
1692                                .store(&vsitesType_)
1693                                .enumValue(c_vsitesTypeNames)
1694                                .description("Convert atoms to virtual sites"));
1695     options->addOption(BooleanOption("heavyh").store(&bHeavyH_).defaultValue(false).description(
1696             "Make hydrogen atoms heavy"));
1697     options->addOption(
1698             BooleanOption("deuterate").store(&bDeuterate_).defaultValue(false).description("Change the mass of hydrogens to 2 amu"));
1699     options->addOption(BooleanOption("chargegrp")
1700                                .store(&bChargeGroups_)
1701                                .defaultValue(true)
1702                                .description("Use charge groups in the [REF].rtp[ref] file"));
1703     options->addOption(BooleanOption("cmap").store(&bCmap_).defaultValue(true).description(
1704             "Use cmap torsions (if enabled in the [REF].rtp[ref] file)"));
1705     options->addOption(BooleanOption("renum")
1706                                .store(&bRenumRes_)
1707                                .defaultValue(false)
1708                                .description("Renumber the residues consecutively in the output"));
1709     options->addOption(BooleanOption("rtpres")
1710                                .store(&bRTPresname_)
1711                                .defaultValue(false)
1712                                .description("Use [REF].rtp[ref] entry names as residue names"));
1713     options->addOption(FileNameOption("f")
1714                                .legacyType(efSTX)
1715                                .inputFile()
1716                                .store(&inputConfFile_)
1717                                .required()
1718                                .defaultBasename("protein")
1719                                .defaultType(efPDB)
1720                                .description("Structure file"));
1721     options->addOption(FileNameOption("o")
1722                                .legacyType(efSTO)
1723                                .outputFile()
1724                                .store(&outputConfFile_)
1725                                .required()
1726                                .defaultBasename("conf")
1727                                .description("Structure file"));
1728     options->addOption(FileNameOption("p")
1729                                .legacyType(efTOP)
1730                                .outputFile()
1731                                .store(&topologyFile_)
1732                                .required()
1733                                .defaultBasename("topol")
1734                                .description("Topology file"));
1735     options->addOption(FileNameOption("i")
1736                                .legacyType(efITP)
1737                                .outputFile()
1738                                .store(&includeTopologyFile_)
1739                                .required()
1740                                .defaultBasename("posre")
1741                                .description("Include file for topology"));
1742     options->addOption(FileNameOption("n")
1743                                .legacyType(efNDX)
1744                                .outputFile()
1745                                .store(&indexOutputFile_)
1746                                .storeIsSet(&bIndexSet_)
1747                                .defaultBasename("index")
1748                                .description("Index file"));
1749     options->addOption(FileNameOption("q")
1750                                .legacyType(efSTO)
1751                                .outputFile()
1752                                .store(&outFile_)
1753                                .storeIsSet(&bOutputSet_)
1754                                .defaultBasename("clean")
1755                                .defaultType(efPDB)
1756                                .description("Structure file"));
1757 }
1758
1759 void pdb2gmx::optionsFinished()
1760 {
1761     if (inputConfFile_.empty())
1762     {
1763         GMX_THROW(InconsistentInputError("You must supply an input file"));
1764     }
1765     if (bInter_)
1766     {
1767         /* if anything changes here, also change description of -inter */
1768         bCysMan_ = true;
1769         bTerMan_ = true;
1770         bLysMan_ = true;
1771         bArgMan_ = true;
1772         bAspMan_ = true;
1773         bGluMan_ = true;
1774         bGlnMan_ = true;
1775         bHisMan_ = true;
1776     }
1777
1778     if (bHeavyH_)
1779     {
1780         mHmult_ = 4.0;
1781     }
1782     else if (bDeuterate_)
1783     {
1784         mHmult_ = 2.0;
1785     }
1786     else
1787     {
1788         mHmult_ = 1.0;
1789     }
1790
1791     /* Force field selection, interactive or direct */
1792     choose_ff(strcmp(ff_.c_str(), "select") == 0 ? nullptr : ff_.c_str(), forcefield_,
1793               sizeof(forcefield_), ffdir_, sizeof(ffdir_), loggerOwner_->logger());
1794
1795     if (strlen(forcefield_) > 0)
1796     {
1797         ffname_    = forcefield_;
1798         ffname_[0] = std::toupper(ffname_[0]);
1799     }
1800     else
1801     {
1802         gmx_fatal(FARGS, "Empty forcefield string");
1803     }
1804 }
1805
1806 int pdb2gmx::run()
1807 {
1808     char                       select[STRLEN];
1809     std::vector<DisulfideBond> ssbonds;
1810
1811     int         this_atomnum;
1812     int         prev_atomnum;
1813     const char* prev_atomname;
1814     const char* this_atomname;
1815     const char* prev_resname;
1816     const char* this_resname;
1817     int         prev_resnum;
1818     int         this_resnum;
1819     char        prev_chainid;
1820     char        this_chainid;
1821     int         prev_chainnumber;
1822     int         this_chainnumber;
1823     int         this_chainstart;
1824     int         prev_chainstart;
1825
1826     const gmx::MDLogger& logger = loggerOwner_->logger();
1827
1828     GMX_LOG(logger.info)
1829             .asParagraph()
1830             .appendTextFormatted("Using the %s force field in directory %s", ffname_, ffdir_);
1831
1832     choose_watermodel(c_waterTypeNames[waterType_], ffdir_, &watermodel_, logger);
1833
1834     switch (vsitesType_)
1835     {
1836         case VSitesType::None:
1837             bVsites_         = false;
1838             bVsiteAromatics_ = false;
1839             break;
1840         case VSitesType::Hydrogens:
1841             bVsites_         = true;
1842             bVsiteAromatics_ = false;
1843             break;
1844         case VSitesType::Aromatics:
1845             bVsites_         = true;
1846             bVsiteAromatics_ = true;
1847             break;
1848         case VSitesType::Count:
1849             gmx_fatal(FARGS, "Internal inconsistency: VSitesType is out of range.");
1850     } /* end switch */
1851
1852     /* Open the symbol table */
1853     t_symtab symtab;
1854     open_symtab(&symtab);
1855
1856     /* Residue type database */
1857     ResidueType rt;
1858
1859     /* Read residue renaming database(s), if present */
1860     std::vector<std::string> rrn = fflib_search_file_end(ffdir_, ".r2b", FALSE);
1861
1862     std::vector<RtpRename> rtprename;
1863     for (const auto& filename : rrn)
1864     {
1865         GMX_LOG(logger.info).asParagraph().appendTextFormatted("going to rename %s", filename.c_str());
1866         FILE* fp = fflib_open(filename);
1867         read_rtprename(filename.c_str(), fp, &rtprename);
1868         gmx_ffclose(fp);
1869     }
1870
1871     /* Add all alternative names from the residue renaming database to the list
1872        of recognized amino/nucleic acids. */
1873     for (const auto& rename : rtprename)
1874     {
1875         /* Only add names if the 'standard' gromacs/iupac base name was found */
1876         if (auto restype = rt.optionalTypeOfNamedDatabaseResidue(rename.gmx))
1877         {
1878             rt.addResidue(rename.main, *restype);
1879             rt.addResidue(rename.nter, *restype);
1880             rt.addResidue(rename.cter, *restype);
1881             rt.addResidue(rename.bter, *restype);
1882         }
1883     }
1884
1885     matrix      box;
1886     const char* watres;
1887     clear_mat(box);
1888     if (watermodel_ != nullptr && (strstr(watermodel_, "4p") || strstr(watermodel_, "4P")))
1889     {
1890         watres = "HO4";
1891     }
1892     else if (watermodel_ != nullptr && (strstr(watermodel_, "5p") || strstr(watermodel_, "5P")))
1893     {
1894         watres = "HO5";
1895     }
1896     else
1897     {
1898         watres = "HOH";
1899     }
1900
1901     AtomProperties aps;
1902     char*          title = nullptr;
1903     PbcType        pbcType;
1904     t_atoms        pdba_all;
1905     rvec*          pdbx;
1906     int natom = read_pdball(inputConfFile_.c_str(), bOutputSet_, outFile_.c_str(), &title, &pdba_all,
1907                             &pdbx, &pbcType, box, bRemoveH_, &symtab, &rt, watres, &aps, bVerbose_);
1908
1909     if (natom == 0)
1910     {
1911         std::string message = formatString("No atoms found in pdb file %s\n", inputConfFile_.c_str());
1912         GMX_THROW(InconsistentInputError(message));
1913     }
1914
1915     GMX_LOG(logger.info).asParagraph().appendTextFormatted("Analyzing pdb file");
1916     int nwaterchain = 0;
1917
1918     modify_chain_numbers(&pdba_all, chainSeparation_, logger);
1919
1920     int nchainmerges = 0;
1921
1922     this_atomname    = nullptr;
1923     this_atomnum     = -1;
1924     this_resname     = nullptr;
1925     this_resnum      = -1;
1926     this_chainid     = '?';
1927     this_chainnumber = -1;
1928     this_chainstart  = 0;
1929     /* Keep the compiler happy */
1930     prev_chainstart = 0;
1931
1932     int                     numChains = 0;
1933     std::vector<t_pdbchain> pdb_ch;
1934
1935     t_resinfo* ri;
1936     bool       bMerged = false;
1937     for (int i = 0; (i < natom); i++)
1938     {
1939         ri = &pdba_all.resinfo[pdba_all.atom[i].resind];
1940
1941         /* TODO this should live in a helper object, and consolidate
1942            that with code in modify_chain_numbers */
1943         prev_atomname    = this_atomname;
1944         prev_atomnum     = this_atomnum;
1945         prev_resname     = this_resname;
1946         prev_resnum      = this_resnum;
1947         prev_chainid     = this_chainid;
1948         prev_chainnumber = this_chainnumber;
1949         if (!bMerged)
1950         {
1951             prev_chainstart = this_chainstart;
1952         }
1953
1954         this_atomname    = *pdba_all.atomname[i];
1955         this_atomnum     = (pdba_all.pdbinfo != nullptr) ? pdba_all.pdbinfo[i].atomnr : i + 1;
1956         this_resname     = *ri->name;
1957         this_resnum      = ri->nr;
1958         this_chainid     = ri->chainid;
1959         this_chainnumber = ri->chainnum;
1960
1961         bWat_ = gmx::equalCaseInsensitive(*ri->name, watres);
1962
1963         if ((i == 0) || (this_chainnumber != prev_chainnumber) || (bWat_ != bPrevWat_))
1964         {
1965             GMX_RELEASE_ASSERT(
1966                     pdba_all.pdbinfo,
1967                     "Must have pdbinfo from reading a PDB file if chain number is changing");
1968             this_chainstart = pdba_all.atom[i].resind;
1969             bMerged         = false;
1970             if (i > 0 && !bWat_)
1971             {
1972                 if (!strncmp(c_mergeTypeNames[mergeType_], "int", 3))
1973                 {
1974                     GMX_LOG(logger.info)
1975                             .asParagraph()
1976                             .appendTextFormatted(
1977                                     "Merge chain ending with residue %s%d (chain id '%c', atom %d "
1978                                     "%s) and chain starting with "
1979                                     "residue %s%d (chain id '%c', atom %d %s) into a single "
1980                                     "moleculetype (keeping termini)? [n/y]",
1981                                     prev_resname, prev_resnum, prev_chainid, prev_atomnum,
1982                                     prev_atomname, this_resname, this_resnum, this_chainid,
1983                                     this_atomnum, this_atomname);
1984
1985                     if (nullptr == fgets(select, STRLEN - 1, stdin))
1986                     {
1987                         gmx_fatal(FARGS, "Error reading from stdin");
1988                     }
1989                     bMerged = (select[0] == 'y');
1990                 }
1991                 else if (!strncmp(c_mergeTypeNames[mergeType_], "all", 3))
1992                 {
1993                     bMerged = true;
1994                 }
1995             }
1996
1997             if (bMerged)
1998             {
1999                 pdb_ch[numChains - 1].chainstart[pdb_ch[numChains - 1].nterpairs] =
2000                         pdba_all.atom[i].resind - prev_chainstart;
2001                 pdb_ch[numChains - 1].nterpairs++;
2002                 pdb_ch[numChains - 1].chainstart.resize(pdb_ch[numChains - 1].nterpairs + 1);
2003                 nchainmerges++;
2004             }
2005             else
2006             {
2007                 /* set natom for previous chain */
2008                 if (numChains > 0)
2009                 {
2010                     pdb_ch[numChains - 1].natom = i - pdb_ch[numChains - 1].start;
2011                 }
2012                 if (bWat_)
2013                 {
2014                     nwaterchain++;
2015                     ri->chainid = ' ';
2016                 }
2017                 /* check if chain identifier was used before */
2018                 for (int j = 0; (j < numChains); j++)
2019                 {
2020                     if (pdb_ch[j].chainid != ' ' && pdb_ch[j].chainid == ri->chainid)
2021                     {
2022                         GMX_LOG(logger.warning)
2023                                 .asParagraph()
2024                                 .appendTextFormatted(
2025                                         "Chain identifier '%c' is used in two non-sequential "
2026                                         "blocks. "
2027                                         "They will be treated as separate chains unless you "
2028                                         "reorder your file.",
2029                                         ri->chainid);
2030                     }
2031                 }
2032                 t_pdbchain newChain;
2033                 newChain.chainid  = ri->chainid;
2034                 newChain.chainnum = ri->chainnum;
2035                 newChain.start    = i;
2036                 newChain.bAllWat  = bWat_;
2037                 if (bWat_)
2038                 {
2039                     newChain.nterpairs = 0;
2040                 }
2041                 else
2042                 {
2043                     newChain.nterpairs = 1;
2044                 }
2045                 newChain.chainstart.resize(newChain.nterpairs + 1);
2046                 /* modified [numChains] to [0] below */
2047                 newChain.chainstart[0] = 0;
2048                 pdb_ch.push_back(newChain);
2049                 numChains++;
2050             }
2051         }
2052         bPrevWat_ = bWat_;
2053     }
2054     pdb_ch.back().natom = natom - pdb_ch.back().start;
2055
2056     /* set all the water blocks at the end of the chain */
2057     std::vector<int> swap_index(numChains);
2058     int              j = 0;
2059     for (int i = 0; i < numChains; i++)
2060     {
2061         if (!pdb_ch[i].bAllWat)
2062         {
2063             swap_index[j] = i;
2064             j++;
2065         }
2066     }
2067     for (int i = 0; i < numChains; i++)
2068     {
2069         if (pdb_ch[i].bAllWat)
2070         {
2071             swap_index[j] = i;
2072             j++;
2073         }
2074     }
2075     if (nwaterchain > 1)
2076     {
2077         GMX_LOG(logger.info)
2078                 .asParagraph()
2079                 .appendTextFormatted("Moved all the water blocks to the end");
2080     }
2081
2082     t_atoms*             pdba;
2083     std::vector<t_chain> chains(numChains);
2084     /* copy pdb data and x for all chains */
2085     for (int i = 0; (i < numChains); i++)
2086     {
2087         int si               = swap_index[i];
2088         chains[i].chainid    = pdb_ch[si].chainid;
2089         chains[i].chainnum   = pdb_ch[si].chainnum;
2090         chains[i].bAllWat    = pdb_ch[si].bAllWat;
2091         chains[i].nterpairs  = pdb_ch[si].nterpairs;
2092         chains[i].chainstart = pdb_ch[si].chainstart;
2093         chains[i].ntdb.clear();
2094         chains[i].ctdb.clear();
2095         chains[i].r_start.resize(pdb_ch[si].nterpairs);
2096         chains[i].r_end.resize(pdb_ch[si].nterpairs);
2097
2098         snew(chains[i].pdba, 1);
2099         init_t_atoms(chains[i].pdba, pdb_ch[si].natom, true);
2100         for (j = 0; j < chains[i].pdba->nr; j++)
2101         {
2102             chains[i].pdba->atom[j]     = pdba_all.atom[pdb_ch[si].start + j];
2103             chains[i].pdba->atomname[j] = put_symtab(&symtab, *pdba_all.atomname[pdb_ch[si].start + j]);
2104             chains[i].pdba->pdbinfo[j] = pdba_all.pdbinfo[pdb_ch[si].start + j];
2105             chains[i].x.emplace_back(pdbx[pdb_ch[si].start + j]);
2106         }
2107         /* Re-index the residues assuming that the indices are continuous */
2108         int k                = chains[i].pdba->atom[0].resind;
2109         int nres             = chains[i].pdba->atom[chains[i].pdba->nr - 1].resind - k + 1;
2110         chains[i].pdba->nres = nres;
2111         for (int j = 0; j < chains[i].pdba->nr; j++)
2112         {
2113             chains[i].pdba->atom[j].resind -= k;
2114         }
2115         srenew(chains[i].pdba->resinfo, nres);
2116         for (int j = 0; j < nres; j++)
2117         {
2118             chains[i].pdba->resinfo[j]      = pdba_all.resinfo[k + j];
2119             chains[i].pdba->resinfo[j].name = put_symtab(&symtab, *pdba_all.resinfo[k + j].name);
2120             /* make all chain identifiers equal to that of the chain */
2121             chains[i].pdba->resinfo[j].chainid = pdb_ch[si].chainid;
2122         }
2123     }
2124
2125     if (nchainmerges > 0)
2126     {
2127         GMX_LOG(logger.info)
2128                 .asParagraph()
2129                 .appendTextFormatted("Merged chains into joint molecule definitions at %d places.",
2130                                      nchainmerges);
2131     }
2132
2133     GMX_LOG(logger.info)
2134             .asParagraph()
2135             .appendTextFormatted(
2136                     "There are %d chains and %d blocks of water and "
2137                     "%d residues with %d atoms",
2138                     numChains - nwaterchain, nwaterchain, pdba_all.nres, natom);
2139
2140     GMX_LOG(logger.info)
2141             .asParagraph()
2142             .appendTextFormatted("  %5s  %4s %6s", "chain", "#res", "#atoms");
2143     for (int i = 0; (i < numChains); i++)
2144     {
2145         GMX_LOG(logger.info)
2146                 .asParagraph()
2147                 .appendTextFormatted("  %d '%c' %5d %6d  %s\n", i + 1,
2148                                      chains[i].chainid ? chains[i].chainid : '-', chains[i].pdba->nres,
2149                                      chains[i].pdba->nr, chains[i].bAllWat ? "(only water)" : "");
2150     }
2151
2152     check_occupancy(&pdba_all, inputConfFile_.c_str(), bVerbose_, logger);
2153
2154     /* Read atomtypes... */
2155     PreprocessingAtomTypes atype = read_atype(ffdir_, &symtab);
2156
2157     /* read residue database */
2158     GMX_LOG(logger.info).asParagraph().appendTextFormatted("Reading residue database... (%s)", forcefield_);
2159     std::vector<std::string>       rtpf = fflib_search_file_end(ffdir_, ".rtp", true);
2160     std::vector<PreprocessResidue> rtpFFDB;
2161     for (const auto& filename : rtpf)
2162     {
2163         readResidueDatabase(filename, &rtpFFDB, &atype, &symtab, logger, false);
2164     }
2165     if (bNewRTP_)
2166     {
2167         /* Not correct with multiple rtp input files with different bonded types */
2168         FILE* fp = gmx_fio_fopen("new.rtp", "w");
2169         print_resall(fp, rtpFFDB, atype);
2170         gmx_fio_fclose(fp);
2171     }
2172
2173     /* read hydrogen database */
2174     std::vector<MoleculePatchDatabase> ah;
2175     read_h_db(ffdir_, &ah);
2176
2177     /* Read Termini database... */
2178     std::vector<MoleculePatchDatabase>  ntdb;
2179     std::vector<MoleculePatchDatabase>  ctdb;
2180     std::vector<MoleculePatchDatabase*> tdblist;
2181     int                                 nNtdb = read_ter_db(ffdir_, 'n', &ntdb, &atype);
2182     int                                 nCtdb = read_ter_db(ffdir_, 'c', &ctdb, &atype);
2183
2184     FILE* top_file = gmx_fio_fopen(topologyFile_.c_str(), "w");
2185
2186     print_top_header(top_file, topologyFile_.c_str(), FALSE, ffdir_, mHmult_);
2187
2188     t_chain*               cc;
2189     std::vector<gmx::RVec> x;
2190     /* new pdb datastructure for sorting. */
2191     t_atoms** sortAtoms  = nullptr;
2192     t_atoms** localAtoms = nullptr;
2193     snew(sortAtoms, numChains);
2194     snew(localAtoms, numChains);
2195     for (int chain = 0; (chain < numChains); chain++)
2196     {
2197         cc = &(chains[chain]);
2198
2199         /* set pdba, natom and nres to the current chain */
2200         pdba     = cc->pdba;
2201         x        = cc->x;
2202         natom    = cc->pdba->nr;
2203         int nres = cc->pdba->nres;
2204
2205         if (cc->chainid && (cc->chainid != ' '))
2206         {
2207             GMX_LOG(logger.info)
2208                     .asParagraph()
2209                     .appendTextFormatted("Processing chain %d '%c' (%d atoms, %d residues)",
2210                                          chain + 1, cc->chainid, natom, nres);
2211         }
2212         else
2213         {
2214             GMX_LOG(logger.info)
2215                     .asParagraph()
2216                     .appendTextFormatted("Processing chain %d (%d atoms, %d residues)", chain + 1,
2217                                          natom, nres);
2218         }
2219
2220         process_chain(pdba, x, bUnA_, bUnA_, bUnA_, bLysMan_, bAspMan_, bGluMan_, bHisMan_,
2221                       bArgMan_, bGlnMan_, angle_, distance_, &symtab, rtprename);
2222
2223         cc->chainstart[cc->nterpairs] = pdba->nres;
2224         j                             = 0;
2225         for (int i = 0; i < cc->nterpairs; i++)
2226         {
2227             find_nc_ter(pdba, cc->chainstart[i], cc->chainstart[i + 1], &(cc->r_start[j]),
2228                         &(cc->r_end[j]), &rt, logger);
2229             if (cc->r_start[j] >= 0 && cc->r_end[j] >= 0)
2230             {
2231                 if (checkChainCyclicity(pdba, pdbx, cc->r_start[j], cc->r_end[j], rtpFFDB,
2232                                         rtprename, long_bond_dist_, short_bond_dist_))
2233                 {
2234                     cc->cyclicBondsIndex.push_back(cc->r_start[j]);
2235                     cc->cyclicBondsIndex.push_back(cc->r_end[j]);
2236                     cc->r_start[j] = -1;
2237                     cc->r_end[j]   = -1;
2238                 }
2239                 else
2240                 {
2241                     j++;
2242                 }
2243             }
2244         }
2245         cc->nterpairs = j;
2246         if (cc->nterpairs == 0 && cc->cyclicBondsIndex.empty())
2247         {
2248             GMX_LOG(logger.info)
2249                     .asParagraph()
2250                     .appendTextFormatted(
2251                             "Problem with chain definition, or missing terminal residues. "
2252                             "This chain does not appear to contain a recognized chain molecule. "
2253                             "If this is incorrect, you can edit residuetypes.dat to modify the "
2254                             "behavior.");
2255         }
2256
2257         /* Check for disulfides and other special bonds */
2258         ssbonds = makeDisulfideBonds(pdba, gmx::as_rvec_array(x.data()), bCysMan_, bVerbose_);
2259
2260         if (!rtprename.empty())
2261         {
2262             rename_resrtp(pdba, cc->nterpairs, cc->r_start, cc->r_end, rtprename, &symtab, bVerbose_, logger);
2263         }
2264
2265         for (int i = 0; i < cc->nterpairs; i++)
2266         {
2267             /* Set termini.
2268              * We first apply a filter so we only have the
2269              * termini that can be applied to the residue in question
2270              * (or a generic terminus if no-residue specific is available).
2271              */
2272             /* First the N terminus */
2273             if (nNtdb > 0)
2274             {
2275                 tdblist = filter_ter(ntdb, *pdba->resinfo[cc->r_start[i]].name);
2276                 if (tdblist.empty())
2277                 {
2278                     GMX_LOG(logger.info)
2279                             .asParagraph()
2280                             .appendTextFormatted(
2281                                     "No suitable end (N or 5') terminus found in database - "
2282                                     "assuming this residue "
2283                                     "is already in a terminus-specific form and skipping terminus "
2284                                     "selection.");
2285                     cc->ntdb.push_back(nullptr);
2286                 }
2287                 else
2288                 {
2289                     if (bTerMan_ && !tdblist.empty())
2290                     {
2291                         sprintf(select, "Select start terminus type for %s-%d",
2292                                 *pdba->resinfo[cc->r_start[i]].name, pdba->resinfo[cc->r_start[i]].nr);
2293                         cc->ntdb.push_back(choose_ter(tdblist, select));
2294                     }
2295                     else
2296                     {
2297                         cc->ntdb.push_back(tdblist[0]);
2298                     }
2299
2300                     printf("Start terminus %s-%d: %s\n", *pdba->resinfo[cc->r_start[i]].name,
2301                            pdba->resinfo[cc->r_start[i]].nr, (cc->ntdb[i])->name.c_str());
2302                     tdblist.clear();
2303                 }
2304             }
2305             else
2306             {
2307                 cc->ntdb.push_back(nullptr);
2308             }
2309
2310             /* And the C terminus */
2311             if (nCtdb > 0)
2312             {
2313                 tdblist = filter_ter(ctdb, *pdba->resinfo[cc->r_end[i]].name);
2314                 if (tdblist.empty())
2315                 {
2316                     GMX_LOG(logger.info)
2317                             .asParagraph()
2318                             .appendTextFormatted(
2319                                     "No suitable end (C or 3') terminus found in database - "
2320                                     "assuming this residue"
2321                                     "is already in a terminus-specific form and skipping terminus "
2322                                     "selection.");
2323                     cc->ctdb.push_back(nullptr);
2324                 }
2325                 else
2326                 {
2327                     if (bTerMan_ && !tdblist.empty())
2328                     {
2329                         sprintf(select, "Select end terminus type for %s-%d",
2330                                 *pdba->resinfo[cc->r_end[i]].name, pdba->resinfo[cc->r_end[i]].nr);
2331                         cc->ctdb.push_back(choose_ter(tdblist, select));
2332                     }
2333                     else
2334                     {
2335                         cc->ctdb.push_back(tdblist[0]);
2336                     }
2337                     printf("End terminus %s-%d: %s\n", *pdba->resinfo[cc->r_end[i]].name,
2338                            pdba->resinfo[cc->r_end[i]].nr, (cc->ctdb[i])->name.c_str());
2339                     tdblist.clear();
2340                 }
2341             }
2342             else
2343             {
2344                 cc->ctdb.push_back(nullptr);
2345             }
2346         }
2347         std::vector<MoleculePatchDatabase> hb_chain;
2348         /* lookup hackblocks and rtp for all residues */
2349         std::vector<PreprocessResidue> restp_chain;
2350         get_hackblocks_rtp(&hb_chain, &restp_chain, rtpFFDB, pdba->nres, pdba->resinfo, cc->nterpairs,
2351                            &symtab, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end, bAllowMissing_, logger);
2352         /* ideally, now we would not need the rtp itself anymore, but do
2353            everything using the hb and restp arrays. Unfortunately, that
2354            requires some re-thinking of code in gen_vsite.c, which I won't
2355            do now :( AF 26-7-99 */
2356
2357         rename_atoms(nullptr, ffdir_, pdba, &symtab, restp_chain, false, &rt, false, bVerbose_);
2358
2359         match_atomnames_with_rtp(restp_chain, hb_chain, pdba, &symtab, x, bVerbose_, logger);
2360
2361         if (bSort_)
2362         {
2363             char**    gnames;
2364             t_blocka* block = new_blocka();
2365             snew(gnames, 1);
2366             sort_pdbatoms(restp_chain, natom, &pdba, &sortAtoms[chain], &x, block, &gnames);
2367             remove_duplicate_atoms(pdba, x, bVerbose_, logger);
2368             if (bIndexSet_)
2369             {
2370                 if (bRemoveH_)
2371                 {
2372                     GMX_LOG(logger.warning)
2373                             .asParagraph()
2374                             .appendTextFormatted(
2375                                     "With the -remh option the generated "
2376                                     "index file (%s) might be useless "
2377                                     "(the index file is generated before hydrogens are added)",
2378                                     indexOutputFile_.c_str());
2379                 }
2380                 write_index(indexOutputFile_.c_str(), block, gnames, false, 0);
2381             }
2382             for (int i = 0; i < block->nr; i++)
2383             {
2384                 sfree(gnames[i]);
2385             }
2386             sfree(gnames);
2387             done_blocka(block);
2388             sfree(block);
2389         }
2390         else
2391         {
2392             GMX_LOG(logger.warning)
2393                     .asParagraph()
2394                     .appendTextFormatted(
2395                             "Without sorting no check for duplicate atoms can be done");
2396         }
2397
2398         /* Generate Hydrogen atoms (and termini) in the sequence */
2399         GMX_LOG(logger.info)
2400                 .asParagraph()
2401                 .appendTextFormatted(
2402                         "Generating any missing hydrogen atoms and/or adding termini.");
2403         add_h(&pdba, &localAtoms[chain], &x, ah, &symtab, cc->nterpairs, cc->ntdb, cc->ctdb,
2404               cc->r_start, cc->r_end, bAllowMissing_, cc->cyclicBondsIndex);
2405         GMX_LOG(logger.info)
2406                 .asParagraph()
2407                 .appendTextFormatted("Now there are %d residues with %d atoms", pdba->nres, pdba->nr);
2408
2409         /* make up molecule name(s) */
2410
2411         int k = (cc->nterpairs > 0 && cc->r_start[0] >= 0) ? cc->r_start[0] : 0;
2412
2413         std::string restype = rt.typeOfNamedDatabaseResidue(*pdba->resinfo[k].name);
2414
2415         std::string molname;
2416         std::string suffix;
2417         if (cc->bAllWat)
2418         {
2419             molname = "Water";
2420         }
2421         else
2422         {
2423             this_chainid = cc->chainid;
2424
2425             /* Add the chain id if we have one */
2426             if (this_chainid != ' ')
2427             {
2428                 suffix.append(formatString("_chain_%c", this_chainid));
2429             }
2430
2431             /* Check if there have been previous chains with the same id */
2432             int nid_used = 0;
2433             for (int k = 0; k < chain; k++)
2434             {
2435                 if (cc->chainid == chains[k].chainid)
2436                 {
2437                     nid_used++;
2438                 }
2439             }
2440             /* Add the number for this chain identifier if there are multiple copies */
2441             if (nid_used > 0)
2442             {
2443                 suffix.append(formatString("%d", nid_used + 1));
2444             }
2445
2446             if (suffix.length() > 0)
2447             {
2448                 molname.append(restype);
2449                 molname.append(suffix);
2450             }
2451             else
2452             {
2453                 molname = restype;
2454             }
2455         }
2456         std::string itp_fn = topologyFile_;
2457
2458         std::string posre_fn = includeTopologyFile_;
2459         if ((numChains - nwaterchain > 1) && !cc->bAllWat)
2460         {
2461             bITP_ = true;
2462             printf("Chain time...\n");
2463             // construct the itp file name
2464             itp_fn = stripSuffixIfPresent(itp_fn, ".top");
2465             itp_fn.append("_");
2466             itp_fn.append(molname);
2467             itp_fn.append(".itp");
2468             // now do the same for posre
2469             posre_fn = stripSuffixIfPresent(posre_fn, ".itp");
2470             posre_fn.append("_");
2471             posre_fn.append(molname);
2472             posre_fn.append(".itp");
2473             if (posre_fn == itp_fn)
2474             {
2475                 posre_fn = Path::concatenateBeforeExtension(posre_fn, "_pr");
2476             }
2477             incls_.emplace_back();
2478             incls_.back() = itp_fn;
2479             itp_file_     = gmx_fio_fopen(itp_fn.c_str(), "w");
2480         }
2481         else
2482         {
2483             bITP_ = false;
2484         }
2485
2486         mols_.emplace_back();
2487         if (cc->bAllWat)
2488         {
2489             mols_.back().name = "SOL";
2490             mols_.back().nr   = pdba->nres;
2491         }
2492         else
2493         {
2494             mols_.back().name = molname;
2495             mols_.back().nr   = 1;
2496         }
2497
2498         if (bITP_)
2499         {
2500             print_top_comment(itp_file_, itp_fn.c_str(), ffdir_, true);
2501         }
2502
2503         FILE* top_file2;
2504         if (cc->bAllWat)
2505         {
2506             top_file2 = nullptr;
2507         }
2508         else if (bITP_)
2509         {
2510             top_file2 = itp_file_;
2511         }
2512         else
2513         {
2514             top_file2 = top_file;
2515         }
2516
2517         pdb2top(top_file2, posre_fn.c_str(), molname.c_str(), pdba, &x, &atype, &symtab, rtpFFDB,
2518                 restp_chain, hb_chain, bAllowMissing_, bVsites_, bVsiteAromatics_, ffdir_, mHmult_,
2519                 ssbonds, long_bond_dist_, short_bond_dist_, bDeuterate_, bChargeGroups_, bCmap_,
2520                 bRenumRes_, bRTPresname_, cc->cyclicBondsIndex, logger);
2521
2522         if (!cc->bAllWat)
2523         {
2524             write_posres(posre_fn.c_str(), pdba, posre_fc_);
2525         }
2526
2527         if (bITP_)
2528         {
2529             gmx_fio_fclose(itp_file_);
2530         }
2531
2532         /* pdba and natom have been reassigned somewhere so: */
2533         cc->pdba = pdba;
2534         cc->x    = x;
2535     }
2536
2537     if (watermodel_ == nullptr)
2538     {
2539         for (int chain = 0; chain < numChains; chain++)
2540         {
2541             if (chains[chain].bAllWat)
2542             {
2543                 auto message = formatString(
2544                         "You have chosen not to include a water model, "
2545                         "but there is water in the input file. Select a "
2546                         "water model or remove the water from your input file.");
2547                 GMX_THROW(InconsistentInputError(message));
2548             }
2549         }
2550     }
2551     else
2552     {
2553         std::string waterFile = formatString("%s%c%s.itp", ffdir_, DIR_SEPARATOR, watermodel_);
2554         if (!fflib_fexist(waterFile))
2555         {
2556             auto message = formatString(
2557                     "The topology file '%s' for the selected water "
2558                     "model '%s' can not be found in the force field "
2559                     "directory. Select a different water model.",
2560                     waterFile.c_str(), watermodel_);
2561             GMX_THROW(InconsistentInputError(message));
2562         }
2563     }
2564
2565     print_top_mols(top_file, title, ffdir_, watermodel_, incls_, mols_);
2566     gmx_fio_fclose(top_file);
2567
2568     /* now merge all chains back together */
2569     natom    = 0;
2570     int nres = 0;
2571     for (int i = 0; (i < numChains); i++)
2572     {
2573         natom += chains[i].pdba->nr;
2574         nres += chains[i].pdba->nres;
2575     }
2576     t_atoms* atoms;
2577     snew(atoms, 1);
2578     init_t_atoms(atoms, natom, false);
2579     for (int i = 0; i < atoms->nres; i++)
2580     {
2581         sfree(atoms->resinfo[i].name);
2582     }
2583     atoms->nres = nres;
2584     srenew(atoms->resinfo, nres);
2585     x.clear();
2586     int k = 0;
2587     int l = 0;
2588     for (int i = 0; (i < numChains); i++)
2589     {
2590         if (numChains > 1)
2591         {
2592             GMX_LOG(logger.info)
2593                     .asParagraph()
2594                     .appendTextFormatted("Including chain %d in system: %d atoms %d residues",
2595                                          i + 1, chains[i].pdba->nr, chains[i].pdba->nres);
2596         }
2597         for (int j = 0; (j < chains[i].pdba->nr); j++)
2598         {
2599             atoms->atom[k] = chains[i].pdba->atom[j];
2600             atoms->atom[k].resind += l; /* l is processed nr of residues */
2601             atoms->atomname[k]                            = chains[i].pdba->atomname[j];
2602             atoms->resinfo[atoms->atom[k].resind].chainid = chains[i].chainid;
2603             x.push_back(chains[i].x[j]);
2604             k++;
2605         }
2606         for (int j = 0; (j < chains[i].pdba->nres); j++)
2607         {
2608             atoms->resinfo[l] = chains[i].pdba->resinfo[j];
2609             if (bRTPresname_)
2610             {
2611                 atoms->resinfo[l].name = atoms->resinfo[l].rtp;
2612             }
2613             l++;
2614         }
2615         done_atom(chains[i].pdba);
2616     }
2617
2618     if (numChains > 1)
2619     {
2620         GMX_LOG(logger.info)
2621                 .asParagraph()
2622                 .appendTextFormatted("Now there are %d atoms and %d residues", k, l);
2623         print_sums(atoms, true, logger);
2624     }
2625
2626     rvec box_space;
2627     GMX_LOG(logger.info).asParagraph().appendTextFormatted("Writing coordinate file...");
2628     clear_rvec(box_space);
2629     if (box[0][0] == 0)
2630     {
2631         make_new_box(atoms->nr, gmx::as_rvec_array(x.data()), box, box_space, false);
2632     }
2633     write_sto_conf(outputConfFile_.c_str(), title, atoms, gmx::as_rvec_array(x.data()), nullptr,
2634                    pbcType, box);
2635
2636     done_symtab(&symtab);
2637     done_atom(&pdba_all);
2638     done_atom(atoms);
2639     for (int chain = 0; chain < numChains; chain++)
2640     {
2641         sfree(sortAtoms[chain]);
2642         sfree(localAtoms[chain]);
2643     }
2644     sfree(sortAtoms);
2645     sfree(localAtoms);
2646     sfree(atoms);
2647     sfree(title);
2648     sfree(pdbx);
2649
2650     GMX_LOG(logger.info)
2651             .asParagraph()
2652             .appendTextFormatted("\t\t--------- PLEASE NOTE ------------");
2653     GMX_LOG(logger.info)
2654             .asParagraph()
2655             .appendTextFormatted("You have successfully generated a topology from: %s.",
2656                                  inputConfFile_.c_str());
2657     if (watermodel_ != nullptr)
2658     {
2659         GMX_LOG(logger.info)
2660                 .asParagraph()
2661                 .appendTextFormatted("The %s force field and the %s water model are used.", ffname_,
2662                                      watermodel_);
2663         sfree(watermodel_);
2664     }
2665     else
2666     {
2667         GMX_LOG(logger.info).asParagraph().appendTextFormatted("The %s force field is used.", ffname_);
2668     }
2669     GMX_LOG(logger.info)
2670             .asParagraph()
2671             .appendTextFormatted("\t\t--------- ETON ESAELP ------------");
2672
2673     return 0;
2674 }
2675
2676 } // namespace
2677
2678 const char pdb2gmxInfo::name[] = "pdb2gmx";
2679 const char pdb2gmxInfo::shortDescription[] =
2680         "Convert coordinate files to topology and FF-compliant coordinate files";
2681 ICommandLineOptionsModulePointer pdb2gmxInfo::create()
2682 {
2683     return std::make_unique<pdb2gmx>();
2684 }
2685
2686 } // namespace gmx