src/gromacs/gmxpreprocess/pdb2gmx.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
   7  * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 #include "gmxpre.h"
  39
  40 #include "pdb2gmx.h"
  41
  42 #include <cctype>
  43 #include <cstdlib>
  44 #include <cstring>
  45 #include <ctime>
  46
  47 #include <algorithm>
  48 #include <memory>
  49 #include <string>
  50 #include <vector>
  51
  52 #include "gromacs/commandline/cmdlineoptionsmodule.h"
  53 #include "gromacs/fileio/confio.h"
  54 #include "gromacs/fileio/filetypes.h"
  55 #include "gromacs/fileio/gmxfio.h"
  56 #include "gromacs/fileio/pdbio.h"
  57 #include "gromacs/gmxlib/conformation_utilities.h"
  58 #include "gromacs/gmxpreprocess/fflibutil.h"
  59 #include "gromacs/gmxpreprocess/genhydro.h"
  60 #include "gromacs/gmxpreprocess/gpp_atomtype.h"
  61 #include "gromacs/gmxpreprocess/grompp_impl.h"
  62 #include "gromacs/gmxpreprocess/h_db.h"
  63 #include "gromacs/gmxpreprocess/hizzie.h"
  64 #include "gromacs/gmxpreprocess/pdb2top.h"
  65 #include "gromacs/gmxpreprocess/pgutil.h"
  66 #include "gromacs/gmxpreprocess/specbond.h"
  67 #include "gromacs/gmxpreprocess/ter_db.h"
  68 #include "gromacs/gmxpreprocess/toputil.h"
  69 #include "gromacs/gmxpreprocess/xlate.h"
  70 #include "gromacs/math/vec.h"
  71 #include "gromacs/options/basicoptions.h"
  72 #include "gromacs/options/filenameoption.h"
  73 #include "gromacs/options/ioptionscontainer.h"
  74 #include "gromacs/topology/atomprop.h"
  75 #include "gromacs/topology/block.h"
  76 #include "gromacs/topology/index.h"
  77 #include "gromacs/topology/residuetypes.h"
  78 #include "gromacs/topology/symtab.h"
  79 #include "gromacs/topology/topology.h"
  80 #include "gromacs/utility/dir_separator.h"
  81 #include "gromacs/utility/exceptions.h"
  82 #include "gromacs/utility/fatalerror.h"
  83 #include "gromacs/utility/filestream.h"
  84 #include "gromacs/utility/loggerbuilder.h"
  85 #include "gromacs/utility/path.h"
  86 #include "gromacs/utility/smalloc.h"
  87 #include "gromacs/utility/strdb.h"
  88 #include "gromacs/utility/stringutil.h"
  89
  90 #include "hackblock.h"
  91 #include "resall.h"
  92
  93 struct RtpRename
  94 {
  95     RtpRename(const char* newGmx, const char* newMain, const char* newNter, const char* newCter, const char* newBter) :
  96         gmx(newGmx),
  97         main(newMain),
  98         nter(newNter),
  99         cter(newCter),
 100         bter(newBter)
 101     {
 102     }
 103     std::string gmx;
 104     std::string main;
 105     std::string nter;
 106     std::string cter;
 107     std::string bter;
 108 };
 109
 110 static const char* res2bb_notermini(const std::string& name, gmx::ArrayRef<const RtpRename> rr)
 111 {
 112     /* NOTE: This function returns the main building block name,
 113      *       it does not take terminal renaming into account.
 114      */
 115     auto found = std::find_if(rr.begin(), rr.end(), [&name](const auto& rename) {
 116         return gmx::equalCaseInsensitive(name, rename.gmx);
 117     });
 118     return found != rr.end() ? found->main.c_str() : name.c_str();
 119 }
 120
 121 static const char* select_res(int                            nr,
 122                               int                            resnr,
 123                               const char*                    name[],
 124                               const char*                    expl[],
 125                               const char*                    title,
 126                               gmx::ArrayRef<const RtpRename> rr)
 127 {
 128     printf("Which %s type do you want for residue %d\n", title, resnr + 1);
 129     for (int sel = 0; (sel < nr); sel++)
 130     {
 131         printf("%d. %s (%s)\n", sel, expl[sel], res2bb_notermini(name[sel], rr));
 132     }
 133     printf("\nType a number:");
 134     fflush(stdout);
 135
 136     int userSelection;
 137     if (scanf("%d", &userSelection) != 1)
 138     {
 139         gmx_fatal(FARGS, "Answer me for res %s %d!", title, resnr + 1);
 140     }
 141
 142     return name[userSelection];
 143 }
 144
 145 static const char* get_asptp(int resnr, gmx::ArrayRef<const RtpRename> rr)
 146 {
 147     enum
 148     {
 149         easp,
 150         easpH,
 151         easpNR
 152     };
 153     const char* lh[easpNR]   = { "ASP", "ASPH" };
 154     const char* expl[easpNR] = { "Not protonated (charge -1)", "Protonated (charge 0)" };
 155
 156     return select_res(easpNR, resnr, lh, expl, "ASPARTIC ACID", rr);
 157 }
 158
 159 static const char* get_glutp(int resnr, gmx::ArrayRef<const RtpRename> rr)
 160 {
 161     enum
 162     {
 163         eglu,
 164         egluH,
 165         egluNR
 166     };
 167     const char* lh[egluNR]   = { "GLU", "GLUH" };
 168     const char* expl[egluNR] = { "Not protonated (charge -1)", "Protonated (charge 0)" };
 169
 170     return select_res(egluNR, resnr, lh, expl, "GLUTAMIC ACID", rr);
 171 }
 172
 173 static const char* get_glntp(int resnr, gmx::ArrayRef<const RtpRename> rr)
 174 {
 175     enum
 176     {
 177         egln,
 178         eglnH,
 179         eglnNR
 180     };
 181     const char* lh[eglnNR]   = { "GLN", "QLN" };
 182     const char* expl[eglnNR] = { "Not protonated (charge 0)", "Protonated (charge +1)" };
 183
 184     return select_res(eglnNR, resnr, lh, expl, "GLUTAMINE", rr);
 185 }
 186
 187 static const char* get_lystp(int resnr, gmx::ArrayRef<const RtpRename> rr)
 188 {
 189     enum
 190     {
 191         elys,
 192         elysH,
 193         elysNR
 194     };
 195     const char* lh[elysNR]   = { "LYSN", "LYS" };
 196     const char* expl[elysNR] = { "Not protonated (charge 0)", "Protonated (charge +1)" };
 197
 198     return select_res(elysNR, resnr, lh, expl, "LYSINE", rr);
 199 }
 200
 201 static const char* get_argtp(int resnr, gmx::ArrayRef<const RtpRename> rr)
 202 {
 203     enum
 204     {
 205         earg,
 206         eargH,
 207         eargNR
 208     };
 209     const char* lh[eargNR]   = { "ARGN", "ARG" };
 210     const char* expl[eargNR] = { "Not protonated (charge 0)", "Protonated (charge +1)" };
 211
 212     return select_res(eargNR, resnr, lh, expl, "ARGININE", rr);
 213 }
 214
 215 static const char* get_histp(int resnr, gmx::ArrayRef<const RtpRename> rr)
 216 {
 217     const char* expl[ehisNR] = { "H on ND1 only", "H on NE2 only", "H on ND1 and NE2",
 218                                  "Coupled to Heme" };
 219
 220     return select_res(ehisNR, resnr, hh, expl, "HISTIDINE", rr);
 221 }
 222
 223 static void read_rtprename(const char* fname, FILE* fp, std::vector<RtpRename>* rtprename)
 224 {
 225     char line[STRLEN], buf[STRLEN];
 226
 227     int ncol = 0;
 228     while (get_a_line(fp, line, STRLEN))
 229     {
 230         /* line is NULL-terminated and length<STRLEN, so final arg cannot overflow.
 231          * For other args, we read up to 6 chars (so we can detect if the length is > 5).
 232          * Note that the buffer length has been increased to 7 to allow this,
 233          * so we just need to make sure the strings have zero-length initially.
 234          */
 235         char gmx[STRLEN];
 236         char main[STRLEN];
 237         char nter[STRLEN];
 238         char cter[STRLEN];
 239         char bter[STRLEN];
 240         gmx[0]       = '\0';
 241         main[0]      = '\0';
 242         nter[0]      = '\0';
 243         cter[0]      = '\0';
 244         bter[0]      = '\0';
 245         int       nc = sscanf(line, "%6s %6s %6s %6s %6s %s", gmx, main, nter, cter, bter, buf);
 246         RtpRename newEntry(gmx, main, nter, cter, bter);
 247         if (ncol == 0)
 248         {
 249             if (nc != 2 && nc != 5)
 250             {
 251                 gmx_fatal(FARGS, "Residue renaming database '%s' has %d columns instead of %d or %d",
 252                           fname, ncol, 2, 5);
 253             }
 254             ncol = nc;
 255         }
 256         else if (nc != ncol)
 257         {
 258             gmx_fatal(FARGS,
 259                       "A line in residue renaming database '%s' has %d columns, while previous "
 260                       "lines have %d columns",
 261                       fname, nc, ncol);
 262         }
 263
 264         if (nc == 2)
 265         {
 266             /* This file does not have special termini names, copy them from main */
 267             newEntry.nter = newEntry.main;
 268             newEntry.cter = newEntry.main;
 269             newEntry.bter = newEntry.main;
 270         }
 271         rtprename->push_back(newEntry);
 272     }
 273 }
 274
 275 static std::string search_resrename(gmx::ArrayRef<const RtpRename> rr,
 276                                     const char*                    name,
 277                                     bool                           bStart,
 278                                     bool                           bEnd,
 279                                     bool                           bCompareFFRTPname)
 280 {
 281     auto found = std::find_if(rr.begin(), rr.end(), [&name, &bCompareFFRTPname](const auto& rename) {
 282         return ((!bCompareFFRTPname && (name == rename.gmx))
 283                 || (bCompareFFRTPname && (name == rename.main)));
 284     });
 285
 286     std::string newName;
 287     /* If found in the database, rename this residue's rtp building block,
 288      * otherwise keep the old name.
 289      */
 290     if (found != rr.end())
 291     {
 292         if (bStart && bEnd)
 293         {
 294             newName = found->bter;
 295         }
 296         else if (bStart)
 297         {
 298             newName = found->nter;
 299         }
 300         else if (bEnd)
 301         {
 302             newName = found->cter;
 303         }
 304         else
 305         {
 306             newName = found->main;
 307         }
 308
 309         if (newName[0] == '-')
 310         {
 311             gmx_fatal(FARGS, "In the chosen force field there is no residue type for '%s'%s", name,
 312                       bStart ? (bEnd ? " as a standalone (starting & ending) residue" : " as a starting terminus")
 313                              : (bEnd ? " as an ending terminus" : ""));
 314         }
 315     }
 316
 317     return newName;
 318 }
 319
 320 static void rename_resrtp(t_atoms*                       pdba,
 321                           int                            nterpairs,
 322                           gmx::ArrayRef<const int>       r_start,
 323                           gmx::ArrayRef<const int>       r_end,
 324                           gmx::ArrayRef<const RtpRename> rr,
 325                           t_symtab*                      symtab,
 326                           bool                           bVerbose,
 327                           const gmx::MDLogger&           logger)
 328 {
 329     bool bFFRTPTERRNM = (getenv("GMX_NO_FFRTP_TER_RENAME") == nullptr);
 330
 331     for (int r = 0; r < pdba->nres; r++)
 332     {
 333         bool bStart = false;
 334         bool bEnd   = false;
 335         for (int j = 0; j < nterpairs; j++)
 336         {
 337             if (r == r_start[j])
 338             {
 339                 bStart = true;
 340             }
 341         }
 342         for (int j = 0; j < nterpairs; j++)
 343         {
 344             if (r == r_end[j])
 345             {
 346                 bEnd = true;
 347             }
 348         }
 349
 350         std::string newName = search_resrename(rr, *pdba->resinfo[r].rtp, bStart, bEnd, false);
 351
 352         if (bFFRTPTERRNM && newName.empty() && (bStart || bEnd))
 353         {
 354             /* This is a terminal residue, but the residue name,
 355              * currently stored in .rtp, is not a standard residue name,
 356              * but probably a force field specific rtp name.
 357              * Check if we need to rename it because it is terminal.
 358              */
 359             newName = search_resrename(rr, *pdba->resinfo[r].rtp, bStart, bEnd, true);
 360         }
 361
 362         if (!newName.empty() && newName != *pdba->resinfo[r].rtp)
 363         {
 364             if (bVerbose)
 365             {
 366                 GMX_LOG(logger.info)
 367                         .asParagraph()
 368                         .appendTextFormatted("Changing rtp entry of residue %d %s to '%s'",
 369                                              pdba->resinfo[r].nr, *pdba->resinfo[r].name,
 370                                              newName.c_str());
 371             }
 372             pdba->resinfo[r].rtp = put_symtab(symtab, newName.c_str());
 373         }
 374     }
 375 }
 376
 377 static void pdbres_to_gmxrtp(t_atoms* pdba)
 378 {
 379     int i;
 380
 381     for (i = 0; (i < pdba->nres); i++)
 382     {
 383         if (pdba->resinfo[i].rtp == nullptr)
 384         {
 385             pdba->resinfo[i].rtp = pdba->resinfo[i].name;
 386         }
 387     }
 388 }
 389
 390 static void rename_pdbres(t_atoms* pdba, const char* oldnm, const char* newnm, bool bFullCompare, t_symtab* symtab)
 391 {
 392     char* resnm;
 393     int   i;
 394
 395     for (i = 0; (i < pdba->nres); i++)
 396     {
 397         resnm = *pdba->resinfo[i].name;
 398         if ((bFullCompare && (gmx::equalCaseInsensitive(resnm, oldnm)))
 399             || (!bFullCompare && strstr(resnm, oldnm) != nullptr))
 400         {
 401             /* Rename the residue name (not the rtp name) */
 402             pdba->resinfo[i].name = put_symtab(symtab, newnm);
 403         }
 404     }
 405 }
 406
 407 static void rename_bb(t_atoms* pdba, const char* oldnm, const char* newnm, bool bFullCompare, t_symtab* symtab)
 408 {
 409     char* bbnm;
 410     int   i;
 411
 412     for (i = 0; (i < pdba->nres); i++)
 413     {
 414         /* We have not set the rtp name yes, use the residue name */
 415         bbnm = *pdba->resinfo[i].name;
 416         if ((bFullCompare && (gmx::equalCaseInsensitive(bbnm, oldnm)))
 417             || (!bFullCompare && strstr(bbnm, oldnm) != nullptr))
 418         {
 419             /* Change the rtp builing block name */
 420             pdba->resinfo[i].rtp = put_symtab(symtab, newnm);
 421         }
 422     }
 423 }
 424
 425 static void rename_bbint(t_atoms*                       pdba,
 426                          const char*                    oldnm,
 427                          const char*                    gettp(int, gmx::ArrayRef<const RtpRename>),
 428                          bool                           bFullCompare,
 429                          t_symtab*                      symtab,
 430                          gmx::ArrayRef<const RtpRename> rr)
 431 {
 432     int         i;
 433     const char* ptr;
 434     char*       bbnm;
 435
 436     for (i = 0; i < pdba->nres; i++)
 437     {
 438         /* We have not set the rtp name yet, use the residue name */
 439         bbnm = *pdba->resinfo[i].name;
 440         if ((bFullCompare && (strcmp(bbnm, oldnm) == 0)) || (!bFullCompare && strstr(bbnm, oldnm) != nullptr))
 441         {
 442             ptr                  = gettp(i, rr);
 443             pdba->resinfo[i].rtp = put_symtab(symtab, ptr);
 444         }
 445     }
 446 }
 447
 448 static void check_occupancy(t_atoms* atoms, const char* filename, bool bVerbose, const gmx::MDLogger& logger)
 449 {
 450     int i, ftp;
 451     int nzero   = 0;
 452     int nnotone = 0;
 453
 454     ftp = fn2ftp(filename);
 455     if (!atoms->pdbinfo || ((ftp != efPDB) && (ftp != efBRK) && (ftp != efENT)))
 456     {
 457         GMX_LOG(logger.warning).asParagraph().appendTextFormatted("No occupancies in %s", filename);
 458     }
 459     else
 460     {
 461         for (i = 0; (i < atoms->nr); i++)
 462         {
 463             if (atoms->pdbinfo[i].occup != 1)
 464             {
 465                 if (bVerbose)
 466                 {
 467                     GMX_LOG(logger.warning)
 468                             .asParagraph()
 469                             .appendTextFormatted("Occupancy for atom %s%d-%s is %f rather than 1",
 470                                                  *atoms->resinfo[atoms->atom[i].resind].name,
 471                                                  atoms->resinfo[atoms->atom[i].resind].nr,
 472                                                  *atoms->atomname[i], atoms->pdbinfo[i].occup);
 473                 }
 474                 if (atoms->pdbinfo[i].occup == 0)
 475                 {
 476                     nzero++;
 477                 }
 478                 else
 479                 {
 480                     nnotone++;
 481                 }
 482             }
 483         }
 484         if (nzero == atoms->nr)
 485         {
 486             GMX_LOG(logger.warning)
 487                     .asParagraph()
 488                     .appendTextFormatted(
 489                             "All occupancy fields zero. This is probably not an X-Ray structure");
 490         }
 491         else if ((nzero > 0) || (nnotone > 0))
 492         {
 493             GMX_LOG(logger.warning)
 494                     .asParagraph()
 495                     .appendTextFormatted(
 496                             "there were %d atoms with zero occupancy and %d atoms with "
 497                             "         occupancy unequal to one (out of %d atoms). Check your pdb "
 498                             "file.",
 499                             nzero, nnotone, atoms->nr);
 500         }
 501         else
 502         {
 503             GMX_LOG(logger.warning).asParagraph().appendTextFormatted("All occupancies are one");
 504         }
 505     }
 506 }
 507
 508 static void write_posres(const char* fn, t_atoms* pdba, real fc)
 509 {
 510     FILE* fp;
 511     int   i;
 512
 513     fp = gmx_fio_fopen(fn, "w");
 514     fprintf(fp,
 515             "; In this topology include file, you will find position restraint\n"
 516             "; entries for all the heavy atoms in your original pdb file.\n"
 517             "; This means that all the protons which were added by pdb2gmx are\n"
 518             "; not restrained.\n"
 519             "\n"
 520             "[ position_restraints ]\n"
 521             "; %4s%6s%8s%8s%8s\n",
 522             "atom", "type", "fx", "fy", "fz");
 523     for (i = 0; (i < pdba->nr); i++)
 524     {
 525         if (!is_hydrogen(*pdba->atomname[i]) && !is_dummymass(*pdba->atomname[i]))
 526         {
 527             fprintf(fp, "%6d%6d  %g  %g  %g\n", i + 1, 1, fc, fc, fc);
 528         }
 529     }
 530     gmx_fio_fclose(fp);
 531 }
 532
 533 static int read_pdball(const char*     inf,
 534                        bool            bOutput,
 535                        const char*     outf,
 536                        char**          title,
 537                        t_atoms*        atoms,
 538                        rvec**          x,
 539                        PbcType*        pbcType,
 540                        matrix          box,
 541                        bool            bRemoveH,
 542                        t_symtab*       symtab,
 543                        ResidueType*    rt,
 544                        const char*     watres,
 545                        AtomProperties* aps,
 546                        bool            bVerbose)
 547 /* Read a pdb file. (containing proteins) */
 548 {
 549     int natom, new_natom, i;
 550
 551     /* READ IT */
 552     printf("Reading %s...\n", inf);
 553     readConfAndAtoms(inf, symtab, title, atoms, pbcType, x, nullptr, box);
 554     natom = atoms->nr;
 555     if (atoms->pdbinfo == nullptr)
 556     {
 557         snew(atoms->pdbinfo, atoms->nr);
 558     }
 559     if (fn2ftp(inf) == efPDB)
 560     {
 561         get_pdb_atomnumber(atoms, aps);
 562     }
 563     if (bRemoveH)
 564     {
 565         new_natom = 0;
 566         for (i = 0; i < atoms->nr; i++)
 567         {
 568             if (!is_hydrogen(*atoms->atomname[i]))
 569             {
 570                 atoms->atom[new_natom]     = atoms->atom[i];
 571                 atoms->atomname[new_natom] = atoms->atomname[i];
 572                 atoms->pdbinfo[new_natom]  = atoms->pdbinfo[i];
 573                 copy_rvec((*x)[i], (*x)[new_natom]);
 574                 new_natom++;
 575             }
 576         }
 577         atoms->nr = new_natom;
 578         natom     = new_natom;
 579     }
 580
 581     printf("Read");
 582     if (title[0])
 583     {
 584         printf(" '%s',", *title);
 585     }
 586     printf(" %d atoms\n", natom);
 587
 588     /* Rename residues */
 589     rename_pdbres(atoms, "HOH", watres, false, symtab);
 590     rename_pdbres(atoms, "SOL", watres, false, symtab);
 591     rename_pdbres(atoms, "WAT", watres, false, symtab);
 592
 593     rename_atoms("xlateat.dat", nullptr, atoms, symtab, {}, true, rt, true, bVerbose);
 594
 595     if (natom == 0)
 596     {
 597         return 0;
 598     }
 599     if (bOutput)
 600     {
 601         write_sto_conf(outf, *title, atoms, *x, nullptr, *pbcType, box);
 602     }
 603
 604     return natom;
 605 }
 606
 607 static void process_chain(t_atoms*                       pdba,
 608                           gmx::ArrayRef<gmx::RVec>       x,
 609                           bool                           bTrpU,
 610                           bool                           bPheU,
 611                           bool                           bTyrU,
 612                           bool                           bLysMan,
 613                           bool                           bAspMan,
 614                           bool                           bGluMan,
 615                           bool                           bHisMan,
 616                           bool                           bArgMan,
 617                           bool                           bGlnMan,
 618                           real                           angle,
 619                           real                           distance,
 620                           t_symtab*                      symtab,
 621                           gmx::ArrayRef<const RtpRename> rr)
 622 {
 623     /* Rename aromatics, lys, asp and histidine */
 624     if (bTyrU)
 625     {
 626         rename_bb(pdba, "TYR", "TYRU", false, symtab);
 627     }
 628     if (bTrpU)
 629     {
 630         rename_bb(pdba, "TRP", "TRPU", false, symtab);
 631     }
 632     if (bPheU)
 633     {
 634         rename_bb(pdba, "PHE", "PHEU", false, symtab);
 635     }
 636     if (bLysMan)
 637     {
 638         rename_bbint(pdba, "LYS", get_lystp, false, symtab, rr);
 639     }
 640     if (bArgMan)
 641     {
 642         rename_bbint(pdba, "ARG", get_argtp, false, symtab, rr);
 643     }
 644     if (bGlnMan)
 645     {
 646         rename_bbint(pdba, "GLN", get_glntp, false, symtab, rr);
 647     }
 648     if (bAspMan)
 649     {
 650         rename_bbint(pdba, "ASP", get_asptp, false, symtab, rr);
 651     }
 652     else
 653     {
 654         rename_bb(pdba, "ASPH", "ASP", false, symtab);
 655     }
 656     if (bGluMan)
 657     {
 658         rename_bbint(pdba, "GLU", get_glutp, false, symtab, rr);
 659     }
 660     else
 661     {
 662         rename_bb(pdba, "GLUH", "GLU", false, symtab);
 663     }
 664
 665     if (!bHisMan)
 666     {
 667         set_histp(pdba, gmx::as_rvec_array(x.data()), symtab, angle, distance);
 668     }
 669     else
 670     {
 671         rename_bbint(pdba, "HIS", get_histp, true, symtab, rr);
 672     }
 673
 674     /* Initialize the rtp builing block names with the residue names
 675      * for the residues that have not been processed above.
 676      */
 677     pdbres_to_gmxrtp(pdba);
 678
 679     /* Now we have all rtp names set.
 680      * The rtp names will conform to Gromacs naming,
 681      * unless the input pdb file contained one or more force field specific
 682      * rtp names as residue names.
 683      */
 684 }
 685
 686 /* struct for sorting the atoms from the pdb file */
 687 typedef struct
 688 {
 689     int  resnr;  /* residue number               */
 690     int  j;      /* database order index         */
 691     int  index;  /* original atom number         */
 692     char anm1;   /* second letter of atom name   */
 693     char altloc; /* alternate location indicator */
 694 } t_pdbindex;
 695
 696 static bool pdbicomp(const t_pdbindex& a, const t_pdbindex& b)
 697 {
 698     int d = (a.resnr - b.resnr);
 699     if (d == 0)
 700     {
 701         d = (a.j - b.j);
 702         if (d == 0)
 703         {
 704             d = (a.anm1 - b.anm1);
 705             if (d == 0)
 706             {
 707                 d = (a.altloc - b.altloc);
 708             }
 709         }
 710     }
 711     return d < 0;
 712 }
 713
 714 static void sort_pdbatoms(gmx::ArrayRef<const PreprocessResidue> restp_chain,
 715                           int                                    natoms,
 716                           t_atoms**                              pdbaptr,
 717                           t_atoms**                              newPdbAtoms,
 718                           std::vector<gmx::RVec>*                x,
 719                           t_blocka*                              block,
 720                           char***                                gnames)
 721 {
 722     t_atoms*               pdba = *pdbaptr;
 723     std::vector<gmx::RVec> xnew;
 724     t_pdbindex*            pdbi;
 725     char*                  atomnm;
 726
 727     natoms = pdba->nr;
 728     snew(pdbi, natoms);
 729
 730     for (int i = 0; i < natoms; i++)
 731     {
 732         atomnm                                  = *pdba->atomname[i];
 733         const PreprocessResidue* localPpResidue = &restp_chain[pdba->atom[i].resind];
 734         auto                     found =
 735                 std::find_if(localPpResidue->atomname.begin(), localPpResidue->atomname.end(),
 736                              [&atomnm](char** it) { return gmx::equalCaseInsensitive(atomnm, *it); });
 737         if (found == localPpResidue->atomname.end())
 738         {
 739             std::string buf = gmx::formatString(
 740                     "Atom %s in residue %s %d was not found in rtp entry %s with %d atoms\n"
 741                     "while sorting atoms.\n%s",
 742                     atomnm, *pdba->resinfo[pdba->atom[i].resind].name,
 743                     pdba->resinfo[pdba->atom[i].resind].nr, localPpResidue->resname.c_str(),
 744                     localPpResidue->natom(),
 745                     is_hydrogen(atomnm)
 746                             ? "\nFor a hydrogen, this can be a different protonation state, or it\n"
 747                               "might have had a different number in the PDB file and was rebuilt\n"
 748                               "(it might for instance have been H3, and we only expected H1 & "
 749                               "H2).\n"
 750                               "Note that hydrogens might have been added to the entry for the "
 751                               "N-terminus.\n"
 752                               "Remove this hydrogen or choose a different protonation state to "
 753                               "solve it.\n"
 754                               "Option -ignh will ignore all hydrogens in the input."
 755                             : ".");
 756             gmx_fatal(FARGS, "%s", buf.c_str());
 757         }
 758         /* make shadow array to be sorted into indexgroup */
 759         pdbi[i].resnr  = pdba->atom[i].resind;
 760         pdbi[i].j      = std::distance(localPpResidue->atomname.begin(), found);
 761         pdbi[i].index  = i;
 762         pdbi[i].anm1   = atomnm[1];
 763         pdbi[i].altloc = pdba->pdbinfo[i].altloc;
 764     }
 765     std::sort(pdbi, pdbi + natoms, pdbicomp);
 766
 767     /* pdba is sorted in pdbnew using the pdbi index */
 768     std::vector<int> a(natoms);
 769     srenew(*newPdbAtoms, 1);
 770     init_t_atoms((*newPdbAtoms), natoms, true);
 771     (*newPdbAtoms)->nr   = pdba->nr;
 772     (*newPdbAtoms)->nres = pdba->nres;
 773     srenew((*newPdbAtoms)->resinfo, pdba->nres);
 774     std::copy(pdba->resinfo, pdba->resinfo + pdba->nres, (*newPdbAtoms)->resinfo);
 775     for (int i = 0; i < natoms; i++)
 776     {
 777         (*newPdbAtoms)->atom[i]     = pdba->atom[pdbi[i].index];
 778         (*newPdbAtoms)->atomname[i] = pdba->atomname[pdbi[i].index];
 779         (*newPdbAtoms)->pdbinfo[i]  = pdba->pdbinfo[pdbi[i].index];
 780         xnew.push_back(x->at(pdbi[i].index));
 781         /* make indexgroup in block */
 782         a[i] = pdbi[i].index;
 783     }
 784     /* clean up */
 785     done_atom(pdba);
 786     sfree(pdba);
 787     /* copy the sorted pdbnew back to pdba */
 788     *pdbaptr = *newPdbAtoms;
 789     *x       = xnew;
 790     add_grp(block, gnames, a, "prot_sort");
 791     sfree(pdbi);
 792 }
 793
 794 static int remove_duplicate_atoms(t_atoms* pdba, gmx::ArrayRef<gmx::RVec> x, bool bVerbose, const gmx::MDLogger& logger)
 795 {
 796     int        i, j, oldnatoms, ndel;
 797     t_resinfo* ri;
 798
 799     GMX_LOG(logger.info).asParagraph().appendTextFormatted("Checking for duplicate atoms....");
 800     oldnatoms = pdba->nr;
 801     ndel      = 0;
 802     /* NOTE: pdba->nr is modified inside the loop */
 803     for (i = 1; (i < pdba->nr); i++)
 804     {
 805         /* compare 'i' and 'i-1', throw away 'i' if they are identical
 806            this is a 'while' because multiple alternate locations can be present */
 807         while ((i < pdba->nr) && (pdba->atom[i - 1].resind == pdba->atom[i].resind)
 808                && (strcmp(*pdba->atomname[i - 1], *pdba->atomname[i]) == 0))
 809         {
 810             ndel++;
 811             if (bVerbose)
 812             {
 813                 ri = &pdba->resinfo[pdba->atom[i].resind];
 814                 GMX_LOG(logger.info)
 815                         .asParagraph()
 816                         .appendTextFormatted("deleting duplicate atom %4s  %s%4d%c",
 817                                              *pdba->atomname[i], *ri->name, ri->nr, ri->ic);
 818                 if (ri->chainid && (ri->chainid != ' '))
 819                 {
 820                     printf(" ch %c", ri->chainid);
 821                 }
 822                 if (pdba->pdbinfo)
 823                 {
 824                     if (pdba->pdbinfo[i].atomnr)
 825                     {
 826                         printf("  pdb nr %4d", pdba->pdbinfo[i].atomnr);
 827                     }
 828                     if (pdba->pdbinfo[i].altloc && (pdba->pdbinfo[i].altloc != ' '))
 829                     {
 830                         printf("  altloc %c", pdba->pdbinfo[i].altloc);
 831                     }
 832                 }
 833                 printf("\n");
 834             }
 835             pdba->nr--;
 836             /* We can not free, since it might be in the symtab */
 837             /* sfree(pdba->atomname[i]); */
 838             for (j = i; j < pdba->nr; j++)
 839             {
 840                 pdba->atom[j]     = pdba->atom[j + 1];
 841                 pdba->atomname[j] = pdba->atomname[j + 1];
 842                 if (pdba->pdbinfo)
 843                 {
 844                     pdba->pdbinfo[j] = pdba->pdbinfo[j + 1];
 845                 }
 846                 copy_rvec(x[j + 1], x[j]);
 847             }
 848             srenew(pdba->atom, pdba->nr);
 849             /* srenew(pdba->atomname, pdba->nr); */
 850             srenew(pdba->pdbinfo, pdba->nr);
 851         }
 852     }
 853     if (pdba->nr != oldnatoms)
 854     {
 855         GMX_LOG(logger.info)
 856                 .asParagraph()
 857                 .appendTextFormatted("Now there are %d atoms. Deleted %d duplicates.", pdba->nr, ndel);
 858     }
 859
 860     return pdba->nr;
 861 }
 862
 863 static void checkResidueTypeSanity(t_atoms* pdba, int r0, int r1, ResidueType* rt)
 864 {
 865     std::string startResidueString =
 866             gmx::formatString("%s%d", *pdba->resinfo[r0].name, pdba->resinfo[r0].nr);
 867     std::string endResidueString =
 868             gmx::formatString("%s%d", *pdba->resinfo[r1 - 1].name, pdba->resinfo[r1 - 1].nr);
 869
 870     // Check whether all residues in chain have the same chain ID.
 871     bool        allResiduesHaveSameChainID = true;
 872     char        chainID0                   = pdba->resinfo[r0].chainid;
 873     char        chainID;
 874     std::string residueString;
 875
 876     for (int i = r0 + 1; i < r1; i++)
 877     {
 878         chainID = pdba->resinfo[i].chainid;
 879         if (chainID != chainID0)
 880         {
 881             allResiduesHaveSameChainID = false;
 882             residueString = gmx::formatString("%s%d", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
 883             break;
 884         }
 885     }
 886
 887     if (!allResiduesHaveSameChainID)
 888     {
 889         gmx_fatal(FARGS,
 890                   "The chain covering the range %s--%s does not have a consistent chain ID. "
 891                   "The first residue has ID '%c', while residue %s has ID '%c'.",
 892                   startResidueString.c_str(), endResidueString.c_str(), chainID0,
 893                   residueString.c_str(), chainID);
 894     }
 895
 896     // At this point all residues have the same ID. If they are also non-blank
 897     // we can be a bit more aggressive and require the types match too.
 898     if (chainID0 != ' ')
 899     {
 900         bool        allResiduesHaveSameType = true;
 901         std::string restype;
 902         std::string restype0 = rt->typeOfNamedDatabaseResidue(*pdba->resinfo[r0].name);
 903
 904         for (int i = r0 + 1; i < r1; i++)
 905         {
 906             restype = rt->typeOfNamedDatabaseResidue(*pdba->resinfo[i].name);
 907             if (!gmx::equalCaseInsensitive(restype, restype0))
 908             {
 909                 allResiduesHaveSameType = false;
 910                 residueString = gmx::formatString("%s%d", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
 911                 break;
 912             }
 913         }
 914
 915         if (!allResiduesHaveSameType)
 916         {
 917             gmx_fatal(FARGS,
 918                       "The residues in the chain %s--%s do not have a consistent type. "
 919                       "The first residue has type '%s', while residue %s is of type '%s'. "
 920                       "Either there is a mistake in your chain, or it includes nonstandard "
 921                       "residue names that have not yet been added to the residuetypes.dat "
 922                       "file in the GROMACS library directory. If there are other molecules "
 923                       "such as ligands, they should not have the same chain ID as the "
 924                       "adjacent protein chain since it's a separate molecule.",
 925                       startResidueString.c_str(), endResidueString.c_str(), restype0.c_str(),
 926                       residueString.c_str(), restype.c_str());
 927         }
 928     }
 929 }
 930
 931 static void find_nc_ter(t_atoms* pdba, int r0, int r1, int* r_start, int* r_end, ResidueType* rt, const gmx::MDLogger& logger)
 932 {
 933     int                                i;
 934     gmx::compat::optional<std::string> startrestype;
 935
 936     *r_start = -1;
 937     *r_end   = -1;
 938
 939     int startWarnings = 0;
 940     int endWarnings   = 0;
 941     int ionNotes      = 0;
 942
 943     // Check that all residues have the same chain identifier, and if it is
 944     // non-blank we also require the residue types to match.
 945     checkResidueTypeSanity(pdba, r0, r1, rt);
 946
 947     // If we return correctly from checkResidueTypeSanity(), the only
 948     // remaining cases where we can have non-matching residue types is if
 949     // the chain ID was blank, which could be the case e.g. for a structure
 950     // read from a GRO file or other file types without chain information.
 951     // In that case we need to be a bit more liberal and detect chains based
 952     // on the residue type.
 953
 954     // If we get here, the chain ID must be identical for all residues
 955     char chainID = pdba->resinfo[r0].chainid;
 956
 957     /* Find the starting terminus (typially N or 5') */
 958     for (i = r0; i < r1 && *r_start == -1; i++)
 959     {
 960         startrestype = rt->optionalTypeOfNamedDatabaseResidue(*pdba->resinfo[i].name);
 961         if (!startrestype)
 962         {
 963             continue;
 964         }
 965         if (gmx::equalCaseInsensitive(*startrestype, "Protein")
 966             || gmx::equalCaseInsensitive(*startrestype, "DNA")
 967             || gmx::equalCaseInsensitive(*startrestype, "RNA"))
 968         {
 969             GMX_LOG(logger.info)
 970                     .asParagraph()
 971                     .appendTextFormatted("Identified residue %s%d as a starting terminus.",
 972                                          *pdba->resinfo[i].name, pdba->resinfo[i].nr);
 973             *r_start = i;
 974         }
 975         else if (gmx::equalCaseInsensitive(*startrestype, "Ion"))
 976         {
 977             if (ionNotes < 5)
 978             {
 979                 GMX_LOG(logger.info)
 980                         .asParagraph()
 981                         .appendTextFormatted(
 982                                 "Residue %s%d has type 'Ion', assuming it is not linked into a "
 983                                 "chain.",
 984                                 *pdba->resinfo[i].name, pdba->resinfo[i].nr);
 985             }
 986             if (ionNotes == 4)
 987             {
 988                 GMX_LOG(logger.info)
 989                         .asParagraph()
 990                         .appendTextFormatted("Disabling further notes about ions.");
 991             }
 992             ionNotes++;
 993         }
 994         else
 995         {
 996             // Either no known residue type, or one not needing special handling
 997             if (startWarnings < 5)
 998             {
 999                 if (chainID == ' ')
1000                 {
1001                     GMX_LOG(logger.warning)
1002                             .asParagraph()
1003                             .appendTextFormatted(
1004                                     "Starting residue %s%d in chain not identified as "
1005                                     "Protein/RNA/DNA. "
1006                                     "This chain lacks identifiers, which makes it impossible to do "
1007                                     "strict "
1008                                     "classification of the start/end residues. Here we need to "
1009                                     "guess this residue "
1010                                     "should not be part of the chain and instead introduce a "
1011                                     "break, but that will "
1012                                     "be catastrophic if they should in fact be linked. Please "
1013                                     "check your structure, "
1014                                     "and add %s to residuetypes.dat if this was not correct.",
1015                                     *pdba->resinfo[i].name, pdba->resinfo[i].nr, *pdba->resinfo[i].name);
1016                 }
1017                 else
1018                 {
1019                     GMX_LOG(logger.warning)
1020                             .asParagraph()
1021                             .appendTextFormatted(
1022                                     "No residues in chain starting at %s%d identified as "
1023                                     "Protein/RNA/DNA. "
1024                                     "This makes it impossible to link them into a molecule, which "
1025                                     "could either be "
1026                                     "correct or a catastrophic error. Please check your structure, "
1027                                     "and add all "
1028                                     "necessary residue names to residuetypes.dat if this was not "
1029                                     "correct.",
1030                                     *pdba->resinfo[i].name, pdba->resinfo[i].nr);
1031                 }
1032             }
1033             if (startWarnings == 4)
1034             {
1035                 GMX_LOG(logger.warning)
1036                         .asParagraph()
1037                         .appendTextFormatted(
1038                                 "Disabling further warnings about unidentified residues at start "
1039                                 "of chain.");
1040             }
1041             startWarnings++;
1042         }
1043     }
1044
1045     if (*r_start >= 0)
1046     {
1047         /* Go through the rest of the residues, check that they are the same class, and identify the ending terminus. */
1048         for (int i = *r_start; i < r1; i++)
1049         {
1050             gmx::compat::optional<std::string> restype =
1051                     rt->optionalTypeOfNamedDatabaseResidue(*pdba->resinfo[i].name);
1052             if (!restype)
1053             {
1054                 continue;
1055             }
1056             if (gmx::equalCaseInsensitive(*restype, *startrestype) && endWarnings == 0)
1057             {
1058                 *r_end = i;
1059             }
1060             else if (gmx::equalCaseInsensitive(*startrestype, "Ion"))
1061             {
1062                 if (ionNotes < 5)
1063                 {
1064                     GMX_LOG(logger.info)
1065                             .asParagraph()
1066                             .appendTextFormatted(
1067                                     "Residue %s%d has type 'Ion', assuming it is not linked into a "
1068                                     "chain.",
1069                                     *pdba->resinfo[i].name, pdba->resinfo[i].nr);
1070                 }
1071                 if (ionNotes == 4)
1072                 {
1073                     GMX_LOG(logger.info)
1074                             .asParagraph()
1075                             .appendTextFormatted("Disabling further notes about ions.");
1076                 }
1077                 ionNotes++;
1078             }
1079             else
1080             {
1081                 // Either no known residue type, or one not needing special handling.
1082                 GMX_RELEASE_ASSERT(chainID == ' ', "Chain ID must be blank");
1083                 // Otherwise the call to checkResidueTypeSanity() will
1084                 // have caught the problem.
1085                 if (endWarnings < 5)
1086                 {
1087                     GMX_LOG(logger.warning)
1088                             .asParagraph()
1089                             .appendTextFormatted(
1090                                     "Residue %s%d in chain has different type ('%s') from "
1091                                     "residue %s%d ('%s'). This chain lacks identifiers, which "
1092                                     "makes "
1093                                     "it impossible to do strict classification of the start/end "
1094                                     "residues. Here we "
1095                                     "need to guess this residue should not be part of the chain "
1096                                     "and instead "
1097                                     "introduce a break, but that will be catastrophic if they "
1098                                     "should in fact be "
1099                                     "linked. Please check your structure, and add %s to "
1100                                     "residuetypes.dat "
1101                                     "if this was not correct.",
1102                                     *pdba->resinfo[i].name, pdba->resinfo[i].nr, restype->c_str(),
1103                                     *pdba->resinfo[*r_start].name, pdba->resinfo[*r_start].nr,
1104                                     startrestype->c_str(), *pdba->resinfo[i].name);
1105                 }
1106                 if (endWarnings == 4)
1107                 {
1108                     GMX_LOG(logger.warning)
1109                             .asParagraph()
1110                             .appendTextFormatted(
1111                                     "Disabling further warnings about unidentified residues at end "
1112                                     "of chain.");
1113                 }
1114                 endWarnings++;
1115             }
1116         }
1117     }
1118
1119     if (*r_end >= 0)
1120     {
1121         GMX_LOG(logger.info)
1122                 .asParagraph()
1123                 .appendTextFormatted("Identified residue %s%d as a ending terminus.",
1124                                      *pdba->resinfo[*r_end].name, pdba->resinfo[*r_end].nr);
1125     }
1126 }
1127
1128 enum class ChainSeparationType : int
1129 {
1130     IdOrTer,
1131     IdAndTer,
1132     Ter,
1133     Id,
1134     Interactive,
1135     Count
1136 };
1137 static const gmx::EnumerationArray<ChainSeparationType, const char*> c_chainSeparationTypeNames = {
1138     { "id_or_ter", "id_and_ter", "ter", "id", "interactive" }
1139 };
1140 static const gmx::EnumerationArray<ChainSeparationType, const char*> c_chainSeparationTypeNotificationMessages = {
1141     { "Splitting chemical chains based on TER records or chain id changing.\n",
1142       "Splitting chemical chains based on TER records and chain id changing.\n",
1143       "Splitting chemical chains based on TER records only (ignoring chain id).\n",
1144       "Splitting chemical chains based on changing chain id only (ignoring TER records).\n",
1145       "Splitting chemical chains interactively.\n" }
1146 };
1147
1148 static void modify_chain_numbers(t_atoms* pdba, ChainSeparationType chainSeparation, const gmx::MDLogger& logger)
1149 {
1150     int         i;
1151     char        old_prev_chainid;
1152     char        old_this_chainid;
1153     int         old_prev_chainnum;
1154     int         old_this_chainnum;
1155     t_resinfo*  ri;
1156     char        select[STRLEN];
1157     int         new_chainnum;
1158     int         this_atomnum;
1159     int         prev_atomnum;
1160     const char* prev_atomname;
1161     const char* this_atomname;
1162     const char* prev_resname;
1163     const char* this_resname;
1164     int         prev_resnum;
1165     int         this_resnum;
1166     char        prev_chainid;
1167     char        this_chainid;
1168
1169     /* The default chain enumeration is based on TER records only */
1170     printf("%s", c_chainSeparationTypeNotificationMessages[chainSeparation]);
1171
1172     old_prev_chainid  = '?';
1173     old_prev_chainnum = -1;
1174     new_chainnum      = -1;
1175
1176     this_atomname = nullptr;
1177     this_atomnum  = -1;
1178     this_resname  = nullptr;
1179     this_resnum   = -1;
1180     this_chainid  = '?';
1181
1182     for (i = 0; i < pdba->nres; i++)
1183     {
1184         ri                = &pdba->resinfo[i];
1185         old_this_chainid  = ri->chainid;
1186         old_this_chainnum = ri->chainnum;
1187
1188         prev_atomname = this_atomname;
1189         prev_atomnum  = this_atomnum;
1190         prev_resname  = this_resname;
1191         prev_resnum   = this_resnum;
1192         prev_chainid  = this_chainid;
1193
1194         this_atomname = *(pdba->atomname[i]);
1195         this_atomnum  = (pdba->pdbinfo != nullptr) ? pdba->pdbinfo[i].atomnr : i + 1;
1196         this_resname  = *ri->name;
1197         this_resnum   = ri->nr;
1198         this_chainid  = ri->chainid;
1199
1200         switch (chainSeparation)
1201         {
1202             case ChainSeparationType::IdOrTer:
1203                 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1204                 {
1205                     new_chainnum++;
1206                 }
1207                 break;
1208
1209             case ChainSeparationType::IdAndTer:
1210                 if (old_this_chainid != old_prev_chainid && old_this_chainnum != old_prev_chainnum)
1211                 {
1212                     new_chainnum++;
1213                 }
1214                 break;
1215
1216             case ChainSeparationType::Id:
1217                 if (old_this_chainid != old_prev_chainid)
1218                 {
1219                     new_chainnum++;
1220                 }
1221                 break;
1222
1223             case ChainSeparationType::Ter:
1224                 if (old_this_chainnum != old_prev_chainnum)
1225                 {
1226                     new_chainnum++;
1227                 }
1228                 break;
1229             case ChainSeparationType::Interactive:
1230                 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1231                 {
1232                     if (i > 0)
1233                     {
1234                         GMX_LOG(logger.info)
1235                                 .asParagraph()
1236                                 .appendTextFormatted(
1237                                         "Split the chain (and introduce termini) between residue %s%d (chain id '%c', atom %d %s)\
1238 "
1239                                         "and residue %s%d (chain id '%c', atom %d %s) ? [n/y]",
1240                                         prev_resname, prev_resnum, prev_chainid, prev_atomnum,
1241                                         prev_atomname, this_resname, this_resnum, this_chainid,
1242                                         this_atomnum, this_atomname);
1243
1244                         if (nullptr == fgets(select, STRLEN - 1, stdin))
1245                         {
1246                             gmx_fatal(FARGS, "Error reading from stdin");
1247                         }
1248                     }
1249                     if (i == 0 || select[0] == 'y')
1250                     {
1251                         new_chainnum++;
1252                     }
1253                 }
1254                 break;
1255             case ChainSeparationType::Count:
1256                 gmx_fatal(FARGS, "Internal inconsistency - this shouldn't happen...");
1257         }
1258         old_prev_chainid  = old_this_chainid;
1259         old_prev_chainnum = old_this_chainnum;
1260
1261         ri->chainnum = new_chainnum;
1262     }
1263 }
1264
1265 struct t_pdbchain
1266 {
1267     char             chainid   = ' ';
1268     char             chainnum  = ' ';
1269     int              start     = -1;
1270     int              natom     = -1;
1271     bool             bAllWat   = false;
1272     int              nterpairs = -1;
1273     std::vector<int> chainstart;
1274 };
1275
1276 struct t_chain
1277 {
1278     char                                chainid   = ' ';
1279     int                                 chainnum  = ' ';
1280     bool                                bAllWat   = false;
1281     int                                 nterpairs = -1;
1282     std::vector<int>                    chainstart;
1283     std::vector<MoleculePatchDatabase*> ntdb;
1284     std::vector<MoleculePatchDatabase*> ctdb;
1285     std::vector<int>                    r_start;
1286     std::vector<int>                    r_end;
1287     t_atoms*                            pdba;
1288     std::vector<gmx::RVec>              x;
1289 };
1290
1291 enum class VSitesType : int
1292 {
1293     None,
1294     Hydrogens,
1295     Aromatics,
1296     Count
1297 };
1298 static const gmx::EnumerationArray<VSitesType, const char*> c_vsitesTypeNames = {
1299     { "none", "hydrogens", "aromatics" }
1300 };
1301
1302 enum class WaterType : int
1303 {
1304     Select,
1305     None,
1306     Spc,
1307     SpcE,
1308     Tip3p,
1309     Tip4p,
1310     Tip5p,
1311     Tips3p,
1312     Count
1313 };
1314 static const gmx::EnumerationArray<WaterType, const char*> c_waterTypeNames = {
1315     { "select", "none", "spc", "spce", "tip3p", "tip4p", "tip5p", "tips3p" }
1316 };
1317
1318 enum class MergeType : int
1319 {
1320     No,
1321     All,
1322     Interactive,
1323     Count
1324 };
1325 static const gmx::EnumerationArray<MergeType, const char*> c_mergeTypeNames = { { "no", "all",
1326                                                                                   "interactive" } };
1327
1328 namespace gmx
1329 {
1330
1331 namespace
1332 {
1333
1334 class pdb2gmx : public ICommandLineOptionsModule
1335 {
1336 public:
1337     pdb2gmx() :
1338         bVsites_(FALSE),
1339         bPrevWat_(FALSE),
1340         bVsiteAromatics_(FALSE),
1341         chainSeparation_(ChainSeparationType::IdOrTer),
1342         vsitesType_(VSitesType::None),
1343         waterType_(WaterType::Select),
1344         mergeType_(MergeType::No),
1345         itp_file_(nullptr),
1346         mHmult_(0)
1347     {
1348     }
1349
1350     // From ICommandLineOptionsModule
1351     void init(CommandLineModuleSettings* /*settings*/) override {}
1352
1353     void initOptions(IOptionsContainer* options, ICommandLineOptionsModuleSettings* settings) override;
1354
1355     void optionsFinished() override;
1356
1357     int run() override;
1358
1359 private:
1360     bool bNewRTP_;
1361     bool bInter_;
1362     bool bCysMan_;
1363     bool bLysMan_;
1364     bool bAspMan_;
1365     bool bGluMan_;
1366     bool bHisMan_;
1367     bool bGlnMan_;
1368     bool bArgMan_;
1369     bool bTerMan_;
1370     bool bUnA_;
1371     bool bHeavyH_;
1372     bool bSort_;
1373     bool bAllowMissing_;
1374     bool bRemoveH_;
1375     bool bDeuterate_;
1376     bool bVerbose_;
1377     bool bChargeGroups_;
1378     bool bCmap_;
1379     bool bRenumRes_;
1380     bool bRTPresname_;
1381     bool bIndexSet_;
1382     bool bOutputSet_;
1383     bool bVsites_;
1384     bool bWat_;
1385     bool bPrevWat_;
1386     bool bITP_;
1387     bool bVsiteAromatics_;
1388     real angle_;
1389     real distance_;
1390     real posre_fc_;
1391     real long_bond_dist_;
1392     real short_bond_dist_;
1393
1394     std::string indexOutputFile_;
1395     std::string outputFile_;
1396     std::string topologyFile_;
1397     std::string includeTopologyFile_;
1398     std::string outputConfFile_;
1399     std::string inputConfFile_;
1400     std::string outFile_;
1401     std::string ff_;
1402
1403     ChainSeparationType chainSeparation_;
1404     VSitesType          vsitesType_;
1405     WaterType           waterType_;
1406     MergeType           mergeType_;
1407
1408     FILE*                          itp_file_;
1409     char                           forcefield_[STRLEN];
1410     char                           ffdir_[STRLEN];
1411     char*                          ffname_;
1412     char*                          watermodel_;
1413     std::vector<std::string>       incls_;
1414     std::vector<t_mols>            mols_;
1415     real                           mHmult_;
1416     std::unique_ptr<gmx::MDLogger> loggerPointer_;
1417 };
1418
1419 void pdb2gmx::initOptions(IOptionsContainer* options, ICommandLineOptionsModuleSettings* settings)
1420 {
1421     const char* desc[] = {
1422         "[THISMODULE] reads a [REF].pdb[ref] (or [REF].gro[ref]) file, reads",
1423         "some database files, adds hydrogens to the molecules and generates",
1424         "coordinates in GROMACS (GROMOS), or optionally [REF].pdb[ref], format",
1425         "and a topology in GROMACS format.",
1426         "These files can subsequently be processed to generate a run input file.",
1427         "[PAR]",
1428         "[THISMODULE] will search for force fields by looking for",
1429         "a [TT]forcefield.itp[tt] file in subdirectories [TT]<forcefield>.ff[tt]",
1430         "of the current working directory and of the GROMACS library directory",
1431         "as inferred from the path of the binary or the [TT]GMXLIB[tt] environment",
1432         "variable.",
1433         "By default the forcefield selection is interactive,",
1434         "but you can use the [TT]-ff[tt] option to specify one of the short names",
1435         "in the list on the command line instead. In that case [THISMODULE] just looks",
1436         "for the corresponding [TT]<forcefield>.ff[tt] directory.",
1437         "[PAR]",
1438         "After choosing a force field, all files will be read only from",
1439         "the corresponding force field directory.",
1440         "If you want to modify or add a residue types, you can copy the force",
1441         "field directory from the GROMACS library directory to your current",
1442         "working directory. If you want to add new protein residue types,",
1443         "you will need to modify [TT]residuetypes.dat[tt] in the library directory",
1444         "or copy the whole library directory to a local directory and set",
1445         "the environment variable [TT]GMXLIB[tt] to the name of that directory.",
1446         "Check Chapter 5 of the manual for more information about file formats.",
1447         "[PAR]",
1448
1449         "Note that a [REF].pdb[ref] file is nothing more than a file format, and it",
1450         "need not necessarily contain a protein structure. Every kind of",
1451         "molecule for which there is support in the database can be converted.",
1452         "If there is no support in the database, you can add it yourself.[PAR]",
1453
1454         "The program has limited intelligence, it reads a number of database",
1455         "files, that allow it to make special bonds (Cys-Cys, Heme-His, etc.),",
1456         "if necessary this can be done manually. The program can prompt the",
1457         "user to select which kind of LYS, ASP, GLU, CYS or HIS residue is",
1458         "desired. For Lys the choice is between neutral (two protons on NZ) or",
1459         "protonated (three protons, default), for Asp and Glu unprotonated",
1460         "(default) or protonated, for His the proton can be either on ND1,",
1461         "on NE2 or on both. By default these selections are done automatically.",
1462         "For His, this is based on an optimal hydrogen bonding",
1463         "conformation. Hydrogen bonds are defined based on a simple geometric",
1464         "criterion, specified by the maximum hydrogen-donor-acceptor angle",
1465         "and donor-acceptor distance, which are set by [TT]-angle[tt] and",
1466         "[TT]-dist[tt] respectively.[PAR]",
1467
1468         "The protonation state of N- and C-termini can be chosen interactively",
1469         "with the [TT]-ter[tt] flag.  Default termini are ionized (NH3+ and COO-),",
1470         "respectively.  Some force fields support zwitterionic forms for chains of",
1471         "one residue, but for polypeptides these options should NOT be selected.",
1472         "The AMBER force fields have unique forms for the terminal residues,",
1473         "and these are incompatible with the [TT]-ter[tt] mechanism. You need",
1474         "to prefix your N- or C-terminal residue names with \"N\" or \"C\"",
1475         "respectively to use these forms, making sure you preserve the format",
1476         "of the coordinate file. Alternatively, use named terminating residues",
1477         "(e.g. ACE, NME).[PAR]",
1478
1479         "The separation of chains is not entirely trivial since the markup",
1480         "in user-generated PDB files frequently varies and sometimes it",
1481         "is desirable to merge entries across a TER record, for instance",
1482         "if you want a disulfide bridge or distance restraints between",
1483         "two protein chains or if you have a HEME group bound to a protein.",
1484         "In such cases multiple chains should be contained in a single",
1485         "[TT]moleculetype[tt] definition.",
1486         "To handle this, [THISMODULE] uses two separate options.",
1487         "First, [TT]-chainsep[tt] allows you to choose when a new chemical chain should",
1488         "start, and termini added when applicable. This can be done based on the",
1489         "existence of TER records, when the chain id changes, or combinations of either",
1490         "or both of these. You can also do the selection fully interactively.",
1491         "In addition, there is a [TT]-merge[tt] option that controls how multiple chains",
1492         "are merged into one moleculetype, after adding all the chemical termini (or not).",
1493         "This can be turned off (no merging), all non-water chains can be merged into a",
1494         "single molecule, or the selection can be done interactively.[PAR]",
1495
1496         "[THISMODULE] will also check the occupancy field of the [REF].pdb[ref] file.",
1497         "If any of the occupancies are not one, indicating that the atom is",
1498         "not resolved well in the structure, a warning message is issued.",
1499         "When a [REF].pdb[ref] file does not originate from an X-ray structure determination",
1500         "all occupancy fields may be zero. Either way, it is up to the user",
1501         "to verify the correctness of the input data (read the article!).[PAR]",
1502
1503         "During processing the atoms will be reordered according to GROMACS",
1504         "conventions. With [TT]-n[tt] an index file can be generated that",
1505         "contains one group reordered in the same way. This allows you to",
1506         "convert a GROMOS trajectory and coordinate file to GROMOS. There is",
1507         "one limitation: reordering is done after the hydrogens are stripped",
1508         "from the input and before new hydrogens are added. This means that",
1509         "you should not use [TT]-ignh[tt].[PAR]",
1510
1511         "The [REF].gro[ref] and [TT].g96[tt] file formats do not support chain",
1512         "identifiers. Therefore it is useful to enter a [REF].pdb[ref] file name at",
1513         "the [TT]-o[tt] option when you want to convert a multi-chain [REF].pdb[ref] file.",
1514         "[PAR]",
1515
1516         "The option [TT]-vsite[tt] removes hydrogen and fast improper dihedral",
1517         "motions. Angular and out-of-plane motions can be removed by changing",
1518         "hydrogens into virtual sites and fixing angles, which fixes their",
1519         "position relative to neighboring atoms. Additionally, all atoms in the",
1520         "aromatic rings of the standard amino acids (i.e. PHE, TRP, TYR and HIS)",
1521         "can be converted into virtual sites, eliminating the fast improper dihedral",
1522         "fluctuations in these rings (but this feature is deprecated).",
1523         "[BB]Note[bb] that in this case all other hydrogen",
1524         "atoms are also converted to virtual sites. The mass of all atoms that are",
1525         "converted into virtual sites, is added to the heavy atoms.[PAR]",
1526         "Also slowing down of dihedral motion can be done with [TT]-heavyh[tt]",
1527         "done by increasing the hydrogen-mass by a factor of 4. This is also",
1528         "done for water hydrogens to slow down the rotational motion of water.",
1529         "The increase in mass of the hydrogens is subtracted from the bonded",
1530         "(heavy) atom so that the total mass of the system remains the same."
1531     };
1532
1533     settings->setHelpText(desc);
1534
1535     options->addOption(BooleanOption("newrtp").store(&bNewRTP_).defaultValue(false).hidden().description(
1536             "Write the residue database in new format to [TT]new.rtp[tt]"));
1537     options->addOption(
1538             RealOption("lb").store(&long_bond_dist_).defaultValue(0.25).hidden().description("Long bond warning distance"));
1539     options->addOption(
1540             RealOption("sb").store(&short_bond_dist_).defaultValue(0.05).hidden().description("Short bond warning distance"));
1541     options->addOption(EnumOption<ChainSeparationType>("chainsep")
1542                                .enumValue(c_chainSeparationTypeNames)
1543                                .store(&chainSeparation_)
1544                                .description("Condition in PDB files when a new chain should be "
1545                                             "started (adding termini)"));
1546     options->addOption(EnumOption<MergeType>("merge")
1547                                .enumValue(c_mergeTypeNames)
1548                                .store(&mergeType_)
1549                                .description("Merge multiple chains into a single [moleculetype]"));
1550     options->addOption(StringOption("ff").store(&ff_).defaultValue("select").description(
1551             "Force field, interactive by default. Use [TT]-h[tt] for information."));
1552     options->addOption(
1553             EnumOption<WaterType>("water").store(&waterType_).enumValue(c_waterTypeNames).description("Water model to use"));
1554     options->addOption(BooleanOption("inter").store(&bInter_).defaultValue(false).description(
1555             "Set the next 8 options to interactive"));
1556     options->addOption(BooleanOption("ss").store(&bCysMan_).defaultValue(false).description(
1557             "Interactive SS bridge selection"));
1558     options->addOption(BooleanOption("ter").store(&bTerMan_).defaultValue(false).description(
1559             "Interactive termini selection, instead of charged (default)"));
1560     options->addOption(BooleanOption("lys").store(&bLysMan_).defaultValue(false).description(
1561             "Interactive lysine selection, instead of charged"));
1562     options->addOption(BooleanOption("arg").store(&bArgMan_).defaultValue(false).description(
1563             "Interactive arginine selection, instead of charged"));
1564     options->addOption(BooleanOption("asp").store(&bAspMan_).defaultValue(false).description(
1565             "Interactive aspartic acid selection, instead of charged"));
1566     options->addOption(BooleanOption("glu").store(&bGluMan_).defaultValue(false).description(
1567             "Interactive glutamic acid selection, instead of charged"));
1568     options->addOption(BooleanOption("gln").store(&bGlnMan_).defaultValue(false).description(
1569             "Interactive glutamine selection, instead of charged"));
1570     options->addOption(BooleanOption("his").store(&bHisMan_).defaultValue(false).description(
1571             "Interactive histidine selection, instead of checking H-bonds"));
1572     options->addOption(RealOption("angle").store(&angle_).defaultValue(135.0).description(
1573             "Minimum hydrogen-donor-acceptor angle for a H-bond (degrees)"));
1574     options->addOption(
1575             RealOption("dist").store(&distance_).defaultValue(0.3).description("Maximum donor-acceptor distance for a H-bond (nm)"));
1576     options->addOption(BooleanOption("una").store(&bUnA_).defaultValue(false).description(
1577             "Select aromatic rings with united CH atoms on phenylalanine, tryptophane and "
1578             "tyrosine"));
1579     options->addOption(BooleanOption("sort").store(&bSort_).defaultValue(true).hidden().description(
1580             "Sort the residues according to database, turning this off is dangerous as charge "
1581             "groups might be broken in parts"));
1582     options->addOption(
1583             BooleanOption("ignh").store(&bRemoveH_).defaultValue(false).description("Ignore hydrogen atoms that are in the coordinate file"));
1584     options->addOption(
1585             BooleanOption("missing")
1586                     .store(&bAllowMissing_)
1587                     .defaultValue(false)
1588                     .description(
1589                             "Continue when atoms are missing and bonds cannot be made, dangerous"));
1590     options->addOption(
1591             BooleanOption("v").store(&bVerbose_).defaultValue(false).description("Be slightly more verbose in messages"));
1592     options->addOption(
1593             RealOption("posrefc").store(&posre_fc_).defaultValue(1000).description("Force constant for position restraints"));
1594     options->addOption(EnumOption<VSitesType>("vsite")
1595                                .store(&vsitesType_)
1596                                .enumValue(c_vsitesTypeNames)
1597                                .description("Convert atoms to virtual sites"));
1598     options->addOption(BooleanOption("heavyh").store(&bHeavyH_).defaultValue(false).description(
1599             "Make hydrogen atoms heavy"));
1600     options->addOption(
1601             BooleanOption("deuterate").store(&bDeuterate_).defaultValue(false).description("Change the mass of hydrogens to 2 amu"));
1602     options->addOption(BooleanOption("chargegrp")
1603                                .store(&bChargeGroups_)
1604                                .defaultValue(true)
1605                                .description("Use charge groups in the [REF].rtp[ref] file"));
1606     options->addOption(BooleanOption("cmap").store(&bCmap_).defaultValue(true).description(
1607             "Use cmap torsions (if enabled in the [REF].rtp[ref] file)"));
1608     options->addOption(BooleanOption("renum")
1609                                .store(&bRenumRes_)
1610                                .defaultValue(false)
1611                                .description("Renumber the residues consecutively in the output"));
1612     options->addOption(BooleanOption("rtpres")
1613                                .store(&bRTPresname_)
1614                                .defaultValue(false)
1615                                .description("Use [REF].rtp[ref] entry names as residue names"));
1616     options->addOption(FileNameOption("f")
1617                                .legacyType(efSTX)
1618                                .inputFile()
1619                                .store(&inputConfFile_)
1620                                .required()
1621                                .defaultBasename("protein")
1622                                .defaultType(efPDB)
1623                                .description("Structure file"));
1624     options->addOption(FileNameOption("o")
1625                                .legacyType(efSTO)
1626                                .outputFile()
1627                                .store(&outputConfFile_)
1628                                .required()
1629                                .defaultBasename("conf")
1630                                .description("Structure file"));
1631     options->addOption(FileNameOption("p")
1632                                .legacyType(efTOP)
1633                                .outputFile()
1634                                .store(&topologyFile_)
1635                                .required()
1636                                .defaultBasename("topol")
1637                                .description("Topology file"));
1638     options->addOption(FileNameOption("i")
1639                                .legacyType(efITP)
1640                                .outputFile()
1641                                .store(&includeTopologyFile_)
1642                                .required()
1643                                .defaultBasename("posre")
1644                                .description("Include file for topology"));
1645     options->addOption(FileNameOption("n")
1646                                .legacyType(efNDX)
1647                                .outputFile()
1648                                .store(&indexOutputFile_)
1649                                .storeIsSet(&bIndexSet_)
1650                                .defaultBasename("index")
1651                                .description("Index file"));
1652     options->addOption(FileNameOption("q")
1653                                .legacyType(efSTO)
1654                                .outputFile()
1655                                .store(&outFile_)
1656                                .storeIsSet(&bOutputSet_)
1657                                .defaultBasename("clean")
1658                                .defaultType(efPDB)
1659                                .description("Structure file"));
1660 }
1661
1662 void pdb2gmx::optionsFinished()
1663 {
1664     if (inputConfFile_.empty())
1665     {
1666         GMX_THROW(InconsistentInputError("You must supply an input file"));
1667     }
1668     if (bInter_)
1669     {
1670         /* if anything changes here, also change description of -inter */
1671         bCysMan_ = true;
1672         bTerMan_ = true;
1673         bLysMan_ = true;
1674         bArgMan_ = true;
1675         bAspMan_ = true;
1676         bGluMan_ = true;
1677         bGlnMan_ = true;
1678         bHisMan_ = true;
1679     }
1680
1681     if (bHeavyH_)
1682     {
1683         mHmult_ = 4.0;
1684     }
1685     else if (bDeuterate_)
1686     {
1687         mHmult_ = 2.0;
1688     }
1689     else
1690     {
1691         mHmult_ = 1.0;
1692     }
1693
1694     /* Force field selection, interactive or direct */
1695     choose_ff(strcmp(ff_.c_str(), "select") == 0 ? nullptr : ff_.c_str(), forcefield_,
1696               sizeof(forcefield_), ffdir_, sizeof(ffdir_), *loggerPointer_);
1697
1698     if (strlen(forcefield_) > 0)
1699     {
1700         ffname_    = forcefield_;
1701         ffname_[0] = std::toupper(ffname_[0]);
1702     }
1703     else
1704     {
1705         gmx_fatal(FARGS, "Empty forcefield string");
1706     }
1707 }
1708
1709 int pdb2gmx::run()
1710 {
1711     char                       select[STRLEN];
1712     std::vector<DisulfideBond> ssbonds;
1713
1714     int         this_atomnum;
1715     int         prev_atomnum;
1716     const char* prev_atomname;
1717     const char* this_atomname;
1718     const char* prev_resname;
1719     const char* this_resname;
1720     int         prev_resnum;
1721     int         this_resnum;
1722     char        prev_chainid;
1723     char        this_chainid;
1724     int         prev_chainnumber;
1725     int         this_chainnumber;
1726     int         this_chainstart;
1727     int         prev_chainstart;
1728
1729     gmx::LoggerBuilder builder;
1730     builder.addTargetStream(gmx::MDLogger::LogLevel::Info, &gmx::TextOutputFile::standardOutput());
1731     builder.addTargetStream(gmx::MDLogger::LogLevel::Warning, &gmx::TextOutputFile::standardError());
1732     gmx::LoggerOwner logOwner(builder.build());
1733     loggerPointer_              = std::make_unique<gmx::MDLogger>(logOwner.logger());
1734     const gmx::MDLogger& logger = *loggerPointer_;
1735
1736     GMX_LOG(logger.info)
1737             .asParagraph()
1738             .appendTextFormatted("Using the %s force field in directory %s", ffname_, ffdir_);
1739
1740     choose_watermodel(c_waterTypeNames[waterType_], ffdir_, &watermodel_, logger);
1741
1742     switch (vsitesType_)
1743     {
1744         case VSitesType::None:
1745             bVsites_         = false;
1746             bVsiteAromatics_ = false;
1747             break;
1748         case VSitesType::Hydrogens:
1749             bVsites_         = true;
1750             bVsiteAromatics_ = false;
1751             break;
1752         case VSitesType::Aromatics:
1753             bVsites_         = true;
1754             bVsiteAromatics_ = true;
1755             break;
1756         case VSitesType::Count:
1757             gmx_fatal(FARGS, "Internal inconsistency: VSitesType='%s'", c_vsitesTypeNames[vsitesType_]);
1758     } /* end switch */
1759
1760     /* Open the symbol table */
1761     t_symtab symtab;
1762     open_symtab(&symtab);
1763
1764     /* Residue type database */
1765     ResidueType rt;
1766
1767     /* Read residue renaming database(s), if present */
1768     std::vector<std::string> rrn = fflib_search_file_end(ffdir_, ".r2b", FALSE);
1769
1770     std::vector<RtpRename> rtprename;
1771     for (const auto& filename : rrn)
1772     {
1773         GMX_LOG(logger.info).asParagraph().appendTextFormatted("going to rename %s", filename.c_str());
1774         FILE* fp = fflib_open(filename);
1775         read_rtprename(filename.c_str(), fp, &rtprename);
1776         gmx_ffclose(fp);
1777     }
1778
1779     /* Add all alternative names from the residue renaming database to the list
1780        of recognized amino/nucleic acids. */
1781     for (const auto& rename : rtprename)
1782     {
1783         /* Only add names if the 'standard' gromacs/iupac base name was found */
1784         if (auto restype = rt.optionalTypeOfNamedDatabaseResidue(rename.gmx))
1785         {
1786             rt.addResidue(rename.main, *restype);
1787             rt.addResidue(rename.nter, *restype);
1788             rt.addResidue(rename.cter, *restype);
1789             rt.addResidue(rename.bter, *restype);
1790         }
1791     }
1792
1793     matrix      box;
1794     const char* watres;
1795     clear_mat(box);
1796     if (watermodel_ != nullptr && (strstr(watermodel_, "4p") || strstr(watermodel_, "4P")))
1797     {
1798         watres = "HO4";
1799     }
1800     else if (watermodel_ != nullptr && (strstr(watermodel_, "5p") || strstr(watermodel_, "5P")))
1801     {
1802         watres = "HO5";
1803     }
1804     else
1805     {
1806         watres = "HOH";
1807     }
1808
1809     AtomProperties aps;
1810     char*          title = nullptr;
1811     PbcType        pbcType;
1812     t_atoms        pdba_all;
1813     rvec*          pdbx;
1814     int natom = read_pdball(inputConfFile_.c_str(), bOutputSet_, outFile_.c_str(), &title, &pdba_all,
1815                             &pdbx, &pbcType, box, bRemoveH_, &symtab, &rt, watres, &aps, bVerbose_);
1816
1817     if (natom == 0)
1818     {
1819         std::string message = formatString("No atoms found in pdb file %s\n", inputConfFile_.c_str());
1820         GMX_THROW(InconsistentInputError(message));
1821     }
1822
1823     GMX_LOG(logger.info).asParagraph().appendTextFormatted("Analyzing pdb file");
1824     int nwaterchain = 0;
1825
1826     modify_chain_numbers(&pdba_all, chainSeparation_, logger);
1827
1828     int nchainmerges = 0;
1829
1830     this_atomname    = nullptr;
1831     this_atomnum     = -1;
1832     this_resname     = nullptr;
1833     this_resnum      = -1;
1834     this_chainid     = '?';
1835     this_chainnumber = -1;
1836     this_chainstart  = 0;
1837     /* Keep the compiler happy */
1838     prev_chainstart = 0;
1839
1840     int                     numChains = 0;
1841     std::vector<t_pdbchain> pdb_ch;
1842
1843     t_resinfo* ri;
1844     bool       bMerged = false;
1845     for (int i = 0; (i < natom); i++)
1846     {
1847         ri = &pdba_all.resinfo[pdba_all.atom[i].resind];
1848
1849         /* TODO this should live in a helper object, and consolidate
1850            that with code in modify_chain_numbers */
1851         prev_atomname    = this_atomname;
1852         prev_atomnum     = this_atomnum;
1853         prev_resname     = this_resname;
1854         prev_resnum      = this_resnum;
1855         prev_chainid     = this_chainid;
1856         prev_chainnumber = this_chainnumber;
1857         if (!bMerged)
1858         {
1859             prev_chainstart = this_chainstart;
1860         }
1861
1862         this_atomname    = *pdba_all.atomname[i];
1863         this_atomnum     = (pdba_all.pdbinfo != nullptr) ? pdba_all.pdbinfo[i].atomnr : i + 1;
1864         this_resname     = *ri->name;
1865         this_resnum      = ri->nr;
1866         this_chainid     = ri->chainid;
1867         this_chainnumber = ri->chainnum;
1868
1869         bWat_ = gmx::equalCaseInsensitive(*ri->name, watres);
1870
1871         if ((i == 0) || (this_chainnumber != prev_chainnumber) || (bWat_ != bPrevWat_))
1872         {
1873             GMX_RELEASE_ASSERT(
1874                     pdba_all.pdbinfo,
1875                     "Must have pdbinfo from reading a PDB file if chain number is changing");
1876             this_chainstart = pdba_all.atom[i].resind;
1877             bMerged         = false;
1878             if (i > 0 && !bWat_)
1879             {
1880                 if (!strncmp(c_mergeTypeNames[mergeType_], "int", 3))
1881                 {
1882                     GMX_LOG(logger.info)
1883                             .asParagraph()
1884                             .appendTextFormatted(
1885                                     "Merge chain ending with residue %s%d (chain id '%c', atom %d "
1886                                     "%s) and chain starting with "
1887                                     "residue %s%d (chain id '%c', atom %d %s) into a single "
1888                                     "moleculetype (keeping termini)? [n/y]",
1889                                     prev_resname, prev_resnum, prev_chainid, prev_atomnum,
1890                                     prev_atomname, this_resname, this_resnum, this_chainid,
1891                                     this_atomnum, this_atomname);
1892
1893                     if (nullptr == fgets(select, STRLEN - 1, stdin))
1894                     {
1895                         gmx_fatal(FARGS, "Error reading from stdin");
1896                     }
1897                     bMerged = (select[0] == 'y');
1898                 }
1899                 else if (!strncmp(c_mergeTypeNames[mergeType_], "all", 3))
1900                 {
1901                     bMerged = true;
1902                 }
1903             }
1904
1905             if (bMerged)
1906             {
1907                 pdb_ch[numChains - 1].chainstart[pdb_ch[numChains - 1].nterpairs] =
1908                         pdba_all.atom[i].resind - prev_chainstart;
1909                 pdb_ch[numChains - 1].nterpairs++;
1910                 pdb_ch[numChains - 1].chainstart.resize(pdb_ch[numChains - 1].nterpairs + 1);
1911                 nchainmerges++;
1912             }
1913             else
1914             {
1915                 /* set natom for previous chain */
1916                 if (numChains > 0)
1917                 {
1918                     pdb_ch[numChains - 1].natom = i - pdb_ch[numChains - 1].start;
1919                 }
1920                 if (bWat_)
1921                 {
1922                     nwaterchain++;
1923                     ri->chainid = ' ';
1924                 }
1925                 /* check if chain identifier was used before */
1926                 for (int j = 0; (j < numChains); j++)
1927                 {
1928                     if (pdb_ch[j].chainid != ' ' && pdb_ch[j].chainid == ri->chainid)
1929                     {
1930                         GMX_LOG(logger.warning)
1931                                 .asParagraph()
1932                                 .appendTextFormatted(
1933                                         "Chain identifier '%c' is used in two non-sequential "
1934                                         "blocks. "
1935                                         "They will be treated as separate chains unless you "
1936                                         "reorder your file.",
1937                                         ri->chainid);
1938                     }
1939                 }
1940                 t_pdbchain newChain;
1941                 newChain.chainid  = ri->chainid;
1942                 newChain.chainnum = ri->chainnum;
1943                 newChain.start    = i;
1944                 newChain.bAllWat  = bWat_;
1945                 if (bWat_)
1946                 {
1947                     newChain.nterpairs = 0;
1948                 }
1949                 else
1950                 {
1951                     newChain.nterpairs = 1;
1952                 }
1953                 newChain.chainstart.resize(newChain.nterpairs + 1);
1954                 /* modified [numChains] to [0] below */
1955                 newChain.chainstart[0] = 0;
1956                 pdb_ch.push_back(newChain);
1957                 numChains++;
1958             }
1959         }
1960         bPrevWat_ = bWat_;
1961     }
1962     pdb_ch.back().natom = natom - pdb_ch.back().start;
1963
1964     /* set all the water blocks at the end of the chain */
1965     std::vector<int> swap_index(numChains);
1966     int              j = 0;
1967     for (int i = 0; i < numChains; i++)
1968     {
1969         if (!pdb_ch[i].bAllWat)
1970         {
1971             swap_index[j] = i;
1972             j++;
1973         }
1974     }
1975     for (int i = 0; i < numChains; i++)
1976     {
1977         if (pdb_ch[i].bAllWat)
1978         {
1979             swap_index[j] = i;
1980             j++;
1981         }
1982     }
1983     if (nwaterchain > 1)
1984     {
1985         GMX_LOG(logger.info)
1986                 .asParagraph()
1987                 .appendTextFormatted("Moved all the water blocks to the end");
1988     }
1989
1990     t_atoms*             pdba;
1991     std::vector<t_chain> chains(numChains);
1992     /* copy pdb data and x for all chains */
1993     for (int i = 0; (i < numChains); i++)
1994     {
1995         int si               = swap_index[i];
1996         chains[i].chainid    = pdb_ch[si].chainid;
1997         chains[i].chainnum   = pdb_ch[si].chainnum;
1998         chains[i].bAllWat    = pdb_ch[si].bAllWat;
1999         chains[i].nterpairs  = pdb_ch[si].nterpairs;
2000         chains[i].chainstart = pdb_ch[si].chainstart;
2001         chains[i].ntdb.clear();
2002         chains[i].ctdb.clear();
2003         chains[i].r_start.resize(pdb_ch[si].nterpairs);
2004         chains[i].r_end.resize(pdb_ch[si].nterpairs);
2005
2006         snew(chains[i].pdba, 1);
2007         init_t_atoms(chains[i].pdba, pdb_ch[si].natom, true);
2008         for (j = 0; j < chains[i].pdba->nr; j++)
2009         {
2010             chains[i].pdba->atom[j]     = pdba_all.atom[pdb_ch[si].start + j];
2011             chains[i].pdba->atomname[j] = put_symtab(&symtab, *pdba_all.atomname[pdb_ch[si].start + j]);
2012             chains[i].pdba->pdbinfo[j] = pdba_all.pdbinfo[pdb_ch[si].start + j];
2013             chains[i].x.emplace_back(pdbx[pdb_ch[si].start + j]);
2014         }
2015         /* Re-index the residues assuming that the indices are continuous */
2016         int k                = chains[i].pdba->atom[0].resind;
2017         int nres             = chains[i].pdba->atom[chains[i].pdba->nr - 1].resind - k + 1;
2018         chains[i].pdba->nres = nres;
2019         for (int j = 0; j < chains[i].pdba->nr; j++)
2020         {
2021             chains[i].pdba->atom[j].resind -= k;
2022         }
2023         srenew(chains[i].pdba->resinfo, nres);
2024         for (int j = 0; j < nres; j++)
2025         {
2026             chains[i].pdba->resinfo[j]      = pdba_all.resinfo[k + j];
2027             chains[i].pdba->resinfo[j].name = put_symtab(&symtab, *pdba_all.resinfo[k + j].name);
2028             /* make all chain identifiers equal to that of the chain */
2029             chains[i].pdba->resinfo[j].chainid = pdb_ch[si].chainid;
2030         }
2031     }
2032
2033     if (nchainmerges > 0)
2034     {
2035         GMX_LOG(logger.info)
2036                 .asParagraph()
2037                 .appendTextFormatted("Merged chains into joint molecule definitions at %d places.",
2038                                      nchainmerges);
2039     }
2040
2041     GMX_LOG(logger.info)
2042             .asParagraph()
2043             .appendTextFormatted(
2044                     "There are %d chains and %d blocks of water and "
2045                     "%d residues with %d atoms",
2046                     numChains - nwaterchain, nwaterchain, pdba_all.nres, natom);
2047
2048     GMX_LOG(logger.info)
2049             .asParagraph()
2050             .appendTextFormatted("  %5s  %4s %6s", "chain", "#res", "#atoms");
2051     for (int i = 0; (i < numChains); i++)
2052     {
2053         GMX_LOG(logger.info)
2054                 .asParagraph()
2055                 .appendTextFormatted("  %d '%c' %5d %6d  %s\n", i + 1,
2056                                      chains[i].chainid ? chains[i].chainid : '-', chains[i].pdba->nres,
2057                                      chains[i].pdba->nr, chains[i].bAllWat ? "(only water)" : "");
2058     }
2059
2060     check_occupancy(&pdba_all, inputConfFile_.c_str(), bVerbose_, logger);
2061
2062     /* Read atomtypes... */
2063     PreprocessingAtomTypes atype = read_atype(ffdir_, &symtab);
2064
2065     /* read residue database */
2066     GMX_LOG(logger.info).asParagraph().appendTextFormatted("Reading residue database... (%s)", forcefield_);
2067     std::vector<std::string>       rtpf = fflib_search_file_end(ffdir_, ".rtp", true);
2068     std::vector<PreprocessResidue> rtpFFDB;
2069     for (const auto& filename : rtpf)
2070     {
2071         readResidueDatabase(filename, &rtpFFDB, &atype, &symtab, logger, false);
2072     }
2073     if (bNewRTP_)
2074     {
2075         /* Not correct with multiple rtp input files with different bonded types */
2076         FILE* fp = gmx_fio_fopen("new.rtp", "w");
2077         print_resall(fp, rtpFFDB, atype);
2078         gmx_fio_fclose(fp);
2079     }
2080
2081     /* read hydrogen database */
2082     std::vector<MoleculePatchDatabase> ah;
2083     read_h_db(ffdir_, &ah);
2084
2085     /* Read Termini database... */
2086     std::vector<MoleculePatchDatabase>  ntdb;
2087     std::vector<MoleculePatchDatabase>  ctdb;
2088     std::vector<MoleculePatchDatabase*> tdblist;
2089     int                                 nNtdb = read_ter_db(ffdir_, 'n', &ntdb, &atype);
2090     int                                 nCtdb = read_ter_db(ffdir_, 'c', &ctdb, &atype);
2091
2092     FILE* top_file = gmx_fio_fopen(topologyFile_.c_str(), "w");
2093
2094     print_top_header(top_file, topologyFile_.c_str(), FALSE, ffdir_, mHmult_);
2095
2096     t_chain*               cc;
2097     std::vector<gmx::RVec> x;
2098     /* new pdb datastructure for sorting. */
2099     t_atoms** sortAtoms  = nullptr;
2100     t_atoms** localAtoms = nullptr;
2101     snew(sortAtoms, numChains);
2102     snew(localAtoms, numChains);
2103     for (int chain = 0; (chain < numChains); chain++)
2104     {
2105         cc = &(chains[chain]);
2106
2107         /* set pdba, natom and nres to the current chain */
2108         pdba     = cc->pdba;
2109         x        = cc->x;
2110         natom    = cc->pdba->nr;
2111         int nres = cc->pdba->nres;
2112
2113         if (cc->chainid && (cc->chainid != ' '))
2114         {
2115             GMX_LOG(logger.info)
2116                     .asParagraph()
2117                     .appendTextFormatted("Processing chain %d '%c' (%d atoms, %d residues)",
2118                                          chain + 1, cc->chainid, natom, nres);
2119         }
2120         else
2121         {
2122             GMX_LOG(logger.info)
2123                     .asParagraph()
2124                     .appendTextFormatted("Processing chain %d (%d atoms, %d residues)", chain + 1,
2125                                          natom, nres);
2126         }
2127
2128         process_chain(pdba, x, bUnA_, bUnA_, bUnA_, bLysMan_, bAspMan_, bGluMan_, bHisMan_,
2129                       bArgMan_, bGlnMan_, angle_, distance_, &symtab, rtprename);
2130
2131         cc->chainstart[cc->nterpairs] = pdba->nres;
2132         j                             = 0;
2133         for (int i = 0; i < cc->nterpairs; i++)
2134         {
2135             find_nc_ter(pdba, cc->chainstart[i], cc->chainstart[i + 1], &(cc->r_start[j]),
2136                         &(cc->r_end[j]), &rt, logger);
2137
2138             if (cc->r_start[j] >= 0 && cc->r_end[j] >= 0)
2139             {
2140                 j++;
2141             }
2142         }
2143         cc->nterpairs = j;
2144         if (cc->nterpairs == 0)
2145         {
2146             GMX_LOG(logger.info)
2147                     .asParagraph()
2148                     .appendTextFormatted(
2149                             "Problem with chain definition, or missing terminal residues. "
2150                             "This chain does not appear to contain a recognized chain molecule. "
2151                             "If this is incorrect, you can edit residuetypes.dat to modify the "
2152                             "behavior.");
2153         }
2154
2155         /* Check for disulfides and other special bonds */
2156         ssbonds = makeDisulfideBonds(pdba, gmx::as_rvec_array(x.data()), bCysMan_, bVerbose_);
2157
2158         if (!rtprename.empty())
2159         {
2160             rename_resrtp(pdba, cc->nterpairs, cc->r_start, cc->r_end, rtprename, &symtab, bVerbose_, logger);
2161         }
2162
2163         for (int i = 0; i < cc->nterpairs; i++)
2164         {
2165             /* Set termini.
2166              * We first apply a filter so we only have the
2167              * termini that can be applied to the residue in question
2168              * (or a generic terminus if no-residue specific is available).
2169              */
2170             /* First the N terminus */
2171             if (nNtdb > 0)
2172             {
2173                 tdblist = filter_ter(ntdb, *pdba->resinfo[cc->r_start[i]].name);
2174                 if (tdblist.empty())
2175                 {
2176                     GMX_LOG(logger.info)
2177                             .asParagraph()
2178                             .appendTextFormatted(
2179                                     "No suitable end (N or 5') terminus found in database - "
2180                                     "assuming this residue "
2181                                     "is already in a terminus-specific form and skipping terminus "
2182                                     "selection.");
2183                     cc->ntdb.push_back(nullptr);
2184                 }
2185                 else
2186                 {
2187                     if (bTerMan_ && !tdblist.empty())
2188                     {
2189                         sprintf(select, "Select start terminus type for %s-%d",
2190                                 *pdba->resinfo[cc->r_start[i]].name, pdba->resinfo[cc->r_start[i]].nr);
2191                         cc->ntdb.push_back(choose_ter(tdblist, select));
2192                     }
2193                     else
2194                     {
2195                         cc->ntdb.push_back(tdblist[0]);
2196                     }
2197
2198                     printf("Start terminus %s-%d: %s\n", *pdba->resinfo[cc->r_start[i]].name,
2199                            pdba->resinfo[cc->r_start[i]].nr, (cc->ntdb[i])->name.c_str());
2200                     tdblist.clear();
2201                 }
2202             }
2203             else
2204             {
2205                 cc->ntdb.push_back(nullptr);
2206             }
2207
2208             /* And the C terminus */
2209             if (nCtdb > 0)
2210             {
2211                 tdblist = filter_ter(ctdb, *pdba->resinfo[cc->r_end[i]].name);
2212                 if (tdblist.empty())
2213                 {
2214                     GMX_LOG(logger.info)
2215                             .asParagraph()
2216                             .appendTextFormatted(
2217                                     "No suitable end (C or 3') terminus found in database - "
2218                                     "assuming this residue"
2219                                     "is already in a terminus-specific form and skipping terminus "
2220                                     "selection.");
2221                     cc->ctdb.push_back(nullptr);
2222                 }
2223                 else
2224                 {
2225                     if (bTerMan_ && !tdblist.empty())
2226                     {
2227                         sprintf(select, "Select end terminus type for %s-%d",
2228                                 *pdba->resinfo[cc->r_end[i]].name, pdba->resinfo[cc->r_end[i]].nr);
2229                         cc->ctdb.push_back(choose_ter(tdblist, select));
2230                     }
2231                     else
2232                     {
2233                         cc->ctdb.push_back(tdblist[0]);
2234                     }
2235                     printf("End terminus %s-%d: %s\n", *pdba->resinfo[cc->r_end[i]].name,
2236                            pdba->resinfo[cc->r_end[i]].nr, (cc->ctdb[i])->name.c_str());
2237                     tdblist.clear();
2238                 }
2239             }
2240             else
2241             {
2242                 cc->ctdb.push_back(nullptr);
2243             }
2244         }
2245         std::vector<MoleculePatchDatabase> hb_chain;
2246         /* lookup hackblocks and rtp for all residues */
2247         std::vector<PreprocessResidue> restp_chain;
2248         get_hackblocks_rtp(&hb_chain, &restp_chain, rtpFFDB, pdba->nres, pdba->resinfo, cc->nterpairs,
2249                            &symtab, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end, bAllowMissing_, logger);
2250         /* ideally, now we would not need the rtp itself anymore, but do
2251            everything using the hb and restp arrays. Unfortunately, that
2252            requires some re-thinking of code in gen_vsite.c, which I won't
2253            do now :( AF 26-7-99 */
2254
2255         rename_atoms(nullptr, ffdir_, pdba, &symtab, restp_chain, false, &rt, false, bVerbose_);
2256
2257         match_atomnames_with_rtp(restp_chain, hb_chain, pdba, &symtab, x, bVerbose_, logger);
2258
2259         if (bSort_)
2260         {
2261             char**    gnames;
2262             t_blocka* block = new_blocka();
2263             snew(gnames, 1);
2264             sort_pdbatoms(restp_chain, natom, &pdba, &sortAtoms[chain], &x, block, &gnames);
2265             remove_duplicate_atoms(pdba, x, bVerbose_, logger);
2266             if (bIndexSet_)
2267             {
2268                 if (bRemoveH_)
2269                 {
2270                     GMX_LOG(logger.warning)
2271                             .asParagraph()
2272                             .appendTextFormatted(
2273                                     "With the -remh option the generated "
2274                                     "index file (%s) might be useless "
2275                                     "(the index file is generated before hydrogens are added)",
2276                                     indexOutputFile_.c_str());
2277                 }
2278                 write_index(indexOutputFile_.c_str(), block, gnames, false, 0);
2279             }
2280             for (int i = 0; i < block->nr; i++)
2281             {
2282                 sfree(gnames[i]);
2283             }
2284             sfree(gnames);
2285             done_blocka(block);
2286             sfree(block);
2287         }
2288         else
2289         {
2290             GMX_LOG(logger.warning)
2291                     .asParagraph()
2292                     .appendTextFormatted(
2293                             "Without sorting no check for duplicate atoms can be done");
2294         }
2295
2296         /* Generate Hydrogen atoms (and termini) in the sequence */
2297         GMX_LOG(logger.info)
2298                 .asParagraph()
2299                 .appendTextFormatted(
2300                         "Generating any missing hydrogen atoms and/or adding termini.");
2301         add_h(&pdba, &localAtoms[chain], &x, ah, &symtab, cc->nterpairs, cc->ntdb, cc->ctdb,
2302               cc->r_start, cc->r_end, bAllowMissing_);
2303         GMX_LOG(logger.info)
2304                 .asParagraph()
2305                 .appendTextFormatted("Now there are %d residues with %d atoms", pdba->nres, pdba->nr);
2306
2307         /* make up molecule name(s) */
2308
2309         int k = (cc->nterpairs > 0 && cc->r_start[0] >= 0) ? cc->r_start[0] : 0;
2310
2311         std::string restype = rt.typeOfNamedDatabaseResidue(*pdba->resinfo[k].name);
2312
2313         std::string molname;
2314         std::string suffix;
2315         if (cc->bAllWat)
2316         {
2317             molname = "Water";
2318         }
2319         else
2320         {
2321             this_chainid = cc->chainid;
2322
2323             /* Add the chain id if we have one */
2324             if (this_chainid != ' ')
2325             {
2326                 suffix.append(formatString("_chain_%c", this_chainid));
2327             }
2328
2329             /* Check if there have been previous chains with the same id */
2330             int nid_used = 0;
2331             for (int k = 0; k < chain; k++)
2332             {
2333                 if (cc->chainid == chains[k].chainid)
2334                 {
2335                     nid_used++;
2336                 }
2337             }
2338             /* Add the number for this chain identifier if there are multiple copies */
2339             if (nid_used > 0)
2340             {
2341                 suffix.append(formatString("%d", nid_used + 1));
2342             }
2343
2344             if (suffix.length() > 0)
2345             {
2346                 molname.append(restype);
2347                 molname.append(suffix);
2348             }
2349             else
2350             {
2351                 molname = restype;
2352             }
2353         }
2354         std::string itp_fn = topologyFile_;
2355
2356         std::string posre_fn = includeTopologyFile_;
2357         if ((numChains - nwaterchain > 1) && !cc->bAllWat)
2358         {
2359             bITP_ = true;
2360             printf("Chain time...\n");
2361             // construct the itp file name
2362             itp_fn = stripSuffixIfPresent(itp_fn, ".top");
2363             itp_fn.append("_");
2364             itp_fn.append(molname);
2365             itp_fn.append(".itp");
2366             // now do the same for posre
2367             posre_fn = stripSuffixIfPresent(posre_fn, ".itp");
2368             posre_fn.append("_");
2369             posre_fn.append(molname);
2370             posre_fn.append(".itp");
2371             if (posre_fn == itp_fn)
2372             {
2373                 posre_fn = Path::concatenateBeforeExtension(posre_fn, "_pr");
2374             }
2375             incls_.emplace_back();
2376             incls_.back() = itp_fn;
2377             itp_file_     = gmx_fio_fopen(itp_fn.c_str(), "w");
2378         }
2379         else
2380         {
2381             bITP_ = false;
2382         }
2383
2384         mols_.emplace_back();
2385         if (cc->bAllWat)
2386         {
2387             mols_.back().name = "SOL";
2388             mols_.back().nr   = pdba->nres;
2389         }
2390         else
2391         {
2392             mols_.back().name = molname;
2393             mols_.back().nr   = 1;
2394         }
2395
2396         if (bITP_)
2397         {
2398             print_top_comment(itp_file_, itp_fn.c_str(), ffdir_, true);
2399         }
2400
2401         FILE* top_file2;
2402         if (cc->bAllWat)
2403         {
2404             top_file2 = nullptr;
2405         }
2406         else if (bITP_)
2407         {
2408             top_file2 = itp_file_;
2409         }
2410         else
2411         {
2412             top_file2 = top_file;
2413         }
2414
2415         pdb2top(top_file2, posre_fn.c_str(), molname.c_str(), pdba, &x, &atype, &symtab, rtpFFDB,
2416                 restp_chain, hb_chain, bAllowMissing_, bVsites_, bVsiteAromatics_, ffdir_, mHmult_,
2417                 ssbonds, long_bond_dist_, short_bond_dist_, bDeuterate_, bChargeGroups_, bCmap_,
2418                 bRenumRes_, bRTPresname_, logger);
2419
2420         if (!cc->bAllWat)
2421         {
2422             write_posres(posre_fn.c_str(), pdba, posre_fc_);
2423         }
2424
2425         if (bITP_)
2426         {
2427             gmx_fio_fclose(itp_file_);
2428         }
2429
2430         /* pdba and natom have been reassigned somewhere so: */
2431         cc->pdba = pdba;
2432         cc->x    = x;
2433     }
2434
2435     if (watermodel_ == nullptr)
2436     {
2437         for (int chain = 0; chain < numChains; chain++)
2438         {
2439             if (chains[chain].bAllWat)
2440             {
2441                 auto message = formatString(
2442                         "You have chosen not to include a water model, "
2443                         "but there is water in the input file. Select a "
2444                         "water model or remove the water from your input file.");
2445                 GMX_THROW(InconsistentInputError(message));
2446             }
2447         }
2448     }
2449     else
2450     {
2451         std::string waterFile = formatString("%s%c%s.itp", ffdir_, DIR_SEPARATOR, watermodel_);
2452         if (!fflib_fexist(waterFile))
2453         {
2454             auto message = formatString(
2455                     "The topology file '%s' for the selected water "
2456                     "model '%s' can not be found in the force field "
2457                     "directory. Select a different water model.",
2458                     waterFile.c_str(), watermodel_);
2459             GMX_THROW(InconsistentInputError(message));
2460         }
2461     }
2462
2463     print_top_mols(top_file, title, ffdir_, watermodel_, incls_, mols_);
2464     gmx_fio_fclose(top_file);
2465
2466     /* now merge all chains back together */
2467     natom    = 0;
2468     int nres = 0;
2469     for (int i = 0; (i < numChains); i++)
2470     {
2471         natom += chains[i].pdba->nr;
2472         nres += chains[i].pdba->nres;
2473     }
2474     t_atoms* atoms;
2475     snew(atoms, 1);
2476     init_t_atoms(atoms, natom, false);
2477     for (int i = 0; i < atoms->nres; i++)
2478     {
2479         sfree(atoms->resinfo[i].name);
2480     }
2481     atoms->nres = nres;
2482     srenew(atoms->resinfo, nres);
2483     x.clear();
2484     int k = 0;
2485     int l = 0;
2486     for (int i = 0; (i < numChains); i++)
2487     {
2488         if (numChains > 1)
2489         {
2490             GMX_LOG(logger.info)
2491                     .asParagraph()
2492                     .appendTextFormatted("Including chain %d in system: %d atoms %d residues",
2493                                          i + 1, chains[i].pdba->nr, chains[i].pdba->nres);
2494         }
2495         for (int j = 0; (j < chains[i].pdba->nr); j++)
2496         {
2497             atoms->atom[k] = chains[i].pdba->atom[j];
2498             atoms->atom[k].resind += l; /* l is processed nr of residues */
2499             atoms->atomname[k]                            = chains[i].pdba->atomname[j];
2500             atoms->resinfo[atoms->atom[k].resind].chainid = chains[i].chainid;
2501             x.push_back(chains[i].x[j]);
2502             k++;
2503         }
2504         for (int j = 0; (j < chains[i].pdba->nres); j++)
2505         {
2506             atoms->resinfo[l] = chains[i].pdba->resinfo[j];
2507             if (bRTPresname_)
2508             {
2509                 atoms->resinfo[l].name = atoms->resinfo[l].rtp;
2510             }
2511             l++;
2512         }
2513         done_atom(chains[i].pdba);
2514     }
2515
2516     if (numChains > 1)
2517     {
2518         GMX_LOG(logger.info)
2519                 .asParagraph()
2520                 .appendTextFormatted("Now there are %d atoms and %d residues", k, l);
2521         print_sums(atoms, true, logger);
2522     }
2523
2524     rvec box_space;
2525     GMX_LOG(logger.info).asParagraph().appendTextFormatted("Writing coordinate file...");
2526     clear_rvec(box_space);
2527     if (box[0][0] == 0)
2528     {
2529         make_new_box(atoms->nr, gmx::as_rvec_array(x.data()), box, box_space, false);
2530     }
2531     write_sto_conf(outputConfFile_.c_str(), title, atoms, gmx::as_rvec_array(x.data()), nullptr,
2532                    pbcType, box);
2533
2534     done_symtab(&symtab);
2535     done_atom(&pdba_all);
2536     done_atom(atoms);
2537     for (int chain = 0; chain < numChains; chain++)
2538     {
2539         sfree(sortAtoms[chain]);
2540         sfree(localAtoms[chain]);
2541     }
2542     sfree(sortAtoms);
2543     sfree(localAtoms);
2544     sfree(atoms);
2545     sfree(title);
2546     sfree(pdbx);
2547
2548     GMX_LOG(logger.info)
2549             .asParagraph()
2550             .appendTextFormatted("\t\t--------- PLEASE NOTE ------------");
2551     GMX_LOG(logger.info)
2552             .asParagraph()
2553             .appendTextFormatted("You have successfully generated a topology from: %s.",
2554                                  inputConfFile_.c_str());
2555     if (watermodel_ != nullptr)
2556     {
2557         GMX_LOG(logger.info)
2558                 .asParagraph()
2559                 .appendTextFormatted("The %s force field and the %s water model are used.", ffname_,
2560                                      watermodel_);
2561         sfree(watermodel_);
2562     }
2563     else
2564     {
2565         GMX_LOG(logger.info).asParagraph().appendTextFormatted("The %s force field is used.", ffname_);
2566     }
2567     GMX_LOG(logger.info)
2568             .asParagraph()
2569             .appendTextFormatted("\t\t--------- ETON ESAELP ------------");
2570
2571     return 0;
2572 }
2573
2574 } // namespace
2575
2576 const char pdb2gmxInfo::name[] = "pdb2gmx";
2577 const char pdb2gmxInfo::shortDescription[] =
2578         "Convert coordinate files to topology and FF-compliant coordinate files";
2579 ICommandLineOptionsModulePointer pdb2gmxInfo::create()
2580 {
2581     return std::make_unique<pdb2gmx>();
2582 }
2583
2584 } // namespace gmx