src/gromacs/gmxpreprocess/ter_db.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2017,2018 by the GROMACS development team.
   7  * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 #include "gmxpre.h"
  39
  40 #include "ter_db.h"
  41
  42 #include <cctype>
  43 #include <cstring>
  44
  45 #include <algorithm>
  46 #include <string>
  47 #include <vector>
  48
  49 #include "gromacs/fileio/gmxfio.h"
  50 #include "gromacs/gmxpreprocess/fflibutil.h"
  51 #include "gromacs/gmxpreprocess/gpp_atomtype.h"
  52 #include "gromacs/gmxpreprocess/grompp_impl.h"
  53 #include "gromacs/gmxpreprocess/h_db.h"
  54 #include "gromacs/gmxpreprocess/notset.h"
  55 #include "gromacs/gmxpreprocess/toputil.h"
  56 #include "gromacs/utility/cstringutil.h"
  57 #include "gromacs/utility/fatalerror.h"
  58 #include "gromacs/utility/futil.h"
  59 #include "gromacs/utility/enumerationhelpers.h"
  60 #include "gromacs/utility/fatalerror.h"
  61 #include "gromacs/utility/futil.h"
  62 #include "gromacs/utility/smalloc.h"
  63 #include "gromacs/utility/strdb.h"
  64 #include "gromacs/utility/stringtoenumvalueconverter.h"
  65
  66 #include "hackblock.h"
  67 #include "resall.h"
  68
  69 enum class ReplaceType : int
  70 {
  71     Repl,
  72     Add,
  73     Del,
  74     Count
  75 };
  76
  77 static const char* enumValueToString(ReplaceType enumValue)
  78 {
  79     constexpr gmx::EnumerationArray<ReplaceType, const char*> replaceTypeNames = { "replace",
  80                                                                                    "add",
  81                                                                                    "delete" };
  82     return replaceTypeNames[enumValue];
  83 }
  84
  85 template<typename EnumType>
  86 static std::optional<EnumType> findTypeFromKeyword(char* keyw)
  87 {
  88     gmx::StringToEnumValueConverter<EnumType, enumValueToString, gmx::StringCompareType::CaseInsensitive, gmx::StripStrings::Yes> converter;
  89     return converter.valueFrom(keyw);
  90 }
  91
  92 #define FATAL() gmx_fatal(FARGS, "Reading Termini Database: not enough items on line\n%s", line)
  93
  94 static void read_atom(char* line, bool bAdd, std::string* nname, t_atom* a, PreprocessingAtomTypes* atype, int* cgnr)
  95 {
  96     int    nr, i;
  97     char   buf[5][30];
  98     double m, q;
  99
 100     /* This code is messy, because of support for different formats:
 101      * for replace: [new name] <atom type> <m> <q> [cgnr (old format)]
 102      * for add:                <atom type> <m> <q> [cgnr]
 103      */
 104     nr = sscanf(line, "%s %s %s %s %s", buf[0], buf[1], buf[2], buf[3], buf[4]);
 105
 106     /* Here there an ambiguity due to the old replace format with cgnr,
 107      * which was read for years, but ignored in the rest of the code.
 108      * We have to assume that the atom type does not start with a digit
 109      * to make a line with 4 entries uniquely interpretable.
 110      */
 111     if (!bAdd && nr == 4 && isdigit(buf[1][0]))
 112     {
 113         nr = 3;
 114     }
 115
 116     if (nr < 3 || nr > 4)
 117     {
 118         gmx_fatal(FARGS,
 119                   "Reading Termini Database: expected %d or %d items of atom data in stead of %d "
 120                   "on line\n%s",
 121                   3,
 122                   4,
 123                   nr,
 124                   line);
 125     }
 126     i = 0;
 127     if (!bAdd)
 128     {
 129         if (nr == 4)
 130         {
 131             *nname = buf[i++];
 132         }
 133         else
 134         {
 135             *nname = "";
 136         }
 137     }
 138     a->type = *atype->atomTypeFromName(buf[i++]);
 139     sscanf(buf[i++], "%lf", &m);
 140     a->m = m;
 141     sscanf(buf[i++], "%lf", &q);
 142     a->q = q;
 143     if (bAdd && nr == 4)
 144     {
 145         sscanf(buf[i++], "%d", cgnr);
 146     }
 147     else
 148     {
 149         *cgnr = NOTSET;
 150     }
 151 }
 152
 153 static void print_atom(FILE* out, const t_atom& a, PreprocessingAtomTypes* atype)
 154 {
 155     fprintf(out, "\t%s\t%g\t%g\n", *atype->atomNameFromAtomType(a.type), a.m, a.q);
 156 }
 157
 158 static void print_ter_db(const char*                                ff,
 159                          char                                       C,
 160                          gmx::ArrayRef<const MoleculePatchDatabase> tb,
 161                          PreprocessingAtomTypes*                    atype)
 162 {
 163     std::string buf = gmx::formatString("%s-%c.tdb", ff, C);
 164     FILE*       out = gmx_fio_fopen(buf.c_str(), "w");
 165
 166     for (const auto& modification : tb)
 167     {
 168         fprintf(out, "[ %s ]\n", modification.name.c_str());
 169
 170         if (std::any_of(modification.hack.begin(), modification.hack.end(), [](const auto& mod) {
 171                 return mod.type() == MoleculePatchType::Replace;
 172             }))
 173         {
 174             fprintf(out, "[ %s ]\n", enumValueToString(ReplaceType::Repl));
 175             for (const auto& hack : modification.hack)
 176             {
 177                 if (hack.type() == MoleculePatchType::Replace)
 178                 {
 179                     fprintf(out, "%s\t", hack.oname.c_str());
 180                     print_atom(out, hack.atom.back(), atype);
 181                 }
 182             }
 183         }
 184         if (std::any_of(modification.hack.begin(), modification.hack.end(), [](const auto& mod) {
 185                 return mod.type() == MoleculePatchType::Add;
 186             }))
 187         {
 188             fprintf(out, "[ %s ]\n", enumValueToString(ReplaceType::Add));
 189             for (const auto& hack : modification.hack)
 190             {
 191                 if (hack.type() == MoleculePatchType::Add)
 192                 {
 193                     print_ab(out, hack, hack.nname.c_str());
 194                     print_atom(out, hack.atom.back(), atype);
 195                 }
 196             }
 197         }
 198         if (std::any_of(modification.hack.begin(), modification.hack.end(), [](const auto& mod) {
 199                 return mod.type() == MoleculePatchType::Delete;
 200             }))
 201         {
 202             fprintf(out, "[ %s ]\n", enumValueToString(ReplaceType::Del));
 203             for (const auto& hack : modification.hack)
 204             {
 205                 if (hack.type() == MoleculePatchType::Delete)
 206                 {
 207                     fprintf(out, "%s\n", hack.oname.c_str());
 208                 }
 209             }
 210         }
 211         for (auto bt : gmx::EnumerationWrapper<BondedTypes>{})
 212         {
 213             if (!modification.rb[bt].b.empty())
 214             {
 215                 fprintf(out, "[ %s ]\n", enumValueToString(bt));
 216                 for (const auto& b : modification.rb[bt].b)
 217                 {
 218                     for (int k = 0; k < enumValueToNumIAtoms(bt); k++)
 219                     {
 220                         fprintf(out, "%s%s", k ? "\t" : "", b.a[k].c_str());
 221                     }
 222                     if (!b.s.empty())
 223                     {
 224                         fprintf(out, "\t%s", b.s.c_str());
 225                     }
 226                     fprintf(out, "\n");
 227                 }
 228             }
 229         }
 230         fprintf(out, "\n");
 231     }
 232     gmx_fio_fclose(out);
 233 }
 234
 235 static void read_ter_db_file(const char*                         fn,
 236                              std::vector<MoleculePatchDatabase>* tbptr,
 237                              PreprocessingAtomTypes*             atype)
 238 {
 239     char filebase[STRLEN];
 240     char header[STRLEN], buf[STRLEN], line[STRLEN];
 241
 242     fflib_filename_base(fn, filebase, STRLEN);
 243     /* Remove the C/N termini extension */
 244     char* ptr = strrchr(filebase, '.');
 245     if (ptr != nullptr)
 246     {
 247         ptr[0] = '\0';
 248     }
 249
 250     FILE* in = fflib_open(fn);
 251
 252     std::optional<BondedTypes> btkw;
 253     std::optional<ReplaceType> rtkw;
 254     get_a_line(in, line, STRLEN);
 255     MoleculePatchDatabase* block = nullptr;
 256     while (!feof(in))
 257     {
 258         if (get_header(line, header))
 259         {
 260             /* this is a new block, or a new keyword */
 261             btkw = findTypeFromKeyword<BondedTypes>(header);
 262             rtkw = findTypeFromKeyword<ReplaceType>(header);
 263
 264             if (!btkw.has_value() && !rtkw.has_value())
 265             {
 266                 tbptr->emplace_back(MoleculePatchDatabase());
 267                 block = &tbptr->back();
 268                 clearModificationBlock(block);
 269                 block->name     = header;
 270                 block->filebase = filebase;
 271             }
 272         }
 273         else
 274         {
 275             if (block == nullptr)
 276             {
 277                 gmx_fatal(FARGS,
 278                           "reading termini database: "
 279                           "directive expected before line:\n%s\n"
 280                           "This might be a file in an old format.",
 281                           line);
 282             }
 283             /* this is not a header, so it must be data */
 284             if (!btkw.has_value())
 285             {
 286                 /* this is a hack: add/rename/delete atoms */
 287                 /* make space for hacks */
 288                 block->hack.emplace_back(MoleculePatch());
 289                 MoleculePatch* hack = &block->hack.back();
 290
 291                 /* get data */
 292                 int n = 0;
 293                 if (*rtkw == ReplaceType::Repl || *rtkw == ReplaceType::Del)
 294                 {
 295                     if (sscanf(line, "%s%n", buf, &n) != 1)
 296                     {
 297                         gmx_fatal(FARGS,
 298                                   "Reading Termini Database '%s': "
 299                                   "expected atom name on line\n%s",
 300                                   fn,
 301                                   line);
 302                     }
 303                     hack->oname = buf;
 304                     /* we only replace or delete one atom at a time */
 305                     hack->nr = 1;
 306                 }
 307                 else if (*rtkw == ReplaceType::Add)
 308                 {
 309                     read_ab(line, fn, hack);
 310                     get_a_line(in, line, STRLEN);
 311                 }
 312                 else
 313                 {
 314                     gmx_fatal(FARGS,
 315                               "unimplemented keyword number %d (%s:%d)",
 316                               static_cast<int>(*rtkw),
 317                               __FILE__,
 318                               __LINE__);
 319                 }
 320                 if (*rtkw == ReplaceType::Repl || *rtkw == ReplaceType::Add)
 321                 {
 322                     hack->atom.emplace_back();
 323                     read_atom(line + n, *rtkw == ReplaceType::Add, &hack->nname, &hack->atom.back(), atype, &hack->cgnr);
 324                     if (hack->nname.empty())
 325                     {
 326                         if (!hack->oname.empty())
 327                         {
 328                             hack->nname = hack->oname;
 329                         }
 330                         else
 331                         {
 332                             gmx_fatal(FARGS,
 333                                       "Reading Termini Database '%s': don't know which name the "
 334                                       "new atom should have on line\n%s",
 335                                       fn,
 336                                       line);
 337                         }
 338                     }
 339                 }
 340             }
 341             else if (*btkw >= BondedTypes::Bonds && *btkw < BondedTypes::Count)
 342             {
 343                 /* this is bonded data: bonds, angles, dihedrals or impropers */
 344                 int n = 0;
 345                 block->rb[*btkw].b.emplace_back();
 346                 BondedInteraction* newBond = &block->rb[*btkw].b.back();
 347                 for (int j = 0; j < enumValueToNumIAtoms(*btkw); j++)
 348                 {
 349                     int ni;
 350                     if (sscanf(line + n, "%s%n", buf, &ni) == 1)
 351                     {
 352                         newBond->a[j] = buf;
 353                     }
 354                     else
 355                     {
 356                         gmx_fatal(FARGS,
 357                                   "Reading Termini Database '%s': expected %d atom names (found "
 358                                   "%d) on line\n%s",
 359                                   fn,
 360                                   enumValueToNumIAtoms(*btkw),
 361                                   j - 1,
 362                                   line);
 363                     }
 364                     n += ni;
 365                 }
 366                 strcpy(buf, "");
 367                 sscanf(line + n, "%s", buf);
 368                 newBond->s = buf;
 369             }
 370             else
 371             {
 372                 gmx_fatal(FARGS,
 373                           "Reading Termini Database: Expecting a header at line\n"
 374                           "%s",
 375                           line);
 376             }
 377         }
 378         get_a_line(in, line, STRLEN);
 379     }
 380
 381     gmx_ffclose(in);
 382 }
 383
 384 int read_ter_db(const char* ffdir, char ter, std::vector<MoleculePatchDatabase>* tbptr, PreprocessingAtomTypes* atype)
 385 {
 386     std::string ext = gmx::formatString(".%c.tdb", ter);
 387
 388     /* Search for termini database files.
 389      * Do not generate an error when none are found.
 390      */
 391     std::vector<std::string> tdbf = fflib_search_file_end(ffdir, ext.c_str(), FALSE);
 392     tbptr->clear();
 393     for (const auto& filename : tdbf)
 394     {
 395         read_ter_db_file(filename.c_str(), tbptr, atype);
 396     }
 397
 398     if (debug)
 399     {
 400         print_ter_db("new", ter, *tbptr, atype);
 401     }
 402
 403     return tbptr->size();
 404 }
 405
 406 std::vector<MoleculePatchDatabase*> filter_ter(gmx::ArrayRef<MoleculePatchDatabase> tb, const char* resname)
 407 {
 408     // TODO Four years later, no force fields have ever used this, so decide status of this feature
 409     /* Since some force fields (e.g. OPLS) needs different
 410      * atomtypes for different residues there could be a lot
 411      * of entries in the databases for specific residues
 412      * (e.g. GLY-NH3+, SER-NH3+, ALA-NH3+).
 413      *
 414      * To reduce the database size, we assume that a terminus specifier liker
 415      *
 416      * [ GLY|SER|ALA-NH3+ ]
 417      *
 418      * would cover all of the three residue types above.
 419      * Wildcards (*,?) are not OK. Don't worry about e.g. GLU vs. GLUH since
 420      * pdb2gmx only uses the first 3 letters when calling this routine.
 421      *
 422      * To automate this, this routines scans a list of termini
 423      * for the residue name "resname" and returns an allocated list of
 424      * pointers to the termini that could be applied to the
 425      * residue in question. The variable pointed to by nret will
 426      * contain the number of valid pointers in the list.
 427      * Remember to free the list when you are done with it...
 428      */
 429
 430     auto                                none_idx = tb.end();
 431     std::vector<MoleculePatchDatabase*> list;
 432
 433     for (auto it = tb.begin(); it != tb.end(); it++)
 434     {
 435         const char* s     = it->name.c_str();
 436         bool        found = false;
 437         do
 438         {
 439             if (gmx::equalCaseInsensitive(resname, s, 3))
 440             {
 441                 found = true;
 442                 list.push_back(&*it);
 443             }
 444             else
 445             {
 446                 /* advance to next |-separated field */
 447                 s = strchr(s, '|');
 448                 if (s != nullptr)
 449                 {
 450                     s++;
 451                 }
 452             }
 453         } while (!found && s != nullptr);
 454     }
 455
 456     /* All residue-specific termini have been added. We might have to fall
 457      * back on generic termini, which are characterized by not having
 458      * '-' in the name prior to the last position (which indicates charge).
 459      * The [ None ] alternative is special since we don't want that
 460      * to be the default, so we put it last in the list we return.
 461      */
 462     for (auto it = tb.begin(); it != tb.end(); it++)
 463     {
 464         const char* s = it->name.c_str();
 465         if (gmx::equalCaseInsensitive("None", it->name))
 466         {
 467             none_idx = it;
 468         }
 469         else
 470         {
 471             /* Time to see if there's a generic terminus that matches.
 472                Is there a hyphen? */
 473             const char* c = strchr(s, '-');
 474
 475             /* A conjunction hyphen normally indicates a residue-specific
 476                terminus, which is named like "GLY-COOH". A generic terminus
 477                won't have a hyphen. */
 478             bool bFoundAnyHyphen = (c != nullptr);
 479             /* '-' as the last character indicates charge, so if that's
 480                the only one found e.g. "COO-", then it was not a conjunction
 481                hyphen, so this is a generic terminus */
 482             bool bOnlyFoundChargeHyphen = (bFoundAnyHyphen && *(c + 1) == '\0');
 483             /* Thus, "GLY-COO-" is not recognized as a generic terminus. */
 484             bool bFoundGenericTerminus = !bFoundAnyHyphen || bOnlyFoundChargeHyphen;
 485             if (bFoundGenericTerminus)
 486             {
 487                 /* Check that we haven't already added a residue-specific version
 488                  * of this terminus.
 489                  */
 490                 auto found = std::find_if(list.begin(), list.end(), [&s](const MoleculePatchDatabase* b) {
 491                     return strstr(b->name.c_str(), s) != nullptr;
 492                 });
 493                 if (found == list.end())
 494                 {
 495                     list.push_back(&*it);
 496                 }
 497             }
 498         }
 499     }
 500     if (none_idx != tb.end())
 501     {
 502         list.push_back(&*none_idx);
 503     }
 504
 505     return list;
 506 }
 507
 508
 509 MoleculePatchDatabase* choose_ter(gmx::ArrayRef<MoleculePatchDatabase*> tb, const char* title)
 510 {
 511     int sel, ret;
 512
 513     printf("%s\n", title);
 514     int i = 0;
 515     for (const auto& modification : tb)
 516     {
 517         bool bIsZwitterion = (0 == gmx_wcmatch("*ZWITTERION*", modification->name.c_str()));
 518         printf("%2d: %s%s\n",
 519                i,
 520                modification->name.c_str(),
 521                bIsZwitterion ? " (only use with zwitterions containing exactly one residue)" : "");
 522         i++;
 523     }
 524     do
 525     {
 526         ret = fscanf(stdin, "%d", &sel);
 527     } while ((ret != 1) || (sel < 0) || (sel >= tb.ssize()));
 528
 529     return tb[sel];
 530 }