src/gromacs/gmxpreprocess/ter_db.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2017,2018 by the GROMACS development team.
   7  * Copyright (c) 2019,2020, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 #include "gmxpre.h"
  39
  40 #include "ter_db.h"
  41
  42 #include <cctype>
  43 #include <cstring>
  44
  45 #include <algorithm>
  46 #include <string>
  47 #include <vector>
  48
  49 #include "gromacs/fileio/gmxfio.h"
  50 #include "gromacs/gmxpreprocess/fflibutil.h"
  51 #include "gromacs/gmxpreprocess/gpp_atomtype.h"
  52 #include "gromacs/gmxpreprocess/grompp_impl.h"
  53 #include "gromacs/gmxpreprocess/h_db.h"
  54 #include "gromacs/gmxpreprocess/notset.h"
  55 #include "gromacs/gmxpreprocess/toputil.h"
  56 #include "gromacs/utility/cstringutil.h"
  57 #include "gromacs/utility/fatalerror.h"
  58 #include "gromacs/utility/futil.h"
  59 #include "gromacs/utility/smalloc.h"
  60 #include "gromacs/utility/strdb.h"
  61
  62 #include "hackblock.h"
  63 #include "resall.h"
  64
  65 /* use bonded types definitions in hackblock.h */
  66 #define ekwRepl (ebtsNR + 1)
  67 #define ekwAdd (ebtsNR + 2)
  68 #define ekwDel (ebtsNR + 3)
  69 #define ekwNR 3
  70 static const char* kw_names[ekwNR] = { "replace", "add", "delete" };
  71
  72 static int find_kw(char* keyw)
  73 {
  74     int i;
  75
  76     for (i = 0; i < ebtsNR; i++)
  77     {
  78         if (gmx_strcasecmp(btsNames[i], keyw) == 0)
  79         {
  80             return i;
  81         }
  82     }
  83     for (i = 0; i < ekwNR; i++)
  84     {
  85         if (gmx_strcasecmp(kw_names[i], keyw) == 0)
  86         {
  87             return ebtsNR + 1 + i;
  88         }
  89     }
  90
  91     return NOTSET;
  92 }
  93
  94 #define FATAL() gmx_fatal(FARGS, "Reading Termini Database: not enough items on line\n%s", line)
  95
  96 static void read_atom(char* line, bool bAdd, std::string* nname, t_atom* a, PreprocessingAtomTypes* atype, int* cgnr)
  97 {
  98     int    nr, i;
  99     char   buf[5][30];
 100     double m, q;
 101
 102     /* This code is messy, because of support for different formats:
 103      * for replace: [new name] <atom type> <m> <q> [cgnr (old format)]
 104      * for add:                <atom type> <m> <q> [cgnr]
 105      */
 106     nr = sscanf(line, "%s %s %s %s %s", buf[0], buf[1], buf[2], buf[3], buf[4]);
 107
 108     /* Here there an ambiguity due to the old replace format with cgnr,
 109      * which was read for years, but ignored in the rest of the code.
 110      * We have to assume that the atom type does not start with a digit
 111      * to make a line with 4 entries uniquely interpretable.
 112      */
 113     if (!bAdd && nr == 4 && isdigit(buf[1][0]))
 114     {
 115         nr = 3;
 116     }
 117
 118     if (nr < 3 || nr > 4)
 119     {
 120         gmx_fatal(FARGS,
 121                   "Reading Termini Database: expected %d or %d items of atom data in stead of %d "
 122                   "on line\n%s",
 123                   3, 4, nr, line);
 124     }
 125     i = 0;
 126     if (!bAdd)
 127     {
 128         if (nr == 4)
 129         {
 130             *nname = buf[i++];
 131         }
 132         else
 133         {
 134             *nname = "";
 135         }
 136     }
 137     a->type = atype->atomTypeFromName(buf[i++]);
 138     sscanf(buf[i++], "%lf", &m);
 139     a->m = m;
 140     sscanf(buf[i++], "%lf", &q);
 141     a->q = q;
 142     if (bAdd && nr == 4)
 143     {
 144         sscanf(buf[i++], "%d", cgnr);
 145     }
 146     else
 147     {
 148         *cgnr = NOTSET;
 149     }
 150 }
 151
 152 static void print_atom(FILE* out, const t_atom& a, PreprocessingAtomTypes* atype)
 153 {
 154     fprintf(out, "\t%s\t%g\t%g\n", atype->atomNameFromAtomType(a.type), a.m, a.q);
 155 }
 156
 157 static void print_ter_db(const char*                                ff,
 158                          char                                       C,
 159                          gmx::ArrayRef<const MoleculePatchDatabase> tb,
 160                          PreprocessingAtomTypes*                    atype)
 161 {
 162     std::string buf = gmx::formatString("%s-%c.tdb", ff, C);
 163     FILE*       out = gmx_fio_fopen(buf.c_str(), "w");
 164
 165     for (const auto& modification : tb)
 166     {
 167         fprintf(out, "[ %s ]\n", modification.name.c_str());
 168
 169         if (std::any_of(modification.hack.begin(), modification.hack.end(),
 170                         [](const auto& mod) { return mod.type() == MoleculePatchType::Replace; }))
 171         {
 172             fprintf(out, "[ %s ]\n", kw_names[ekwRepl - ebtsNR - 1]);
 173             for (const auto& hack : modification.hack)
 174             {
 175                 if (hack.type() == MoleculePatchType::Replace)
 176                 {
 177                     fprintf(out, "%s\t", hack.oname.c_str());
 178                     print_atom(out, hack.atom.back(), atype);
 179                 }
 180             }
 181         }
 182         if (std::any_of(modification.hack.begin(), modification.hack.end(),
 183                         [](const auto& mod) { return mod.type() == MoleculePatchType::Add; }))
 184         {
 185             fprintf(out, "[ %s ]\n", kw_names[ekwAdd - ebtsNR - 1]);
 186             for (const auto& hack : modification.hack)
 187             {
 188                 if (hack.type() == MoleculePatchType::Add)
 189                 {
 190                     print_ab(out, hack, hack.nname.c_str());
 191                     print_atom(out, hack.atom.back(), atype);
 192                 }
 193             }
 194         }
 195         if (std::any_of(modification.hack.begin(), modification.hack.end(),
 196                         [](const auto& mod) { return mod.type() == MoleculePatchType::Delete; }))
 197         {
 198             fprintf(out, "[ %s ]\n", kw_names[ekwDel - ebtsNR - 1]);
 199             for (const auto& hack : modification.hack)
 200             {
 201                 if (hack.type() == MoleculePatchType::Delete)
 202                 {
 203                     fprintf(out, "%s\n", hack.oname.c_str());
 204                 }
 205             }
 206         }
 207         for (int bt = 0; bt < ebtsNR; bt++)
 208         {
 209             if (!modification.rb[bt].b.empty())
 210             {
 211                 fprintf(out, "[ %s ]\n", btsNames[bt]);
 212                 for (const auto& b : modification.rb[bt].b)
 213                 {
 214                     for (int k = 0; k < btsNiatoms[bt]; k++)
 215                     {
 216                         fprintf(out, "%s%s", k ? "\t" : "", b.a[k].c_str());
 217                     }
 218                     if (!b.s.empty())
 219                     {
 220                         fprintf(out, "\t%s", b.s.c_str());
 221                     }
 222                     fprintf(out, "\n");
 223                 }
 224             }
 225         }
 226         fprintf(out, "\n");
 227     }
 228     gmx_fio_fclose(out);
 229 }
 230
 231 static void read_ter_db_file(const char*                         fn,
 232                              std::vector<MoleculePatchDatabase>* tbptr,
 233                              PreprocessingAtomTypes*             atype)
 234 {
 235     char filebase[STRLEN];
 236     char header[STRLEN], buf[STRLEN], line[STRLEN];
 237
 238     fflib_filename_base(fn, filebase, STRLEN);
 239     /* Remove the C/N termini extension */
 240     char* ptr = strrchr(filebase, '.');
 241     if (ptr != nullptr)
 242     {
 243         ptr[0] = '\0';
 244     }
 245
 246     FILE* in = fflib_open(fn);
 247
 248     int kwnr = NOTSET;
 249     get_a_line(in, line, STRLEN);
 250     MoleculePatchDatabase* block = nullptr;
 251     while (!feof(in))
 252     {
 253         if (get_header(line, header))
 254         {
 255             /* this is a new block, or a new keyword */
 256             kwnr = find_kw(header);
 257
 258             if (kwnr == NOTSET)
 259             {
 260                 tbptr->emplace_back(MoleculePatchDatabase());
 261                 block = &tbptr->back();
 262                 clearModificationBlock(block);
 263                 block->name     = header;
 264                 block->filebase = filebase;
 265             }
 266         }
 267         else
 268         {
 269             if (block == nullptr)
 270             {
 271                 gmx_fatal(FARGS,
 272                           "reading termini database: "
 273                           "directive expected before line:\n%s\n"
 274                           "This might be a file in an old format.",
 275                           line);
 276             }
 277             /* this is not a header, so it must be data */
 278             if (kwnr >= ebtsNR)
 279             {
 280                 /* this is a hack: add/rename/delete atoms */
 281                 /* make space for hacks */
 282                 block->hack.emplace_back(MoleculePatch());
 283                 MoleculePatch* hack = &block->hack.back();
 284
 285                 /* get data */
 286                 int n = 0;
 287                 if (kwnr == ekwRepl || kwnr == ekwDel)
 288                 {
 289                     if (sscanf(line, "%s%n", buf, &n) != 1)
 290                     {
 291                         gmx_fatal(FARGS,
 292                                   "Reading Termini Database '%s': "
 293                                   "expected atom name on line\n%s",
 294                                   fn, line);
 295                     }
 296                     hack->oname = buf;
 297                     /* we only replace or delete one atom at a time */
 298                     hack->nr = 1;
 299                 }
 300                 else if (kwnr == ekwAdd)
 301                 {
 302                     read_ab(line, fn, hack);
 303                     get_a_line(in, line, STRLEN);
 304                 }
 305                 else
 306                 {
 307                     gmx_fatal(FARGS, "unimplemented keyword number %d (%s:%d)", kwnr, __FILE__, __LINE__);
 308                 }
 309                 if (kwnr == ekwRepl || kwnr == ekwAdd)
 310                 {
 311                     hack->atom.emplace_back();
 312                     read_atom(line + n, kwnr == ekwAdd, &hack->nname, &hack->atom.back(), atype,
 313                               &hack->cgnr);
 314                     if (hack->nname.empty())
 315                     {
 316                         if (!hack->oname.empty())
 317                         {
 318                             hack->nname = hack->oname;
 319                         }
 320                         else
 321                         {
 322                             gmx_fatal(FARGS,
 323                                       "Reading Termini Database '%s': don't know which name the "
 324                                       "new atom should have on line\n%s",
 325                                       fn, line);
 326                         }
 327                     }
 328                 }
 329             }
 330             else if (kwnr >= 0 && kwnr < ebtsNR)
 331             {
 332                 /* this is bonded data: bonds, angles, dihedrals or impropers */
 333                 int n = 0;
 334                 block->rb[kwnr].b.emplace_back();
 335                 BondedInteraction* newBond = &block->rb[kwnr].b.back();
 336                 for (int j = 0; j < btsNiatoms[kwnr]; j++)
 337                 {
 338                     int ni;
 339                     if (sscanf(line + n, "%s%n", buf, &ni) == 1)
 340                     {
 341                         newBond->a[j] = buf;
 342                     }
 343                     else
 344                     {
 345                         gmx_fatal(FARGS,
 346                                   "Reading Termini Database '%s': expected %d atom names (found "
 347                                   "%d) on line\n%s",
 348                                   fn, btsNiatoms[kwnr], j - 1, line);
 349                     }
 350                     n += ni;
 351                 }
 352                 strcpy(buf, "");
 353                 sscanf(line + n, "%s", buf);
 354                 newBond->s = buf;
 355             }
 356             else
 357             {
 358                 gmx_fatal(FARGS,
 359                           "Reading Termini Database: Expecting a header at line\n"
 360                           "%s",
 361                           line);
 362             }
 363         }
 364         get_a_line(in, line, STRLEN);
 365     }
 366
 367     gmx_ffclose(in);
 368 }
 369
 370 int read_ter_db(const char* ffdir, char ter, std::vector<MoleculePatchDatabase>* tbptr, PreprocessingAtomTypes* atype)
 371 {
 372     std::string ext = gmx::formatString(".%c.tdb", ter);
 373
 374     /* Search for termini database files.
 375      * Do not generate an error when none are found.
 376      */
 377     std::vector<std::string> tdbf = fflib_search_file_end(ffdir, ext.c_str(), FALSE);
 378     tbptr->clear();
 379     for (const auto& filename : tdbf)
 380     {
 381         read_ter_db_file(filename.c_str(), tbptr, atype);
 382     }
 383
 384     if (debug)
 385     {
 386         print_ter_db("new", ter, *tbptr, atype);
 387     }
 388
 389     return tbptr->size();
 390 }
 391
 392 std::vector<MoleculePatchDatabase*> filter_ter(gmx::ArrayRef<MoleculePatchDatabase> tb, const char* resname)
 393 {
 394     // TODO Four years later, no force fields have ever used this, so decide status of this feature
 395     /* Since some force fields (e.g. OPLS) needs different
 396      * atomtypes for different residues there could be a lot
 397      * of entries in the databases for specific residues
 398      * (e.g. GLY-NH3+, SER-NH3+, ALA-NH3+).
 399      *
 400      * To reduce the database size, we assume that a terminus specifier liker
 401      *
 402      * [ GLY|SER|ALA-NH3+ ]
 403      *
 404      * would cover all of the three residue types above.
 405      * Wildcards (*,?) are not OK. Don't worry about e.g. GLU vs. GLUH since
 406      * pdb2gmx only uses the first 3 letters when calling this routine.
 407      *
 408      * To automate this, this routines scans a list of termini
 409      * for the residue name "resname" and returns an allocated list of
 410      * pointers to the termini that could be applied to the
 411      * residue in question. The variable pointed to by nret will
 412      * contain the number of valid pointers in the list.
 413      * Remember to free the list when you are done with it...
 414      */
 415
 416     auto                                none_idx = tb.end();
 417     std::vector<MoleculePatchDatabase*> list;
 418
 419     for (auto it = tb.begin(); it != tb.end(); it++)
 420     {
 421         const char* s     = it->name.c_str();
 422         bool        found = false;
 423         do
 424         {
 425             if (gmx::equalCaseInsensitive(resname, s, 3))
 426             {
 427                 found = true;
 428                 list.push_back(&*it);
 429             }
 430             else
 431             {
 432                 /* advance to next |-separated field */
 433                 s = strchr(s, '|');
 434                 if (s != nullptr)
 435                 {
 436                     s++;
 437                 }
 438             }
 439         } while (!found && s != nullptr);
 440     }
 441
 442     /* All residue-specific termini have been added. We might have to fall
 443      * back on generic termini, which are characterized by not having
 444      * '-' in the name prior to the last position (which indicates charge).
 445      * The [ None ] alternative is special since we don't want that
 446      * to be the default, so we put it last in the list we return.
 447      */
 448     for (auto it = tb.begin(); it != tb.end(); it++)
 449     {
 450         const char* s = it->name.c_str();
 451         if (gmx::equalCaseInsensitive("None", it->name))
 452         {
 453             none_idx = it;
 454         }
 455         else
 456         {
 457             /* Time to see if there's a generic terminus that matches.
 458                Is there a hyphen? */
 459             const char* c = strchr(s, '-');
 460
 461             /* A conjunction hyphen normally indicates a residue-specific
 462                terminus, which is named like "GLY-COOH". A generic terminus
 463                won't have a hyphen. */
 464             bool bFoundAnyHyphen = (c != nullptr);
 465             /* '-' as the last character indicates charge, so if that's
 466                the only one found e.g. "COO-", then it was not a conjunction
 467                hyphen, so this is a generic terminus */
 468             bool bOnlyFoundChargeHyphen = (bFoundAnyHyphen && *(c + 1) == '\0');
 469             /* Thus, "GLY-COO-" is not recognized as a generic terminus. */
 470             bool bFoundGenericTerminus = !bFoundAnyHyphen || bOnlyFoundChargeHyphen;
 471             if (bFoundGenericTerminus)
 472             {
 473                 /* Check that we haven't already added a residue-specific version
 474                  * of this terminus.
 475                  */
 476                 auto found = std::find_if(list.begin(), list.end(), [&s](const MoleculePatchDatabase* b) {
 477                     return strstr(b->name.c_str(), s) != nullptr;
 478                 });
 479                 if (found == list.end())
 480                 {
 481                     list.push_back(&*it);
 482                 }
 483             }
 484         }
 485     }
 486     if (none_idx != tb.end())
 487     {
 488         list.push_back(&*none_idx);
 489     }
 490
 491     return list;
 492 }
 493
 494
 495 MoleculePatchDatabase* choose_ter(gmx::ArrayRef<MoleculePatchDatabase*> tb, const char* title)
 496 {
 497     int sel, ret;
 498
 499     printf("%s\n", title);
 500     int i = 0;
 501     for (const auto& modification : tb)
 502     {
 503         bool bIsZwitterion = (0 == gmx_wcmatch("*ZWITTERION*", modification->name.c_str()));
 504         printf("%2d: %s%s\n", i, modification->name.c_str(),
 505                bIsZwitterion ? " (only use with zwitterions containing exactly one residue)" : "");
 506         i++;
 507     }
 508     do
 509     {
 510         ret = fscanf(stdin, "%d", &sel);
 511     } while ((ret != 1) || (sel < 0) || (sel >= tb.ssize()));
 512
 513     return tb[sel];
 514 }