src/gromacs/gmxpreprocess/ter_db.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2017,2018,2019, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 #include "gmxpre.h"
  38
  39 #include "ter_db.h"
  40
  41 #include <cctype>
  42 #include <cstring>
  43
  44 #include <algorithm>
  45 #include <string>
  46 #include <vector>
  47
  48 #include "gromacs/fileio/gmxfio.h"
  49 #include "gromacs/gmxpreprocess/fflibutil.h"
  50 #include "gromacs/gmxpreprocess/gpp_atomtype.h"
  51 #include "gromacs/gmxpreprocess/grompp_impl.h"
  52 #include "gromacs/gmxpreprocess/h_db.h"
  53 #include "gromacs/gmxpreprocess/notset.h"
  54 #include "gromacs/gmxpreprocess/toputil.h"
  55 #include "gromacs/utility/cstringutil.h"
  56 #include "gromacs/utility/fatalerror.h"
  57 #include "gromacs/utility/futil.h"
  58 #include "gromacs/utility/smalloc.h"
  59 #include "gromacs/utility/strdb.h"
  60
  61 #include "hackblock.h"
  62 #include "resall.h"
  63
  64 /* use bonded types definitions in hackblock.h */
  65 #define ekwRepl (ebtsNR + 1)
  66 #define ekwAdd (ebtsNR + 2)
  67 #define ekwDel (ebtsNR + 3)
  68 #define ekwNR 3
  69 static const char* kw_names[ekwNR] = { "replace", "add", "delete" };
  70
  71 static int find_kw(char* keyw)
  72 {
  73     int i;
  74
  75     for (i = 0; i < ebtsNR; i++)
  76     {
  77         if (gmx_strcasecmp(btsNames[i], keyw) == 0)
  78         {
  79             return i;
  80         }
  81     }
  82     for (i = 0; i < ekwNR; i++)
  83     {
  84         if (gmx_strcasecmp(kw_names[i], keyw) == 0)
  85         {
  86             return ebtsNR + 1 + i;
  87         }
  88     }
  89
  90     return NOTSET;
  91 }
  92
  93 #define FATAL() gmx_fatal(FARGS, "Reading Termini Database: not enough items on line\n%s", line)
  94
  95 static void read_atom(char* line, bool bAdd, std::string* nname, t_atom* a, PreprocessingAtomTypes* atype, int* cgnr)
  96 {
  97     int    nr, i;
  98     char   buf[5][30];
  99     double m, q;
 100
 101     /* This code is messy, because of support for different formats:
 102      * for replace: [new name] <atom type> <m> <q> [cgnr (old format)]
 103      * for add:                <atom type> <m> <q> [cgnr]
 104      */
 105     nr = sscanf(line, "%s %s %s %s %s", buf[0], buf[1], buf[2], buf[3], buf[4]);
 106
 107     /* Here there an ambiguity due to the old replace format with cgnr,
 108      * which was read for years, but ignored in the rest of the code.
 109      * We have to assume that the atom type does not start with a digit
 110      * to make a line with 4 entries uniquely interpretable.
 111      */
 112     if (!bAdd && nr == 4 && isdigit(buf[1][0]))
 113     {
 114         nr = 3;
 115     }
 116
 117     if (nr < 3 || nr > 4)
 118     {
 119         gmx_fatal(FARGS,
 120                   "Reading Termini Database: expected %d or %d items of atom data in stead of %d "
 121                   "on line\n%s",
 122                   3, 4, nr, line);
 123     }
 124     i = 0;
 125     if (!bAdd)
 126     {
 127         if (nr == 4)
 128         {
 129             *nname = buf[i++];
 130         }
 131         else
 132         {
 133             *nname = "";
 134         }
 135     }
 136     a->type = atype->atomTypeFromName(buf[i++]);
 137     sscanf(buf[i++], "%lf", &m);
 138     a->m = m;
 139     sscanf(buf[i++], "%lf", &q);
 140     a->q = q;
 141     if (bAdd && nr == 4)
 142     {
 143         sscanf(buf[i++], "%d", cgnr);
 144     }
 145     else
 146     {
 147         *cgnr = NOTSET;
 148     }
 149 }
 150
 151 static void print_atom(FILE* out, const t_atom& a, PreprocessingAtomTypes* atype)
 152 {
 153     fprintf(out, "\t%s\t%g\t%g\n", atype->atomNameFromAtomType(a.type), a.m, a.q);
 154 }
 155
 156 static void print_ter_db(const char*                                ff,
 157                          char                                       C,
 158                          gmx::ArrayRef<const MoleculePatchDatabase> tb,
 159                          PreprocessingAtomTypes*                    atype)
 160 {
 161     std::string buf = gmx::formatString("%s-%c.tdb", ff, C);
 162     FILE*       out = gmx_fio_fopen(buf.c_str(), "w");
 163
 164     for (const auto& modification : tb)
 165     {
 166         fprintf(out, "[ %s ]\n", modification.name.c_str());
 167
 168         if (std::any_of(modification.hack.begin(), modification.hack.end(),
 169                         [](const auto& mod) { return mod.type() == MoleculePatchType::Replace; }))
 170         {
 171             fprintf(out, "[ %s ]\n", kw_names[ekwRepl - ebtsNR - 1]);
 172             for (const auto& hack : modification.hack)
 173             {
 174                 if (hack.type() == MoleculePatchType::Replace)
 175                 {
 176                     fprintf(out, "%s\t", hack.oname.c_str());
 177                     print_atom(out, hack.atom.back(), atype);
 178                 }
 179             }
 180         }
 181         if (std::any_of(modification.hack.begin(), modification.hack.end(),
 182                         [](const auto& mod) { return mod.type() == MoleculePatchType::Add; }))
 183         {
 184             fprintf(out, "[ %s ]\n", kw_names[ekwAdd - ebtsNR - 1]);
 185             for (const auto& hack : modification.hack)
 186             {
 187                 if (hack.type() == MoleculePatchType::Add)
 188                 {
 189                     print_ab(out, hack, hack.nname.c_str());
 190                     print_atom(out, hack.atom.back(), atype);
 191                 }
 192             }
 193         }
 194         if (std::any_of(modification.hack.begin(), modification.hack.end(),
 195                         [](const auto& mod) { return mod.type() == MoleculePatchType::Delete; }))
 196         {
 197             fprintf(out, "[ %s ]\n", kw_names[ekwDel - ebtsNR - 1]);
 198             for (const auto& hack : modification.hack)
 199             {
 200                 if (hack.type() == MoleculePatchType::Delete)
 201                 {
 202                     fprintf(out, "%s\n", hack.oname.c_str());
 203                 }
 204             }
 205         }
 206         for (int bt = 0; bt < ebtsNR; bt++)
 207         {
 208             if (!modification.rb[bt].b.empty())
 209             {
 210                 fprintf(out, "[ %s ]\n", btsNames[bt]);
 211                 for (const auto& b : modification.rb[bt].b)
 212                 {
 213                     for (int k = 0; k < btsNiatoms[bt]; k++)
 214                     {
 215                         fprintf(out, "%s%s", k ? "\t" : "", b.a[k].c_str());
 216                     }
 217                     if (!b.s.empty())
 218                     {
 219                         fprintf(out, "\t%s", b.s.c_str());
 220                     }
 221                     fprintf(out, "\n");
 222                 }
 223             }
 224         }
 225         fprintf(out, "\n");
 226     }
 227     gmx_fio_fclose(out);
 228 }
 229
 230 static void read_ter_db_file(const char*                         fn,
 231                              std::vector<MoleculePatchDatabase>* tbptr,
 232                              PreprocessingAtomTypes*             atype)
 233 {
 234     char filebase[STRLEN];
 235     char header[STRLEN], buf[STRLEN], line[STRLEN];
 236
 237     fflib_filename_base(fn, filebase, STRLEN);
 238     /* Remove the C/N termini extension */
 239     char* ptr = strrchr(filebase, '.');
 240     if (ptr != nullptr)
 241     {
 242         ptr[0] = '\0';
 243     }
 244
 245     FILE* in = fflib_open(fn);
 246
 247     int kwnr = NOTSET;
 248     get_a_line(in, line, STRLEN);
 249     MoleculePatchDatabase* block = nullptr;
 250     while (!feof(in))
 251     {
 252         if (get_header(line, header))
 253         {
 254             /* this is a new block, or a new keyword */
 255             kwnr = find_kw(header);
 256
 257             if (kwnr == NOTSET)
 258             {
 259                 tbptr->emplace_back(MoleculePatchDatabase());
 260                 block = &tbptr->back();
 261                 clearModificationBlock(block);
 262                 block->name     = header;
 263                 block->filebase = filebase;
 264             }
 265         }
 266         else
 267         {
 268             if (block == nullptr)
 269             {
 270                 gmx_fatal(FARGS,
 271                           "reading termini database: "
 272                           "directive expected before line:\n%s\n"
 273                           "This might be a file in an old format.",
 274                           line);
 275             }
 276             /* this is not a header, so it must be data */
 277             if (kwnr >= ebtsNR)
 278             {
 279                 /* this is a hack: add/rename/delete atoms */
 280                 /* make space for hacks */
 281                 block->hack.emplace_back(MoleculePatch());
 282                 MoleculePatch* hack = &block->hack.back();
 283
 284                 /* get data */
 285                 int n = 0;
 286                 if (kwnr == ekwRepl || kwnr == ekwDel)
 287                 {
 288                     if (sscanf(line, "%s%n", buf, &n) != 1)
 289                     {
 290                         gmx_fatal(FARGS,
 291                                   "Reading Termini Database '%s': "
 292                                   "expected atom name on line\n%s",
 293                                   fn, line);
 294                     }
 295                     hack->oname = buf;
 296                     /* we only replace or delete one atom at a time */
 297                     hack->nr = 1;
 298                 }
 299                 else if (kwnr == ekwAdd)
 300                 {
 301                     read_ab(line, fn, hack);
 302                     get_a_line(in, line, STRLEN);
 303                 }
 304                 else
 305                 {
 306                     gmx_fatal(FARGS, "unimplemented keyword number %d (%s:%d)", kwnr, __FILE__, __LINE__);
 307                 }
 308                 if (kwnr == ekwRepl || kwnr == ekwAdd)
 309                 {
 310                     hack->atom.emplace_back();
 311                     read_atom(line + n, kwnr == ekwAdd, &hack->nname, &hack->atom.back(), atype,
 312                               &hack->cgnr);
 313                     if (hack->nname.empty())
 314                     {
 315                         if (!hack->oname.empty())
 316                         {
 317                             hack->nname = hack->oname;
 318                         }
 319                         else
 320                         {
 321                             gmx_fatal(FARGS,
 322                                       "Reading Termini Database '%s': don't know which name the "
 323                                       "new atom should have on line\n%s",
 324                                       fn, line);
 325                         }
 326                     }
 327                 }
 328             }
 329             else if (kwnr >= 0 && kwnr < ebtsNR)
 330             {
 331                 /* this is bonded data: bonds, angles, dihedrals or impropers */
 332                 int n = 0;
 333                 block->rb[kwnr].b.emplace_back();
 334                 BondedInteraction* newBond = &block->rb[kwnr].b.back();
 335                 for (int j = 0; j < btsNiatoms[kwnr]; j++)
 336                 {
 337                     int ni;
 338                     if (sscanf(line + n, "%s%n", buf, &ni) == 1)
 339                     {
 340                         newBond->a[j] = buf;
 341                     }
 342                     else
 343                     {
 344                         gmx_fatal(FARGS,
 345                                   "Reading Termini Database '%s': expected %d atom names (found "
 346                                   "%d) on line\n%s",
 347                                   fn, btsNiatoms[kwnr], j - 1, line);
 348                     }
 349                     n += ni;
 350                 }
 351                 strcpy(buf, "");
 352                 sscanf(line + n, "%s", buf);
 353                 newBond->s = buf;
 354             }
 355             else
 356             {
 357                 gmx_fatal(FARGS,
 358                           "Reading Termini Database: Expecting a header at line\n"
 359                           "%s",
 360                           line);
 361             }
 362         }
 363         get_a_line(in, line, STRLEN);
 364     }
 365
 366     gmx_ffclose(in);
 367 }
 368
 369 int read_ter_db(const char* ffdir, char ter, std::vector<MoleculePatchDatabase>* tbptr, PreprocessingAtomTypes* atype)
 370 {
 371     std::string ext = gmx::formatString(".%c.tdb", ter);
 372
 373     /* Search for termini database files.
 374      * Do not generate an error when none are found.
 375      */
 376     std::vector<std::string> tdbf = fflib_search_file_end(ffdir, ext.c_str(), FALSE);
 377     tbptr->clear();
 378     for (const auto& filename : tdbf)
 379     {
 380         read_ter_db_file(filename.c_str(), tbptr, atype);
 381     }
 382
 383     if (debug)
 384     {
 385         print_ter_db("new", ter, *tbptr, atype);
 386     }
 387
 388     return tbptr->size();
 389 }
 390
 391 std::vector<MoleculePatchDatabase*> filter_ter(gmx::ArrayRef<MoleculePatchDatabase> tb, const char* resname)
 392 {
 393     // TODO Four years later, no force fields have ever used this, so decide status of this feature
 394     /* Since some force fields (e.g. OPLS) needs different
 395      * atomtypes for different residues there could be a lot
 396      * of entries in the databases for specific residues
 397      * (e.g. GLY-NH3+, SER-NH3+, ALA-NH3+).
 398      *
 399      * To reduce the database size, we assume that a terminus specifier liker
 400      *
 401      * [ GLY|SER|ALA-NH3+ ]
 402      *
 403      * would cover all of the three residue types above.
 404      * Wildcards (*,?) are not OK. Don't worry about e.g. GLU vs. GLUH since
 405      * pdb2gmx only uses the first 3 letters when calling this routine.
 406      *
 407      * To automate this, this routines scans a list of termini
 408      * for the residue name "resname" and returns an allocated list of
 409      * pointers to the termini that could be applied to the
 410      * residue in question. The variable pointed to by nret will
 411      * contain the number of valid pointers in the list.
 412      * Remember to free the list when you are done with it...
 413      */
 414
 415     auto                                none_idx = tb.end();
 416     std::vector<MoleculePatchDatabase*> list;
 417
 418     for (auto it = tb.begin(); it != tb.end(); it++)
 419     {
 420         const char* s     = it->name.c_str();
 421         bool        found = false;
 422         do
 423         {
 424             if (gmx::equalCaseInsensitive(resname, s, 3))
 425             {
 426                 found = true;
 427                 list.push_back(it);
 428             }
 429             else
 430             {
 431                 /* advance to next |-separated field */
 432                 s = strchr(s, '|');
 433                 if (s != nullptr)
 434                 {
 435                     s++;
 436                 }
 437             }
 438         } while (!found && s != nullptr);
 439     }
 440
 441     /* All residue-specific termini have been added. We might have to fall
 442      * back on generic termini, which are characterized by not having
 443      * '-' in the name prior to the last position (which indicates charge).
 444      * The [ None ] alternative is special since we don't want that
 445      * to be the default, so we put it last in the list we return.
 446      */
 447     for (auto it = tb.begin(); it != tb.end(); it++)
 448     {
 449         const char* s = it->name.c_str();
 450         if (gmx::equalCaseInsensitive("None", it->name))
 451         {
 452             none_idx = it;
 453         }
 454         else
 455         {
 456             /* Time to see if there's a generic terminus that matches.
 457                Is there a hyphen? */
 458             const char* c = strchr(s, '-');
 459
 460             /* A conjunction hyphen normally indicates a residue-specific
 461                terminus, which is named like "GLY-COOH". A generic terminus
 462                won't have a hyphen. */
 463             bool bFoundAnyHyphen = (c != nullptr);
 464             /* '-' as the last character indicates charge, so if that's
 465                the only one found e.g. "COO-", then it was not a conjunction
 466                hyphen, so this is a generic terminus */
 467             bool bOnlyFoundChargeHyphen = (bFoundAnyHyphen && *(c + 1) == '\0');
 468             /* Thus, "GLY-COO-" is not recognized as a generic terminus. */
 469             bool bFoundGenericTerminus = !bFoundAnyHyphen || bOnlyFoundChargeHyphen;
 470             if (bFoundGenericTerminus)
 471             {
 472                 /* Check that we haven't already added a residue-specific version
 473                  * of this terminus.
 474                  */
 475                 auto found = std::find_if(list.begin(), list.end(), [&s](const MoleculePatchDatabase* b) {
 476                     return strstr(b->name.c_str(), s) != nullptr;
 477                 });
 478                 if (found == list.end())
 479                 {
 480                     list.push_back(it);
 481                 }
 482             }
 483         }
 484     }
 485     if (none_idx != tb.end())
 486     {
 487         list.push_back(none_idx);
 488     }
 489
 490     return list;
 491 }
 492
 493
 494 MoleculePatchDatabase* choose_ter(gmx::ArrayRef<MoleculePatchDatabase*> tb, const char* title)
 495 {
 496     int sel, ret;
 497
 498     printf("%s\n", title);
 499     int i = 0;
 500     for (const auto& modification : tb)
 501     {
 502         bool bIsZwitterion = (0 == gmx_wcmatch("*ZWITTERION*", modification->name.c_str()));
 503         printf("%2d: %s%s\n", i, modification->name.c_str(),
 504                bIsZwitterion ? " (only use with zwitterions containing exactly one residue)" : "");
 505         i++;
 506     }
 507     do
 508     {
 509         ret = fscanf(stdin, "%d", &sel);
 510     } while ((ret != 1) || (sel < 0) || (sel >= tb.ssize()));
 511
 512     return tb[sel];
 513 }