2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2017,2018 by the GROMACS development team.
7 * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
51 #include "gromacs/fileio/gmxfio.h"
52 #include "gromacs/gmxpreprocess/fflibutil.h"
53 #include "gromacs/gmxpreprocess/gpp_atomtype.h"
54 #include "gromacs/gmxpreprocess/grompp_impl.h"
55 #include "gromacs/gmxpreprocess/h_db.h"
56 #include "gromacs/gmxpreprocess/notset.h"
57 #include "gromacs/gmxpreprocess/toputil.h"
58 #include "gromacs/utility/cstringutil.h"
59 #include "gromacs/utility/enumerationhelpers.h"
60 #include "gromacs/utility/fatalerror.h"
61 #include "gromacs/utility/futil.h"
62 #include "gromacs/utility/enumerationhelpers.h"
63 #include "gromacs/utility/fatalerror.h"
64 #include "gromacs/utility/futil.h"
65 #include "gromacs/utility/smalloc.h"
66 #include "gromacs/utility/strdb.h"
67 #include "gromacs/utility/stringtoenumvalueconverter.h"
69 #include "hackblock.h"
72 /* use bonded types definitions in hackblock.h */
73 enum class ReplaceType : int
81 static const char* enumValueToString(ReplaceType enumValue)
83 constexpr gmx::EnumerationArray<ReplaceType, const char*> replaceTypeNames = { "replace",
86 return replaceTypeNames[enumValue];
89 template<typename EnumType>
90 static std::optional<EnumType> findTypeFromKeyword(char* keyw)
92 gmx::StringToEnumValueConverter<EnumType, enumValueToString, gmx::StringCompareType::CaseInsensitive, gmx::StripStrings::Yes> converter;
93 return converter.valueFrom(keyw);
96 #define FATAL() gmx_fatal(FARGS, "Reading Termini Database: not enough items on line\n%s", line)
98 static void read_atom(char* line, bool bAdd, std::string* nname, t_atom* a, PreprocessingAtomTypes* atype, int* cgnr)
104 /* This code is messy, because of support for different formats:
105 * for replace: [new name] <atom type> <m> <q> [cgnr (old format)]
106 * for add: <atom type> <m> <q> [cgnr]
108 nr = sscanf(line, "%s %s %s %s %s", buf[0], buf[1], buf[2], buf[3], buf[4]);
110 /* Here there an ambiguity due to the old replace format with cgnr,
111 * which was read for years, but ignored in the rest of the code.
112 * We have to assume that the atom type does not start with a digit
113 * to make a line with 4 entries uniquely interpretable.
115 if (!bAdd && nr == 4 && isdigit(buf[1][0]))
120 if (nr < 3 || nr > 4)
123 "Reading Termini Database: expected %d or %d items of atom data in stead of %d "
142 a->type = *atype->atomTypeFromName(buf[i++]);
143 sscanf(buf[i++], "%lf", &m);
145 sscanf(buf[i++], "%lf", &q);
149 sscanf(buf[i++], "%d", cgnr);
157 static void print_atom(FILE* out, const t_atom& a, PreprocessingAtomTypes* atype)
159 fprintf(out, "\t%s\t%g\t%g\n", *atype->atomNameFromAtomType(a.type), a.m, a.q);
162 static void print_ter_db(const char* ff,
164 gmx::ArrayRef<const MoleculePatchDatabase> tb,
165 PreprocessingAtomTypes* atype)
167 std::string buf = gmx::formatString("%s-%c.tdb", ff, C);
168 FILE* out = gmx_fio_fopen(buf.c_str(), "w");
170 for (const auto& modification : tb)
172 fprintf(out, "[ %s ]\n", modification.name.c_str());
174 if (std::any_of(modification.hack.begin(), modification.hack.end(), [](const auto& mod) {
175 return mod.type() == MoleculePatchType::Replace;
178 fprintf(out, "[ %s ]\n", enumValueToString(ReplaceType::Repl));
179 for (const auto& hack : modification.hack)
181 if (hack.type() == MoleculePatchType::Replace)
183 fprintf(out, "%s\t", hack.oname.c_str());
184 print_atom(out, hack.atom.back(), atype);
188 if (std::any_of(modification.hack.begin(), modification.hack.end(), [](const auto& mod) {
189 return mod.type() == MoleculePatchType::Add;
192 fprintf(out, "[ %s ]\n", enumValueToString(ReplaceType::Add));
193 for (const auto& hack : modification.hack)
195 if (hack.type() == MoleculePatchType::Add)
197 print_ab(out, hack, hack.nname.c_str());
198 print_atom(out, hack.atom.back(), atype);
202 if (std::any_of(modification.hack.begin(), modification.hack.end(), [](const auto& mod) {
203 return mod.type() == MoleculePatchType::Delete;
206 fprintf(out, "[ %s ]\n", enumValueToString(ReplaceType::Del));
207 for (const auto& hack : modification.hack)
209 if (hack.type() == MoleculePatchType::Delete)
211 fprintf(out, "%s\n", hack.oname.c_str());
215 for (auto bt : gmx::EnumerationWrapper<BondedTypes>{})
217 if (!modification.rb[bt].b.empty())
219 fprintf(out, "[ %s ]\n", enumValueToString(bt));
220 for (const auto& b : modification.rb[bt].b)
222 for (int k = 0; k < enumValueToNumIAtoms(bt); k++)
224 fprintf(out, "%s%s", k ? "\t" : "", b.a[k].c_str());
228 fprintf(out, "\t%s", b.s.c_str());
239 static void read_ter_db_file(const char* fn,
240 std::vector<MoleculePatchDatabase>* tbptr,
241 PreprocessingAtomTypes* atype)
243 char filebase[STRLEN];
244 char header[STRLEN], buf[STRLEN], line[STRLEN];
246 fflib_filename_base(fn, filebase, STRLEN);
247 /* Remove the C/N termini extension */
248 char* ptr = strrchr(filebase, '.');
254 FILE* in = fflib_open(fn);
256 std::optional<BondedTypes> btkw;
257 std::optional<ReplaceType> rtkw;
258 get_a_line(in, line, STRLEN);
259 MoleculePatchDatabase* block = nullptr;
262 if (get_header(line, header))
264 /* this is a new block, or a new keyword */
265 btkw = findTypeFromKeyword<BondedTypes>(header);
266 rtkw = findTypeFromKeyword<ReplaceType>(header);
268 if (!btkw.has_value() && !rtkw.has_value())
270 tbptr->emplace_back(MoleculePatchDatabase());
271 block = &tbptr->back();
272 clearModificationBlock(block);
273 block->name = header;
274 block->filebase = filebase;
279 if (block == nullptr)
282 "reading termini database: "
283 "directive expected before line:\n%s\n"
284 "This might be a file in an old format.",
287 /* this is not a header, so it must be data */
288 if (!btkw.has_value())
290 /* this is a hack: add/rename/delete atoms */
291 /* make space for hacks */
292 block->hack.emplace_back(MoleculePatch());
293 MoleculePatch* hack = &block->hack.back();
297 if (*rtkw == ReplaceType::Repl || *rtkw == ReplaceType::Del)
299 if (sscanf(line, "%s%n", buf, &n) != 1)
302 "Reading Termini Database '%s': "
303 "expected atom name on line\n%s",
308 /* we only replace or delete one atom at a time */
311 else if (*rtkw == ReplaceType::Add)
313 read_ab(line, fn, hack);
314 get_a_line(in, line, STRLEN);
319 "unimplemented keyword number %d (%s:%d)",
320 static_cast<int>(*rtkw),
324 if (*rtkw == ReplaceType::Repl || *rtkw == ReplaceType::Add)
326 hack->atom.emplace_back();
327 read_atom(line + n, *rtkw == ReplaceType::Add, &hack->nname, &hack->atom.back(), atype, &hack->cgnr);
328 if (hack->nname.empty())
330 if (!hack->oname.empty())
332 hack->nname = hack->oname;
337 "Reading Termini Database '%s': don't know which name the "
338 "new atom should have on line\n%s",
345 else if (*btkw >= BondedTypes::Bonds && *btkw < BondedTypes::Count)
347 /* this is bonded data: bonds, angles, dihedrals or impropers */
349 block->rb[*btkw].b.emplace_back();
350 BondedInteraction* newBond = &block->rb[*btkw].b.back();
351 for (int j = 0; j < enumValueToNumIAtoms(*btkw); j++)
354 if (sscanf(line + n, "%s%n", buf, &ni) == 1)
361 "Reading Termini Database '%s': expected %d atom names (found "
364 enumValueToNumIAtoms(*btkw),
371 sscanf(line + n, "%s", buf);
377 "Reading Termini Database: Expecting a header at line\n"
382 get_a_line(in, line, STRLEN);
388 int read_ter_db(const char* ffdir, char ter, std::vector<MoleculePatchDatabase>* tbptr, PreprocessingAtomTypes* atype)
390 std::string ext = gmx::formatString(".%c.tdb", ter);
392 /* Search for termini database files.
393 * Do not generate an error when none are found.
395 std::vector<std::string> tdbf = fflib_search_file_end(ffdir, ext.c_str(), FALSE);
397 for (const auto& filename : tdbf)
399 read_ter_db_file(filename.c_str(), tbptr, atype);
404 print_ter_db("new", ter, *tbptr, atype);
407 return tbptr->size();
410 std::vector<MoleculePatchDatabase*> filter_ter(gmx::ArrayRef<MoleculePatchDatabase> tb, const char* resname)
412 // TODO Four years later, no force fields have ever used this, so decide status of this feature
413 /* Since some force fields (e.g. OPLS) needs different
414 * atomtypes for different residues there could be a lot
415 * of entries in the databases for specific residues
416 * (e.g. GLY-NH3+, SER-NH3+, ALA-NH3+).
418 * To reduce the database size, we assume that a terminus specifier liker
420 * [ GLY|SER|ALA-NH3+ ]
422 * would cover all of the three residue types above.
423 * Wildcards (*,?) are not OK. Don't worry about e.g. GLU vs. GLUH since
424 * pdb2gmx only uses the first 3 letters when calling this routine.
426 * To automate this, this routines scans a list of termini
427 * for the residue name "resname" and returns an allocated list of
428 * pointers to the termini that could be applied to the
429 * residue in question. The variable pointed to by nret will
430 * contain the number of valid pointers in the list.
431 * Remember to free the list when you are done with it...
434 auto none_idx = tb.end();
435 std::vector<MoleculePatchDatabase*> list;
437 for (auto it = tb.begin(); it != tb.end(); it++)
439 const char* s = it->name.c_str();
443 if (gmx::equalCaseInsensitive(resname, s, 3))
446 list.push_back(&*it);
450 /* advance to next |-separated field */
457 } while (!found && s != nullptr);
460 /* All residue-specific termini have been added. We might have to fall
461 * back on generic termini, which are characterized by not having
462 * '-' in the name prior to the last position (which indicates charge).
463 * The [ None ] alternative is special since we don't want that
464 * to be the default, so we put it last in the list we return.
466 for (auto it = tb.begin(); it != tb.end(); it++)
468 const char* s = it->name.c_str();
469 if (gmx::equalCaseInsensitive("None", it->name))
475 /* Time to see if there's a generic terminus that matches.
476 Is there a hyphen? */
477 const char* c = strchr(s, '-');
479 /* A conjunction hyphen normally indicates a residue-specific
480 terminus, which is named like "GLY-COOH". A generic terminus
481 won't have a hyphen. */
482 bool bFoundAnyHyphen = (c != nullptr);
483 /* '-' as the last character indicates charge, so if that's
484 the only one found e.g. "COO-", then it was not a conjunction
485 hyphen, so this is a generic terminus */
486 bool bOnlyFoundChargeHyphen = (bFoundAnyHyphen && *(c + 1) == '\0');
487 /* Thus, "GLY-COO-" is not recognized as a generic terminus. */
488 bool bFoundGenericTerminus = !bFoundAnyHyphen || bOnlyFoundChargeHyphen;
489 if (bFoundGenericTerminus)
491 /* Check that we haven't already added a residue-specific version
494 auto found = std::find_if(list.begin(), list.end(), [&s](const MoleculePatchDatabase* b) {
495 return strstr(b->name.c_str(), s) != nullptr;
497 if (found == list.end())
499 list.push_back(&*it);
504 if (none_idx != tb.end())
506 list.push_back(&*none_idx);
513 MoleculePatchDatabase* choose_ter(gmx::ArrayRef<MoleculePatchDatabase*> tb, const char* title)
517 printf("%s\n", title);
519 for (const auto& modification : tb)
521 bool bIsZwitterion = (0 == gmx_wcmatch("*ZWITTERION*", modification->name.c_str()));
522 printf("%2d: %s%s\n",
524 modification->name.c_str(),
525 bIsZwitterion ? " (only use with zwitterions containing exactly one residue)" : "");
530 ret = fscanf(stdin, "%d", &sel);
531 } while ((ret != 1) || (sel < 0) || (sel >= tb.ssize()));