2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
39 #include "insert_molecules.h"
47 #include "gromacs/commandline/cmdlineoptionsmodule.h"
48 #include "gromacs/fileio/confio.h"
49 #include "gromacs/fileio/filetypes.h"
50 #include "gromacs/fileio/xvgr.h"
51 #include "gromacs/gmxlib/conformation_utilities.h"
52 #include "gromacs/gmxpreprocess/makeexclusiondistances.h"
53 #include "gromacs/math/functions.h"
54 #include "gromacs/math/utilities.h"
55 #include "gromacs/math/vec.h"
56 #include "gromacs/options/basicoptions.h"
57 #include "gromacs/options/filenameoption.h"
58 #include "gromacs/options/ioptionscontainer.h"
59 #include "gromacs/pbcutil/pbc.h"
60 #include "gromacs/random/threefry.h"
61 #include "gromacs/random/uniformrealdistribution.h"
62 #include "gromacs/selection/nbsearch.h"
63 #include "gromacs/selection/selection.h"
64 #include "gromacs/selection/selectioncollection.h"
65 #include "gromacs/selection/selectionoption.h"
66 #include "gromacs/selection/selectionoptionbehavior.h"
67 #include "gromacs/topology/atomprop.h"
68 #include "gromacs/topology/atoms.h"
69 #include "gromacs/topology/atomsbuilder.h"
70 #include "gromacs/topology/mtop_util.h"
71 #include "gromacs/topology/symtab.h"
72 #include "gromacs/topology/topology.h"
73 #include "gromacs/trajectory/trajectoryframe.h"
74 #include "gromacs/utility/cstringutil.h"
75 #include "gromacs/utility/exceptions.h"
76 #include "gromacs/utility/fatalerror.h"
77 #include "gromacs/utility/smalloc.h"
81 /* enum for random rotations of inserted solutes */
88 const char* const cRotationEnum[] = { "xyz", "z", "none" };
90 static void center_molecule(gmx::ArrayRef<RVec> x)
97 svmul(1.0 / x.size(), center, center);
100 rvec_dec(xi, center);
104 static void generate_trial_conf(gmx::ArrayRef<RVec> xin,
106 RotationType enum_rot,
107 gmx::DefaultRandomEngine* rng,
108 std::vector<RVec>* xout)
110 gmx::UniformRealDistribution<real> dist(0, 2.0 * M_PI);
111 xout->assign(xin.begin(), xin.end());
113 real alfa = 0.0, beta = 0.0, gamma = 0.0;
125 case en_rotNone: alfa = beta = gamma = 0.; break;
127 if (enum_rot == en_rotXYZ || enum_rot == en_rotZ)
129 rotate_conf(xout->size(), as_rvec_array(xout->data()), nullptr, alfa, beta, gamma);
131 for (size_t i = 0; i < xout->size(); ++i)
133 rvec_inc((*xout)[i], offset);
137 static bool isInsertionAllowed(gmx::AnalysisNeighborhoodSearch* search,
138 const std::vector<real>& exclusionDistances,
139 const std::vector<RVec>& x,
140 const std::vector<real>& exclusionDistances_insrt,
141 const t_atoms& atoms,
142 const std::set<int>& removableAtoms,
143 gmx::AtomsRemover* remover)
145 gmx::AnalysisNeighborhoodPositions pos(x);
146 gmx::AnalysisNeighborhoodPairSearch pairSearch = search->startPairSearch(pos);
147 gmx::AnalysisNeighborhoodPair pair;
148 while (pairSearch.findNextPair(&pair))
150 const real r1 = exclusionDistances[pair.refIndex()];
151 const real r2 = exclusionDistances_insrt[pair.testIndex()];
152 if (pair.distance2() < gmx::square(r1 + r2))
154 if (removableAtoms.count(pair.refIndex()) == 0)
158 // TODO: If molecule information is available, this should ideally
159 // use it to remove whole molecules.
160 remover->markResidue(atoms, pair.refIndex(), true);
166 static void insert_mols(int nmol_insrt,
169 real defaultDistance,
173 std::vector<RVec>* x,
174 const std::set<int>& removableAtoms,
175 const t_atoms& atoms_insrt,
176 gmx::ArrayRef<RVec> x_insrt,
179 const std::string& posfn,
181 RotationType enum_rot)
183 fprintf(stderr, "Initialising inter-atomic distances...\n");
185 std::vector<real> exclusionDistances(makeExclusionDistances(atoms, &aps, defaultDistance, scaleFactor));
186 const std::vector<real> exclusionDistances_insrt(
187 makeExclusionDistances(&atoms_insrt, &aps, defaultDistance, scaleFactor));
189 const real maxInsertRadius =
190 *std::max_element(exclusionDistances_insrt.begin(), exclusionDistances_insrt.end());
191 real maxRadius = maxInsertRadius;
192 if (!exclusionDistances.empty())
194 const real maxExistingRadius =
195 *std::max_element(exclusionDistances.begin(), exclusionDistances.end());
196 maxRadius = std::max(maxInsertRadius, maxExistingRadius);
199 // TODO: Make all of this exception-safe.
200 gmx::AnalysisNeighborhood nb;
201 nb.setCutoff(maxInsertRadius + maxRadius);
206 seed = static_cast<int>(gmx::makeRandomSeed());
208 fprintf(stderr, "Using random seed %d\n", seed);
210 gmx::DefaultRandomEngine rng(seed);
213 set_pbc(&pbc, ePBC, box);
215 /* With -ip, take nmol_insrt from file posfn */
216 double** rpos = nullptr;
217 const bool insertAtPositions = !posfn.empty();
218 if (insertAtPositions)
221 nmol_insrt = read_xvg(posfn.c_str(), &rpos, &ncol);
224 gmx_fatal(FARGS, "Expected 3 columns (x/y/z coordinates) in file %s\n", posfn.c_str());
226 fprintf(stderr, "Read %d positions from file %s\n\n", nmol_insrt, posfn.c_str());
229 gmx::AtomsBuilder builder(atoms, symtab);
230 gmx::AtomsRemover remover(*atoms);
232 const int finalAtomCount = atoms->nr + nmol_insrt * atoms_insrt.nr;
233 const int finalResidueCount = atoms->nres + nmol_insrt * atoms_insrt.nres;
234 builder.reserve(finalAtomCount, finalResidueCount);
235 x->reserve(finalAtomCount);
236 exclusionDistances.reserve(finalAtomCount);
239 std::vector<RVec> x_n(x_insrt.size());
245 gmx::UniformRealDistribution<real> dist;
247 while (mol < nmol_insrt && trial < ntry * nmol_insrt)
250 if (!insertAtPositions)
252 // Insert at random positions.
253 offset_x[XX] = box[XX][XX] * dist(rng);
254 offset_x[YY] = box[YY][YY] * dist(rng);
255 offset_x[ZZ] = box[ZZ][ZZ] * dist(rng);
259 // Skip a position if ntry trials were not successful.
260 if (trial >= firstTrial + ntry)
262 fprintf(stderr, " skipped position (%.3f, %.3f, %.3f)\n", rpos[XX][mol],
263 rpos[YY][mol], rpos[ZZ][mol]);
269 // Insert at positions taken from option -ip file.
270 offset_x[XX] = rpos[XX][mol] + deltaR[XX] * (2 * dist(rng) - 1);
271 offset_x[YY] = rpos[YY][mol] + deltaR[YY] * (2 * dist(rng) - 1);
272 offset_x[ZZ] = rpos[ZZ][mol] + deltaR[ZZ] * (2 * dist(rng) - 1);
274 fprintf(stderr, "\rTry %d", ++trial);
277 generate_trial_conf(x_insrt, offset_x, enum_rot, &rng, &x_n);
278 gmx::AnalysisNeighborhoodPositions pos(*x);
279 gmx::AnalysisNeighborhoodSearch search = nb.initSearch(&pbc, pos);
280 if (isInsertionAllowed(&search, exclusionDistances, x_n, exclusionDistances_insrt, *atoms,
281 removableAtoms, &remover))
283 x->insert(x->end(), x_n.begin(), x_n.end());
284 exclusionDistances.insert(exclusionDistances.end(), exclusionDistances_insrt.begin(),
285 exclusionDistances_insrt.end());
286 builder.mergeAtoms(atoms_insrt);
289 fprintf(stderr, " success (now %d atoms)!\n", builder.currentAtomCount());
293 fprintf(stderr, "\n");
294 /* print number of molecules added */
295 fprintf(stderr, "Added %d molecules (out of %d requested)\n", mol - failed, nmol_insrt);
297 const int originalAtomCount = atoms->nr;
298 const int originalResidueCount = atoms->nres;
299 remover.refreshAtomCount(*atoms);
300 remover.removeMarkedElements(x);
301 remover.removeMarkedAtoms(atoms);
302 if (atoms->nr < originalAtomCount)
304 fprintf(stderr, "Replaced %d residues (%d atoms)\n", originalResidueCount - atoms->nres,
305 originalAtomCount - atoms->nr);
310 for (int i = 0; i < DIM; ++i)
324 class InsertMolecules : public ICommandLineOptionsModule, public ITopologyProvider
332 defaultDistance_(0.105),
341 // From ITopologyProvider
342 gmx_mtop_t* getTopology(bool /*required*/) override { return &top_; }
343 int getAtomCount() override { return 0; }
345 // From ICommandLineOptionsModule
346 void init(CommandLineModuleSettings* /*settings*/) override {}
347 void initOptions(IOptionsContainer* options, ICommandLineOptionsModuleSettings* settings) override;
348 void optionsFinished() override;
352 SelectionCollection selections_;
354 std::string inputConfFile_;
355 std::string insertConfFile_;
356 std::string positionFile_;
357 std::string outputConfFile_;
363 real defaultDistance_;
366 RotationType enumRot_;
367 Selection replaceSel_;
370 std::vector<RVec> x_;
375 void InsertMolecules::initOptions(IOptionsContainer* options, ICommandLineOptionsModuleSettings* settings)
377 const char* const desc[] = {
378 "[THISMODULE] inserts [TT]-nmol[tt] copies of the system specified in",
379 "the [TT]-ci[tt] input file. The insertions take place either into",
380 "vacant space in the solute conformation given with [TT]-f[tt], or",
381 "into an empty box given by [TT]-box[tt]. Specifying both [TT]-f[tt]",
382 "and [TT]-box[tt] behaves like [TT]-f[tt], but places a new box",
383 "around the solute before insertions. Any velocities present are",
386 "It is possible to also insert into a solvated configuration and",
387 "replace solvent atoms with the inserted atoms. To do this, use",
388 "[TT]-replace[tt] to specify a selection that identifies the atoms",
389 "that can be replaced. The tool assumes that all molecules in this",
390 "selection consist of single residues: each residue from this",
391 "selection that overlaps with the inserted molecules will be removed",
392 "instead of preventing insertion.",
394 "By default, the insertion positions are random (with initial seed",
395 "specified by [TT]-seed[tt]). The program iterates until [TT]-nmol[tt]",
396 "molecules have been inserted in the box. Molecules are not inserted",
397 "where the distance between any existing atom and any atom of the",
398 "inserted molecule is less than the sum based on the van der Waals",
399 "radii of both atoms. A database ([TT]vdwradii.dat[tt]) of van der",
400 "Waals radii is read by the program, and the resulting radii scaled",
401 "by [TT]-scale[tt]. If radii are not found in the database, those",
402 "atoms are assigned the (pre-scaled) distance [TT]-radius[tt].",
403 "Note that the usefulness of those radii depends on the atom names,",
404 "and thus varies widely with force field.",
406 "A total of [TT]-nmol[tt] * [TT]-try[tt] insertion attempts are made",
407 "before giving up. Increase [TT]-try[tt] if you have several small",
408 "holes to fill. Option [TT]-rot[tt] specifies whether the insertion",
409 "molecules are randomly oriented before insertion attempts.",
411 "Alternatively, the molecules can be inserted only at positions defined in",
412 "positions.dat ([TT]-ip[tt]). That file should have 3 columns (x,y,z),",
413 "that give the displacements compared to the input molecule position",
414 "([TT]-ci[tt]). Hence, if that file should contain the absolute",
415 "positions, the molecule must be centered on (0,0,0) before using",
416 "[THISMODULE] (e.g. from [gmx-editconf] [TT]-center[tt]).",
417 "Comments in that file starting with # are ignored. Option [TT]-dr[tt]",
418 "defines the maximally allowed displacements during insertial trials.",
419 "[TT]-try[tt] and [TT]-rot[tt] work as in the default mode (see above)."
422 settings->setHelpText(desc);
424 std::shared_ptr<SelectionOptionBehavior> selectionOptionBehavior(
425 new SelectionOptionBehavior(&selections_, this));
426 settings->addOptionsBehavior(selectionOptionBehavior);
428 // TODO: Replace use of legacyType.
429 options->addOption(FileNameOption("f")
432 .store(&inputConfFile_)
433 .defaultBasename("protein")
434 .description("Existing configuration to insert into"));
435 options->addOption(FileNameOption("ci")
439 .store(&insertConfFile_)
440 .defaultBasename("insert")
441 .description("Configuration to insert"));
442 options->addOption(FileNameOption("ip")
443 .filetype(eftGenericData)
445 .store(&positionFile_)
446 .defaultBasename("positions")
447 .description("Predefined insertion trial positions"));
448 options->addOption(FileNameOption("o")
452 .store(&outputConfFile_)
453 .defaultBasename("out")
454 .description("Output configuration after insertion"));
457 SelectionOption("replace").onlyAtoms().store(&replaceSel_).description("Atoms that can be removed if overlapping"));
458 selectionOptionBehavior->initOptions(options);
460 options->addOption(RealOption("box").vector().store(newBox_).storeIsSet(&bBox_).description(
461 "Box size (in nm)"));
462 options->addOption(IntegerOption("nmol").store(&nmolIns_).description(
463 "Number of extra molecules to insert"));
464 options->addOption(IntegerOption("try").store(&nmolTry_).description(
465 "Try inserting [TT]-nmol[tt] times [TT]-try[tt] times"));
466 options->addOption(IntegerOption("seed").store(&seed_).description(
467 "Random generator seed (0 means generate)"));
469 RealOption("radius").store(&defaultDistance_).description("Default van der Waals distance"));
471 RealOption("scale").store(&scaleFactor_).description("Scale factor to multiply Van der Waals radii from the database in share/gromacs/top/vdwradii.dat. The default value of 0.57 yields density close to 1000 g/l for proteins in water."));
472 options->addOption(RealOption("dr").vector().store(deltaR_).description(
473 "Allowed displacement in x/y/z from positions in [TT]-ip[tt] file"));
474 options->addOption(EnumOption<RotationType>("rot")
475 .enumValue(cRotationEnum)
477 .description("Rotate inserted molecules randomly"));
480 void InsertMolecules::optionsFinished()
482 if (nmolIns_ <= 0 && positionFile_.empty())
485 InconsistentInputError("Either -nmol must be larger than 0, "
486 "or positions must be given with -ip."));
488 if (inputConfFile_.empty() && !bBox_)
491 InconsistentInputError("When no solute (-f) is specified, "
492 "a box size (-box) must be specified."));
494 if (replaceSel_.isValid() && inputConfFile_.empty())
497 InconsistentInputError("Replacement (-replace) only makes sense "
498 "together with an existing configuration (-f)."));
501 if (!inputConfFile_.empty())
503 bool bTprFileWasRead;
504 rvec* temporaryX = nullptr;
505 fprintf(stderr, "Reading solute configuration\n");
506 readConfAndTopology(inputConfFile_.c_str(), &bTprFileWasRead, &top_, &ePBC_, &temporaryX,
508 x_.assign(temporaryX, temporaryX + top_.natoms);
510 if (top_.natoms == 0)
512 fprintf(stderr, "Note: no atoms in %s\n", inputConfFile_.c_str());
517 int InsertMolecules::run()
519 std::set<int> removableAtoms;
520 if (replaceSel_.isValid())
523 set_pbc(&pbc, ePBC_, box_);
526 fr->natoms = x_.size();
528 fr->x = as_rvec_array(x_.data());
529 selections_.evaluate(fr, &pbc);
531 removableAtoms.insert(replaceSel_.atomIndices().begin(), replaceSel_.atomIndices().end());
532 // TODO: It could be nice to check that removableAtoms contains full
533 // residues, since we anyways remove whole residues instead of
537 int ePBCForOutput = ePBC_;
540 ePBCForOutput = epbcXYZ;
542 box_[XX][XX] = newBox_[XX];
543 box_[YY][YY] = newBox_[YY];
544 box_[ZZ][ZZ] = newBox_[ZZ];
549 "Undefined solute box.\nCreate one with gmx editconf "
550 "or give explicit -box command line option");
553 gmx_mtop_t topInserted;
554 t_atoms atomsInserted;
555 std::vector<RVec> xInserted;
557 bool bTprFileWasRead;
561 readConfAndTopology(insertConfFile_.c_str(), &bTprFileWasRead, &topInserted, &ePBC_dummy,
562 &temporaryX, nullptr, box_dummy);
563 xInserted.assign(temporaryX, temporaryX + topInserted.natoms);
565 atomsInserted = gmx_mtop_global_atoms(&topInserted);
566 if (atomsInserted.nr == 0)
568 gmx_fatal(FARGS, "No molecule in %s, please check your input", insertConfFile_.c_str());
570 if (top_.name == nullptr)
572 top_.name = topInserted.name;
574 if (positionFile_.empty())
576 center_molecule(xInserted);
580 t_atoms atoms = gmx_mtop_global_atoms(&top_);
582 /* add nmol_ins molecules of atoms_ins
583 in random orientation at random place */
584 insert_mols(nmolIns_, nmolTry_, seed_, defaultDistance_, scaleFactor_, &atoms, &top_.symtab,
585 &x_, removableAtoms, atomsInserted, xInserted, ePBCForOutput, box_, positionFile_,
588 /* write new configuration to file confout */
589 fprintf(stderr, "Writing generated configuration to %s\n", outputConfFile_.c_str());
590 write_sto_conf(outputConfFile_.c_str(), *top_.name, &atoms, as_rvec_array(x_.data()), nullptr,
591 ePBCForOutput, box_);
593 /* print size of generated configuration */
594 fprintf(stderr, "\nOutput configuration contains %d atoms in %d residues\n", atoms.nr, atoms.nres);
597 done_atom(&atomsInserted);
605 const char* InsertMoleculesInfo::name()
607 static const char* name = "insert-molecules";
611 const char* InsertMoleculesInfo::shortDescription()
613 static const char* shortDescription = "Insert molecules into existing vacancies";
614 return shortDescription;
617 ICommandLineOptionsModulePointer InsertMoleculesInfo::create()
619 return ICommandLineOptionsModulePointer(new InsertMolecules());