2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
40 #include "insert_molecules.h"
48 #include "gromacs/commandline/cmdlineoptionsmodule.h"
49 #include "gromacs/fileio/confio.h"
50 #include "gromacs/fileio/filetypes.h"
51 #include "gromacs/fileio/xvgr.h"
52 #include "gromacs/gmxlib/conformation_utilities.h"
53 #include "gromacs/gmxpreprocess/makeexclusiondistances.h"
54 #include "gromacs/math/functions.h"
55 #include "gromacs/math/utilities.h"
56 #include "gromacs/math/vec.h"
57 #include "gromacs/options/basicoptions.h"
58 #include "gromacs/options/filenameoption.h"
59 #include "gromacs/options/ioptionscontainer.h"
60 #include "gromacs/pbcutil/pbc.h"
61 #include "gromacs/random/threefry.h"
62 #include "gromacs/random/uniformrealdistribution.h"
63 #include "gromacs/selection/nbsearch.h"
64 #include "gromacs/selection/selection.h"
65 #include "gromacs/selection/selectioncollection.h"
66 #include "gromacs/selection/selectionoption.h"
67 #include "gromacs/selection/selectionoptionbehavior.h"
68 #include "gromacs/topology/atomprop.h"
69 #include "gromacs/topology/atoms.h"
70 #include "gromacs/topology/atomsbuilder.h"
71 #include "gromacs/topology/mtop_util.h"
72 #include "gromacs/topology/symtab.h"
73 #include "gromacs/topology/topology.h"
74 #include "gromacs/trajectory/trajectoryframe.h"
75 #include "gromacs/utility/cstringutil.h"
76 #include "gromacs/utility/enumerationhelpers.h"
77 #include "gromacs/utility/exceptions.h"
78 #include "gromacs/utility/fatalerror.h"
79 #include "gromacs/utility/smalloc.h"
83 /* enum for random rotations of inserted solutes */
84 enum class RotationType : int
91 static const gmx::EnumerationArray<RotationType, const char*> c_rotationTypeNames = { { "xyz", "z",
94 static void center_molecule(gmx::ArrayRef<RVec> x)
101 svmul(1.0 / x.size(), center, center);
104 rvec_dec(xi, center);
108 static void generate_trial_conf(gmx::ArrayRef<RVec> xin,
110 RotationType enum_rot,
111 gmx::DefaultRandomEngine* rng,
112 std::vector<RVec>* xout)
114 gmx::UniformRealDistribution<real> dist(0, 2.0 * M_PI);
115 xout->assign(xin.begin(), xin.end());
117 real alfa = 0.0, beta = 0.0, gamma = 0.0;
120 case RotationType::XYZ:
125 case RotationType::Z:
129 case RotationType::None: alfa = beta = gamma = 0.; break;
130 default: GMX_THROW(gmx::InternalError("Invalid RotationType"));
132 if (enum_rot == RotationType::XYZ || enum_rot == RotationType::Z)
134 rotate_conf(xout->size(), as_rvec_array(xout->data()), nullptr, alfa, beta, gamma);
136 for (size_t i = 0; i < xout->size(); ++i)
138 rvec_inc((*xout)[i], offset);
142 static bool isInsertionAllowed(gmx::AnalysisNeighborhoodSearch* search,
143 const std::vector<real>& exclusionDistances,
144 const std::vector<RVec>& x,
145 const std::vector<real>& exclusionDistances_insrt,
146 const t_atoms& atoms,
147 const std::set<int>& removableAtoms,
148 gmx::AtomsRemover* remover)
150 gmx::AnalysisNeighborhoodPositions pos(x);
151 gmx::AnalysisNeighborhoodPairSearch pairSearch = search->startPairSearch(pos);
152 gmx::AnalysisNeighborhoodPair pair;
153 while (pairSearch.findNextPair(&pair))
155 const real r1 = exclusionDistances[pair.refIndex()];
156 const real r2 = exclusionDistances_insrt[pair.testIndex()];
157 if (pair.distance2() < gmx::square(r1 + r2))
159 if (removableAtoms.count(pair.refIndex()) == 0)
163 // TODO: If molecule information is available, this should ideally
164 // use it to remove whole molecules.
165 remover->markResidue(atoms, pair.refIndex(), true);
171 static void insert_mols(int nmol_insrt,
174 real defaultDistance,
178 std::vector<RVec>* x,
179 const std::set<int>& removableAtoms,
180 const t_atoms& atoms_insrt,
181 gmx::ArrayRef<RVec> x_insrt,
184 const std::string& posfn,
186 RotationType enum_rot)
188 fprintf(stderr, "Initialising inter-atomic distances...\n");
190 std::vector<real> exclusionDistances(makeExclusionDistances(atoms, &aps, defaultDistance, scaleFactor));
191 const std::vector<real> exclusionDistances_insrt(
192 makeExclusionDistances(&atoms_insrt, &aps, defaultDistance, scaleFactor));
194 const real maxInsertRadius =
195 *std::max_element(exclusionDistances_insrt.begin(), exclusionDistances_insrt.end());
196 real maxRadius = maxInsertRadius;
197 if (!exclusionDistances.empty())
199 const real maxExistingRadius =
200 *std::max_element(exclusionDistances.begin(), exclusionDistances.end());
201 maxRadius = std::max(maxInsertRadius, maxExistingRadius);
204 // TODO: Make all of this exception-safe.
205 gmx::AnalysisNeighborhood nb;
206 nb.setCutoff(maxInsertRadius + maxRadius);
211 seed = static_cast<int>(gmx::makeRandomSeed());
213 fprintf(stderr, "Using random seed %d\n", seed);
215 gmx::DefaultRandomEngine rng(seed);
218 set_pbc(&pbc, pbcType, box);
220 /* With -ip, take nmol_insrt from file posfn */
221 double** rpos = nullptr;
222 const bool insertAtPositions = !posfn.empty();
223 if (insertAtPositions)
226 nmol_insrt = read_xvg(posfn.c_str(), &rpos, &ncol);
229 gmx_fatal(FARGS, "Expected 3 columns (x/y/z coordinates) in file %s\n", posfn.c_str());
231 fprintf(stderr, "Read %d positions from file %s\n\n", nmol_insrt, posfn.c_str());
234 gmx::AtomsBuilder builder(atoms, symtab);
235 gmx::AtomsRemover remover(*atoms);
237 const int finalAtomCount = atoms->nr + nmol_insrt * atoms_insrt.nr;
238 const int finalResidueCount = atoms->nres + nmol_insrt * atoms_insrt.nres;
239 builder.reserve(finalAtomCount, finalResidueCount);
240 x->reserve(finalAtomCount);
241 exclusionDistances.reserve(finalAtomCount);
244 std::vector<RVec> x_n(x_insrt.size());
250 gmx::UniformRealDistribution<real> dist;
252 while (mol < nmol_insrt && trial < ntry * nmol_insrt)
255 if (!insertAtPositions)
257 // Insert at random positions.
258 offset_x[XX] = box[XX][XX] * dist(rng);
259 offset_x[YY] = box[YY][YY] * dist(rng);
260 offset_x[ZZ] = box[ZZ][ZZ] * dist(rng);
264 // Skip a position if ntry trials were not successful.
265 if (trial >= firstTrial + ntry)
267 fprintf(stderr, " skipped position (%.3f, %.3f, %.3f)\n", rpos[XX][mol],
268 rpos[YY][mol], rpos[ZZ][mol]);
274 // Insert at positions taken from option -ip file.
275 offset_x[XX] = rpos[XX][mol] + deltaR[XX] * (2 * dist(rng) - 1);
276 offset_x[YY] = rpos[YY][mol] + deltaR[YY] * (2 * dist(rng) - 1);
277 offset_x[ZZ] = rpos[ZZ][mol] + deltaR[ZZ] * (2 * dist(rng) - 1);
279 fprintf(stderr, "\rTry %d", ++trial);
282 generate_trial_conf(x_insrt, offset_x, enum_rot, &rng, &x_n);
283 gmx::AnalysisNeighborhoodPositions pos(*x);
284 gmx::AnalysisNeighborhoodSearch search = nb.initSearch(&pbc, pos);
285 if (isInsertionAllowed(&search, exclusionDistances, x_n, exclusionDistances_insrt, *atoms,
286 removableAtoms, &remover))
288 x->insert(x->end(), x_n.begin(), x_n.end());
289 exclusionDistances.insert(exclusionDistances.end(), exclusionDistances_insrt.begin(),
290 exclusionDistances_insrt.end());
291 builder.mergeAtoms(atoms_insrt);
294 fprintf(stderr, " success (now %d atoms)!\n", builder.currentAtomCount());
298 fprintf(stderr, "\n");
299 /* print number of molecules added */
300 fprintf(stderr, "Added %d molecules (out of %d requested)\n", mol - failed, nmol_insrt);
302 const int originalAtomCount = atoms->nr;
303 const int originalResidueCount = atoms->nres;
304 remover.refreshAtomCount(*atoms);
305 remover.removeMarkedElements(x);
306 remover.removeMarkedAtoms(atoms);
307 if (atoms->nr < originalAtomCount)
309 fprintf(stderr, "Replaced %d residues (%d atoms)\n", originalResidueCount - atoms->nres,
310 originalAtomCount - atoms->nr);
315 for (int i = 0; i < DIM; ++i)
329 class InsertMolecules : public ICommandLineOptionsModule, public ITopologyProvider
337 defaultDistance_(0.105),
339 enumRot_(RotationType::XYZ)
346 // From ITopologyProvider
347 gmx_mtop_t* getTopology(bool /*required*/) override { return &top_; }
348 int getAtomCount() override { return 0; }
350 // From ICommandLineOptionsModule
351 void init(CommandLineModuleSettings* /*settings*/) override {}
352 void initOptions(IOptionsContainer* options, ICommandLineOptionsModuleSettings* settings) override;
353 void optionsFinished() override;
357 SelectionCollection selections_;
359 std::string inputConfFile_;
360 std::string insertConfFile_;
361 std::string positionFile_;
362 std::string outputConfFile_;
368 real defaultDistance_;
371 RotationType enumRot_;
372 Selection replaceSel_;
375 std::vector<RVec> x_;
380 void InsertMolecules::initOptions(IOptionsContainer* options, ICommandLineOptionsModuleSettings* settings)
382 const char* const desc[] = {
383 "[THISMODULE] inserts [TT]-nmol[tt] copies of the system specified in",
384 "the [TT]-ci[tt] input file. The insertions take place either into",
385 "vacant space in the solute conformation given with [TT]-f[tt], or",
386 "into an empty box given by [TT]-box[tt]. Specifying both [TT]-f[tt]",
387 "and [TT]-box[tt] behaves like [TT]-f[tt], but places a new box",
388 "around the solute before insertions. Any velocities present are",
391 "It is possible to also insert into a solvated configuration and",
392 "replace solvent atoms with the inserted atoms. To do this, use",
393 "[TT]-replace[tt] to specify a selection that identifies the atoms",
394 "that can be replaced. The tool assumes that all molecules in this",
395 "selection consist of single residues: each residue from this",
396 "selection that overlaps with the inserted molecules will be removed",
397 "instead of preventing insertion.",
399 "By default, the insertion positions are random (with initial seed",
400 "specified by [TT]-seed[tt]). The program iterates until [TT]-nmol[tt]",
401 "molecules have been inserted in the box. Molecules are not inserted",
402 "where the distance between any existing atom and any atom of the",
403 "inserted molecule is less than the sum based on the van der Waals",
404 "radii of both atoms. A database ([TT]vdwradii.dat[tt]) of van der",
405 "Waals radii is read by the program, and the resulting radii scaled",
406 "by [TT]-scale[tt]. If radii are not found in the database, those",
407 "atoms are assigned the (pre-scaled) distance [TT]-radius[tt].",
408 "Note that the usefulness of those radii depends on the atom names,",
409 "and thus varies widely with force field.",
411 "A total of [TT]-nmol[tt] * [TT]-try[tt] insertion attempts are made",
412 "before giving up. Increase [TT]-try[tt] if you have several small",
413 "holes to fill. Option [TT]-rot[tt] specifies whether the insertion",
414 "molecules are randomly oriented before insertion attempts.",
416 "Alternatively, the molecules can be inserted only at positions defined in",
417 "positions.dat ([TT]-ip[tt]). That file should have 3 columns (x,y,z),",
418 "that give the displacements compared to the input molecule position",
419 "([TT]-ci[tt]). Hence, if that file should contain the absolute",
420 "positions, the molecule must be centered on (0,0,0) before using",
421 "[THISMODULE] (e.g. from [gmx-editconf] [TT]-center[tt]).",
422 "Comments in that file starting with # are ignored. Option [TT]-dr[tt]",
423 "defines the maximally allowed displacements during insertial trials.",
424 "[TT]-try[tt] and [TT]-rot[tt] work as in the default mode (see above)."
427 settings->setHelpText(desc);
429 std::shared_ptr<SelectionOptionBehavior> selectionOptionBehavior(
430 new SelectionOptionBehavior(&selections_, this));
431 settings->addOptionsBehavior(selectionOptionBehavior);
433 // TODO: Replace use of legacyType.
434 options->addOption(FileNameOption("f")
437 .store(&inputConfFile_)
438 .defaultBasename("protein")
439 .description("Existing configuration to insert into"));
440 options->addOption(FileNameOption("ci")
444 .store(&insertConfFile_)
445 .defaultBasename("insert")
446 .description("Configuration to insert"));
447 options->addOption(FileNameOption("ip")
448 .filetype(eftGenericData)
450 .store(&positionFile_)
451 .defaultBasename("positions")
452 .description("Predefined insertion trial positions"));
453 options->addOption(FileNameOption("o")
457 .store(&outputConfFile_)
458 .defaultBasename("out")
459 .description("Output configuration after insertion"));
462 SelectionOption("replace").onlyAtoms().store(&replaceSel_).description("Atoms that can be removed if overlapping"));
463 selectionOptionBehavior->initOptions(options);
465 options->addOption(RealOption("box").vector().store(newBox_).storeIsSet(&bBox_).description(
466 "Box size (in nm)"));
467 options->addOption(IntegerOption("nmol").store(&nmolIns_).description(
468 "Number of extra molecules to insert"));
469 options->addOption(IntegerOption("try").store(&nmolTry_).description(
470 "Try inserting [TT]-nmol[tt] times [TT]-try[tt] times"));
471 options->addOption(IntegerOption("seed").store(&seed_).description(
472 "Random generator seed (0 means generate)"));
474 RealOption("radius").store(&defaultDistance_).description("Default van der Waals distance"));
476 RealOption("scale").store(&scaleFactor_).description("Scale factor to multiply Van der Waals radii from the database in share/gromacs/top/vdwradii.dat. The default value of 0.57 yields density close to 1000 g/l for proteins in water."));
477 options->addOption(RealOption("dr").vector().store(deltaR_).description(
478 "Allowed displacement in x/y/z from positions in [TT]-ip[tt] file"));
479 options->addOption(EnumOption<RotationType>("rot")
480 .enumValue(c_rotationTypeNames)
482 .description("Rotate inserted molecules randomly"));
485 void InsertMolecules::optionsFinished()
487 if (nmolIns_ <= 0 && positionFile_.empty())
490 InconsistentInputError("Either -nmol must be larger than 0, "
491 "or positions must be given with -ip."));
493 if (inputConfFile_.empty() && !bBox_)
496 InconsistentInputError("When no solute (-f) is specified, "
497 "a box size (-box) must be specified."));
499 if (replaceSel_.isValid() && inputConfFile_.empty())
502 InconsistentInputError("Replacement (-replace) only makes sense "
503 "together with an existing configuration (-f)."));
506 if (!inputConfFile_.empty())
508 bool bTprFileWasRead;
509 rvec* temporaryX = nullptr;
510 fprintf(stderr, "Reading solute configuration\n");
511 readConfAndTopology(inputConfFile_.c_str(), &bTprFileWasRead, &top_, &pbcType_, &temporaryX,
513 x_.assign(temporaryX, temporaryX + top_.natoms);
515 if (top_.natoms == 0)
517 fprintf(stderr, "Note: no atoms in %s\n", inputConfFile_.c_str());
522 int InsertMolecules::run()
524 std::set<int> removableAtoms;
525 if (replaceSel_.isValid())
528 set_pbc(&pbc, pbcType_, box_);
531 fr->natoms = x_.size();
533 fr->x = as_rvec_array(x_.data());
534 selections_.evaluate(fr, &pbc);
536 removableAtoms.insert(replaceSel_.atomIndices().begin(), replaceSel_.atomIndices().end());
537 // TODO: It could be nice to check that removableAtoms contains full
538 // residues, since we anyways remove whole residues instead of
542 PbcType pbcTypeForOutput = pbcType_;
545 pbcTypeForOutput = PbcType::Xyz;
547 box_[XX][XX] = newBox_[XX];
548 box_[YY][YY] = newBox_[YY];
549 box_[ZZ][ZZ] = newBox_[ZZ];
554 "Undefined solute box.\nCreate one with gmx editconf "
555 "or give explicit -box command line option");
558 gmx_mtop_t topInserted;
559 t_atoms atomsInserted;
560 std::vector<RVec> xInserted;
562 bool bTprFileWasRead;
563 PbcType pbcType_dummy;
566 readConfAndTopology(insertConfFile_.c_str(), &bTprFileWasRead, &topInserted, &pbcType_dummy,
567 &temporaryX, nullptr, box_dummy);
568 xInserted.assign(temporaryX, temporaryX + topInserted.natoms);
570 atomsInserted = gmx_mtop_global_atoms(&topInserted);
571 if (atomsInserted.nr == 0)
573 gmx_fatal(FARGS, "No molecule in %s, please check your input", insertConfFile_.c_str());
575 if (top_.name == nullptr)
577 top_.name = topInserted.name;
579 if (positionFile_.empty())
581 center_molecule(xInserted);
585 t_atoms atoms = gmx_mtop_global_atoms(&top_);
587 /* add nmol_ins molecules of atoms_ins
588 in random orientation at random place */
589 insert_mols(nmolIns_, nmolTry_, seed_, defaultDistance_, scaleFactor_, &atoms, &top_.symtab,
590 &x_, removableAtoms, atomsInserted, xInserted, pbcTypeForOutput, box_,
591 positionFile_, deltaR_, enumRot_);
593 /* write new configuration to file confout */
594 fprintf(stderr, "Writing generated configuration to %s\n", outputConfFile_.c_str());
595 write_sto_conf(outputConfFile_.c_str(), *top_.name, &atoms, as_rvec_array(x_.data()), nullptr,
596 pbcTypeForOutput, box_);
598 /* print size of generated configuration */
599 fprintf(stderr, "\nOutput configuration contains %d atoms in %d residues\n", atoms.nr, atoms.nres);
602 done_atom(&atomsInserted);
610 const char* InsertMoleculesInfo::name()
612 static const char* name = "insert-molecules";
616 const char* InsertMoleculesInfo::shortDescription()
618 static const char* shortDescription = "Insert molecules into existing vacancies";
619 return shortDescription;
622 ICommandLineOptionsModulePointer InsertMoleculesInfo::create()
624 return ICommandLineOptionsModulePointer(new InsertMolecules());