2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2009,2010,2011,2012,2013 by the GROMACS development team.
5 * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
6 * Copyright (c) 2019,2020, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
39 * Implements gmx::analysismodules::Select.
41 * \author Teemu Murtola <teemu.murtola@gmail.com>
42 * \ingroup module_trajectoryanalysis
56 #include "gromacs/analysisdata/analysisdata.h"
57 #include "gromacs/analysisdata/dataframe.h"
58 #include "gromacs/analysisdata/datamodule.h"
59 #include "gromacs/analysisdata/modules/average.h"
60 #include "gromacs/analysisdata/modules/lifetime.h"
61 #include "gromacs/analysisdata/modules/plot.h"
62 #include "gromacs/fileio/gmxfio.h"
63 #include "gromacs/fileio/trxio.h"
64 #include "gromacs/options/basicoptions.h"
65 #include "gromacs/options/filenameoption.h"
66 #include "gromacs/options/ioptionscontainer.h"
67 #include "gromacs/selection/selection.h"
68 #include "gromacs/selection/selectionoption.h"
69 #include "gromacs/topology/topology.h"
70 #include "gromacs/trajectory/trajectoryframe.h"
71 #include "gromacs/trajectoryanalysis/analysissettings.h"
72 #include "gromacs/trajectoryanalysis/topologyinformation.h"
73 #include "gromacs/utility/arrayref.h"
74 #include "gromacs/utility/exceptions.h"
75 #include "gromacs/utility/gmxassert.h"
76 #include "gromacs/utility/smalloc.h"
77 #include "gromacs/utility/stringutil.h"
78 #include "gromacs/utility/unique_cptr.h"
83 namespace analysismodules
90 * Data module for writing index files.
92 * \ingroup module_analysisdata
94 class IndexFileWriterModule : public AnalysisDataModuleSerial
97 IndexFileWriterModule();
98 ~IndexFileWriterModule() override;
100 //! Sets the file name to write the index file to.
101 void setFileName(const std::string& fnm);
103 * Adds information about a group to be printed.
105 * Must be called for each group present in the input data.
107 void addGroup(const std::string& name, bool bDynamic);
109 int flags() const override;
111 void dataStarted(AbstractAnalysisData* data) override;
112 void frameStarted(const AnalysisDataFrameHeader& header) override;
113 void pointsAdded(const AnalysisDataPointSetRef& points) override;
114 void frameFinished(const AnalysisDataFrameHeader& header) override;
115 void dataFinished() override;
122 GroupInfo(const std::string& name, bool bDynamic) : name(name), bDynamic(bDynamic) {}
129 std::vector<GroupInfo> groups_;
136 /********************************************************************
137 * IndexFileWriterModule
140 IndexFileWriterModule::IndexFileWriterModule() :
149 IndexFileWriterModule::~IndexFileWriterModule()
155 void IndexFileWriterModule::closeFile()
165 void IndexFileWriterModule::setFileName(const std::string& fnm)
171 void IndexFileWriterModule::addGroup(const std::string& name, bool bDynamic)
173 std::string newName(name);
174 std::replace(newName.begin(), newName.end(), ' ', '_');
175 groups_.emplace_back(newName, bDynamic);
179 int IndexFileWriterModule::flags() const
181 return efAllowMulticolumn | efAllowMultipoint;
185 void IndexFileWriterModule::dataStarted(AbstractAnalysisData* /*data*/)
189 fp_ = gmx_fio_fopen(fnm_.c_str(), "w");
194 void IndexFileWriterModule::frameStarted(const AnalysisDataFrameHeader& /*header*/)
196 bAnyWritten_ = false;
201 void IndexFileWriterModule::pointsAdded(const AnalysisDataPointSetRef& points)
207 bool bFirstFrame = (points.frameIndex() == 0);
208 if (points.firstColumn() == 0)
211 GMX_RELEASE_ASSERT(currentGroup_ < ssize(groups_), "Too few groups initialized");
212 if (bFirstFrame || groups_[currentGroup_].bDynamic)
214 if (!bFirstFrame || currentGroup_ > 0)
216 std::fprintf(fp_, "\n\n");
218 std::string name = groups_[currentGroup_].name;
219 if (groups_[currentGroup_].bDynamic)
221 name += formatString("_f%d_t%.3f", points.frameIndex(), points.x());
223 std::fprintf(fp_, "[ %s ]", name.c_str());
230 if (bFirstFrame || groups_[currentGroup_].bDynamic)
232 if (currentSize_ % 15 == 0)
234 std::fprintf(fp_, "\n");
236 std::fprintf(fp_, "%4d ", static_cast<int>(points.y(0)));
243 void IndexFileWriterModule::frameFinished(const AnalysisDataFrameHeader& /*header*/) {}
246 void IndexFileWriterModule::dataFinished()
250 std::fprintf(fp_, "\n");
255 /********************************************************************
259 //! How to identify residues in output files.
260 enum class ResidueNumbering : int
266 //! Which atoms to write out to PDB files.
267 enum class PdbAtomsSelection : int
274 //! String values corresponding to ResidueNumbering.
275 const EnumerationArray<ResidueNumbering, const char*> c_residueNumberingTypeNames = { { "number",
277 //! String values corresponding to PdbAtomsSelection.
278 const EnumerationArray<PdbAtomsSelection, const char*> c_pdbAtomsTypeNames = {
279 { "all", "maxsel", "selected" }
282 class Select : public TrajectoryAnalysisModule
287 void initOptions(IOptionsContainer* options, TrajectoryAnalysisSettings* settings) override;
288 void optionsFinished(TrajectoryAnalysisSettings* settings) override;
289 void initAnalysis(const TrajectoryAnalysisSettings& settings, const TopologyInformation& top) override;
291 void analyzeFrame(int frnr, const t_trxframe& fr, t_pbc* pbc, TrajectoryAnalysisModuleData* pdata) override;
293 void finishAnalysis(int nframes) override;
294 void writeOutput() override;
301 std::string fnIndex_;
304 std::string fnOccupancy_;
306 std::string fnLifetime_;
310 bool bCumulativeLifetimes_;
311 ResidueNumbering resNumberType_;
312 PdbAtomsSelection pdbAtoms_;
314 //! The input topology.
315 const TopologyInformation* top_;
316 std::vector<int> totsize_;
321 AnalysisDataAverageModulePointer occupancyModule_;
322 AnalysisDataLifetimeModulePointer lifetimeModule_;
329 bCumulativeLifetimes_(true),
330 resNumberType_(ResidueNumbering::ByNumber),
331 pdbAtoms_(PdbAtomsSelection::All),
333 occupancyModule_(new AnalysisDataAverageModule()),
334 lifetimeModule_(new AnalysisDataLifetimeModule())
336 mdata_.addModule(occupancyModule_);
337 mdata_.addModule(lifetimeModule_);
339 registerAnalysisDataset(&sdata_, "size");
340 registerAnalysisDataset(&cdata_, "cfrac");
341 idata_.setColumnCount(0, 2);
342 idata_.setMultipoint(true);
343 registerAnalysisDataset(&idata_, "index");
344 registerAnalysisDataset(&mdata_, "mask");
345 occupancyModule_->setXAxis(1.0, 1.0);
346 registerBasicDataset(occupancyModule_.get(), "occupancy");
347 registerBasicDataset(lifetimeModule_.get(), "lifetime");
351 void Select::initOptions(IOptionsContainer* options, TrajectoryAnalysisSettings* settings)
353 static const char* const desc[] = {
354 "[THISMODULE] writes out basic data about dynamic selections.",
355 "It can be used for some simple analyses, or the output can",
356 "be combined with output from other programs and/or external",
357 "analysis programs to calculate more complex things.",
358 "For detailed help on the selection syntax, please use",
359 "[TT]gmx help selections[tt].",
361 "Any combination of the output options is possible, but note",
362 "that [TT]-om[tt] only operates on the first selection.",
363 "Also note that if you provide no output options, no output is",
366 "With [TT]-os[tt], calculates the number of positions in each",
367 "selection for each frame. With [TT]-norm[tt], the output is",
368 "between 0 and 1 and describes the fraction from the maximum",
369 "number of positions (e.g., for selection 'resname RA and x < 5'",
370 "the maximum number of positions is the number of atoms in",
371 "RA residues). With [TT]-cfnorm[tt], the output is divided",
372 "by the fraction covered by the selection.",
373 "[TT]-norm[tt] and [TT]-cfnorm[tt] can be specified independently",
376 "With [TT]-oc[tt], the fraction covered by each selection is",
377 "written out as a function of time.",
379 "With [TT]-oi[tt], the selected atoms/residues/molecules are",
380 "written out as a function of time. In the output, the first",
381 "column contains the frame time, the second contains the number",
382 "of positions, followed by the atom/residue/molecule numbers.",
383 "If more than one selection is specified, the size of the second",
384 "group immediately follows the last number of the first group",
387 "With [TT]-on[tt], the selected atoms are written as a index file",
388 "compatible with [TT]make_ndx[tt] and the analyzing tools. Each selection",
389 "is written as a selection group and for dynamic selections a",
390 "group is written for each frame.",
392 "For residue numbers, the output of [TT]-oi[tt] can be controlled",
393 "with [TT]-resnr[tt]: [TT]number[tt] (default) prints the residue",
394 "numbers as they appear in the input file, while [TT]index[tt] prints",
395 "unique numbers assigned to the residues in the order they appear",
396 "in the input file, starting with 1. The former is more intuitive,",
397 "but if the input contains multiple residues with the same number,",
398 "the output can be less useful.",
400 "With [TT]-om[tt], a mask is printed for the first selection",
401 "as a function of time. Each line in the output corresponds to",
402 "one frame, and contains either 0/1 for each atom/residue/molecule",
403 "possibly selected. 1 stands for the atom/residue/molecule being",
404 "selected for the current frame, 0 for not selected.",
406 "With [TT]-of[tt], the occupancy fraction of each position (i.e.,",
407 "the fraction of frames where the position is selected) is",
410 "With [TT]-ofpdb[tt], a PDB file is written out where the occupancy",
411 "column is filled with the occupancy fraction of each atom in the",
412 "selection. The coordinates in the PDB file will be those from the",
413 "input topology. [TT]-pdbatoms[tt] can be used to control which atoms",
414 "appear in the output PDB file: with [TT]all[tt] all atoms are",
415 "present, with [TT]maxsel[tt] all atoms possibly selected by the",
416 "selection are present, and with [TT]selected[tt] only atoms that are",
417 "selected at least in one frame are present.",
419 "With [TT]-olt[tt], a histogram is produced that shows the number of",
420 "selected positions as a function of the time the position was",
421 "continuously selected. [TT]-cumlt[tt] can be used to control whether",
422 "subintervals of longer intervals are included in the histogram.[PAR]",
423 "[TT]-om[tt], [TT]-of[tt], and [TT]-olt[tt] only make sense with",
424 "dynamic selections.",
426 "To plot coordinates for selections, use [gmx-trajectory]."
429 settings->setHelpText(desc);
431 options->addOption(FileNameOption("os")
435 .defaultBasename("size")
436 .description("Number of positions in each selection"));
437 options->addOption(FileNameOption("oc")
441 .defaultBasename("cfrac")
442 .description("Covered fraction for each selection"));
443 options->addOption(FileNameOption("oi")
444 .filetype(eftGenericData)
447 .defaultBasename("index")
448 .description("Indices selected by each selection"));
449 options->addOption(FileNameOption("on")
453 .defaultBasename("index")
454 .description("Index file from the selection"));
455 options->addOption(FileNameOption("om")
459 .defaultBasename("mask")
460 .description("Mask for selected positions"));
461 options->addOption(FileNameOption("of")
464 .store(&fnOccupancy_)
465 .defaultBasename("occupancy")
466 .description("Occupied fraction for selected positions"));
468 FileNameOption("ofpdb")
472 .defaultBasename("occupancy")
473 .description("PDB file with occupied fraction for selected positions"));
474 options->addOption(FileNameOption("olt")
478 .defaultBasename("lifetime")
479 .description("Lifetime histogram"));
481 options->addOption(SelectionOption("select").storeVector(&sel_).required().multiValue().description(
482 "Selections to analyze"));
485 BooleanOption("norm").store(&bTotNorm_).description("Normalize by total number of positions with -os"));
487 BooleanOption("cfnorm").store(&bFracNorm_).description("Normalize by covered fraction with -os"));
488 options->addOption(EnumOption<ResidueNumbering>("resnr")
489 .store(&resNumberType_)
490 .enumValue(c_residueNumberingTypeNames)
491 .description("Residue number output type with -oi and -on"));
492 options->addOption(EnumOption<PdbAtomsSelection>("pdbatoms")
494 .enumValue(c_pdbAtomsTypeNames)
495 .description("Atoms to write with -ofpdb"));
496 options->addOption(BooleanOption("cumlt")
497 .store(&bCumulativeLifetimes_)
498 .description("Cumulate subintervals of longer intervals in -olt"));
501 void Select::optionsFinished(TrajectoryAnalysisSettings* settings)
505 settings->setFlag(TrajectoryAnalysisSettings::efRequireTop);
506 settings->setFlag(TrajectoryAnalysisSettings::efUseTopX);
510 void Select::initAnalysis(const TrajectoryAnalysisSettings& settings, const TopologyInformation& top)
512 bResInd_ = (resNumberType_ == ResidueNumbering::ByIndex);
514 for (SelectionList::iterator i = sel_.begin(); i != sel_.end(); ++i)
516 i->initCoveredFraction(CFRAC_SOLIDANGLE);
519 // TODO: For large systems, a float may not have enough precision
520 sdata_.setColumnCount(0, sel_.size());
521 totsize_.reserve(sel_.size());
522 for (size_t g = 0; g < sel_.size(); ++g)
524 totsize_.push_back(sel_[g].posCount());
526 if (!fnSize_.empty())
528 AnalysisDataPlotModulePointer plot(new AnalysisDataPlotModule(settings.plotSettings()));
529 plot->setFileName(fnSize_);
530 plot->setTitle("Selection size");
531 plot->setXAxisIsTime();
532 plot->setYLabel("Number");
533 for (size_t g = 0; g < sel_.size(); ++g)
535 plot->appendLegend(sel_[g].name());
537 sdata_.addModule(plot);
540 cdata_.setColumnCount(0, sel_.size());
541 if (!fnFrac_.empty())
543 AnalysisDataPlotModulePointer plot(new AnalysisDataPlotModule(settings.plotSettings()));
544 plot->setFileName(fnFrac_);
545 plot->setTitle("Covered fraction");
546 plot->setXAxisIsTime();
547 plot->setYLabel("Fraction");
548 plot->setYFormat(6, 4);
549 for (size_t g = 0; g < sel_.size(); ++g)
551 plot->appendLegend(sel_[g].name());
553 cdata_.addModule(plot);
556 // TODO: For large systems, a float may not have enough precision
557 if (!fnIndex_.empty())
559 AnalysisDataPlotModulePointer plot(new AnalysisDataPlotModule(settings.plotSettings()));
560 plot->setFileName(fnIndex_);
561 plot->setPlainOutput(true);
562 plot->setYFormat(4, 0);
563 idata_.addModule(plot);
567 std::shared_ptr<IndexFileWriterModule> writer(new IndexFileWriterModule());
568 writer->setFileName(fnNdx_);
569 for (size_t g = 0; g < sel_.size(); ++g)
571 writer->addGroup(sel_[g].name(), sel_[g].isDynamic());
573 idata_.addModule(writer);
576 mdata_.setDataSetCount(sel_.size());
577 for (size_t g = 0; g < sel_.size(); ++g)
579 mdata_.setColumnCount(g, sel_[g].posCount());
581 lifetimeModule_->setCumulative(bCumulativeLifetimes_);
582 if (!fnMask_.empty())
584 AnalysisDataPlotModulePointer plot(new AnalysisDataPlotModule(settings.plotSettings()));
585 plot->setFileName(fnMask_);
586 plot->setTitle("Selection mask");
587 plot->setXAxisIsTime();
588 plot->setYLabel("Occupancy");
589 plot->setYFormat(1, 0);
590 // TODO: Add legend? (there can be massive amount of columns)
591 mdata_.addModule(plot);
593 if (!fnOccupancy_.empty())
595 AnalysisDataPlotModulePointer plot(new AnalysisDataPlotModule(settings.plotSettings()));
596 plot->setFileName(fnOccupancy_);
597 plot->setTitle("Fraction of time selection matches");
598 plot->setXLabel("Selected position");
599 plot->setYLabel("Occupied fraction");
600 for (size_t g = 0; g < sel_.size(); ++g)
602 plot->appendLegend(sel_[g].name());
604 occupancyModule_->addModule(plot);
606 if (!fnLifetime_.empty())
608 AnalysisDataPlotModulePointer plot(new AnalysisDataPlotModule(settings.plotSettings()));
609 plot->setFileName(fnLifetime_);
610 plot->setTitle("Lifetime histogram");
611 plot->setXAxisIsTime();
612 plot->setYLabel("Number of occurrences");
613 for (size_t g = 0; g < sel_.size(); ++g)
615 plot->appendLegend(sel_[g].name());
617 lifetimeModule_->addModule(plot);
624 void Select::analyzeFrame(int frnr, const t_trxframe& fr, t_pbc* /* pbc */, TrajectoryAnalysisModuleData* pdata)
626 AnalysisDataHandle sdh = pdata->dataHandle(sdata_);
627 AnalysisDataHandle cdh = pdata->dataHandle(cdata_);
628 AnalysisDataHandle idh = pdata->dataHandle(idata_);
629 AnalysisDataHandle mdh = pdata->dataHandle(mdata_);
630 const SelectionList& sel = TrajectoryAnalysisModuleData::parallelSelections(sel_);
632 sdh.startFrame(frnr, fr.time);
633 for (size_t g = 0; g < sel.size(); ++g)
635 real normfac = bFracNorm_ ? 1.0 / sel[g].coveredFraction() : 1.0;
638 normfac /= totsize_[g];
640 sdh.setPoint(g, sel[g].posCount() * normfac);
644 cdh.startFrame(frnr, fr.time);
645 for (size_t g = 0; g < sel.size(); ++g)
647 cdh.setPoint(g, sel[g].coveredFraction());
651 idh.startFrame(frnr, fr.time);
652 for (size_t g = 0; g < sel.size(); ++g)
654 idh.setPoint(0, sel[g].posCount());
655 idh.finishPointSet();
656 for (int i = 0; i < sel[g].posCount(); ++i)
658 const SelectionPosition& p = sel[g].position(i);
659 if (sel[g].type() == INDEX_RES && !bResInd_)
661 idh.setPoint(1, top_->atoms()->resinfo[p.mappedId()].nr);
665 idh.setPoint(1, p.mappedId() + 1);
667 idh.finishPointSet();
672 mdh.startFrame(frnr, fr.time);
673 for (size_t g = 0; g < sel.size(); ++g)
675 mdh.selectDataSet(g);
676 for (int i = 0; i < totsize_[g]; ++i)
680 for (int i = 0; i < sel[g].posCount(); ++i)
682 mdh.setPoint(sel[g].position(i).refId(), 1);
689 void Select::finishAnalysis(int /*nframes*/) {}
692 void Select::writeOutput()
696 GMX_RELEASE_ASSERT(top_->hasTopology(),
697 "Topology should have been loaded or an error given earlier");
698 auto atoms = top_->copyAtoms();
699 if (!atoms->havePdbInfo)
701 snew(atoms->pdbinfo, atoms->nr);
702 atoms->havePdbInfo = TRUE;
704 for (int i = 0; i < atoms->nr; ++i)
706 atoms->pdbinfo[i].occup = 0.0;
708 for (size_t g = 0; g < sel_.size(); ++g)
710 for (int i = 0; i < sel_[g].posCount(); ++i)
712 ArrayRef<const int> atomIndices = sel_[g].position(i).atomIndices();
713 ArrayRef<const int>::const_iterator ai;
714 for (ai = atomIndices.begin(); ai != atomIndices.end(); ++ai)
716 atoms->pdbinfo[*ai].occup += occupancyModule_->average(g, i);
721 std::vector<RVec> x = copyOf(top_->x());
723 clear_trxframe(&fr, TRUE);
725 fr.atoms = atoms.get();
728 fr.x = as_rvec_array(x.data());
729 top_->getBox(fr.box);
733 case PdbAtomsSelection::All:
735 t_trxstatus* status = open_trx(fnPDB_.c_str(), "w");
736 write_trxframe(status, &fr, nullptr);
740 case PdbAtomsSelection::MaxSelection:
742 std::set<int> atomIndicesSet;
743 for (size_t g = 0; g < sel_.size(); ++g)
745 ArrayRef<const int> atomIndices = sel_[g].atomIndices();
746 atomIndicesSet.insert(atomIndices.begin(), atomIndices.end());
748 std::vector<int> allAtomIndices(atomIndicesSet.begin(), atomIndicesSet.end());
749 t_trxstatus* status = open_trx(fnPDB_.c_str(), "w");
750 write_trxframe_indexed(status, &fr, allAtomIndices.size(), allAtomIndices.data(), nullptr);
754 case PdbAtomsSelection::Selected:
756 std::vector<int> indices;
757 for (int i = 0; i < atoms->nr; ++i)
759 if (atoms->pdbinfo[i].occup > 0.0)
761 indices.push_back(i);
764 t_trxstatus* status = open_trx(fnPDB_.c_str(), "w");
765 write_trxframe_indexed(status, &fr, indices.size(), indices.data(), nullptr);
769 case PdbAtomsSelection::Count:
770 GMX_RELEASE_ASSERT(false,
771 "Mismatch between -pdbatoms enum values and implementation");
778 const char SelectInfo::name[] = "select";
779 const char SelectInfo::shortDescription[] = "Print general information about selections";
781 TrajectoryAnalysisModulePointer SelectInfo::create()
783 return TrajectoryAnalysisModulePointer(new Select);
786 } // namespace analysismodules