2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2009,2010,2011,2012,2013 by the GROMACS development team.
5 * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
6 * Copyright (c) 2019,2020, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
39 * Implements gmx::analysismodules::Select.
41 * \author Teemu Murtola <teemu.murtola@gmail.com>
42 * \ingroup module_trajectoryanalysis
56 #include "gromacs/analysisdata/analysisdata.h"
57 #include "gromacs/analysisdata/dataframe.h"
58 #include "gromacs/analysisdata/datamodule.h"
59 #include "gromacs/analysisdata/modules/average.h"
60 #include "gromacs/analysisdata/modules/lifetime.h"
61 #include "gromacs/analysisdata/modules/plot.h"
62 #include "gromacs/fileio/gmxfio.h"
63 #include "gromacs/fileio/trxio.h"
64 #include "gromacs/options/basicoptions.h"
65 #include "gromacs/options/filenameoption.h"
66 #include "gromacs/options/ioptionscontainer.h"
67 #include "gromacs/selection/selection.h"
68 #include "gromacs/selection/selectionoption.h"
69 #include "gromacs/topology/topology.h"
70 #include "gromacs/trajectory/trajectoryframe.h"
71 #include "gromacs/trajectoryanalysis/analysissettings.h"
72 #include "gromacs/trajectoryanalysis/topologyinformation.h"
73 #include "gromacs/utility/arrayref.h"
74 #include "gromacs/utility/exceptions.h"
75 #include "gromacs/utility/gmxassert.h"
76 #include "gromacs/utility/smalloc.h"
77 #include "gromacs/utility/stringutil.h"
78 #include "gromacs/utility/unique_cptr.h"
83 namespace analysismodules
90 * Data module for writing index files.
92 * \ingroup module_analysisdata
94 class IndexFileWriterModule : public AnalysisDataModuleSerial
97 IndexFileWriterModule();
98 ~IndexFileWriterModule() override;
100 //! Sets the file name to write the index file to.
101 void setFileName(const std::string& fnm);
103 * Adds information about a group to be printed.
105 * Must be called for each group present in the input data.
107 void addGroup(const std::string& name, bool bDynamic);
109 int flags() const override;
111 void dataStarted(AbstractAnalysisData* data) override;
112 void frameStarted(const AnalysisDataFrameHeader& header) override;
113 void pointsAdded(const AnalysisDataPointSetRef& points) override;
114 void frameFinished(const AnalysisDataFrameHeader& header) override;
115 void dataFinished() override;
122 GroupInfo(const std::string& name, bool bDynamic) : name(name), bDynamic(bDynamic) {}
129 std::vector<GroupInfo> groups_;
136 /********************************************************************
137 * IndexFileWriterModule
140 IndexFileWriterModule::IndexFileWriterModule() :
149 IndexFileWriterModule::~IndexFileWriterModule()
155 void IndexFileWriterModule::closeFile()
165 void IndexFileWriterModule::setFileName(const std::string& fnm)
171 void IndexFileWriterModule::addGroup(const std::string& name, bool bDynamic)
173 std::string newName(name);
174 std::replace(newName.begin(), newName.end(), ' ', '_');
175 groups_.emplace_back(newName, bDynamic);
179 int IndexFileWriterModule::flags() const
181 return efAllowMulticolumn | efAllowMultipoint;
185 void IndexFileWriterModule::dataStarted(AbstractAnalysisData* /*data*/)
189 fp_ = gmx_fio_fopen(fnm_.c_str(), "w");
194 void IndexFileWriterModule::frameStarted(const AnalysisDataFrameHeader& /*header*/)
196 bAnyWritten_ = false;
201 void IndexFileWriterModule::pointsAdded(const AnalysisDataPointSetRef& points)
207 bool bFirstFrame = (points.frameIndex() == 0);
208 if (points.firstColumn() == 0)
211 GMX_RELEASE_ASSERT(currentGroup_ < ssize(groups_), "Too few groups initialized");
212 if (bFirstFrame || groups_[currentGroup_].bDynamic)
214 if (!bFirstFrame || currentGroup_ > 0)
216 std::fprintf(fp_, "\n\n");
218 std::string name = groups_[currentGroup_].name;
219 if (groups_[currentGroup_].bDynamic)
221 name += formatString("_f%d_t%.3f", points.frameIndex(), points.x());
223 std::fprintf(fp_, "[ %s ]", name.c_str());
230 if (bFirstFrame || groups_[currentGroup_].bDynamic)
232 if (currentSize_ % 15 == 0)
234 std::fprintf(fp_, "\n");
236 std::fprintf(fp_, "%4d ", static_cast<int>(points.y(0)));
243 void IndexFileWriterModule::frameFinished(const AnalysisDataFrameHeader& /*header*/) {}
246 void IndexFileWriterModule::dataFinished()
250 std::fprintf(fp_, "\n");
255 /********************************************************************
259 //! How to identify residues in output files.
260 enum class ResidueNumbering : int
266 //! Which atoms to write out to PDB files.
267 enum class PdbAtomsSelection : int
274 //! String values corresponding to ResidueNumbering.
275 const EnumerationArray<ResidueNumbering, const char*> c_residueNumberingTypeNames = { { "number",
277 //! String values corresponding to PdbAtomsSelection.
278 const EnumerationArray<PdbAtomsSelection, const char*> c_pdbAtomsTypeNames = { { "all", "maxsel",
281 class Select : public TrajectoryAnalysisModule
286 void initOptions(IOptionsContainer* options, TrajectoryAnalysisSettings* settings) override;
287 void optionsFinished(TrajectoryAnalysisSettings* settings) override;
288 void initAnalysis(const TrajectoryAnalysisSettings& settings, const TopologyInformation& top) override;
290 void analyzeFrame(int frnr, const t_trxframe& fr, t_pbc* pbc, TrajectoryAnalysisModuleData* pdata) override;
292 void finishAnalysis(int nframes) override;
293 void writeOutput() override;
300 std::string fnIndex_;
303 std::string fnOccupancy_;
305 std::string fnLifetime_;
309 bool bCumulativeLifetimes_;
310 ResidueNumbering resNumberType_;
311 PdbAtomsSelection pdbAtoms_;
313 //! The input topology.
314 const TopologyInformation* top_;
315 std::vector<int> totsize_;
320 AnalysisDataAverageModulePointer occupancyModule_;
321 AnalysisDataLifetimeModulePointer lifetimeModule_;
328 bCumulativeLifetimes_(true),
329 resNumberType_(ResidueNumbering::ByNumber),
330 pdbAtoms_(PdbAtomsSelection::All),
332 occupancyModule_(new AnalysisDataAverageModule()),
333 lifetimeModule_(new AnalysisDataLifetimeModule())
335 mdata_.addModule(occupancyModule_);
336 mdata_.addModule(lifetimeModule_);
338 registerAnalysisDataset(&sdata_, "size");
339 registerAnalysisDataset(&cdata_, "cfrac");
340 idata_.setColumnCount(0, 2);
341 idata_.setMultipoint(true);
342 registerAnalysisDataset(&idata_, "index");
343 registerAnalysisDataset(&mdata_, "mask");
344 occupancyModule_->setXAxis(1.0, 1.0);
345 registerBasicDataset(occupancyModule_.get(), "occupancy");
346 registerBasicDataset(lifetimeModule_.get(), "lifetime");
350 void Select::initOptions(IOptionsContainer* options, TrajectoryAnalysisSettings* settings)
352 static const char* const desc[] = {
353 "[THISMODULE] writes out basic data about dynamic selections.",
354 "It can be used for some simple analyses, or the output can",
355 "be combined with output from other programs and/or external",
356 "analysis programs to calculate more complex things.",
357 "For detailed help on the selection syntax, please use",
358 "[TT]gmx help selections[tt].",
360 "Any combination of the output options is possible, but note",
361 "that [TT]-om[tt] only operates on the first selection.",
362 "Also note that if you provide no output options, no output is",
365 "With [TT]-os[tt], calculates the number of positions in each",
366 "selection for each frame. With [TT]-norm[tt], the output is",
367 "between 0 and 1 and describes the fraction from the maximum",
368 "number of positions (e.g., for selection 'resname RA and x < 5'",
369 "the maximum number of positions is the number of atoms in",
370 "RA residues). With [TT]-cfnorm[tt], the output is divided",
371 "by the fraction covered by the selection.",
372 "[TT]-norm[tt] and [TT]-cfnorm[tt] can be specified independently",
375 "With [TT]-oc[tt], the fraction covered by each selection is",
376 "written out as a function of time.",
378 "With [TT]-oi[tt], the selected atoms/residues/molecules are",
379 "written out as a function of time. In the output, the first",
380 "column contains the frame time, the second contains the number",
381 "of positions, followed by the atom/residue/molecule numbers.",
382 "If more than one selection is specified, the size of the second",
383 "group immediately follows the last number of the first group",
386 "With [TT]-on[tt], the selected atoms are written as a index file",
387 "compatible with [TT]make_ndx[tt] and the analyzing tools. Each selection",
388 "is written as a selection group and for dynamic selections a",
389 "group is written for each frame.",
391 "For residue numbers, the output of [TT]-oi[tt] can be controlled",
392 "with [TT]-resnr[tt]: [TT]number[tt] (default) prints the residue",
393 "numbers as they appear in the input file, while [TT]index[tt] prints",
394 "unique numbers assigned to the residues in the order they appear",
395 "in the input file, starting with 1. The former is more intuitive,",
396 "but if the input contains multiple residues with the same number,",
397 "the output can be less useful.",
399 "With [TT]-om[tt], a mask is printed for the first selection",
400 "as a function of time. Each line in the output corresponds to",
401 "one frame, and contains either 0/1 for each atom/residue/molecule",
402 "possibly selected. 1 stands for the atom/residue/molecule being",
403 "selected for the current frame, 0 for not selected.",
405 "With [TT]-of[tt], the occupancy fraction of each position (i.e.,",
406 "the fraction of frames where the position is selected) is",
409 "With [TT]-ofpdb[tt], a PDB file is written out where the occupancy",
410 "column is filled with the occupancy fraction of each atom in the",
411 "selection. The coordinates in the PDB file will be those from the",
412 "input topology. [TT]-pdbatoms[tt] can be used to control which atoms",
413 "appear in the output PDB file: with [TT]all[tt] all atoms are",
414 "present, with [TT]maxsel[tt] all atoms possibly selected by the",
415 "selection are present, and with [TT]selected[tt] only atoms that are",
416 "selected at least in one frame are present.",
418 "With [TT]-olt[tt], a histogram is produced that shows the number of",
419 "selected positions as a function of the time the position was",
420 "continuously selected. [TT]-cumlt[tt] can be used to control whether",
421 "subintervals of longer intervals are included in the histogram.[PAR]",
422 "[TT]-om[tt], [TT]-of[tt], and [TT]-olt[tt] only make sense with",
423 "dynamic selections.",
425 "To plot coordinates for selections, use [gmx-trajectory]."
428 settings->setHelpText(desc);
430 options->addOption(FileNameOption("os")
434 .defaultBasename("size")
435 .description("Number of positions in each selection"));
436 options->addOption(FileNameOption("oc")
440 .defaultBasename("cfrac")
441 .description("Covered fraction for each selection"));
442 options->addOption(FileNameOption("oi")
443 .filetype(eftGenericData)
446 .defaultBasename("index")
447 .description("Indices selected by each selection"));
448 options->addOption(FileNameOption("on")
452 .defaultBasename("index")
453 .description("Index file from the selection"));
454 options->addOption(FileNameOption("om")
458 .defaultBasename("mask")
459 .description("Mask for selected positions"));
460 options->addOption(FileNameOption("of")
463 .store(&fnOccupancy_)
464 .defaultBasename("occupancy")
465 .description("Occupied fraction for selected positions"));
467 FileNameOption("ofpdb")
471 .defaultBasename("occupancy")
472 .description("PDB file with occupied fraction for selected positions"));
473 options->addOption(FileNameOption("olt")
477 .defaultBasename("lifetime")
478 .description("Lifetime histogram"));
480 options->addOption(SelectionOption("select").storeVector(&sel_).required().multiValue().description(
481 "Selections to analyze"));
484 BooleanOption("norm").store(&bTotNorm_).description("Normalize by total number of positions with -os"));
486 BooleanOption("cfnorm").store(&bFracNorm_).description("Normalize by covered fraction with -os"));
487 options->addOption(EnumOption<ResidueNumbering>("resnr")
488 .store(&resNumberType_)
489 .enumValue(c_residueNumberingTypeNames)
490 .description("Residue number output type with -oi and -on"));
491 options->addOption(EnumOption<PdbAtomsSelection>("pdbatoms")
493 .enumValue(c_pdbAtomsTypeNames)
494 .description("Atoms to write with -ofpdb"));
495 options->addOption(BooleanOption("cumlt")
496 .store(&bCumulativeLifetimes_)
497 .description("Cumulate subintervals of longer intervals in -olt"));
500 void Select::optionsFinished(TrajectoryAnalysisSettings* settings)
504 settings->setFlag(TrajectoryAnalysisSettings::efRequireTop);
505 settings->setFlag(TrajectoryAnalysisSettings::efUseTopX);
509 void Select::initAnalysis(const TrajectoryAnalysisSettings& settings, const TopologyInformation& top)
511 bResInd_ = (resNumberType_ == ResidueNumbering::ByIndex);
513 for (SelectionList::iterator i = sel_.begin(); i != sel_.end(); ++i)
515 i->initCoveredFraction(CFRAC_SOLIDANGLE);
518 // TODO: For large systems, a float may not have enough precision
519 sdata_.setColumnCount(0, sel_.size());
520 totsize_.reserve(sel_.size());
521 for (size_t g = 0; g < sel_.size(); ++g)
523 totsize_.push_back(sel_[g].posCount());
525 if (!fnSize_.empty())
527 AnalysisDataPlotModulePointer plot(new AnalysisDataPlotModule(settings.plotSettings()));
528 plot->setFileName(fnSize_);
529 plot->setTitle("Selection size");
530 plot->setXAxisIsTime();
531 plot->setYLabel("Number");
532 for (size_t g = 0; g < sel_.size(); ++g)
534 plot->appendLegend(sel_[g].name());
536 sdata_.addModule(plot);
539 cdata_.setColumnCount(0, sel_.size());
540 if (!fnFrac_.empty())
542 AnalysisDataPlotModulePointer plot(new AnalysisDataPlotModule(settings.plotSettings()));
543 plot->setFileName(fnFrac_);
544 plot->setTitle("Covered fraction");
545 plot->setXAxisIsTime();
546 plot->setYLabel("Fraction");
547 plot->setYFormat(6, 4);
548 for (size_t g = 0; g < sel_.size(); ++g)
550 plot->appendLegend(sel_[g].name());
552 cdata_.addModule(plot);
555 // TODO: For large systems, a float may not have enough precision
556 if (!fnIndex_.empty())
558 AnalysisDataPlotModulePointer plot(new AnalysisDataPlotModule(settings.plotSettings()));
559 plot->setFileName(fnIndex_);
560 plot->setPlainOutput(true);
561 plot->setYFormat(4, 0);
562 idata_.addModule(plot);
566 std::shared_ptr<IndexFileWriterModule> writer(new IndexFileWriterModule());
567 writer->setFileName(fnNdx_);
568 for (size_t g = 0; g < sel_.size(); ++g)
570 writer->addGroup(sel_[g].name(), sel_[g].isDynamic());
572 idata_.addModule(writer);
575 mdata_.setDataSetCount(sel_.size());
576 for (size_t g = 0; g < sel_.size(); ++g)
578 mdata_.setColumnCount(g, sel_[g].posCount());
580 lifetimeModule_->setCumulative(bCumulativeLifetimes_);
581 if (!fnMask_.empty())
583 AnalysisDataPlotModulePointer plot(new AnalysisDataPlotModule(settings.plotSettings()));
584 plot->setFileName(fnMask_);
585 plot->setTitle("Selection mask");
586 plot->setXAxisIsTime();
587 plot->setYLabel("Occupancy");
588 plot->setYFormat(1, 0);
589 // TODO: Add legend? (there can be massive amount of columns)
590 mdata_.addModule(plot);
592 if (!fnOccupancy_.empty())
594 AnalysisDataPlotModulePointer plot(new AnalysisDataPlotModule(settings.plotSettings()));
595 plot->setFileName(fnOccupancy_);
596 plot->setTitle("Fraction of time selection matches");
597 plot->setXLabel("Selected position");
598 plot->setYLabel("Occupied fraction");
599 for (size_t g = 0; g < sel_.size(); ++g)
601 plot->appendLegend(sel_[g].name());
603 occupancyModule_->addModule(plot);
605 if (!fnLifetime_.empty())
607 AnalysisDataPlotModulePointer plot(new AnalysisDataPlotModule(settings.plotSettings()));
608 plot->setFileName(fnLifetime_);
609 plot->setTitle("Lifetime histogram");
610 plot->setXAxisIsTime();
611 plot->setYLabel("Number of occurrences");
612 for (size_t g = 0; g < sel_.size(); ++g)
614 plot->appendLegend(sel_[g].name());
616 lifetimeModule_->addModule(plot);
623 void Select::analyzeFrame(int frnr, const t_trxframe& fr, t_pbc* /* pbc */, TrajectoryAnalysisModuleData* pdata)
625 AnalysisDataHandle sdh = pdata->dataHandle(sdata_);
626 AnalysisDataHandle cdh = pdata->dataHandle(cdata_);
627 AnalysisDataHandle idh = pdata->dataHandle(idata_);
628 AnalysisDataHandle mdh = pdata->dataHandle(mdata_);
629 const SelectionList& sel = pdata->parallelSelections(sel_);
631 sdh.startFrame(frnr, fr.time);
632 for (size_t g = 0; g < sel.size(); ++g)
634 real normfac = bFracNorm_ ? 1.0 / sel[g].coveredFraction() : 1.0;
637 normfac /= totsize_[g];
639 sdh.setPoint(g, sel[g].posCount() * normfac);
643 cdh.startFrame(frnr, fr.time);
644 for (size_t g = 0; g < sel.size(); ++g)
646 cdh.setPoint(g, sel[g].coveredFraction());
650 idh.startFrame(frnr, fr.time);
651 for (size_t g = 0; g < sel.size(); ++g)
653 idh.setPoint(0, sel[g].posCount());
654 idh.finishPointSet();
655 for (int i = 0; i < sel[g].posCount(); ++i)
657 const SelectionPosition& p = sel[g].position(i);
658 if (sel[g].type() == INDEX_RES && !bResInd_)
660 idh.setPoint(1, top_->atoms()->resinfo[p.mappedId()].nr);
664 idh.setPoint(1, p.mappedId() + 1);
666 idh.finishPointSet();
671 mdh.startFrame(frnr, fr.time);
672 for (size_t g = 0; g < sel.size(); ++g)
674 mdh.selectDataSet(g);
675 for (int i = 0; i < totsize_[g]; ++i)
679 for (int i = 0; i < sel[g].posCount(); ++i)
681 mdh.setPoint(sel[g].position(i).refId(), 1);
688 void Select::finishAnalysis(int /*nframes*/) {}
691 void Select::writeOutput()
695 GMX_RELEASE_ASSERT(top_->hasTopology(),
696 "Topology should have been loaded or an error given earlier");
697 auto atoms = top_->copyAtoms();
698 if (!atoms->havePdbInfo)
700 snew(atoms->pdbinfo, atoms->nr);
701 atoms->havePdbInfo = TRUE;
703 for (int i = 0; i < atoms->nr; ++i)
705 atoms->pdbinfo[i].occup = 0.0;
707 for (size_t g = 0; g < sel_.size(); ++g)
709 for (int i = 0; i < sel_[g].posCount(); ++i)
711 ArrayRef<const int> atomIndices = sel_[g].position(i).atomIndices();
712 ArrayRef<const int>::const_iterator ai;
713 for (ai = atomIndices.begin(); ai != atomIndices.end(); ++ai)
715 atoms->pdbinfo[*ai].occup += occupancyModule_->average(g, i);
720 std::vector<RVec> x = copyOf(top_->x());
722 clear_trxframe(&fr, TRUE);
724 fr.atoms = atoms.get();
727 fr.x = as_rvec_array(x.data());
728 top_->getBox(fr.box);
732 case PdbAtomsSelection::All:
734 t_trxstatus* status = open_trx(fnPDB_.c_str(), "w");
735 write_trxframe(status, &fr, nullptr);
739 case PdbAtomsSelection::MaxSelection:
741 std::set<int> atomIndicesSet;
742 for (size_t g = 0; g < sel_.size(); ++g)
744 ArrayRef<const int> atomIndices = sel_[g].atomIndices();
745 atomIndicesSet.insert(atomIndices.begin(), atomIndices.end());
747 std::vector<int> allAtomIndices(atomIndicesSet.begin(), atomIndicesSet.end());
748 t_trxstatus* status = open_trx(fnPDB_.c_str(), "w");
749 write_trxframe_indexed(status, &fr, allAtomIndices.size(), allAtomIndices.data(), nullptr);
753 case PdbAtomsSelection::Selected:
755 std::vector<int> indices;
756 for (int i = 0; i < atoms->nr; ++i)
758 if (atoms->pdbinfo[i].occup > 0.0)
760 indices.push_back(i);
763 t_trxstatus* status = open_trx(fnPDB_.c_str(), "w");
764 write_trxframe_indexed(status, &fr, indices.size(), indices.data(), nullptr);
768 case PdbAtomsSelection::Count:
769 GMX_RELEASE_ASSERT(false,
770 "Mismatch between -pdbatoms enum values and implementation");
777 const char SelectInfo::name[] = "select";
778 const char SelectInfo::shortDescription[] = "Print general information about selections";
780 TrajectoryAnalysisModulePointer SelectInfo::create()
782 return TrajectoryAnalysisModulePointer(new Select);
785 } // namespace analysismodules