2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2009,2010,2011,2012,2013 by the GROMACS development team.
5 * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
6 * Copyright (c) 2019,2020, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
39 * Implements gmx::analysismodules::Select.
41 * \author Teemu Murtola <teemu.murtola@gmail.com>
42 * \ingroup module_trajectoryanalysis
56 #include "gromacs/analysisdata/analysisdata.h"
57 #include "gromacs/analysisdata/dataframe.h"
58 #include "gromacs/analysisdata/datamodule.h"
59 #include "gromacs/analysisdata/modules/average.h"
60 #include "gromacs/analysisdata/modules/lifetime.h"
61 #include "gromacs/analysisdata/modules/plot.h"
62 #include "gromacs/fileio/gmxfio.h"
63 #include "gromacs/fileio/trxio.h"
64 #include "gromacs/options/basicoptions.h"
65 #include "gromacs/options/filenameoption.h"
66 #include "gromacs/options/ioptionscontainer.h"
67 #include "gromacs/selection/selection.h"
68 #include "gromacs/selection/selectionoption.h"
69 #include "gromacs/topology/topology.h"
70 #include "gromacs/trajectory/trajectoryframe.h"
71 #include "gromacs/trajectoryanalysis/analysissettings.h"
72 #include "gromacs/trajectoryanalysis/topologyinformation.h"
73 #include "gromacs/utility/arrayref.h"
74 #include "gromacs/utility/exceptions.h"
75 #include "gromacs/utility/gmxassert.h"
76 #include "gromacs/utility/smalloc.h"
77 #include "gromacs/utility/stringutil.h"
78 #include "gromacs/utility/unique_cptr.h"
83 namespace analysismodules
90 * Data module for writing index files.
92 * \ingroup module_analysisdata
94 class IndexFileWriterModule : public AnalysisDataModuleSerial
97 IndexFileWriterModule();
98 ~IndexFileWriterModule() override;
100 //! Sets the file name to write the index file to.
101 void setFileName(const std::string& fnm);
103 * Adds information about a group to be printed.
105 * Must be called for each group present in the input data.
107 void addGroup(const std::string& name, bool bDynamic);
109 int flags() const override;
111 void dataStarted(AbstractAnalysisData* data) override;
112 void frameStarted(const AnalysisDataFrameHeader& header) override;
113 void pointsAdded(const AnalysisDataPointSetRef& points) override;
114 void frameFinished(const AnalysisDataFrameHeader& header) override;
115 void dataFinished() override;
122 GroupInfo(const std::string& name, bool bDynamic) : name(name), bDynamic(bDynamic) {}
129 std::vector<GroupInfo> groups_;
136 /********************************************************************
137 * IndexFileWriterModule
140 IndexFileWriterModule::IndexFileWriterModule() :
149 IndexFileWriterModule::~IndexFileWriterModule()
155 void IndexFileWriterModule::closeFile()
165 void IndexFileWriterModule::setFileName(const std::string& fnm)
171 void IndexFileWriterModule::addGroup(const std::string& name, bool bDynamic)
173 std::string newName(name);
174 std::replace(newName.begin(), newName.end(), ' ', '_');
175 groups_.emplace_back(newName, bDynamic);
179 int IndexFileWriterModule::flags() const
181 return efAllowMulticolumn | efAllowMultipoint;
185 void IndexFileWriterModule::dataStarted(AbstractAnalysisData* /*data*/)
189 fp_ = gmx_fio_fopen(fnm_.c_str(), "w");
194 void IndexFileWriterModule::frameStarted(const AnalysisDataFrameHeader& /*header*/)
196 bAnyWritten_ = false;
201 void IndexFileWriterModule::pointsAdded(const AnalysisDataPointSetRef& points)
207 bool bFirstFrame = (points.frameIndex() == 0);
208 if (points.firstColumn() == 0)
211 GMX_RELEASE_ASSERT(currentGroup_ < ssize(groups_), "Too few groups initialized");
212 if (bFirstFrame || groups_[currentGroup_].bDynamic)
214 if (!bFirstFrame || currentGroup_ > 0)
216 std::fprintf(fp_, "\n\n");
218 std::string name = groups_[currentGroup_].name;
219 if (groups_[currentGroup_].bDynamic)
221 name += formatString("_f%d_t%.3f", points.frameIndex(), points.x());
223 std::fprintf(fp_, "[ %s ]", name.c_str());
230 if (bFirstFrame || groups_[currentGroup_].bDynamic)
232 if (currentSize_ % 15 == 0)
234 std::fprintf(fp_, "\n");
236 std::fprintf(fp_, "%4d ", static_cast<int>(points.y(0)));
243 void IndexFileWriterModule::frameFinished(const AnalysisDataFrameHeader& /*header*/) {}
246 void IndexFileWriterModule::dataFinished()
250 std::fprintf(fp_, "\n");
255 /********************************************************************
259 //! How to identify residues in output files.
260 enum ResidueNumbering
262 ResidueNumbering_ByNumber,
263 ResidueNumbering_ByIndex
265 //! Which atoms to write out to PDB files.
266 enum PdbAtomsSelection
268 PdbAtomsSelection_All,
269 PdbAtomsSelection_MaxSelection,
270 PdbAtomsSelection_Selected
272 //! String values corresponding to ResidueNumbering.
273 const char* const cResNumberEnum[] = { "number", "index" };
274 //! String values corresponding to PdbAtomsSelection.
275 const char* const cPDBAtomsEnum[] = { "all", "maxsel", "selected" };
277 class Select : public TrajectoryAnalysisModule
282 void initOptions(IOptionsContainer* options, TrajectoryAnalysisSettings* settings) override;
283 void optionsFinished(TrajectoryAnalysisSettings* settings) override;
284 void initAnalysis(const TrajectoryAnalysisSettings& settings, const TopologyInformation& top) override;
286 void analyzeFrame(int frnr, const t_trxframe& fr, t_pbc* pbc, TrajectoryAnalysisModuleData* pdata) override;
288 void finishAnalysis(int nframes) override;
289 void writeOutput() override;
296 std::string fnIndex_;
299 std::string fnOccupancy_;
301 std::string fnLifetime_;
305 bool bCumulativeLifetimes_;
306 ResidueNumbering resNumberType_;
307 PdbAtomsSelection pdbAtoms_;
309 //! The input topology.
310 const TopologyInformation* top_;
311 std::vector<int> totsize_;
316 AnalysisDataAverageModulePointer occupancyModule_;
317 AnalysisDataLifetimeModulePointer lifetimeModule_;
324 bCumulativeLifetimes_(true),
325 resNumberType_(ResidueNumbering_ByNumber),
326 pdbAtoms_(PdbAtomsSelection_All),
328 occupancyModule_(new AnalysisDataAverageModule()),
329 lifetimeModule_(new AnalysisDataLifetimeModule())
331 mdata_.addModule(occupancyModule_);
332 mdata_.addModule(lifetimeModule_);
334 registerAnalysisDataset(&sdata_, "size");
335 registerAnalysisDataset(&cdata_, "cfrac");
336 idata_.setColumnCount(0, 2);
337 idata_.setMultipoint(true);
338 registerAnalysisDataset(&idata_, "index");
339 registerAnalysisDataset(&mdata_, "mask");
340 occupancyModule_->setXAxis(1.0, 1.0);
341 registerBasicDataset(occupancyModule_.get(), "occupancy");
342 registerBasicDataset(lifetimeModule_.get(), "lifetime");
346 void Select::initOptions(IOptionsContainer* options, TrajectoryAnalysisSettings* settings)
348 static const char* const desc[] = {
349 "[THISMODULE] writes out basic data about dynamic selections.",
350 "It can be used for some simple analyses, or the output can",
351 "be combined with output from other programs and/or external",
352 "analysis programs to calculate more complex things.",
353 "For detailed help on the selection syntax, please use",
354 "[TT]gmx help selections[tt].",
356 "Any combination of the output options is possible, but note",
357 "that [TT]-om[tt] only operates on the first selection.",
358 "Also note that if you provide no output options, no output is",
361 "With [TT]-os[tt], calculates the number of positions in each",
362 "selection for each frame. With [TT]-norm[tt], the output is",
363 "between 0 and 1 and describes the fraction from the maximum",
364 "number of positions (e.g., for selection 'resname RA and x < 5'",
365 "the maximum number of positions is the number of atoms in",
366 "RA residues). With [TT]-cfnorm[tt], the output is divided",
367 "by the fraction covered by the selection.",
368 "[TT]-norm[tt] and [TT]-cfnorm[tt] can be specified independently",
371 "With [TT]-oc[tt], the fraction covered by each selection is",
372 "written out as a function of time.",
374 "With [TT]-oi[tt], the selected atoms/residues/molecules are",
375 "written out as a function of time. In the output, the first",
376 "column contains the frame time, the second contains the number",
377 "of positions, followed by the atom/residue/molecule numbers.",
378 "If more than one selection is specified, the size of the second",
379 "group immediately follows the last number of the first group",
382 "With [TT]-on[tt], the selected atoms are written as a index file",
383 "compatible with [TT]make_ndx[tt] and the analyzing tools. Each selection",
384 "is written as a selection group and for dynamic selections a",
385 "group is written for each frame.",
387 "For residue numbers, the output of [TT]-oi[tt] can be controlled",
388 "with [TT]-resnr[tt]: [TT]number[tt] (default) prints the residue",
389 "numbers as they appear in the input file, while [TT]index[tt] prints",
390 "unique numbers assigned to the residues in the order they appear",
391 "in the input file, starting with 1. The former is more intuitive,",
392 "but if the input contains multiple residues with the same number,",
393 "the output can be less useful.",
395 "With [TT]-om[tt], a mask is printed for the first selection",
396 "as a function of time. Each line in the output corresponds to",
397 "one frame, and contains either 0/1 for each atom/residue/molecule",
398 "possibly selected. 1 stands for the atom/residue/molecule being",
399 "selected for the current frame, 0 for not selected.",
401 "With [TT]-of[tt], the occupancy fraction of each position (i.e.,",
402 "the fraction of frames where the position is selected) is",
405 "With [TT]-ofpdb[tt], a PDB file is written out where the occupancy",
406 "column is filled with the occupancy fraction of each atom in the",
407 "selection. The coordinates in the PDB file will be those from the",
408 "input topology. [TT]-pdbatoms[tt] can be used to control which atoms",
409 "appear in the output PDB file: with [TT]all[tt] all atoms are",
410 "present, with [TT]maxsel[tt] all atoms possibly selected by the",
411 "selection are present, and with [TT]selected[tt] only atoms that are",
412 "selected at least in one frame are present.",
414 "With [TT]-olt[tt], a histogram is produced that shows the number of",
415 "selected positions as a function of the time the position was",
416 "continuously selected. [TT]-cumlt[tt] can be used to control whether",
417 "subintervals of longer intervals are included in the histogram.[PAR]",
418 "[TT]-om[tt], [TT]-of[tt], and [TT]-olt[tt] only make sense with",
419 "dynamic selections.",
421 "To plot coordinates for selections, use [gmx-trajectory]."
424 settings->setHelpText(desc);
426 options->addOption(FileNameOption("os")
430 .defaultBasename("size")
431 .description("Number of positions in each selection"));
432 options->addOption(FileNameOption("oc")
436 .defaultBasename("cfrac")
437 .description("Covered fraction for each selection"));
438 options->addOption(FileNameOption("oi")
439 .filetype(eftGenericData)
442 .defaultBasename("index")
443 .description("Indices selected by each selection"));
444 options->addOption(FileNameOption("on")
448 .defaultBasename("index")
449 .description("Index file from the selection"));
450 options->addOption(FileNameOption("om")
454 .defaultBasename("mask")
455 .description("Mask for selected positions"));
456 options->addOption(FileNameOption("of")
459 .store(&fnOccupancy_)
460 .defaultBasename("occupancy")
461 .description("Occupied fraction for selected positions"));
463 FileNameOption("ofpdb")
467 .defaultBasename("occupancy")
468 .description("PDB file with occupied fraction for selected positions"));
469 options->addOption(FileNameOption("olt")
473 .defaultBasename("lifetime")
474 .description("Lifetime histogram"));
476 options->addOption(SelectionOption("select").storeVector(&sel_).required().multiValue().description(
477 "Selections to analyze"));
480 BooleanOption("norm").store(&bTotNorm_).description("Normalize by total number of positions with -os"));
482 BooleanOption("cfnorm").store(&bFracNorm_).description("Normalize by covered fraction with -os"));
483 options->addOption(EnumOption<ResidueNumbering>("resnr")
484 .store(&resNumberType_)
485 .enumValue(cResNumberEnum)
486 .description("Residue number output type with -oi and -on"));
487 options->addOption(EnumOption<PdbAtomsSelection>("pdbatoms")
489 .enumValue(cPDBAtomsEnum)
490 .description("Atoms to write with -ofpdb"));
491 options->addOption(BooleanOption("cumlt")
492 .store(&bCumulativeLifetimes_)
493 .description("Cumulate subintervals of longer intervals in -olt"));
496 void Select::optionsFinished(TrajectoryAnalysisSettings* settings)
500 settings->setFlag(TrajectoryAnalysisSettings::efRequireTop);
501 settings->setFlag(TrajectoryAnalysisSettings::efUseTopX);
505 void Select::initAnalysis(const TrajectoryAnalysisSettings& settings, const TopologyInformation& top)
507 bResInd_ = (resNumberType_ == ResidueNumbering_ByIndex);
509 for (SelectionList::iterator i = sel_.begin(); i != sel_.end(); ++i)
511 i->initCoveredFraction(CFRAC_SOLIDANGLE);
514 // TODO: For large systems, a float may not have enough precision
515 sdata_.setColumnCount(0, sel_.size());
516 totsize_.reserve(sel_.size());
517 for (size_t g = 0; g < sel_.size(); ++g)
519 totsize_.push_back(sel_[g].posCount());
521 if (!fnSize_.empty())
523 AnalysisDataPlotModulePointer plot(new AnalysisDataPlotModule(settings.plotSettings()));
524 plot->setFileName(fnSize_);
525 plot->setTitle("Selection size");
526 plot->setXAxisIsTime();
527 plot->setYLabel("Number");
528 for (size_t g = 0; g < sel_.size(); ++g)
530 plot->appendLegend(sel_[g].name());
532 sdata_.addModule(plot);
535 cdata_.setColumnCount(0, sel_.size());
536 if (!fnFrac_.empty())
538 AnalysisDataPlotModulePointer plot(new AnalysisDataPlotModule(settings.plotSettings()));
539 plot->setFileName(fnFrac_);
540 plot->setTitle("Covered fraction");
541 plot->setXAxisIsTime();
542 plot->setYLabel("Fraction");
543 plot->setYFormat(6, 4);
544 for (size_t g = 0; g < sel_.size(); ++g)
546 plot->appendLegend(sel_[g].name());
548 cdata_.addModule(plot);
551 // TODO: For large systems, a float may not have enough precision
552 if (!fnIndex_.empty())
554 AnalysisDataPlotModulePointer plot(new AnalysisDataPlotModule(settings.plotSettings()));
555 plot->setFileName(fnIndex_);
556 plot->setPlainOutput(true);
557 plot->setYFormat(4, 0);
558 idata_.addModule(plot);
562 std::shared_ptr<IndexFileWriterModule> writer(new IndexFileWriterModule());
563 writer->setFileName(fnNdx_);
564 for (size_t g = 0; g < sel_.size(); ++g)
566 writer->addGroup(sel_[g].name(), sel_[g].isDynamic());
568 idata_.addModule(writer);
571 mdata_.setDataSetCount(sel_.size());
572 for (size_t g = 0; g < sel_.size(); ++g)
574 mdata_.setColumnCount(g, sel_[g].posCount());
576 lifetimeModule_->setCumulative(bCumulativeLifetimes_);
577 if (!fnMask_.empty())
579 AnalysisDataPlotModulePointer plot(new AnalysisDataPlotModule(settings.plotSettings()));
580 plot->setFileName(fnMask_);
581 plot->setTitle("Selection mask");
582 plot->setXAxisIsTime();
583 plot->setYLabel("Occupancy");
584 plot->setYFormat(1, 0);
585 // TODO: Add legend? (there can be massive amount of columns)
586 mdata_.addModule(plot);
588 if (!fnOccupancy_.empty())
590 AnalysisDataPlotModulePointer plot(new AnalysisDataPlotModule(settings.plotSettings()));
591 plot->setFileName(fnOccupancy_);
592 plot->setTitle("Fraction of time selection matches");
593 plot->setXLabel("Selected position");
594 plot->setYLabel("Occupied fraction");
595 for (size_t g = 0; g < sel_.size(); ++g)
597 plot->appendLegend(sel_[g].name());
599 occupancyModule_->addModule(plot);
601 if (!fnLifetime_.empty())
603 AnalysisDataPlotModulePointer plot(new AnalysisDataPlotModule(settings.plotSettings()));
604 plot->setFileName(fnLifetime_);
605 plot->setTitle("Lifetime histogram");
606 plot->setXAxisIsTime();
607 plot->setYLabel("Number of occurrences");
608 for (size_t g = 0; g < sel_.size(); ++g)
610 plot->appendLegend(sel_[g].name());
612 lifetimeModule_->addModule(plot);
619 void Select::analyzeFrame(int frnr, const t_trxframe& fr, t_pbc* /* pbc */, TrajectoryAnalysisModuleData* pdata)
621 AnalysisDataHandle sdh = pdata->dataHandle(sdata_);
622 AnalysisDataHandle cdh = pdata->dataHandle(cdata_);
623 AnalysisDataHandle idh = pdata->dataHandle(idata_);
624 AnalysisDataHandle mdh = pdata->dataHandle(mdata_);
625 const SelectionList& sel = pdata->parallelSelections(sel_);
627 sdh.startFrame(frnr, fr.time);
628 for (size_t g = 0; g < sel.size(); ++g)
630 real normfac = bFracNorm_ ? 1.0 / sel[g].coveredFraction() : 1.0;
633 normfac /= totsize_[g];
635 sdh.setPoint(g, sel[g].posCount() * normfac);
639 cdh.startFrame(frnr, fr.time);
640 for (size_t g = 0; g < sel.size(); ++g)
642 cdh.setPoint(g, sel[g].coveredFraction());
646 idh.startFrame(frnr, fr.time);
647 for (size_t g = 0; g < sel.size(); ++g)
649 idh.setPoint(0, sel[g].posCount());
650 idh.finishPointSet();
651 for (int i = 0; i < sel[g].posCount(); ++i)
653 const SelectionPosition& p = sel[g].position(i);
654 if (sel[g].type() == INDEX_RES && !bResInd_)
656 idh.setPoint(1, top_->atoms()->resinfo[p.mappedId()].nr);
660 idh.setPoint(1, p.mappedId() + 1);
662 idh.finishPointSet();
667 mdh.startFrame(frnr, fr.time);
668 for (size_t g = 0; g < sel.size(); ++g)
670 mdh.selectDataSet(g);
671 for (int i = 0; i < totsize_[g]; ++i)
675 for (int i = 0; i < sel[g].posCount(); ++i)
677 mdh.setPoint(sel[g].position(i).refId(), 1);
684 void Select::finishAnalysis(int /*nframes*/) {}
687 void Select::writeOutput()
691 GMX_RELEASE_ASSERT(top_->hasTopology(),
692 "Topology should have been loaded or an error given earlier");
693 auto atoms = top_->copyAtoms();
694 if (!atoms->havePdbInfo)
696 snew(atoms->pdbinfo, atoms->nr);
697 atoms->havePdbInfo = TRUE;
699 for (int i = 0; i < atoms->nr; ++i)
701 atoms->pdbinfo[i].occup = 0.0;
703 for (size_t g = 0; g < sel_.size(); ++g)
705 for (int i = 0; i < sel_[g].posCount(); ++i)
707 ArrayRef<const int> atomIndices = sel_[g].position(i).atomIndices();
708 ArrayRef<const int>::const_iterator ai;
709 for (ai = atomIndices.begin(); ai != atomIndices.end(); ++ai)
711 atoms->pdbinfo[*ai].occup += occupancyModule_->average(g, i);
716 std::vector<RVec> x = copyOf(top_->x());
718 clear_trxframe(&fr, TRUE);
720 fr.atoms = atoms.get();
723 fr.x = as_rvec_array(x.data());
724 top_->getBox(fr.box);
728 case PdbAtomsSelection_All:
730 t_trxstatus* status = open_trx(fnPDB_.c_str(), "w");
731 write_trxframe(status, &fr, nullptr);
735 case PdbAtomsSelection_MaxSelection:
737 std::set<int> atomIndicesSet;
738 for (size_t g = 0; g < sel_.size(); ++g)
740 ArrayRef<const int> atomIndices = sel_[g].atomIndices();
741 atomIndicesSet.insert(atomIndices.begin(), atomIndices.end());
743 std::vector<int> allAtomIndices(atomIndicesSet.begin(), atomIndicesSet.end());
744 t_trxstatus* status = open_trx(fnPDB_.c_str(), "w");
745 write_trxframe_indexed(status, &fr, allAtomIndices.size(), allAtomIndices.data(), nullptr);
749 case PdbAtomsSelection_Selected:
751 std::vector<int> indices;
752 for (int i = 0; i < atoms->nr; ++i)
754 if (atoms->pdbinfo[i].occup > 0.0)
756 indices.push_back(i);
759 t_trxstatus* status = open_trx(fnPDB_.c_str(), "w");
760 write_trxframe_indexed(status, &fr, indices.size(), indices.data(), nullptr);
765 GMX_RELEASE_ASSERT(false,
766 "Mismatch between -pdbatoms enum values and implementation");
773 const char SelectInfo::name[] = "select";
774 const char SelectInfo::shortDescription[] = "Print general information about selections";
776 TrajectoryAnalysisModulePointer SelectInfo::create()
778 return TrajectoryAnalysisModulePointer(new Select);
781 } // namespace analysismodules