2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
37 * Implements gmx::analysismodules::Select.
39 * \author Teemu Murtola <teemu.murtola@gmail.com>
40 * \ingroup module_trajectoryanalysis
54 #include "gromacs/analysisdata/analysisdata.h"
55 #include "gromacs/analysisdata/dataframe.h"
56 #include "gromacs/analysisdata/datamodule.h"
57 #include "gromacs/analysisdata/modules/average.h"
58 #include "gromacs/analysisdata/modules/lifetime.h"
59 #include "gromacs/analysisdata/modules/plot.h"
60 #include "gromacs/fileio/gmxfio.h"
61 #include "gromacs/fileio/trxio.h"
62 #include "gromacs/options/basicoptions.h"
63 #include "gromacs/options/filenameoption.h"
64 #include "gromacs/options/ioptionscontainer.h"
65 #include "gromacs/selection/selection.h"
66 #include "gromacs/selection/selectionoption.h"
67 #include "gromacs/topology/topology.h"
68 #include "gromacs/trajectory/trajectoryframe.h"
69 #include "gromacs/trajectoryanalysis/analysissettings.h"
70 #include "gromacs/trajectoryanalysis/topologyinformation.h"
71 #include "gromacs/utility/arrayref.h"
72 #include "gromacs/utility/exceptions.h"
73 #include "gromacs/utility/gmxassert.h"
74 #include "gromacs/utility/smalloc.h"
75 #include "gromacs/utility/stringutil.h"
76 #include "gromacs/utility/unique_cptr.h"
81 namespace analysismodules
88 * Data module for writing index files.
90 * \ingroup module_analysisdata
92 class IndexFileWriterModule : public AnalysisDataModuleSerial
95 IndexFileWriterModule();
96 ~IndexFileWriterModule() override;
98 //! Sets the file name to write the index file to.
99 void setFileName(const std::string& fnm);
101 * Adds information about a group to be printed.
103 * Must be called for each group present in the input data.
105 void addGroup(const std::string& name, bool bDynamic);
107 int flags() const override;
109 void dataStarted(AbstractAnalysisData* data) override;
110 void frameStarted(const AnalysisDataFrameHeader& header) override;
111 void pointsAdded(const AnalysisDataPointSetRef& points) override;
112 void frameFinished(const AnalysisDataFrameHeader& header) override;
113 void dataFinished() override;
120 GroupInfo(const std::string& name, bool bDynamic) : name(name), bDynamic(bDynamic) {}
127 std::vector<GroupInfo> groups_;
134 /********************************************************************
135 * IndexFileWriterModule
138 IndexFileWriterModule::IndexFileWriterModule() :
147 IndexFileWriterModule::~IndexFileWriterModule()
153 void IndexFileWriterModule::closeFile()
163 void IndexFileWriterModule::setFileName(const std::string& fnm)
169 void IndexFileWriterModule::addGroup(const std::string& name, bool bDynamic)
171 std::string newName(name);
172 std::replace(newName.begin(), newName.end(), ' ', '_');
173 groups_.emplace_back(newName, bDynamic);
177 int IndexFileWriterModule::flags() const
179 return efAllowMulticolumn | efAllowMultipoint;
183 void IndexFileWriterModule::dataStarted(AbstractAnalysisData* /*data*/)
187 fp_ = gmx_fio_fopen(fnm_.c_str(), "w");
192 void IndexFileWriterModule::frameStarted(const AnalysisDataFrameHeader& /*header*/)
194 bAnyWritten_ = false;
199 void IndexFileWriterModule::pointsAdded(const AnalysisDataPointSetRef& points)
205 bool bFirstFrame = (points.frameIndex() == 0);
206 if (points.firstColumn() == 0)
209 GMX_RELEASE_ASSERT(currentGroup_ < ssize(groups_), "Too few groups initialized");
210 if (bFirstFrame || groups_[currentGroup_].bDynamic)
212 if (!bFirstFrame || currentGroup_ > 0)
214 std::fprintf(fp_, "\n\n");
216 std::string name = groups_[currentGroup_].name;
217 if (groups_[currentGroup_].bDynamic)
219 name += formatString("_f%d_t%.3f", points.frameIndex(), points.x());
221 std::fprintf(fp_, "[ %s ]", name.c_str());
228 if (bFirstFrame || groups_[currentGroup_].bDynamic)
230 if (currentSize_ % 15 == 0)
232 std::fprintf(fp_, "\n");
234 std::fprintf(fp_, "%4d ", static_cast<int>(points.y(0)));
241 void IndexFileWriterModule::frameFinished(const AnalysisDataFrameHeader& /*header*/) {}
244 void IndexFileWriterModule::dataFinished()
248 std::fprintf(fp_, "\n");
253 /********************************************************************
257 //! How to identify residues in output files.
258 enum ResidueNumbering
260 ResidueNumbering_ByNumber,
261 ResidueNumbering_ByIndex
263 //! Which atoms to write out to PDB files.
264 enum PdbAtomsSelection
266 PdbAtomsSelection_All,
267 PdbAtomsSelection_MaxSelection,
268 PdbAtomsSelection_Selected
270 //! String values corresponding to ResidueNumbering.
271 const char* const cResNumberEnum[] = { "number", "index" };
272 //! String values corresponding to PdbAtomsSelection.
273 const char* const cPDBAtomsEnum[] = { "all", "maxsel", "selected" };
275 class Select : public TrajectoryAnalysisModule
280 void initOptions(IOptionsContainer* options, TrajectoryAnalysisSettings* settings) override;
281 void optionsFinished(TrajectoryAnalysisSettings* settings) override;
282 void initAnalysis(const TrajectoryAnalysisSettings& settings, const TopologyInformation& top) override;
284 void analyzeFrame(int frnr, const t_trxframe& fr, t_pbc* pbc, TrajectoryAnalysisModuleData* pdata) override;
286 void finishAnalysis(int nframes) override;
287 void writeOutput() override;
294 std::string fnIndex_;
297 std::string fnOccupancy_;
299 std::string fnLifetime_;
303 bool bCumulativeLifetimes_;
304 ResidueNumbering resNumberType_;
305 PdbAtomsSelection pdbAtoms_;
307 //! The input topology.
308 const TopologyInformation* top_;
309 std::vector<int> totsize_;
314 AnalysisDataAverageModulePointer occupancyModule_;
315 AnalysisDataLifetimeModulePointer lifetimeModule_;
322 bCumulativeLifetimes_(true),
323 resNumberType_(ResidueNumbering_ByNumber),
324 pdbAtoms_(PdbAtomsSelection_All),
326 occupancyModule_(new AnalysisDataAverageModule()),
327 lifetimeModule_(new AnalysisDataLifetimeModule())
329 mdata_.addModule(occupancyModule_);
330 mdata_.addModule(lifetimeModule_);
332 registerAnalysisDataset(&sdata_, "size");
333 registerAnalysisDataset(&cdata_, "cfrac");
334 idata_.setColumnCount(0, 2);
335 idata_.setMultipoint(true);
336 registerAnalysisDataset(&idata_, "index");
337 registerAnalysisDataset(&mdata_, "mask");
338 occupancyModule_->setXAxis(1.0, 1.0);
339 registerBasicDataset(occupancyModule_.get(), "occupancy");
340 registerBasicDataset(lifetimeModule_.get(), "lifetime");
344 void Select::initOptions(IOptionsContainer* options, TrajectoryAnalysisSettings* settings)
346 static const char* const desc[] = {
347 "[THISMODULE] writes out basic data about dynamic selections.",
348 "It can be used for some simple analyses, or the output can",
349 "be combined with output from other programs and/or external",
350 "analysis programs to calculate more complex things.",
351 "For detailed help on the selection syntax, please use",
352 "[TT]gmx help selections[tt].",
354 "Any combination of the output options is possible, but note",
355 "that [TT]-om[tt] only operates on the first selection.",
356 "Also note that if you provide no output options, no output is",
359 "With [TT]-os[tt], calculates the number of positions in each",
360 "selection for each frame. With [TT]-norm[tt], the output is",
361 "between 0 and 1 and describes the fraction from the maximum",
362 "number of positions (e.g., for selection 'resname RA and x < 5'",
363 "the maximum number of positions is the number of atoms in",
364 "RA residues). With [TT]-cfnorm[tt], the output is divided",
365 "by the fraction covered by the selection.",
366 "[TT]-norm[tt] and [TT]-cfnorm[tt] can be specified independently",
369 "With [TT]-oc[tt], the fraction covered by each selection is",
370 "written out as a function of time.",
372 "With [TT]-oi[tt], the selected atoms/residues/molecules are",
373 "written out as a function of time. In the output, the first",
374 "column contains the frame time, the second contains the number",
375 "of positions, followed by the atom/residue/molecule numbers.",
376 "If more than one selection is specified, the size of the second",
377 "group immediately follows the last number of the first group",
380 "With [TT]-on[tt], the selected atoms are written as a index file",
381 "compatible with [TT]make_ndx[tt] and the analyzing tools. Each selection",
382 "is written as a selection group and for dynamic selections a",
383 "group is written for each frame.",
385 "For residue numbers, the output of [TT]-oi[tt] can be controlled",
386 "with [TT]-resnr[tt]: [TT]number[tt] (default) prints the residue",
387 "numbers as they appear in the input file, while [TT]index[tt] prints",
388 "unique numbers assigned to the residues in the order they appear",
389 "in the input file, starting with 1. The former is more intuitive,",
390 "but if the input contains multiple residues with the same number,",
391 "the output can be less useful.",
393 "With [TT]-om[tt], a mask is printed for the first selection",
394 "as a function of time. Each line in the output corresponds to",
395 "one frame, and contains either 0/1 for each atom/residue/molecule",
396 "possibly selected. 1 stands for the atom/residue/molecule being",
397 "selected for the current frame, 0 for not selected.",
399 "With [TT]-of[tt], the occupancy fraction of each position (i.e.,",
400 "the fraction of frames where the position is selected) is",
403 "With [TT]-ofpdb[tt], a PDB file is written out where the occupancy",
404 "column is filled with the occupancy fraction of each atom in the",
405 "selection. The coordinates in the PDB file will be those from the",
406 "input topology. [TT]-pdbatoms[tt] can be used to control which atoms",
407 "appear in the output PDB file: with [TT]all[tt] all atoms are",
408 "present, with [TT]maxsel[tt] all atoms possibly selected by the",
409 "selection are present, and with [TT]selected[tt] only atoms that are",
410 "selected at least in one frame are present.",
412 "With [TT]-olt[tt], a histogram is produced that shows the number of",
413 "selected positions as a function of the time the position was",
414 "continuously selected. [TT]-cumlt[tt] can be used to control whether",
415 "subintervals of longer intervals are included in the histogram.[PAR]",
416 "[TT]-om[tt], [TT]-of[tt], and [TT]-olt[tt] only make sense with",
417 "dynamic selections.",
419 "To plot coordinates for selections, use [gmx-trajectory]."
422 settings->setHelpText(desc);
424 options->addOption(FileNameOption("os")
428 .defaultBasename("size")
429 .description("Number of positions in each selection"));
430 options->addOption(FileNameOption("oc")
434 .defaultBasename("cfrac")
435 .description("Covered fraction for each selection"));
436 options->addOption(FileNameOption("oi")
437 .filetype(eftGenericData)
440 .defaultBasename("index")
441 .description("Indices selected by each selection"));
442 options->addOption(FileNameOption("on")
446 .defaultBasename("index")
447 .description("Index file from the selection"));
448 options->addOption(FileNameOption("om")
452 .defaultBasename("mask")
453 .description("Mask for selected positions"));
454 options->addOption(FileNameOption("of")
457 .store(&fnOccupancy_)
458 .defaultBasename("occupancy")
459 .description("Occupied fraction for selected positions"));
461 FileNameOption("ofpdb")
465 .defaultBasename("occupancy")
466 .description("PDB file with occupied fraction for selected positions"));
467 options->addOption(FileNameOption("olt")
471 .defaultBasename("lifetime")
472 .description("Lifetime histogram"));
474 options->addOption(SelectionOption("select").storeVector(&sel_).required().multiValue().description(
475 "Selections to analyze"));
478 BooleanOption("norm").store(&bTotNorm_).description("Normalize by total number of positions with -os"));
480 BooleanOption("cfnorm").store(&bFracNorm_).description("Normalize by covered fraction with -os"));
481 options->addOption(EnumOption<ResidueNumbering>("resnr")
482 .store(&resNumberType_)
483 .enumValue(cResNumberEnum)
484 .description("Residue number output type with -oi and -on"));
485 options->addOption(EnumOption<PdbAtomsSelection>("pdbatoms")
487 .enumValue(cPDBAtomsEnum)
488 .description("Atoms to write with -ofpdb"));
489 options->addOption(BooleanOption("cumlt")
490 .store(&bCumulativeLifetimes_)
491 .description("Cumulate subintervals of longer intervals in -olt"));
494 void Select::optionsFinished(TrajectoryAnalysisSettings* settings)
498 settings->setFlag(TrajectoryAnalysisSettings::efRequireTop);
499 settings->setFlag(TrajectoryAnalysisSettings::efUseTopX);
503 void Select::initAnalysis(const TrajectoryAnalysisSettings& settings, const TopologyInformation& top)
505 bResInd_ = (resNumberType_ == ResidueNumbering_ByIndex);
507 for (SelectionList::iterator i = sel_.begin(); i != sel_.end(); ++i)
509 i->initCoveredFraction(CFRAC_SOLIDANGLE);
512 // TODO: For large systems, a float may not have enough precision
513 sdata_.setColumnCount(0, sel_.size());
514 totsize_.reserve(sel_.size());
515 for (size_t g = 0; g < sel_.size(); ++g)
517 totsize_.push_back(sel_[g].posCount());
519 if (!fnSize_.empty())
521 AnalysisDataPlotModulePointer plot(new AnalysisDataPlotModule(settings.plotSettings()));
522 plot->setFileName(fnSize_);
523 plot->setTitle("Selection size");
524 plot->setXAxisIsTime();
525 plot->setYLabel("Number");
526 for (size_t g = 0; g < sel_.size(); ++g)
528 plot->appendLegend(sel_[g].name());
530 sdata_.addModule(plot);
533 cdata_.setColumnCount(0, sel_.size());
534 if (!fnFrac_.empty())
536 AnalysisDataPlotModulePointer plot(new AnalysisDataPlotModule(settings.plotSettings()));
537 plot->setFileName(fnFrac_);
538 plot->setTitle("Covered fraction");
539 plot->setXAxisIsTime();
540 plot->setYLabel("Fraction");
541 plot->setYFormat(6, 4);
542 for (size_t g = 0; g < sel_.size(); ++g)
544 plot->appendLegend(sel_[g].name());
546 cdata_.addModule(plot);
549 // TODO: For large systems, a float may not have enough precision
550 if (!fnIndex_.empty())
552 AnalysisDataPlotModulePointer plot(new AnalysisDataPlotModule(settings.plotSettings()));
553 plot->setFileName(fnIndex_);
554 plot->setPlainOutput(true);
555 plot->setYFormat(4, 0);
556 idata_.addModule(plot);
560 std::shared_ptr<IndexFileWriterModule> writer(new IndexFileWriterModule());
561 writer->setFileName(fnNdx_);
562 for (size_t g = 0; g < sel_.size(); ++g)
564 writer->addGroup(sel_[g].name(), sel_[g].isDynamic());
566 idata_.addModule(writer);
569 mdata_.setDataSetCount(sel_.size());
570 for (size_t g = 0; g < sel_.size(); ++g)
572 mdata_.setColumnCount(g, sel_[g].posCount());
574 lifetimeModule_->setCumulative(bCumulativeLifetimes_);
575 if (!fnMask_.empty())
577 AnalysisDataPlotModulePointer plot(new AnalysisDataPlotModule(settings.plotSettings()));
578 plot->setFileName(fnMask_);
579 plot->setTitle("Selection mask");
580 plot->setXAxisIsTime();
581 plot->setYLabel("Occupancy");
582 plot->setYFormat(1, 0);
583 // TODO: Add legend? (there can be massive amount of columns)
584 mdata_.addModule(plot);
586 if (!fnOccupancy_.empty())
588 AnalysisDataPlotModulePointer plot(new AnalysisDataPlotModule(settings.plotSettings()));
589 plot->setFileName(fnOccupancy_);
590 plot->setTitle("Fraction of time selection matches");
591 plot->setXLabel("Selected position");
592 plot->setYLabel("Occupied fraction");
593 for (size_t g = 0; g < sel_.size(); ++g)
595 plot->appendLegend(sel_[g].name());
597 occupancyModule_->addModule(plot);
599 if (!fnLifetime_.empty())
601 AnalysisDataPlotModulePointer plot(new AnalysisDataPlotModule(settings.plotSettings()));
602 plot->setFileName(fnLifetime_);
603 plot->setTitle("Lifetime histogram");
604 plot->setXAxisIsTime();
605 plot->setYLabel("Number of occurrences");
606 for (size_t g = 0; g < sel_.size(); ++g)
608 plot->appendLegend(sel_[g].name());
610 lifetimeModule_->addModule(plot);
617 void Select::analyzeFrame(int frnr, const t_trxframe& fr, t_pbc* /* pbc */, TrajectoryAnalysisModuleData* pdata)
619 AnalysisDataHandle sdh = pdata->dataHandle(sdata_);
620 AnalysisDataHandle cdh = pdata->dataHandle(cdata_);
621 AnalysisDataHandle idh = pdata->dataHandle(idata_);
622 AnalysisDataHandle mdh = pdata->dataHandle(mdata_);
623 const SelectionList& sel = pdata->parallelSelections(sel_);
625 sdh.startFrame(frnr, fr.time);
626 for (size_t g = 0; g < sel.size(); ++g)
628 real normfac = bFracNorm_ ? 1.0 / sel[g].coveredFraction() : 1.0;
631 normfac /= totsize_[g];
633 sdh.setPoint(g, sel[g].posCount() * normfac);
637 cdh.startFrame(frnr, fr.time);
638 for (size_t g = 0; g < sel.size(); ++g)
640 cdh.setPoint(g, sel[g].coveredFraction());
644 idh.startFrame(frnr, fr.time);
645 for (size_t g = 0; g < sel.size(); ++g)
647 idh.setPoint(0, sel[g].posCount());
648 idh.finishPointSet();
649 for (int i = 0; i < sel[g].posCount(); ++i)
651 const SelectionPosition& p = sel[g].position(i);
652 if (sel[g].type() == INDEX_RES && !bResInd_)
654 idh.setPoint(1, top_->atoms()->resinfo[p.mappedId()].nr);
658 idh.setPoint(1, p.mappedId() + 1);
660 idh.finishPointSet();
665 mdh.startFrame(frnr, fr.time);
666 for (size_t g = 0; g < sel.size(); ++g)
668 mdh.selectDataSet(g);
669 for (int i = 0; i < totsize_[g]; ++i)
673 for (int i = 0; i < sel[g].posCount(); ++i)
675 mdh.setPoint(sel[g].position(i).refId(), 1);
682 void Select::finishAnalysis(int /*nframes*/) {}
685 void Select::writeOutput()
689 GMX_RELEASE_ASSERT(top_->hasTopology(),
690 "Topology should have been loaded or an error given earlier");
691 auto atoms = top_->copyAtoms();
692 if (!atoms->havePdbInfo)
694 snew(atoms->pdbinfo, atoms->nr);
695 atoms->havePdbInfo = TRUE;
697 for (int i = 0; i < atoms->nr; ++i)
699 atoms->pdbinfo[i].occup = 0.0;
701 for (size_t g = 0; g < sel_.size(); ++g)
703 for (int i = 0; i < sel_[g].posCount(); ++i)
705 ArrayRef<const int> atomIndices = sel_[g].position(i).atomIndices();
706 ArrayRef<const int>::const_iterator ai;
707 for (ai = atomIndices.begin(); ai != atomIndices.end(); ++ai)
709 atoms->pdbinfo[*ai].occup += occupancyModule_->average(g, i);
714 std::vector<RVec> x = copyOf(top_->x());
716 clear_trxframe(&fr, TRUE);
718 fr.atoms = atoms.get();
721 fr.x = as_rvec_array(x.data());
722 top_->getBox(fr.box);
726 case PdbAtomsSelection_All:
728 t_trxstatus* status = open_trx(fnPDB_.c_str(), "w");
729 write_trxframe(status, &fr, nullptr);
733 case PdbAtomsSelection_MaxSelection:
735 std::set<int> atomIndicesSet;
736 for (size_t g = 0; g < sel_.size(); ++g)
738 ArrayRef<const int> atomIndices = sel_[g].atomIndices();
739 atomIndicesSet.insert(atomIndices.begin(), atomIndices.end());
741 std::vector<int> allAtomIndices(atomIndicesSet.begin(), atomIndicesSet.end());
742 t_trxstatus* status = open_trx(fnPDB_.c_str(), "w");
743 write_trxframe_indexed(status, &fr, allAtomIndices.size(), allAtomIndices.data(), nullptr);
747 case PdbAtomsSelection_Selected:
749 std::vector<int> indices;
750 for (int i = 0; i < atoms->nr; ++i)
752 if (atoms->pdbinfo[i].occup > 0.0)
754 indices.push_back(i);
757 t_trxstatus* status = open_trx(fnPDB_.c_str(), "w");
758 write_trxframe_indexed(status, &fr, indices.size(), indices.data(), nullptr);
763 GMX_RELEASE_ASSERT(false,
764 "Mismatch between -pdbatoms enum values and implementation");
771 const char SelectInfo::name[] = "select";
772 const char SelectInfo::shortDescription[] = "Print general information about selections";
774 TrajectoryAnalysisModulePointer SelectInfo::create()
776 return TrajectoryAnalysisModulePointer(new Select);
779 } // namespace analysismodules