2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2009,2010,2011,2012,2013,2014,2015,2016, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
37 * Implements gmx::analysismodules::Select.
39 * \author Teemu Murtola <teemu.murtola@gmail.com>
40 * \ingroup module_trajectoryanalysis
54 #include "gromacs/analysisdata/analysisdata.h"
55 #include "gromacs/analysisdata/dataframe.h"
56 #include "gromacs/analysisdata/datamodule.h"
57 #include "gromacs/analysisdata/modules/average.h"
58 #include "gromacs/analysisdata/modules/lifetime.h"
59 #include "gromacs/analysisdata/modules/plot.h"
60 #include "gromacs/fileio/gmxfio.h"
61 #include "gromacs/fileio/trxio.h"
62 #include "gromacs/options/basicoptions.h"
63 #include "gromacs/options/filenameoption.h"
64 #include "gromacs/options/ioptionscontainer.h"
65 #include "gromacs/selection/selection.h"
66 #include "gromacs/selection/selectionoption.h"
67 #include "gromacs/topology/topology.h"
68 #include "gromacs/trajectory/trajectoryframe.h"
69 #include "gromacs/trajectoryanalysis/analysissettings.h"
70 #include "gromacs/utility/arrayref.h"
71 #include "gromacs/utility/exceptions.h"
72 #include "gromacs/utility/gmxassert.h"
73 #include "gromacs/utility/smalloc.h"
74 #include "gromacs/utility/stringutil.h"
75 #include "gromacs/utility/unique_cptr.h"
80 namespace analysismodules
87 * Data module for writing index files.
89 * \ingroup module_analysisdata
91 class IndexFileWriterModule : public AnalysisDataModuleSerial
94 IndexFileWriterModule();
95 virtual ~IndexFileWriterModule();
97 //! Sets the file name to write the index file to.
98 void setFileName(const std::string &fnm);
100 * Adds information about a group to be printed.
102 * Must be called for each group present in the input data.
104 void addGroup(const std::string &name, bool bDynamic);
106 virtual int flags() const;
108 virtual void dataStarted(AbstractAnalysisData *data);
109 virtual void frameStarted(const AnalysisDataFrameHeader &header);
110 virtual void pointsAdded(const AnalysisDataPointSetRef &points);
111 virtual void frameFinished(const AnalysisDataFrameHeader &header);
112 virtual void dataFinished();
119 GroupInfo(const std::string &name, bool bDynamic)
120 : name(name), bDynamic(bDynamic)
128 std::vector<GroupInfo> groups_;
135 /********************************************************************
136 * IndexFileWriterModule
139 IndexFileWriterModule::IndexFileWriterModule()
140 : fp_(NULL), currentGroup_(-1), currentSize_(0), bAnyWritten_(false)
145 IndexFileWriterModule::~IndexFileWriterModule()
151 void IndexFileWriterModule::closeFile()
161 void IndexFileWriterModule::setFileName(const std::string &fnm)
167 void IndexFileWriterModule::addGroup(const std::string &name, bool bDynamic)
169 std::string newName(name);
170 std::replace(newName.begin(), newName.end(), ' ', '_');
171 groups_.emplace_back(newName, bDynamic);
175 int IndexFileWriterModule::flags() const
177 return efAllowMulticolumn | efAllowMultipoint;
181 void IndexFileWriterModule::dataStarted(AbstractAnalysisData * /*data*/)
185 fp_ = gmx_fio_fopen(fnm_.c_str(), "w");
190 void IndexFileWriterModule::frameStarted(const AnalysisDataFrameHeader & /*header*/)
192 bAnyWritten_ = false;
198 IndexFileWriterModule::pointsAdded(const AnalysisDataPointSetRef &points)
204 bool bFirstFrame = (points.frameIndex() == 0);
205 if (points.firstColumn() == 0)
208 GMX_RELEASE_ASSERT(currentGroup_ < static_cast<int>(groups_.size()),
209 "Too few groups initialized");
210 if (bFirstFrame || groups_[currentGroup_].bDynamic)
212 if (!bFirstFrame || currentGroup_ > 0)
214 std::fprintf(fp_, "\n\n");
216 std::string name = groups_[currentGroup_].name;
217 if (groups_[currentGroup_].bDynamic)
219 name += formatString("_f%d_t%.3f", points.frameIndex(), points.x());
221 std::fprintf(fp_, "[ %s ]", name.c_str());
228 if (bFirstFrame || groups_[currentGroup_].bDynamic)
230 if (currentSize_ % 15 == 0)
232 std::fprintf(fp_, "\n");
234 std::fprintf(fp_, "%4d ", static_cast<int>(points.y(0)));
241 void IndexFileWriterModule::frameFinished(const AnalysisDataFrameHeader & /*header*/)
246 void IndexFileWriterModule::dataFinished()
250 std::fprintf(fp_, "\n");
255 /********************************************************************
259 //! How to identify residues in output files.
260 enum ResidueNumbering
262 ResidueNumbering_ByNumber,
263 ResidueNumbering_ByIndex
265 //! Which atoms to write out to PDB files.
266 enum PdbAtomsSelection
268 PdbAtomsSelection_All,
269 PdbAtomsSelection_MaxSelection,
270 PdbAtomsSelection_Selected
272 //! String values corresponding to ResidueNumbering.
273 const char *const cResNumberEnum[] = { "number", "index" };
274 //! String values corresponding to PdbAtomsSelection.
275 const char *const cPDBAtomsEnum[] = { "all", "maxsel", "selected" };
277 class Select : public TrajectoryAnalysisModule
282 virtual void initOptions(IOptionsContainer *options,
283 TrajectoryAnalysisSettings *settings);
284 virtual void optionsFinished(TrajectoryAnalysisSettings *settings);
285 virtual void initAnalysis(const TrajectoryAnalysisSettings &settings,
286 const TopologyInformation &top);
288 virtual void analyzeFrame(int frnr, const t_trxframe &fr, t_pbc *pbc,
289 TrajectoryAnalysisModuleData *pdata);
291 virtual void finishAnalysis(int nframes);
292 virtual void writeOutput();
299 std::string fnIndex_;
302 std::string fnOccupancy_;
304 std::string fnLifetime_;
308 bool bCumulativeLifetimes_;
309 ResidueNumbering resNumberType_;
310 PdbAtomsSelection pdbAtoms_;
312 const TopologyInformation *top_;
313 std::vector<int> totsize_;
318 AnalysisDataAverageModulePointer occupancyModule_;
319 AnalysisDataLifetimeModulePointer lifetimeModule_;
323 : bTotNorm_(false), bFracNorm_(false), bResInd_(false),
324 bCumulativeLifetimes_(true), resNumberType_(ResidueNumbering_ByNumber),
325 pdbAtoms_(PdbAtomsSelection_All), top_(NULL),
326 occupancyModule_(new AnalysisDataAverageModule()),
327 lifetimeModule_(new AnalysisDataLifetimeModule())
329 mdata_.addModule(occupancyModule_);
330 mdata_.addModule(lifetimeModule_);
332 registerAnalysisDataset(&sdata_, "size");
333 registerAnalysisDataset(&cdata_, "cfrac");
334 idata_.setColumnCount(0, 2);
335 idata_.setMultipoint(true);
336 registerAnalysisDataset(&idata_, "index");
337 registerAnalysisDataset(&mdata_, "mask");
338 occupancyModule_->setXAxis(1.0, 1.0);
339 registerBasicDataset(occupancyModule_.get(), "occupancy");
340 registerBasicDataset(lifetimeModule_.get(), "lifetime");
345 Select::initOptions(IOptionsContainer *options, TrajectoryAnalysisSettings *settings)
347 static const char *const desc[] = {
348 "[THISMODULE] writes out basic data about dynamic selections.",
349 "It can be used for some simple analyses, or the output can",
350 "be combined with output from other programs and/or external",
351 "analysis programs to calculate more complex things.",
352 "For detailed help on the selection syntax, please use",
353 "[TT]gmx help selections[tt].",
355 "Any combination of the output options is possible, but note",
356 "that [TT]-om[tt] only operates on the first selection.",
357 "Also note that if you provide no output options, no output is",
360 "With [TT]-os[tt], calculates the number of positions in each",
361 "selection for each frame. With [TT]-norm[tt], the output is",
362 "between 0 and 1 and describes the fraction from the maximum",
363 "number of positions (e.g., for selection 'resname RA and x < 5'",
364 "the maximum number of positions is the number of atoms in",
365 "RA residues). With [TT]-cfnorm[tt], the output is divided",
366 "by the fraction covered by the selection.",
367 "[TT]-norm[tt] and [TT]-cfnorm[tt] can be specified independently",
370 "With [TT]-oc[tt], the fraction covered by each selection is",
371 "written out as a function of time.",
373 "With [TT]-oi[tt], the selected atoms/residues/molecules are",
374 "written out as a function of time. In the output, the first",
375 "column contains the frame time, the second contains the number",
376 "of positions, followed by the atom/residue/molecule numbers.",
377 "If more than one selection is specified, the size of the second",
378 "group immediately follows the last number of the first group",
381 "With [TT]-on[tt], the selected atoms are written as a index file",
382 "compatible with [TT]make_ndx[tt] and the analyzing tools. Each selection",
383 "is written as a selection group and for dynamic selections a",
384 "group is written for each frame.",
386 "For residue numbers, the output of [TT]-oi[tt] can be controlled",
387 "with [TT]-resnr[tt]: [TT]number[tt] (default) prints the residue",
388 "numbers as they appear in the input file, while [TT]index[tt] prints",
389 "unique numbers assigned to the residues in the order they appear",
390 "in the input file, starting with 1. The former is more intuitive,",
391 "but if the input contains multiple residues with the same number,",
392 "the output can be less useful.",
394 "With [TT]-om[tt], a mask is printed for the first selection",
395 "as a function of time. Each line in the output corresponds to",
396 "one frame, and contains either 0/1 for each atom/residue/molecule",
397 "possibly selected. 1 stands for the atom/residue/molecule being",
398 "selected for the current frame, 0 for not selected.",
400 "With [TT]-of[tt], the occupancy fraction of each position (i.e.,",
401 "the fraction of frames where the position is selected) is",
404 "With [TT]-ofpdb[tt], a PDB file is written out where the occupancy",
405 "column is filled with the occupancy fraction of each atom in the",
406 "selection. The coordinates in the PDB file will be those from the",
407 "input topology. [TT]-pdbatoms[tt] can be used to control which atoms",
408 "appear in the output PDB file: with [TT]all[tt] all atoms are",
409 "present, with [TT]maxsel[tt] all atoms possibly selected by the",
410 "selection are present, and with [TT]selected[tt] only atoms that are",
411 "selected at least in one frame are present.",
413 "With [TT]-olt[tt], a histogram is produced that shows the number of",
414 "selected positions as a function of the time the position was",
415 "continuously selected. [TT]-cumlt[tt] can be used to control whether",
416 "subintervals of longer intervals are included in the histogram.[PAR]",
417 "[TT]-om[tt], [TT]-of[tt], and [TT]-olt[tt] only make sense with",
418 "dynamic selections.",
420 "To plot coordinates for selections, use [gmx-trajectory]."
423 settings->setHelpText(desc);
425 options->addOption(FileNameOption("os").filetype(eftPlot).outputFile()
426 .store(&fnSize_).defaultBasename("size")
427 .description("Number of positions in each selection"));
428 options->addOption(FileNameOption("oc").filetype(eftPlot).outputFile()
429 .store(&fnFrac_).defaultBasename("cfrac")
430 .description("Covered fraction for each selection"));
431 options->addOption(FileNameOption("oi").filetype(eftGenericData).outputFile()
432 .store(&fnIndex_).defaultBasename("index")
433 .description("Indices selected by each selection"));
434 options->addOption(FileNameOption("on").filetype(eftIndex).outputFile()
435 .store(&fnNdx_).defaultBasename("index")
436 .description("Index file from the selection"));
437 options->addOption(FileNameOption("om").filetype(eftPlot).outputFile()
438 .store(&fnMask_).defaultBasename("mask")
439 .description("Mask for selected positions"));
440 options->addOption(FileNameOption("of").filetype(eftPlot).outputFile()
441 .store(&fnOccupancy_).defaultBasename("occupancy")
442 .description("Occupied fraction for selected positions"));
443 options->addOption(FileNameOption("ofpdb").filetype(eftPDB).outputFile()
444 .store(&fnPDB_).defaultBasename("occupancy")
445 .description("PDB file with occupied fraction for selected positions"));
446 options->addOption(FileNameOption("olt").filetype(eftPlot).outputFile()
447 .store(&fnLifetime_).defaultBasename("lifetime")
448 .description("Lifetime histogram"));
450 options->addOption(SelectionOption("select").storeVector(&sel_)
451 .required().multiValue()
452 .description("Selections to analyze"));
454 options->addOption(BooleanOption("norm").store(&bTotNorm_)
455 .description("Normalize by total number of positions with -os"));
456 options->addOption(BooleanOption("cfnorm").store(&bFracNorm_)
457 .description("Normalize by covered fraction with -os"));
458 options->addOption(EnumOption<ResidueNumbering>("resnr").store(&resNumberType_)
459 .enumValue(cResNumberEnum)
460 .description("Residue number output type with -oi and -on"));
461 options->addOption(EnumOption<PdbAtomsSelection>("pdbatoms").store(&pdbAtoms_)
462 .enumValue(cPDBAtomsEnum)
463 .description("Atoms to write with -ofpdb"));
464 options->addOption(BooleanOption("cumlt").store(&bCumulativeLifetimes_)
465 .description("Cumulate subintervals of longer intervals in -olt"));
469 Select::optionsFinished(TrajectoryAnalysisSettings *settings)
473 settings->setFlag(TrajectoryAnalysisSettings::efRequireTop);
474 settings->setFlag(TrajectoryAnalysisSettings::efUseTopX);
479 Select::initAnalysis(const TrajectoryAnalysisSettings &settings,
480 const TopologyInformation &top)
482 bResInd_ = (resNumberType_ == ResidueNumbering_ByIndex);
484 for (SelectionList::iterator i = sel_.begin(); i != sel_.end(); ++i)
486 i->initCoveredFraction(CFRAC_SOLIDANGLE);
489 // TODO: For large systems, a float may not have enough precision
490 sdata_.setColumnCount(0, sel_.size());
491 totsize_.reserve(sel_.size());
492 for (size_t g = 0; g < sel_.size(); ++g)
494 totsize_.push_back(sel_[g].posCount());
496 if (!fnSize_.empty())
498 AnalysisDataPlotModulePointer plot(
499 new AnalysisDataPlotModule(settings.plotSettings()));
500 plot->setFileName(fnSize_);
501 plot->setTitle("Selection size");
502 plot->setXAxisIsTime();
503 plot->setYLabel("Number");
504 for (size_t g = 0; g < sel_.size(); ++g)
506 plot->appendLegend(sel_[g].name());
508 sdata_.addModule(plot);
511 cdata_.setColumnCount(0, sel_.size());
512 if (!fnFrac_.empty())
514 AnalysisDataPlotModulePointer plot(
515 new AnalysisDataPlotModule(settings.plotSettings()));
516 plot->setFileName(fnFrac_);
517 plot->setTitle("Covered fraction");
518 plot->setXAxisIsTime();
519 plot->setYLabel("Fraction");
520 plot->setYFormat(6, 4);
521 for (size_t g = 0; g < sel_.size(); ++g)
523 plot->appendLegend(sel_[g].name());
525 cdata_.addModule(plot);
528 // TODO: For large systems, a float may not have enough precision
529 if (!fnIndex_.empty())
531 AnalysisDataPlotModulePointer plot(
532 new AnalysisDataPlotModule(settings.plotSettings()));
533 plot->setFileName(fnIndex_);
534 plot->setPlainOutput(true);
535 plot->setYFormat(4, 0);
536 idata_.addModule(plot);
540 std::shared_ptr<IndexFileWriterModule> writer(new IndexFileWriterModule());
541 writer->setFileName(fnNdx_);
542 for (size_t g = 0; g < sel_.size(); ++g)
544 writer->addGroup(sel_[g].name(), sel_[g].isDynamic());
546 idata_.addModule(writer);
549 mdata_.setDataSetCount(sel_.size());
550 for (size_t g = 0; g < sel_.size(); ++g)
552 mdata_.setColumnCount(g, sel_[g].posCount());
554 lifetimeModule_->setCumulative(bCumulativeLifetimes_);
555 if (!fnMask_.empty())
557 AnalysisDataPlotModulePointer plot(
558 new AnalysisDataPlotModule(settings.plotSettings()));
559 plot->setFileName(fnMask_);
560 plot->setTitle("Selection mask");
561 plot->setXAxisIsTime();
562 plot->setYLabel("Occupancy");
563 plot->setYFormat(1, 0);
564 // TODO: Add legend? (there can be massive amount of columns)
565 mdata_.addModule(plot);
567 if (!fnOccupancy_.empty())
569 AnalysisDataPlotModulePointer plot(
570 new AnalysisDataPlotModule(settings.plotSettings()));
571 plot->setFileName(fnOccupancy_);
572 plot->setTitle("Fraction of time selection matches");
573 plot->setXLabel("Selected position");
574 plot->setYLabel("Occupied fraction");
575 for (size_t g = 0; g < sel_.size(); ++g)
577 plot->appendLegend(sel_[g].name());
579 occupancyModule_->addModule(plot);
581 if (!fnLifetime_.empty())
583 AnalysisDataPlotModulePointer plot(
584 new AnalysisDataPlotModule(settings.plotSettings()));
585 plot->setFileName(fnLifetime_);
586 plot->setTitle("Lifetime histogram");
587 plot->setXAxisIsTime();
588 plot->setYLabel("Number of occurrences");
589 for (size_t g = 0; g < sel_.size(); ++g)
591 plot->appendLegend(sel_[g].name());
593 lifetimeModule_->addModule(plot);
601 Select::analyzeFrame(int frnr, const t_trxframe &fr, t_pbc * /* pbc */,
602 TrajectoryAnalysisModuleData *pdata)
604 AnalysisDataHandle sdh = pdata->dataHandle(sdata_);
605 AnalysisDataHandle cdh = pdata->dataHandle(cdata_);
606 AnalysisDataHandle idh = pdata->dataHandle(idata_);
607 AnalysisDataHandle mdh = pdata->dataHandle(mdata_);
608 const SelectionList &sel = pdata->parallelSelections(sel_);
609 t_topology *top = top_->topology();
611 sdh.startFrame(frnr, fr.time);
612 for (size_t g = 0; g < sel.size(); ++g)
614 real normfac = bFracNorm_ ? 1.0 / sel[g].coveredFraction() : 1.0;
617 normfac /= totsize_[g];
619 sdh.setPoint(g, sel[g].posCount() * normfac);
623 cdh.startFrame(frnr, fr.time);
624 for (size_t g = 0; g < sel.size(); ++g)
626 cdh.setPoint(g, sel[g].coveredFraction());
630 idh.startFrame(frnr, fr.time);
631 for (size_t g = 0; g < sel.size(); ++g)
633 idh.setPoint(0, sel[g].posCount());
634 idh.finishPointSet();
635 for (int i = 0; i < sel[g].posCount(); ++i)
637 const SelectionPosition &p = sel[g].position(i);
638 if (sel[g].type() == INDEX_RES && !bResInd_)
640 idh.setPoint(1, top->atoms.resinfo[p.mappedId()].nr);
644 idh.setPoint(1, p.mappedId() + 1);
646 idh.finishPointSet();
651 mdh.startFrame(frnr, fr.time);
652 for (size_t g = 0; g < sel.size(); ++g)
654 mdh.selectDataSet(g);
655 for (int i = 0; i < totsize_[g]; ++i)
659 for (int i = 0; i < sel[g].posCount(); ++i)
661 mdh.setPoint(sel[g].position(i).refId(), 1);
669 Select::finishAnalysis(int /*nframes*/)
675 Select::writeOutput()
679 GMX_RELEASE_ASSERT(top_->hasTopology(),
680 "Topology should have been loaded or an error given earlier");
682 atoms = top_->topology()->atoms;
684 snew(pdbinfo, atoms.nr);
685 const sfree_guard pdbinfoGuard(pdbinfo);
686 if (atoms.havePdbInfo)
688 std::memcpy(pdbinfo, atoms.pdbinfo, atoms.nr*sizeof(*pdbinfo));
692 atoms.havePdbInfo = TRUE;
694 atoms.pdbinfo = pdbinfo;
695 for (int i = 0; i < atoms.nr; ++i)
697 pdbinfo[i].occup = 0.0;
699 for (size_t g = 0; g < sel_.size(); ++g)
701 for (int i = 0; i < sel_[g].posCount(); ++i)
703 ConstArrayRef<int> atomIndices
704 = sel_[g].position(i).atomIndices();
705 ConstArrayRef<int>::const_iterator ai;
706 for (ai = atomIndices.begin(); ai != atomIndices.end(); ++ai)
708 pdbinfo[*ai].occup += occupancyModule_->average(g, i);
714 clear_trxframe(&fr, TRUE);
719 top_->getTopologyConf(&fr.x, fr.box);
723 case PdbAtomsSelection_All:
725 t_trxstatus *status = open_trx(fnPDB_.c_str(), "w");
726 write_trxframe(status, &fr, NULL);
730 case PdbAtomsSelection_MaxSelection:
732 std::set<int> atomIndicesSet;
733 for (size_t g = 0; g < sel_.size(); ++g)
735 ConstArrayRef<int> atomIndices = sel_[g].atomIndices();
736 atomIndicesSet.insert(atomIndices.begin(), atomIndices.end());
738 std::vector<int> allAtomIndices(atomIndicesSet.begin(),
739 atomIndicesSet.end());
740 t_trxstatus *status = open_trx(fnPDB_.c_str(), "w");
741 write_trxframe_indexed(status, &fr, allAtomIndices.size(),
742 allAtomIndices.data(), NULL);
746 case PdbAtomsSelection_Selected:
748 std::vector<int> indices;
749 for (int i = 0; i < atoms.nr; ++i)
751 if (pdbinfo[i].occup > 0.0)
753 indices.push_back(i);
756 t_trxstatus *status = open_trx(fnPDB_.c_str(), "w");
757 write_trxframe_indexed(status, &fr, indices.size(), indices.data(), NULL);
762 GMX_RELEASE_ASSERT(false,
763 "Mismatch between -pdbatoms enum values and implementation");
770 const char SelectInfo::name[] = "select";
771 const char SelectInfo::shortDescription[] =
772 "Print general information about selections";
774 TrajectoryAnalysisModulePointer SelectInfo::create()
776 return TrajectoryAnalysisModulePointer(new Select);
779 } // namespace analysismodules