2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2009,2010,2011,2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
37 * Implements gmx::analysismodules::Select.
39 * \author Teemu Murtola <teemu.murtola@gmail.com>
40 * \ingroup module_trajectoryanalysis
54 #include "gromacs/analysisdata/analysisdata.h"
55 #include "gromacs/analysisdata/dataframe.h"
56 #include "gromacs/analysisdata/datamodule.h"
57 #include "gromacs/analysisdata/modules/average.h"
58 #include "gromacs/analysisdata/modules/lifetime.h"
59 #include "gromacs/analysisdata/modules/plot.h"
60 #include "gromacs/fileio/gmxfio.h"
61 #include "gromacs/fileio/trx.h"
62 #include "gromacs/fileio/trxio.h"
63 #include "gromacs/options/basicoptions.h"
64 #include "gromacs/options/filenameoption.h"
65 #include "gromacs/options/options.h"
66 #include "gromacs/selection/selection.h"
67 #include "gromacs/selection/selectionoption.h"
68 #include "gromacs/topology/topology.h"
69 #include "gromacs/trajectoryanalysis/analysissettings.h"
70 #include "gromacs/utility/arrayref.h"
71 #include "gromacs/utility/exceptions.h"
72 #include "gromacs/utility/gmxassert.h"
73 #include "gromacs/utility/scoped_cptr.h"
74 #include "gromacs/utility/smalloc.h"
75 #include "gromacs/utility/stringutil.h"
80 namespace analysismodules
87 * Data module for writing index files.
89 * \ingroup module_analysisdata
91 class IndexFileWriterModule : public AnalysisDataModuleSerial
94 IndexFileWriterModule();
95 virtual ~IndexFileWriterModule();
97 //! Sets the file name to write the index file to.
98 void setFileName(const std::string &fnm);
100 * Adds information about a group to be printed.
102 * Must be called for each group present in the input data.
104 void addGroup(const std::string &name, bool bDynamic);
106 virtual int flags() const;
108 virtual void dataStarted(AbstractAnalysisData *data);
109 virtual void frameStarted(const AnalysisDataFrameHeader &header);
110 virtual void pointsAdded(const AnalysisDataPointSetRef &points);
111 virtual void frameFinished(const AnalysisDataFrameHeader &header);
112 virtual void dataFinished();
119 GroupInfo(const std::string &name, bool bDynamic)
120 : name(name), bDynamic(bDynamic)
128 std::vector<GroupInfo> groups_;
135 /********************************************************************
136 * IndexFileWriterModule
139 IndexFileWriterModule::IndexFileWriterModule()
140 : fp_(NULL), currentGroup_(-1), currentSize_(0), bAnyWritten_(false)
145 IndexFileWriterModule::~IndexFileWriterModule()
151 void IndexFileWriterModule::closeFile()
161 void IndexFileWriterModule::setFileName(const std::string &fnm)
167 void IndexFileWriterModule::addGroup(const std::string &name, bool bDynamic)
169 std::string newName(name);
170 std::replace(newName.begin(), newName.end(), ' ', '_');
171 groups_.push_back(GroupInfo(newName, bDynamic));
175 int IndexFileWriterModule::flags() const
177 return efAllowMulticolumn | efAllowMultipoint;
181 void IndexFileWriterModule::dataStarted(AbstractAnalysisData * /*data*/)
185 fp_ = gmx_fio_fopen(fnm_.c_str(), "w");
190 void IndexFileWriterModule::frameStarted(const AnalysisDataFrameHeader & /*header*/)
192 bAnyWritten_ = false;
198 IndexFileWriterModule::pointsAdded(const AnalysisDataPointSetRef &points)
204 bool bFirstFrame = (points.frameIndex() == 0);
205 if (points.firstColumn() == 0)
208 GMX_RELEASE_ASSERT(currentGroup_ < static_cast<int>(groups_.size()),
209 "Too few groups initialized");
210 if (bFirstFrame || groups_[currentGroup_].bDynamic)
212 if (!bFirstFrame || currentGroup_ > 0)
214 std::fprintf(fp_, "\n\n");
216 std::string name = groups_[currentGroup_].name;
217 if (groups_[currentGroup_].bDynamic)
219 name += formatString("_f%d_t%.3f", points.frameIndex(), points.x());
221 std::fprintf(fp_, "[ %s ]", name.c_str());
228 if (bFirstFrame || groups_[currentGroup_].bDynamic)
230 if (currentSize_ % 15 == 0)
232 std::fprintf(fp_, "\n");
234 std::fprintf(fp_, "%4d ", static_cast<int>(points.y(0)));
241 void IndexFileWriterModule::frameFinished(const AnalysisDataFrameHeader & /*header*/)
246 void IndexFileWriterModule::dataFinished()
250 std::fprintf(fp_, "\n");
255 /********************************************************************
259 class Select : public TrajectoryAnalysisModule
264 virtual void initOptions(Options *options,
265 TrajectoryAnalysisSettings *settings);
266 virtual void optionsFinished(Options *options,
267 TrajectoryAnalysisSettings *settings);
268 virtual void initAnalysis(const TrajectoryAnalysisSettings &settings,
269 const TopologyInformation &top);
271 virtual void analyzeFrame(int frnr, const t_trxframe &fr, t_pbc *pbc,
272 TrajectoryAnalysisModuleData *pdata);
274 virtual void finishAnalysis(int nframes);
275 virtual void writeOutput();
279 SelectionOptionInfo *selOpt_;
283 std::string fnIndex_;
286 std::string fnOccupancy_;
288 std::string fnLifetime_;
292 bool bCumulativeLifetimes_;
293 std::string resNumberType_;
294 std::string pdbAtoms_;
296 const TopologyInformation *top_;
297 std::vector<int> totsize_;
302 AnalysisDataAverageModulePointer occupancyModule_;
303 AnalysisDataLifetimeModulePointer lifetimeModule_;
307 : TrajectoryAnalysisModule(SelectInfo::name, SelectInfo::shortDescription),
309 bTotNorm_(false), bFracNorm_(false), bResInd_(false),
310 bCumulativeLifetimes_(true), top_(NULL),
311 occupancyModule_(new AnalysisDataAverageModule()),
312 lifetimeModule_(new AnalysisDataLifetimeModule())
314 mdata_.addModule(occupancyModule_);
315 mdata_.addModule(lifetimeModule_);
317 registerAnalysisDataset(&sdata_, "size");
318 registerAnalysisDataset(&cdata_, "cfrac");
319 idata_.setColumnCount(0, 2);
320 idata_.setMultipoint(true);
321 registerAnalysisDataset(&idata_, "index");
322 registerAnalysisDataset(&mdata_, "mask");
323 occupancyModule_->setXAxis(1.0, 1.0);
324 registerBasicDataset(occupancyModule_.get(), "occupancy");
325 registerBasicDataset(lifetimeModule_.get(), "lifetime");
330 Select::initOptions(Options *options, TrajectoryAnalysisSettings * /*settings*/)
332 static const char *const desc[] = {
333 "[THISMODULE] writes out basic data about dynamic selections.",
334 "It can be used for some simple analyses, or the output can",
335 "be combined with output from other programs and/or external",
336 "analysis programs to calculate more complex things.",
337 "Any combination of the output options is possible, but note",
338 "that [TT]-om[tt] only operates on the first selection.",
339 "Also note that if you provide no output options, no output is",
341 "With [TT]-os[tt], calculates the number of positions in each",
342 "selection for each frame. With [TT]-norm[tt], the output is",
343 "between 0 and 1 and describes the fraction from the maximum",
344 "number of positions (e.g., for selection 'resname RA and x < 5'",
345 "the maximum number of positions is the number of atoms in",
346 "RA residues). With [TT]-cfnorm[tt], the output is divided",
347 "by the fraction covered by the selection.",
348 "[TT]-norm[tt] and [TT]-cfnorm[tt] can be specified independently",
349 "of one another.[PAR]",
350 "With [TT]-oc[tt], the fraction covered by each selection is",
351 "written out as a function of time.[PAR]",
352 "With [TT]-oi[tt], the selected atoms/residues/molecules are",
353 "written out as a function of time. In the output, the first",
354 "column contains the frame time, the second contains the number",
355 "of positions, followed by the atom/residue/molecule numbers.",
356 "If more than one selection is specified, the size of the second",
357 "group immediately follows the last number of the first group",
359 "With [TT]-on[tt], the selected atoms are written as a index file",
360 "compatible with [TT]make_ndx[tt] and the analyzing tools. Each selection",
361 "is written as a selection group and for dynamic selections a",
362 "group is written for each frame.[PAR]",
363 "For residue numbers, the output of [TT]-oi[tt] can be controlled",
364 "with [TT]-resnr[tt]: [TT]number[tt] (default) prints the residue",
365 "numbers as they appear in the input file, while [TT]index[tt] prints",
366 "unique numbers assigned to the residues in the order they appear",
367 "in the input file, starting with 1. The former is more intuitive,",
368 "but if the input contains multiple residues with the same number,",
369 "the output can be less useful.[PAR]",
370 "With [TT]-om[tt], a mask is printed for the first selection",
371 "as a function of time. Each line in the output corresponds to",
372 "one frame, and contains either 0/1 for each atom/residue/molecule",
373 "possibly selected. 1 stands for the atom/residue/molecule being",
374 "selected for the current frame, 0 for not selected.[PAR]",
375 "With [TT]-of[tt], the occupancy fraction of each position (i.e.,",
376 "the fraction of frames where the position is selected) is",
378 "With [TT]-ofpdb[tt], a PDB file is written out where the occupancy",
379 "column is filled with the occupancy fraction of each atom in the",
380 "selection. The coordinates in the PDB file will be those from the",
381 "input topology. [TT]-pdbatoms[tt] can be used to control which atoms",
382 "appear in the output PDB file: with [TT]all[tt] all atoms are",
383 "present, with [TT]maxsel[tt] all atoms possibly selected by the",
384 "selection are present, and with [TT]selected[tt] only atoms that are",
385 "selected at least in one frame are present.[PAR]",
386 "With [TT]-olt[tt], a histogram is produced that shows the number of",
387 "selected positions as a function of the time the position was",
388 "continuously selected. [TT]-cumlt[tt] can be used to control whether",
389 "subintervals of longer intervals are included in the histogram.[PAR]",
390 "[TT]-om[tt], [TT]-of[tt], and [TT]-olt[tt] only make sense with",
391 "dynamic selections."
394 options->setDescription(desc);
396 options->addOption(FileNameOption("os").filetype(eftPlot).outputFile()
397 .store(&fnSize_).defaultBasename("size")
398 .description("Number of positions in each selection"));
399 options->addOption(FileNameOption("oc").filetype(eftPlot).outputFile()
400 .store(&fnFrac_).defaultBasename("cfrac")
401 .description("Covered fraction for each selection"));
402 options->addOption(FileNameOption("oi").filetype(eftGenericData).outputFile()
403 .store(&fnIndex_).defaultBasename("index")
404 .description("Indices selected by each selection"));
405 options->addOption(FileNameOption("on").filetype(eftIndex).outputFile()
406 .store(&fnNdx_).defaultBasename("index")
407 .description("Index file from the selection"));
408 options->addOption(FileNameOption("om").filetype(eftPlot).outputFile()
409 .store(&fnMask_).defaultBasename("mask")
410 .description("Mask for selected positions"));
411 options->addOption(FileNameOption("of").filetype(eftPlot).outputFile()
412 .store(&fnOccupancy_).defaultBasename("occupancy")
413 .description("Occupied fraction for selected positions"));
414 options->addOption(FileNameOption("ofpdb").filetype(eftPDB).outputFile()
415 .store(&fnPDB_).defaultBasename("occupancy")
416 .description("PDB file with occupied fraction for selected positions"));
417 options->addOption(FileNameOption("olt").filetype(eftPlot).outputFile()
418 .store(&fnLifetime_).defaultBasename("lifetime")
419 .description("Lifetime histogram"));
421 selOpt_ = options->addOption(SelectionOption("select").storeVector(&sel_)
422 .required().multiValue()
423 .description("Selections to analyze"));
425 options->addOption(BooleanOption("norm").store(&bTotNorm_)
426 .description("Normalize by total number of positions with -os"));
427 options->addOption(BooleanOption("cfnorm").store(&bFracNorm_)
428 .description("Normalize by covered fraction with -os"));
429 const char *const cResNumberEnum[] = { "number", "index" };
430 options->addOption(StringOption("resnr").store(&resNumberType_)
431 .enumValue(cResNumberEnum).defaultEnumIndex(0)
432 .description("Residue number output type with -oi and -on"));
433 const char *const cPDBAtomsEnum[] = { "all", "maxsel", "selected" };
434 options->addOption(StringOption("pdbatoms").store(&pdbAtoms_)
435 .enumValue(cPDBAtomsEnum).defaultEnumIndex(0)
436 .description("Atoms to write with -ofpdb"));
437 options->addOption(BooleanOption("cumlt").store(&bCumulativeLifetimes_)
438 .description("Cumulate subintervals of longer intervals in -olt"));
442 Select::optionsFinished(Options * /*options*/,
443 TrajectoryAnalysisSettings *settings)
447 settings->setFlag(TrajectoryAnalysisSettings::efRequireTop);
448 settings->setFlag(TrajectoryAnalysisSettings::efUseTopX);
453 Select::initAnalysis(const TrajectoryAnalysisSettings &settings,
454 const TopologyInformation &top)
456 bResInd_ = (resNumberType_ == "index");
458 for (SelectionList::iterator i = sel_.begin(); i != sel_.end(); ++i)
460 i->initCoveredFraction(CFRAC_SOLIDANGLE);
463 // TODO: For large systems, a float may not have enough precision
464 sdata_.setColumnCount(0, sel_.size());
465 totsize_.reserve(sel_.size());
466 for (size_t g = 0; g < sel_.size(); ++g)
468 totsize_.push_back(sel_[g].posCount());
470 if (!fnSize_.empty())
472 AnalysisDataPlotModulePointer plot(
473 new AnalysisDataPlotModule(settings.plotSettings()));
474 plot->setFileName(fnSize_);
475 plot->setTitle("Selection size");
476 plot->setXAxisIsTime();
477 plot->setYLabel("Number");
478 for (size_t g = 0; g < sel_.size(); ++g)
480 plot->appendLegend(sel_[g].name());
482 sdata_.addModule(plot);
485 cdata_.setColumnCount(0, sel_.size());
486 if (!fnFrac_.empty())
488 AnalysisDataPlotModulePointer plot(
489 new AnalysisDataPlotModule(settings.plotSettings()));
490 plot->setFileName(fnFrac_);
491 plot->setTitle("Covered fraction");
492 plot->setXAxisIsTime();
493 plot->setYLabel("Fraction");
494 plot->setYFormat(6, 4);
495 for (size_t g = 0; g < sel_.size(); ++g)
497 plot->appendLegend(sel_[g].name());
499 cdata_.addModule(plot);
502 // TODO: For large systems, a float may not have enough precision
503 if (!fnIndex_.empty())
505 AnalysisDataPlotModulePointer plot(
506 new AnalysisDataPlotModule(settings.plotSettings()));
507 plot->setFileName(fnIndex_);
508 plot->setPlainOutput(true);
509 plot->setYFormat(4, 0);
510 idata_.addModule(plot);
514 boost::shared_ptr<IndexFileWriterModule> writer(new IndexFileWriterModule());
515 writer->setFileName(fnNdx_);
516 for (size_t g = 0; g < sel_.size(); ++g)
518 writer->addGroup(sel_[g].name(), sel_[g].isDynamic());
520 idata_.addModule(writer);
523 mdata_.setDataSetCount(sel_.size());
524 for (size_t g = 0; g < sel_.size(); ++g)
526 mdata_.setColumnCount(g, sel_[g].posCount());
528 lifetimeModule_->setCumulative(bCumulativeLifetimes_);
529 if (!fnMask_.empty())
531 AnalysisDataPlotModulePointer plot(
532 new AnalysisDataPlotModule(settings.plotSettings()));
533 plot->setFileName(fnMask_);
534 plot->setTitle("Selection mask");
535 plot->setXAxisIsTime();
536 plot->setYLabel("Occupancy");
537 plot->setYFormat(1, 0);
538 // TODO: Add legend? (there can be massive amount of columns)
539 mdata_.addModule(plot);
541 if (!fnOccupancy_.empty())
543 AnalysisDataPlotModulePointer plot(
544 new AnalysisDataPlotModule(settings.plotSettings()));
545 plot->setFileName(fnOccupancy_);
546 plot->setTitle("Fraction of time selection matches");
547 plot->setXLabel("Selected position");
548 plot->setYLabel("Occupied fraction");
549 for (size_t g = 0; g < sel_.size(); ++g)
551 plot->appendLegend(sel_[g].name());
553 occupancyModule_->addModule(plot);
555 if (!fnLifetime_.empty())
557 AnalysisDataPlotModulePointer plot(
558 new AnalysisDataPlotModule(settings.plotSettings()));
559 plot->setFileName(fnLifetime_);
560 plot->setTitle("Lifetime histogram");
561 plot->setXAxisIsTime();
562 plot->setYLabel("Number of occurrences");
563 for (size_t g = 0; g < sel_.size(); ++g)
565 plot->appendLegend(sel_[g].name());
567 lifetimeModule_->addModule(plot);
575 Select::analyzeFrame(int frnr, const t_trxframe &fr, t_pbc * /* pbc */,
576 TrajectoryAnalysisModuleData *pdata)
578 AnalysisDataHandle sdh = pdata->dataHandle(sdata_);
579 AnalysisDataHandle cdh = pdata->dataHandle(cdata_);
580 AnalysisDataHandle idh = pdata->dataHandle(idata_);
581 AnalysisDataHandle mdh = pdata->dataHandle(mdata_);
582 const SelectionList &sel = pdata->parallelSelections(sel_);
583 t_topology *top = top_->topology();
585 sdh.startFrame(frnr, fr.time);
586 for (size_t g = 0; g < sel.size(); ++g)
588 real normfac = bFracNorm_ ? 1.0 / sel[g].coveredFraction() : 1.0;
591 normfac /= totsize_[g];
593 sdh.setPoint(g, sel[g].posCount() * normfac);
597 cdh.startFrame(frnr, fr.time);
598 for (size_t g = 0; g < sel.size(); ++g)
600 cdh.setPoint(g, sel[g].coveredFraction());
604 idh.startFrame(frnr, fr.time);
605 for (size_t g = 0; g < sel.size(); ++g)
607 idh.setPoint(0, sel[g].posCount());
608 idh.finishPointSet();
609 for (int i = 0; i < sel[g].posCount(); ++i)
611 const SelectionPosition &p = sel[g].position(i);
612 if (sel[g].type() == INDEX_RES && !bResInd_)
614 idh.setPoint(1, top->atoms.resinfo[p.mappedId()].nr);
618 idh.setPoint(1, p.mappedId() + 1);
620 idh.finishPointSet();
625 mdh.startFrame(frnr, fr.time);
626 for (size_t g = 0; g < sel.size(); ++g)
628 mdh.selectDataSet(g);
629 for (int i = 0; i < totsize_[g]; ++i)
633 for (int i = 0; i < sel[g].posCount(); ++i)
635 mdh.setPoint(sel[g].position(i).refId(), 1);
643 Select::finishAnalysis(int /*nframes*/)
649 Select::writeOutput()
653 GMX_RELEASE_ASSERT(top_->hasTopology(),
654 "Topology should have been loaded or an error given earlier");
656 atoms = top_->topology()->atoms;
658 snew(pdbinfo, atoms.nr);
659 scoped_guard_sfree pdbinfoGuard(pdbinfo);
660 if (atoms.pdbinfo != NULL)
662 std::memcpy(pdbinfo, atoms.pdbinfo, atoms.nr*sizeof(*pdbinfo));
664 atoms.pdbinfo = pdbinfo;
665 for (int i = 0; i < atoms.nr; ++i)
667 pdbinfo[i].occup = 0.0;
669 for (size_t g = 0; g < sel_.size(); ++g)
671 for (int i = 0; i < sel_[g].posCount(); ++i)
673 ConstArrayRef<int> atomIndices
674 = sel_[g].position(i).atomIndices();
675 ConstArrayRef<int>::const_iterator ai;
676 for (ai = atomIndices.begin(); ai != atomIndices.end(); ++ai)
678 pdbinfo[*ai].occup += occupancyModule_->average(g, i);
684 clear_trxframe(&fr, TRUE);
689 top_->getTopologyConf(&fr.x, fr.box);
691 if (pdbAtoms_ == "all")
693 t_trxstatus *status = open_trx(fnPDB_.c_str(), "w");
694 write_trxframe(status, &fr, NULL);
697 else if (pdbAtoms_ == "maxsel")
699 std::set<int> atomIndicesSet;
700 for (size_t g = 0; g < sel_.size(); ++g)
702 ConstArrayRef<int> atomIndices = sel_[g].atomIndices();
703 atomIndicesSet.insert(atomIndices.begin(), atomIndices.end());
705 std::vector<int> allAtomIndices(atomIndicesSet.begin(),
706 atomIndicesSet.end());
707 t_trxstatus *status = open_trx(fnPDB_.c_str(), "w");
708 write_trxframe_indexed(status, &fr, allAtomIndices.size(),
709 &allAtomIndices[0], NULL);
712 else if (pdbAtoms_ == "selected")
714 std::vector<int> indices;
715 for (int i = 0; i < atoms.nr; ++i)
717 if (pdbinfo[i].occup > 0.0)
719 indices.push_back(i);
722 t_trxstatus *status = open_trx(fnPDB_.c_str(), "w");
723 write_trxframe_indexed(status, &fr, indices.size(), &indices[0], NULL);
728 GMX_RELEASE_ASSERT(false,
729 "Mismatch between -pdbatoms enum values and implementation");
736 const char SelectInfo::name[] = "select";
737 const char SelectInfo::shortDescription[] =
738 "Print general information about selections";
740 TrajectoryAnalysisModulePointer SelectInfo::create()
742 return TrajectoryAnalysisModulePointer(new Select);
745 } // namespace analysismodules