2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2009,2010,2011,2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
37 * Implements gmx::analysismodules::Select.
39 * \author Teemu Murtola <teemu.murtola@gmail.com>
40 * \ingroup module_trajectoryanalysis
52 #include "gromacs/analysisdata/analysisdata.h"
53 #include "gromacs/analysisdata/dataframe.h"
54 #include "gromacs/analysisdata/datamodule.h"
55 #include "gromacs/analysisdata/modules/average.h"
56 #include "gromacs/analysisdata/modules/lifetime.h"
57 #include "gromacs/analysisdata/modules/plot.h"
58 #include "gromacs/fileio/gmxfio.h"
59 #include "gromacs/fileio/trx.h"
60 #include "gromacs/fileio/trxio.h"
61 #include "gromacs/options/basicoptions.h"
62 #include "gromacs/options/filenameoption.h"
63 #include "gromacs/options/options.h"
64 #include "gromacs/selection/selection.h"
65 #include "gromacs/selection/selectionoption.h"
66 #include "gromacs/topology/topology.h"
67 #include "gromacs/trajectoryanalysis/analysissettings.h"
68 #include "gromacs/utility/arrayref.h"
69 #include "gromacs/utility/exceptions.h"
70 #include "gromacs/utility/gmxassert.h"
71 #include "gromacs/utility/scoped_ptr_sfree.h"
72 #include "gromacs/utility/smalloc.h"
73 #include "gromacs/utility/stringutil.h"
78 namespace analysismodules
85 * Data module for writing index files.
87 * \ingroup module_analysisdata
89 class IndexFileWriterModule : public AnalysisDataModuleSerial
92 IndexFileWriterModule();
93 virtual ~IndexFileWriterModule();
95 //! Sets the file name to write the index file to.
96 void setFileName(const std::string &fnm);
98 * Adds information about a group to be printed.
100 * Must be called for each group present in the input data.
102 void addGroup(const std::string &name, bool bDynamic);
104 virtual int flags() const;
106 virtual void dataStarted(AbstractAnalysisData *data);
107 virtual void frameStarted(const AnalysisDataFrameHeader &header);
108 virtual void pointsAdded(const AnalysisDataPointSetRef &points);
109 virtual void frameFinished(const AnalysisDataFrameHeader &header);
110 virtual void dataFinished();
117 GroupInfo(const std::string &name, bool bDynamic)
118 : name(name), bDynamic(bDynamic)
126 std::vector<GroupInfo> groups_;
133 /********************************************************************
134 * IndexFileWriterModule
137 IndexFileWriterModule::IndexFileWriterModule()
138 : fp_(NULL), currentGroup_(-1), currentSize_(0), bAnyWritten_(false)
143 IndexFileWriterModule::~IndexFileWriterModule()
149 void IndexFileWriterModule::closeFile()
159 void IndexFileWriterModule::setFileName(const std::string &fnm)
165 void IndexFileWriterModule::addGroup(const std::string &name, bool bDynamic)
167 std::string newName(name);
168 std::replace(newName.begin(), newName.end(), ' ', '_');
169 groups_.push_back(GroupInfo(newName, bDynamic));
173 int IndexFileWriterModule::flags() const
175 return efAllowMulticolumn | efAllowMultipoint;
179 void IndexFileWriterModule::dataStarted(AbstractAnalysisData * /*data*/)
183 fp_ = gmx_fio_fopen(fnm_.c_str(), "w");
188 void IndexFileWriterModule::frameStarted(const AnalysisDataFrameHeader & /*header*/)
190 bAnyWritten_ = false;
196 IndexFileWriterModule::pointsAdded(const AnalysisDataPointSetRef &points)
202 bool bFirstFrame = (points.frameIndex() == 0);
203 if (points.firstColumn() == 0)
206 GMX_RELEASE_ASSERT(currentGroup_ < static_cast<int>(groups_.size()),
207 "Too few groups initialized");
208 if (bFirstFrame || groups_[currentGroup_].bDynamic)
210 if (!bFirstFrame || currentGroup_ > 0)
212 std::fprintf(fp_, "\n\n");
214 std::string name = groups_[currentGroup_].name;
215 if (groups_[currentGroup_].bDynamic)
217 name += formatString("_f%d_t%.3f", points.frameIndex(), points.x());
219 std::fprintf(fp_, "[ %s ]", name.c_str());
226 if (bFirstFrame || groups_[currentGroup_].bDynamic)
228 if (currentSize_ % 15 == 0)
230 std::fprintf(fp_, "\n");
232 std::fprintf(fp_, "%4d ", static_cast<int>(points.y(0)));
239 void IndexFileWriterModule::frameFinished(const AnalysisDataFrameHeader & /*header*/)
244 void IndexFileWriterModule::dataFinished()
248 std::fprintf(fp_, "\n");
253 /********************************************************************
257 class Select : public TrajectoryAnalysisModule
262 virtual void initOptions(Options *options,
263 TrajectoryAnalysisSettings *settings);
264 virtual void optionsFinished(Options *options,
265 TrajectoryAnalysisSettings *settings);
266 virtual void initAnalysis(const TrajectoryAnalysisSettings &settings,
267 const TopologyInformation &top);
269 virtual void analyzeFrame(int frnr, const t_trxframe &fr, t_pbc *pbc,
270 TrajectoryAnalysisModuleData *pdata);
272 virtual void finishAnalysis(int nframes);
273 virtual void writeOutput();
277 SelectionOptionInfo *selOpt_;
281 std::string fnIndex_;
284 std::string fnOccupancy_;
286 std::string fnLifetime_;
290 bool bCumulativeLifetimes_;
291 std::string resNumberType_;
292 std::string pdbAtoms_;
294 const TopologyInformation *top_;
295 std::vector<int> totsize_;
300 AnalysisDataAverageModulePointer occupancyModule_;
301 AnalysisDataLifetimeModulePointer lifetimeModule_;
305 : TrajectoryAnalysisModule(SelectInfo::name, SelectInfo::shortDescription),
307 bTotNorm_(false), bFracNorm_(false), bResInd_(false),
308 bCumulativeLifetimes_(true), top_(NULL),
309 occupancyModule_(new AnalysisDataAverageModule()),
310 lifetimeModule_(new AnalysisDataLifetimeModule())
312 mdata_.addModule(occupancyModule_);
313 mdata_.addModule(lifetimeModule_);
315 registerAnalysisDataset(&sdata_, "size");
316 registerAnalysisDataset(&cdata_, "cfrac");
317 idata_.setColumnCount(0, 2);
318 idata_.setMultipoint(true);
319 registerAnalysisDataset(&idata_, "index");
320 registerAnalysisDataset(&mdata_, "mask");
321 occupancyModule_->setXAxis(1.0, 1.0);
322 registerBasicDataset(occupancyModule_.get(), "occupancy");
323 registerBasicDataset(lifetimeModule_.get(), "lifetime");
328 Select::initOptions(Options *options, TrajectoryAnalysisSettings * /*settings*/)
330 static const char *const desc[] = {
331 "[THISMODULE] writes out basic data about dynamic selections.",
332 "It can be used for some simple analyses, or the output can",
333 "be combined with output from other programs and/or external",
334 "analysis programs to calculate more complex things.",
335 "Any combination of the output options is possible, but note",
336 "that [TT]-om[tt] only operates on the first selection.",
337 "Also note that if you provide no output options, no output is",
339 "With [TT]-os[tt], calculates the number of positions in each",
340 "selection for each frame. With [TT]-norm[tt], the output is",
341 "between 0 and 1 and describes the fraction from the maximum",
342 "number of positions (e.g., for selection 'resname RA and x < 5'",
343 "the maximum number of positions is the number of atoms in",
344 "RA residues). With [TT]-cfnorm[tt], the output is divided",
345 "by the fraction covered by the selection.",
346 "[TT]-norm[tt] and [TT]-cfnorm[tt] can be specified independently",
347 "of one another.[PAR]",
348 "With [TT]-oc[tt], the fraction covered by each selection is",
349 "written out as a function of time.[PAR]",
350 "With [TT]-oi[tt], the selected atoms/residues/molecules are",
351 "written out as a function of time. In the output, the first",
352 "column contains the frame time, the second contains the number",
353 "of positions, followed by the atom/residue/molecule numbers.",
354 "If more than one selection is specified, the size of the second",
355 "group immediately follows the last number of the first group",
357 "With [TT]-on[tt], the selected atoms are written as a index file",
358 "compatible with [TT]make_ndx[tt] and the analyzing tools. Each selection",
359 "is written as a selection group and for dynamic selections a",
360 "group is written for each frame.[PAR]",
361 "For residue numbers, the output of [TT]-oi[tt] can be controlled",
362 "with [TT]-resnr[tt]: [TT]number[tt] (default) prints the residue",
363 "numbers as they appear in the input file, while [TT]index[tt] prints",
364 "unique numbers assigned to the residues in the order they appear",
365 "in the input file, starting with 1. The former is more intuitive,",
366 "but if the input contains multiple residues with the same number,",
367 "the output can be less useful.[PAR]",
368 "With [TT]-om[tt], a mask is printed for the first selection",
369 "as a function of time. Each line in the output corresponds to",
370 "one frame, and contains either 0/1 for each atom/residue/molecule",
371 "possibly selected. 1 stands for the atom/residue/molecule being",
372 "selected for the current frame, 0 for not selected.[PAR]",
373 "With [TT]-of[tt], the occupancy fraction of each position (i.e.,",
374 "the fraction of frames where the position is selected) is",
376 "With [TT]-ofpdb[tt], a PDB file is written out where the occupancy",
377 "column is filled with the occupancy fraction of each atom in the",
378 "selection. The coordinates in the PDB file will be those from the",
379 "input topology. [TT]-pdbatoms[tt] can be used to control which atoms",
380 "appear in the output PDB file: with [TT]all[tt] all atoms are",
381 "present, with [TT]maxsel[tt] all atoms possibly selected by the",
382 "selection are present, and with [TT]selected[tt] only atoms that are",
383 "selected at least in one frame are present.[PAR]",
384 "With [TT]-olt[tt], a histogram is produced that shows the number of",
385 "selected positions as a function of the time the position was",
386 "continuously selected. [TT]-cumlt[tt] can be used to control whether",
387 "subintervals of longer intervals are included in the histogram.[PAR]",
388 "[TT]-om[tt], [TT]-of[tt], and [TT]-olt[tt] only make sense with",
389 "dynamic selections."
392 options->setDescription(desc);
394 options->addOption(FileNameOption("os").filetype(eftPlot).outputFile()
395 .store(&fnSize_).defaultBasename("size")
396 .description("Number of positions in each selection"));
397 options->addOption(FileNameOption("oc").filetype(eftPlot).outputFile()
398 .store(&fnFrac_).defaultBasename("cfrac")
399 .description("Covered fraction for each selection"));
400 options->addOption(FileNameOption("oi").filetype(eftGenericData).outputFile()
401 .store(&fnIndex_).defaultBasename("index")
402 .description("Indices selected by each selection"));
403 options->addOption(FileNameOption("on").filetype(eftIndex).outputFile()
404 .store(&fnNdx_).defaultBasename("index")
405 .description("Index file from the selection"));
406 options->addOption(FileNameOption("om").filetype(eftPlot).outputFile()
407 .store(&fnMask_).defaultBasename("mask")
408 .description("Mask for selected positions"));
409 options->addOption(FileNameOption("of").filetype(eftPlot).outputFile()
410 .store(&fnOccupancy_).defaultBasename("occupancy")
411 .description("Occupied fraction for selected positions"));
412 options->addOption(FileNameOption("ofpdb").filetype(eftPDB).outputFile()
413 .store(&fnPDB_).defaultBasename("occupancy")
414 .description("PDB file with occupied fraction for selected positions"));
415 options->addOption(FileNameOption("olt").filetype(eftPlot).outputFile()
416 .store(&fnLifetime_).defaultBasename("lifetime")
417 .description("Lifetime histogram"));
419 selOpt_ = options->addOption(SelectionOption("select").storeVector(&sel_)
420 .required().multiValue()
421 .description("Selections to analyze"));
423 options->addOption(BooleanOption("norm").store(&bTotNorm_)
424 .description("Normalize by total number of positions with -os"));
425 options->addOption(BooleanOption("cfnorm").store(&bFracNorm_)
426 .description("Normalize by covered fraction with -os"));
427 const char *const cResNumberEnum[] = { "number", "index" };
428 options->addOption(StringOption("resnr").store(&resNumberType_)
429 .enumValue(cResNumberEnum).defaultEnumIndex(0)
430 .description("Residue number output type with -oi and -on"));
431 const char *const cPDBAtomsEnum[] = { "all", "maxsel", "selected" };
432 options->addOption(StringOption("pdbatoms").store(&pdbAtoms_)
433 .enumValue(cPDBAtomsEnum).defaultEnumIndex(0)
434 .description("Atoms to write with -ofpdb"));
435 options->addOption(BooleanOption("cumlt").store(&bCumulativeLifetimes_)
436 .description("Cumulate subintervals of longer intervals in -olt"));
440 Select::optionsFinished(Options * /*options*/,
441 TrajectoryAnalysisSettings *settings)
445 settings->setFlag(TrajectoryAnalysisSettings::efRequireTop);
446 settings->setFlag(TrajectoryAnalysisSettings::efUseTopX);
451 Select::initAnalysis(const TrajectoryAnalysisSettings &settings,
452 const TopologyInformation &top)
454 bResInd_ = (resNumberType_ == "index");
456 for (SelectionList::iterator i = sel_.begin(); i != sel_.end(); ++i)
458 i->initCoveredFraction(CFRAC_SOLIDANGLE);
461 // TODO: For large systems, a float may not have enough precision
462 sdata_.setColumnCount(0, sel_.size());
463 totsize_.reserve(sel_.size());
464 for (size_t g = 0; g < sel_.size(); ++g)
466 totsize_.push_back(sel_[g].posCount());
468 if (!fnSize_.empty())
470 AnalysisDataPlotModulePointer plot(
471 new AnalysisDataPlotModule(settings.plotSettings()));
472 plot->setFileName(fnSize_);
473 plot->setTitle("Selection size");
474 plot->setXAxisIsTime();
475 plot->setYLabel("Number");
476 for (size_t g = 0; g < sel_.size(); ++g)
478 plot->appendLegend(sel_[g].name());
480 sdata_.addModule(plot);
483 cdata_.setColumnCount(0, sel_.size());
484 if (!fnFrac_.empty())
486 AnalysisDataPlotModulePointer plot(
487 new AnalysisDataPlotModule(settings.plotSettings()));
488 plot->setFileName(fnFrac_);
489 plot->setTitle("Covered fraction");
490 plot->setXAxisIsTime();
491 plot->setYLabel("Fraction");
492 plot->setYFormat(6, 4);
493 for (size_t g = 0; g < sel_.size(); ++g)
495 plot->appendLegend(sel_[g].name());
497 cdata_.addModule(plot);
500 // TODO: For large systems, a float may not have enough precision
501 if (!fnIndex_.empty())
503 AnalysisDataPlotModulePointer plot(
504 new AnalysisDataPlotModule(settings.plotSettings()));
505 plot->setFileName(fnIndex_);
506 plot->setPlainOutput(true);
507 plot->setYFormat(4, 0);
508 idata_.addModule(plot);
512 boost::shared_ptr<IndexFileWriterModule> writer(new IndexFileWriterModule());
513 writer->setFileName(fnNdx_);
514 for (size_t g = 0; g < sel_.size(); ++g)
516 writer->addGroup(sel_[g].name(), sel_[g].isDynamic());
518 idata_.addModule(writer);
521 mdata_.setDataSetCount(sel_.size());
522 for (size_t g = 0; g < sel_.size(); ++g)
524 mdata_.setColumnCount(g, sel_[g].posCount());
526 lifetimeModule_->setCumulative(bCumulativeLifetimes_);
527 if (!fnMask_.empty())
529 AnalysisDataPlotModulePointer plot(
530 new AnalysisDataPlotModule(settings.plotSettings()));
531 plot->setFileName(fnMask_);
532 plot->setTitle("Selection mask");
533 plot->setXAxisIsTime();
534 plot->setYLabel("Occupancy");
535 plot->setYFormat(1, 0);
536 // TODO: Add legend? (there can be massive amount of columns)
537 mdata_.addModule(plot);
539 if (!fnOccupancy_.empty())
541 AnalysisDataPlotModulePointer plot(
542 new AnalysisDataPlotModule(settings.plotSettings()));
543 plot->setFileName(fnOccupancy_);
544 plot->setTitle("Fraction of time selection matches");
545 plot->setXLabel("Selected position");
546 plot->setYLabel("Occupied fraction");
547 for (size_t g = 0; g < sel_.size(); ++g)
549 plot->appendLegend(sel_[g].name());
551 occupancyModule_->addModule(plot);
553 if (!fnLifetime_.empty())
555 AnalysisDataPlotModulePointer plot(
556 new AnalysisDataPlotModule(settings.plotSettings()));
557 plot->setFileName(fnLifetime_);
558 plot->setTitle("Lifetime histogram");
559 plot->setXAxisIsTime();
560 plot->setYLabel("Number of occurrences");
561 for (size_t g = 0; g < sel_.size(); ++g)
563 plot->appendLegend(sel_[g].name());
565 lifetimeModule_->addModule(plot);
573 Select::analyzeFrame(int frnr, const t_trxframe &fr, t_pbc * /* pbc */,
574 TrajectoryAnalysisModuleData *pdata)
576 AnalysisDataHandle sdh = pdata->dataHandle(sdata_);
577 AnalysisDataHandle cdh = pdata->dataHandle(cdata_);
578 AnalysisDataHandle idh = pdata->dataHandle(idata_);
579 AnalysisDataHandle mdh = pdata->dataHandle(mdata_);
580 const SelectionList &sel = pdata->parallelSelections(sel_);
581 t_topology *top = top_->topology();
583 sdh.startFrame(frnr, fr.time);
584 for (size_t g = 0; g < sel.size(); ++g)
586 real normfac = bFracNorm_ ? 1.0 / sel[g].coveredFraction() : 1.0;
589 normfac /= totsize_[g];
591 sdh.setPoint(g, sel[g].posCount() * normfac);
595 cdh.startFrame(frnr, fr.time);
596 for (size_t g = 0; g < sel.size(); ++g)
598 cdh.setPoint(g, sel[g].coveredFraction());
602 idh.startFrame(frnr, fr.time);
603 for (size_t g = 0; g < sel.size(); ++g)
605 idh.setPoint(0, sel[g].posCount());
606 idh.finishPointSet();
607 for (int i = 0; i < sel[g].posCount(); ++i)
609 const SelectionPosition &p = sel[g].position(i);
610 if (sel[g].type() == INDEX_RES && !bResInd_)
612 idh.setPoint(1, top->atoms.resinfo[p.mappedId()].nr);
616 idh.setPoint(1, p.mappedId() + 1);
618 idh.finishPointSet();
623 mdh.startFrame(frnr, fr.time);
624 for (size_t g = 0; g < sel.size(); ++g)
626 mdh.selectDataSet(g);
627 for (int i = 0; i < totsize_[g]; ++i)
631 for (int i = 0; i < sel[g].posCount(); ++i)
633 mdh.setPoint(sel[g].position(i).refId(), 1);
641 Select::finishAnalysis(int /*nframes*/)
647 Select::writeOutput()
651 GMX_RELEASE_ASSERT(top_->hasTopology(),
652 "Topology should have been loaded or an error given earlier");
654 atoms = top_->topology()->atoms;
656 snew(pdbinfo, atoms.nr);
657 scoped_ptr_sfree pdbinfoGuard(pdbinfo);
658 if (atoms.pdbinfo != NULL)
660 std::memcpy(pdbinfo, atoms.pdbinfo, atoms.nr*sizeof(*pdbinfo));
662 atoms.pdbinfo = pdbinfo;
663 for (int i = 0; i < atoms.nr; ++i)
665 pdbinfo[i].occup = 0.0;
667 for (size_t g = 0; g < sel_.size(); ++g)
669 for (int i = 0; i < sel_[g].posCount(); ++i)
671 ConstArrayRef<int> atomIndices
672 = sel_[g].position(i).atomIndices();
673 ConstArrayRef<int>::const_iterator ai;
674 for (ai = atomIndices.begin(); ai != atomIndices.end(); ++ai)
676 pdbinfo[*ai].occup += occupancyModule_->average(g, i);
682 clear_trxframe(&fr, TRUE);
687 top_->getTopologyConf(&fr.x, fr.box);
689 if (pdbAtoms_ == "all")
691 t_trxstatus *status = open_trx(fnPDB_.c_str(), "w");
692 write_trxframe(status, &fr, NULL);
695 else if (pdbAtoms_ == "maxsel")
697 std::set<int> atomIndicesSet;
698 for (size_t g = 0; g < sel_.size(); ++g)
700 ConstArrayRef<int> atomIndices = sel_[g].atomIndices();
701 atomIndicesSet.insert(atomIndices.begin(), atomIndices.end());
703 std::vector<int> allAtomIndices(atomIndicesSet.begin(),
704 atomIndicesSet.end());
705 t_trxstatus *status = open_trx(fnPDB_.c_str(), "w");
706 write_trxframe_indexed(status, &fr, allAtomIndices.size(),
707 &allAtomIndices[0], NULL);
710 else if (pdbAtoms_ == "selected")
712 std::vector<int> indices;
713 for (int i = 0; i < atoms.nr; ++i)
715 if (pdbinfo[i].occup > 0.0)
717 indices.push_back(i);
720 t_trxstatus *status = open_trx(fnPDB_.c_str(), "w");
721 write_trxframe_indexed(status, &fr, indices.size(), &indices[0], NULL);
726 GMX_RELEASE_ASSERT(false,
727 "Mismatch between -pdbatoms enum values and implementation");
734 const char SelectInfo::name[] = "select";
735 const char SelectInfo::shortDescription[] =
736 "Print general information about selections";
738 TrajectoryAnalysisModulePointer SelectInfo::create()
740 return TrajectoryAnalysisModulePointer(new Select);
743 } // namespace analysismodules