2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2009,2010,2011,2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
37 * Implements gmx::analysismodules::Select.
39 * \author Teemu Murtola <teemu.murtola@gmail.com>
40 * \ingroup module_trajectoryanalysis
52 #include "gromacs/analysisdata/analysisdata.h"
53 #include "gromacs/analysisdata/dataframe.h"
54 #include "gromacs/analysisdata/datamodule.h"
55 #include "gromacs/analysisdata/modules/average.h"
56 #include "gromacs/analysisdata/modules/lifetime.h"
57 #include "gromacs/analysisdata/modules/plot.h"
58 #include "gromacs/fileio/gmxfio.h"
59 #include "gromacs/fileio/trxio.h"
60 #include "gromacs/options/basicoptions.h"
61 #include "gromacs/options/filenameoption.h"
62 #include "gromacs/options/options.h"
63 #include "gromacs/selection/selection.h"
64 #include "gromacs/selection/selectionoption.h"
65 #include "gromacs/trajectoryanalysis/analysissettings.h"
66 #include "gromacs/utility/arrayref.h"
67 #include "gromacs/utility/exceptions.h"
68 #include "gromacs/utility/gmxassert.h"
69 #include "gromacs/utility/scoped_ptr_sfree.h"
70 #include "gromacs/utility/smalloc.h"
71 #include "gromacs/utility/stringutil.h"
76 namespace analysismodules
83 * Data module for writing index files.
85 * \ingroup module_analysisdata
87 class IndexFileWriterModule : public AnalysisDataModuleSerial
90 IndexFileWriterModule();
91 virtual ~IndexFileWriterModule();
93 //! Sets the file name to write the index file to.
94 void setFileName(const std::string &fnm);
96 * Adds information about a group to be printed.
98 * Must be called for each group present in the input data.
100 void addGroup(const std::string &name, bool bDynamic);
102 virtual int flags() const;
104 virtual void dataStarted(AbstractAnalysisData *data);
105 virtual void frameStarted(const AnalysisDataFrameHeader &header);
106 virtual void pointsAdded(const AnalysisDataPointSetRef &points);
107 virtual void frameFinished(const AnalysisDataFrameHeader &header);
108 virtual void dataFinished();
115 GroupInfo(const std::string &name, bool bDynamic)
116 : name(name), bDynamic(bDynamic)
124 std::vector<GroupInfo> groups_;
131 /********************************************************************
132 * IndexFileWriterModule
135 IndexFileWriterModule::IndexFileWriterModule()
136 : fp_(NULL), currentGroup_(-1), currentSize_(0), bAnyWritten_(false)
141 IndexFileWriterModule::~IndexFileWriterModule()
147 void IndexFileWriterModule::closeFile()
157 void IndexFileWriterModule::setFileName(const std::string &fnm)
163 void IndexFileWriterModule::addGroup(const std::string &name, bool bDynamic)
165 std::string newName(name);
166 std::replace(newName.begin(), newName.end(), ' ', '_');
167 groups_.push_back(GroupInfo(newName, bDynamic));
171 int IndexFileWriterModule::flags() const
173 return efAllowMulticolumn | efAllowMultipoint;
177 void IndexFileWriterModule::dataStarted(AbstractAnalysisData * /*data*/)
181 fp_ = gmx_fio_fopen(fnm_.c_str(), "w");
186 void IndexFileWriterModule::frameStarted(const AnalysisDataFrameHeader & /*header*/)
188 bAnyWritten_ = false;
194 IndexFileWriterModule::pointsAdded(const AnalysisDataPointSetRef &points)
200 bool bFirstFrame = (points.frameIndex() == 0);
201 if (points.firstColumn() == 0)
204 GMX_RELEASE_ASSERT(currentGroup_ < static_cast<int>(groups_.size()),
205 "Too few groups initialized");
206 if (bFirstFrame || groups_[currentGroup_].bDynamic)
208 if (!bFirstFrame || currentGroup_ > 0)
210 std::fprintf(fp_, "\n\n");
212 std::string name = groups_[currentGroup_].name;
213 if (groups_[currentGroup_].bDynamic)
215 name += formatString("_f%d_t%.3f", points.frameIndex(), points.x());
217 std::fprintf(fp_, "[ %s ]", name.c_str());
224 if (bFirstFrame || groups_[currentGroup_].bDynamic)
226 if (currentSize_ % 15 == 0)
228 std::fprintf(fp_, "\n");
230 std::fprintf(fp_, "%4d ", static_cast<int>(points.y(0)));
237 void IndexFileWriterModule::frameFinished(const AnalysisDataFrameHeader & /*header*/)
242 void IndexFileWriterModule::dataFinished()
246 std::fprintf(fp_, "\n");
251 /********************************************************************
255 class Select : public TrajectoryAnalysisModule
260 virtual void initOptions(Options *options,
261 TrajectoryAnalysisSettings *settings);
262 virtual void optionsFinished(Options *options,
263 TrajectoryAnalysisSettings *settings);
264 virtual void initAnalysis(const TrajectoryAnalysisSettings &settings,
265 const TopologyInformation &top);
267 virtual void analyzeFrame(int frnr, const t_trxframe &fr, t_pbc *pbc,
268 TrajectoryAnalysisModuleData *pdata);
270 virtual void finishAnalysis(int nframes);
271 virtual void writeOutput();
275 SelectionOptionInfo *selOpt_;
279 std::string fnIndex_;
282 std::string fnOccupancy_;
284 std::string fnLifetime_;
288 bool bCumulativeLifetimes_;
289 std::string resNumberType_;
290 std::string pdbAtoms_;
292 const TopologyInformation *top_;
293 std::vector<int> totsize_;
298 AnalysisDataAverageModulePointer occupancyModule_;
299 AnalysisDataLifetimeModulePointer lifetimeModule_;
303 : TrajectoryAnalysisModule(SelectInfo::name, SelectInfo::shortDescription),
305 bTotNorm_(false), bFracNorm_(false), bResInd_(false),
306 bCumulativeLifetimes_(true), top_(NULL),
307 occupancyModule_(new AnalysisDataAverageModule()),
308 lifetimeModule_(new AnalysisDataLifetimeModule())
310 mdata_.addModule(occupancyModule_);
311 mdata_.addModule(lifetimeModule_);
313 registerAnalysisDataset(&sdata_, "size");
314 registerAnalysisDataset(&cdata_, "cfrac");
315 idata_.setColumnCount(0, 2);
316 idata_.setMultipoint(true);
317 registerAnalysisDataset(&idata_, "index");
318 registerAnalysisDataset(&mdata_, "mask");
319 occupancyModule_->setXAxis(1.0, 1.0);
320 registerBasicDataset(occupancyModule_.get(), "occupancy");
321 registerBasicDataset(lifetimeModule_.get(), "lifetime");
326 Select::initOptions(Options *options, TrajectoryAnalysisSettings * /*settings*/)
328 static const char *const desc[] = {
329 "[THISMODULE] writes out basic data about dynamic selections.",
330 "It can be used for some simple analyses, or the output can",
331 "be combined with output from other programs and/or external",
332 "analysis programs to calculate more complex things.",
333 "Any combination of the output options is possible, but note",
334 "that [TT]-om[tt] only operates on the first selection.",
335 "Also note that if you provide no output options, no output is",
337 "With [TT]-os[tt], calculates the number of positions in each",
338 "selection for each frame. With [TT]-norm[tt], the output is",
339 "between 0 and 1 and describes the fraction from the maximum",
340 "number of positions (e.g., for selection 'resname RA and x < 5'",
341 "the maximum number of positions is the number of atoms in",
342 "RA residues). With [TT]-cfnorm[tt], the output is divided",
343 "by the fraction covered by the selection.",
344 "[TT]-norm[tt] and [TT]-cfnorm[tt] can be specified independently",
345 "of one another.[PAR]",
346 "With [TT]-oc[tt], the fraction covered by each selection is",
347 "written out as a function of time.[PAR]",
348 "With [TT]-oi[tt], the selected atoms/residues/molecules are",
349 "written out as a function of time. In the output, the first",
350 "column contains the frame time, the second contains the number",
351 "of positions, followed by the atom/residue/molecule numbers.",
352 "If more than one selection is specified, the size of the second",
353 "group immediately follows the last number of the first group",
355 "With [TT]-on[tt], the selected atoms are written as a index file",
356 "compatible with [TT]make_ndx[tt] and the analyzing tools. Each selection",
357 "is written as a selection group and for dynamic selections a",
358 "group is written for each frame.[PAR]",
359 "For residue numbers, the output of [TT]-oi[tt] can be controlled",
360 "with [TT]-resnr[tt]: [TT]number[tt] (default) prints the residue",
361 "numbers as they appear in the input file, while [TT]index[tt] prints",
362 "unique numbers assigned to the residues in the order they appear",
363 "in the input file, starting with 1. The former is more intuitive,",
364 "but if the input contains multiple residues with the same number,",
365 "the output can be less useful.[PAR]",
366 "With [TT]-om[tt], a mask is printed for the first selection",
367 "as a function of time. Each line in the output corresponds to",
368 "one frame, and contains either 0/1 for each atom/residue/molecule",
369 "possibly selected. 1 stands for the atom/residue/molecule being",
370 "selected for the current frame, 0 for not selected.[PAR]",
371 "With [TT]-of[tt], the occupancy fraction of each position (i.e.,",
372 "the fraction of frames where the position is selected) is",
374 "With [TT]-ofpdb[tt], a PDB file is written out where the occupancy",
375 "column is filled with the occupancy fraction of each atom in the",
376 "selection. The coordinates in the PDB file will be those from the",
377 "input topology. [TT]-pdbatoms[tt] can be used to control which atoms",
378 "appear in the output PDB file: with [TT]all[tt] all atoms are",
379 "present, with [TT]maxsel[tt] all atoms possibly selected by the",
380 "selection are present, and with [TT]selected[tt] only atoms that are",
381 "selected at least in one frame are present.[PAR]",
382 "With [TT]-olt[tt], a histogram is produced that shows the number of",
383 "selected positions as a function of the time the position was",
384 "continuously selected. [TT]-cumlt[tt] can be used to control whether",
385 "subintervals of longer intervals are included in the histogram.[PAR]",
386 "[TT]-om[tt], [TT]-of[tt], and [TT]-olt[tt] only make sense with",
387 "dynamic selections."
390 options->setDescription(desc);
392 options->addOption(FileNameOption("os").filetype(eftPlot).outputFile()
393 .store(&fnSize_).defaultBasename("size")
394 .description("Number of positions in each selection"));
395 options->addOption(FileNameOption("oc").filetype(eftPlot).outputFile()
396 .store(&fnFrac_).defaultBasename("cfrac")
397 .description("Covered fraction for each selection"));
398 options->addOption(FileNameOption("oi").filetype(eftGenericData).outputFile()
399 .store(&fnIndex_).defaultBasename("index")
400 .description("Indices selected by each selection"));
401 options->addOption(FileNameOption("on").filetype(eftIndex).outputFile()
402 .store(&fnNdx_).defaultBasename("index")
403 .description("Index file from the selection"));
404 options->addOption(FileNameOption("om").filetype(eftPlot).outputFile()
405 .store(&fnMask_).defaultBasename("mask")
406 .description("Mask for selected positions"));
407 options->addOption(FileNameOption("of").filetype(eftPlot).outputFile()
408 .store(&fnOccupancy_).defaultBasename("occupancy")
409 .description("Occupied fraction for selected positions"));
410 options->addOption(FileNameOption("ofpdb").filetype(eftPDB).outputFile()
411 .store(&fnPDB_).defaultBasename("occupancy")
412 .description("PDB file with occupied fraction for selected positions"));
413 options->addOption(FileNameOption("olt").filetype(eftPlot).outputFile()
414 .store(&fnLifetime_).defaultBasename("lifetime")
415 .description("Lifetime histogram"));
417 selOpt_ = options->addOption(SelectionOption("select").storeVector(&sel_)
418 .required().multiValue()
419 .description("Selections to analyze"));
421 options->addOption(BooleanOption("norm").store(&bTotNorm_)
422 .description("Normalize by total number of positions with -os"));
423 options->addOption(BooleanOption("cfnorm").store(&bFracNorm_)
424 .description("Normalize by covered fraction with -os"));
425 const char *const cResNumberEnum[] = { "number", "index" };
426 options->addOption(StringOption("resnr").store(&resNumberType_)
427 .enumValue(cResNumberEnum).defaultEnumIndex(0)
428 .description("Residue number output type with -oi and -on"));
429 const char *const cPDBAtomsEnum[] = { "all", "maxsel", "selected" };
430 options->addOption(StringOption("pdbatoms").store(&pdbAtoms_)
431 .enumValue(cPDBAtomsEnum).defaultEnumIndex(0)
432 .description("Atoms to write with -ofpdb"));
433 options->addOption(BooleanOption("cumlt").store(&bCumulativeLifetimes_)
434 .description("Cumulate subintervals of longer intervals in -olt"));
438 Select::optionsFinished(Options * /*options*/,
439 TrajectoryAnalysisSettings *settings)
443 settings->setFlag(TrajectoryAnalysisSettings::efRequireTop);
444 settings->setFlag(TrajectoryAnalysisSettings::efUseTopX);
449 Select::initAnalysis(const TrajectoryAnalysisSettings &settings,
450 const TopologyInformation &top)
452 bResInd_ = (resNumberType_ == "index");
454 for (SelectionList::iterator i = sel_.begin(); i != sel_.end(); ++i)
456 i->initCoveredFraction(CFRAC_SOLIDANGLE);
459 // TODO: For large systems, a float may not have enough precision
460 sdata_.setColumnCount(0, sel_.size());
461 totsize_.reserve(sel_.size());
462 for (size_t g = 0; g < sel_.size(); ++g)
464 totsize_.push_back(sel_[g].posCount());
466 if (!fnSize_.empty())
468 AnalysisDataPlotModulePointer plot(
469 new AnalysisDataPlotModule(settings.plotSettings()));
470 plot->setFileName(fnSize_);
471 plot->setTitle("Selection size");
472 plot->setXAxisIsTime();
473 plot->setYLabel("Number");
474 for (size_t g = 0; g < sel_.size(); ++g)
476 plot->appendLegend(sel_[g].name());
478 sdata_.addModule(plot);
481 cdata_.setColumnCount(0, sel_.size());
482 if (!fnFrac_.empty())
484 AnalysisDataPlotModulePointer plot(
485 new AnalysisDataPlotModule(settings.plotSettings()));
486 plot->setFileName(fnFrac_);
487 plot->setTitle("Covered fraction");
488 plot->setXAxisIsTime();
489 plot->setYLabel("Fraction");
490 plot->setYFormat(6, 4);
491 for (size_t g = 0; g < sel_.size(); ++g)
493 plot->appendLegend(sel_[g].name());
495 cdata_.addModule(plot);
498 // TODO: For large systems, a float may not have enough precision
499 if (!fnIndex_.empty())
501 AnalysisDataPlotModulePointer plot(
502 new AnalysisDataPlotModule(settings.plotSettings()));
503 plot->setFileName(fnIndex_);
504 plot->setPlainOutput(true);
505 plot->setYFormat(4, 0);
506 idata_.addModule(plot);
510 boost::shared_ptr<IndexFileWriterModule> writer(new IndexFileWriterModule());
511 writer->setFileName(fnNdx_);
512 for (size_t g = 0; g < sel_.size(); ++g)
514 writer->addGroup(sel_[g].name(), sel_[g].isDynamic());
516 idata_.addModule(writer);
519 mdata_.setDataSetCount(sel_.size());
520 for (size_t g = 0; g < sel_.size(); ++g)
522 mdata_.setColumnCount(g, sel_[g].posCount());
524 lifetimeModule_->setCumulative(bCumulativeLifetimes_);
525 if (!fnMask_.empty())
527 AnalysisDataPlotModulePointer plot(
528 new AnalysisDataPlotModule(settings.plotSettings()));
529 plot->setFileName(fnMask_);
530 plot->setTitle("Selection mask");
531 plot->setXAxisIsTime();
532 plot->setYLabel("Occupancy");
533 plot->setYFormat(1, 0);
534 // TODO: Add legend? (there can be massive amount of columns)
535 mdata_.addModule(plot);
537 if (!fnOccupancy_.empty())
539 AnalysisDataPlotModulePointer plot(
540 new AnalysisDataPlotModule(settings.plotSettings()));
541 plot->setFileName(fnOccupancy_);
542 plot->setTitle("Fraction of time selection matches");
543 plot->setXLabel("Selected position");
544 plot->setYLabel("Occupied fraction");
545 for (size_t g = 0; g < sel_.size(); ++g)
547 plot->appendLegend(sel_[g].name());
549 occupancyModule_->addModule(plot);
551 if (!fnLifetime_.empty())
553 AnalysisDataPlotModulePointer plot(
554 new AnalysisDataPlotModule(settings.plotSettings()));
555 plot->setFileName(fnLifetime_);
556 plot->setTitle("Lifetime histogram");
557 plot->setXAxisIsTime();
558 plot->setYLabel("Number of occurrences");
559 for (size_t g = 0; g < sel_.size(); ++g)
561 plot->appendLegend(sel_[g].name());
563 lifetimeModule_->addModule(plot);
571 Select::analyzeFrame(int frnr, const t_trxframe &fr, t_pbc * /* pbc */,
572 TrajectoryAnalysisModuleData *pdata)
574 AnalysisDataHandle sdh = pdata->dataHandle(sdata_);
575 AnalysisDataHandle cdh = pdata->dataHandle(cdata_);
576 AnalysisDataHandle idh = pdata->dataHandle(idata_);
577 AnalysisDataHandle mdh = pdata->dataHandle(mdata_);
578 const SelectionList &sel = pdata->parallelSelections(sel_);
579 t_topology *top = top_->topology();
581 sdh.startFrame(frnr, fr.time);
582 for (size_t g = 0; g < sel.size(); ++g)
584 real normfac = bFracNorm_ ? 1.0 / sel[g].coveredFraction() : 1.0;
587 normfac /= totsize_[g];
589 sdh.setPoint(g, sel[g].posCount() * normfac);
593 cdh.startFrame(frnr, fr.time);
594 for (size_t g = 0; g < sel.size(); ++g)
596 cdh.setPoint(g, sel[g].coveredFraction());
600 idh.startFrame(frnr, fr.time);
601 for (size_t g = 0; g < sel.size(); ++g)
603 idh.setPoint(0, sel[g].posCount());
604 idh.finishPointSet();
605 for (int i = 0; i < sel[g].posCount(); ++i)
607 const SelectionPosition &p = sel[g].position(i);
608 if (sel[g].type() == INDEX_RES && !bResInd_)
610 idh.setPoint(1, top->atoms.resinfo[p.mappedId()].nr);
614 idh.setPoint(1, p.mappedId() + 1);
616 idh.finishPointSet();
621 mdh.startFrame(frnr, fr.time);
622 for (size_t g = 0; g < sel.size(); ++g)
624 mdh.selectDataSet(g);
625 for (int i = 0; i < totsize_[g]; ++i)
629 for (int i = 0; i < sel[g].posCount(); ++i)
631 mdh.setPoint(sel[g].position(i).refId(), 1);
639 Select::finishAnalysis(int /*nframes*/)
645 Select::writeOutput()
649 GMX_RELEASE_ASSERT(top_->hasTopology(),
650 "Topology should have been loaded or an error given earlier");
652 atoms = top_->topology()->atoms;
654 snew(pdbinfo, atoms.nr);
655 scoped_ptr_sfree pdbinfoGuard(pdbinfo);
656 if (atoms.pdbinfo != NULL)
658 std::memcpy(pdbinfo, atoms.pdbinfo, atoms.nr*sizeof(*pdbinfo));
660 atoms.pdbinfo = pdbinfo;
661 for (int i = 0; i < atoms.nr; ++i)
663 pdbinfo[i].occup = 0.0;
665 for (size_t g = 0; g < sel_.size(); ++g)
667 for (int i = 0; i < sel_[g].posCount(); ++i)
669 ConstArrayRef<int> atomIndices
670 = sel_[g].position(i).atomIndices();
671 ConstArrayRef<int>::const_iterator ai;
672 for (ai = atomIndices.begin(); ai != atomIndices.end(); ++ai)
674 pdbinfo[*ai].occup += occupancyModule_->average(g, i);
680 clear_trxframe(&fr, TRUE);
685 top_->getTopologyConf(&fr.x, fr.box);
687 if (pdbAtoms_ == "all")
689 t_trxstatus *status = open_trx(fnPDB_.c_str(), "w");
690 write_trxframe(status, &fr, NULL);
693 else if (pdbAtoms_ == "maxsel")
695 std::set<int> atomIndicesSet;
696 for (size_t g = 0; g < sel_.size(); ++g)
698 ConstArrayRef<int> atomIndices = sel_[g].atomIndices();
699 atomIndicesSet.insert(atomIndices.begin(), atomIndices.end());
701 std::vector<int> allAtomIndices(atomIndicesSet.begin(),
702 atomIndicesSet.end());
703 t_trxstatus *status = open_trx(fnPDB_.c_str(), "w");
704 write_trxframe_indexed(status, &fr, allAtomIndices.size(),
705 &allAtomIndices[0], NULL);
708 else if (pdbAtoms_ == "selected")
710 std::vector<int> indices;
711 for (int i = 0; i < atoms.nr; ++i)
713 if (pdbinfo[i].occup > 0.0)
715 indices.push_back(i);
718 t_trxstatus *status = open_trx(fnPDB_.c_str(), "w");
719 write_trxframe_indexed(status, &fr, indices.size(), &indices[0], NULL);
724 GMX_RELEASE_ASSERT(false,
725 "Mismatch between -pdbatoms enum values and implementation");
732 const char SelectInfo::name[] = "select";
733 const char SelectInfo::shortDescription[] =
734 "Print general information about selections";
736 TrajectoryAnalysisModulePointer SelectInfo::create()
738 return TrajectoryAnalysisModulePointer(new Select);
741 } // namespace analysismodules