3 * This source code is part of
7 * GROningen MAchine for Chemical Simulations
9 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
10 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
11 * Copyright (c) 2001-2009, The GROMACS development team,
12 * check out http://www.gromacs.org for more information.
14 * This program is free software; you can redistribute it and/or
15 * modify it under the terms of the GNU General Public License
16 * as published by the Free Software Foundation; either version 2
17 * of the License, or (at your option) any later version.
19 * If you want to redistribute modifications, please consider that
20 * scientific software is very special. Version control is crucial -
21 * bugs must be traceable. We will be happy to consider code for
22 * inclusion in the official distribution, but derived work must not
23 * be called official GROMACS. Details are found in the README & COPYING
24 * files - if they are missing, get the official version at www.gromacs.org.
26 * To help us fund GROMACS development, we humbly ask that you cite
27 * the papers on the package - you can find them in the top README file.
29 * For more info, check our website at http://www.gromacs.org
33 * Implements gmx::analysismodules::Select.
35 * \author Teemu Murtola <teemu.murtola@cbr.su.se>
36 * \ingroup module_trajectoryanalysis
47 #include "gromacs/legacyheaders/gmxfio.h"
48 #include "gromacs/legacyheaders/smalloc.h"
49 #include "gromacs/legacyheaders/statutil.h"
51 #include "gromacs/analysisdata/analysisdata.h"
52 #include "gromacs/analysisdata/dataframe.h"
53 #include "gromacs/analysisdata/datamodule.h"
54 #include "gromacs/analysisdata/modules/average.h"
55 #include "gromacs/analysisdata/modules/plot.h"
56 #include "gromacs/options/basicoptions.h"
57 #include "gromacs/options/filenameoption.h"
58 #include "gromacs/options/options.h"
59 #include "gromacs/selection/selection.h"
60 #include "gromacs/selection/selectionoption.h"
61 #include "gromacs/trajectoryanalysis/analysissettings.h"
62 #include "gromacs/utility/exceptions.h"
63 #include "gromacs/utility/gmxassert.h"
64 #include "gromacs/utility/stringutil.h"
69 namespace analysismodules
76 * Data module for writing index files.
78 * \ingroup module_analysisdata
80 class IndexFileWriterModule : public AnalysisDataModuleInterface
83 IndexFileWriterModule();
84 virtual ~IndexFileWriterModule();
86 //! Sets the file name to write the index file to.
87 void setFileName(const std::string &fnm);
89 * Adds information about a group to be printed.
91 * Must be called for each group present in the input data.
93 void addGroup(const std::string &name, bool bDynamic);
95 virtual int flags() const;
97 virtual void dataStarted(AbstractAnalysisData *data);
98 virtual void frameStarted(const AnalysisDataFrameHeader &header);
99 virtual void pointsAdded(const AnalysisDataPointSetRef &points);
100 virtual void frameFinished(const AnalysisDataFrameHeader &header);
101 virtual void dataFinished();
108 GroupInfo(const std::string &name, bool bDynamic)
109 : name(name), bDynamic(bDynamic)
117 std::vector<GroupInfo> groups_;
124 /********************************************************************
125 * IndexFileWriterModule
128 IndexFileWriterModule::IndexFileWriterModule()
129 : fp_(NULL), currentGroup_(-1), currentSize_(0), bAnyWritten_(false)
134 IndexFileWriterModule::~IndexFileWriterModule()
140 void IndexFileWriterModule::closeFile()
150 void IndexFileWriterModule::setFileName(const std::string &fnm)
156 void IndexFileWriterModule::addGroup(const std::string &name, bool bDynamic)
158 std::string newName(name);
159 std::replace(newName.begin(), newName.end(), ' ', '_');
160 groups_.push_back(GroupInfo(newName, bDynamic));
164 int IndexFileWriterModule::flags() const
166 return efAllowMulticolumn | efAllowMultipoint;
170 void IndexFileWriterModule::dataStarted(AbstractAnalysisData * /*data*/)
174 fp_ = gmx_fio_fopen(fnm_.c_str(), "w");
179 void IndexFileWriterModule::frameStarted(const AnalysisDataFrameHeader & /*header*/)
181 bAnyWritten_ = false;
187 IndexFileWriterModule::pointsAdded(const AnalysisDataPointSetRef &points)
193 bool bFirstFrame = (points.frameIndex() == 0);
194 if (points.firstColumn() == 0)
197 GMX_RELEASE_ASSERT(currentGroup_ < static_cast<int>(groups_.size()),
198 "Too few groups initialized");
199 if (bFirstFrame || groups_[currentGroup_].bDynamic)
201 if (!bFirstFrame || currentGroup_ > 0)
203 std::fprintf(fp_, "\n\n");
205 std::string name = groups_[currentGroup_].name;
206 if (groups_[currentGroup_].bDynamic)
208 name += formatString("_f%d_t%.3f", points.frameIndex(), points.x());
210 std::fprintf(fp_, "[ %s ]", name.c_str());
217 if (bFirstFrame || groups_[currentGroup_].bDynamic)
219 if (currentSize_ % 15 == 0)
221 std::fprintf(fp_, "\n");
223 std::fprintf(fp_, "%4d ", static_cast<int>(points.y(0)));
230 void IndexFileWriterModule::frameFinished(const AnalysisDataFrameHeader & /*header*/)
235 void IndexFileWriterModule::dataFinished()
239 std::fprintf(fp_, "\n");
247 /********************************************************************
251 const char Select::name[] = "select";
252 const char Select::shortDescription[] =
253 "Print general information about selections";
256 : TrajectoryAnalysisModule(name, shortDescription),
258 bDump_(false), bTotNorm_(false), bFracNorm_(false), bResInd_(false),
259 top_(NULL), occupancyModule_(new AnalysisDataAverageModule())
261 registerAnalysisDataset(&sdata_, "size");
262 registerAnalysisDataset(&cdata_, "cfrac");
263 idata_.setColumnCount(2);
264 idata_.setMultipoint(true);
265 registerAnalysisDataset(&idata_, "index");
266 registerAnalysisDataset(&mdata_, "mask");
267 occupancyModule_->setXAxis(1.0, 1.0);
268 registerBasicDataset(occupancyModule_.get(), "occupancy");
278 Select::initOptions(Options *options, TrajectoryAnalysisSettings * /*settings*/)
280 static const char *const desc[] = {
281 "[TT]g_select[tt] writes out basic data about dynamic selections.",
282 "It can be used for some simple analyses, or the output can",
283 "be combined with output from other programs and/or external",
284 "analysis programs to calculate more complex things.",
285 "Any combination of the output options is possible, but note",
286 "that [TT]-om[tt] only operates on the first selection.",
287 "Also note that if you provide no output options, no output is",
289 "With [TT]-os[tt], calculates the number of positions in each",
290 "selection for each frame. With [TT]-norm[tt], the output is",
291 "between 0 and 1 and describes the fraction from the maximum",
292 "number of positions (e.g., for selection 'resname RA and x < 5'",
293 "the maximum number of positions is the number of atoms in",
294 "RA residues). With [TT]-cfnorm[tt], the output is divided",
295 "by the fraction covered by the selection.",
296 "[TT]-norm[tt] and [TT]-cfnorm[tt] can be specified independently",
297 "of one another.[PAR]",
298 "With [TT]-oc[tt], the fraction covered by each selection is",
299 "written out as a function of time.[PAR]",
300 "With [TT]-oi[tt], the selected atoms/residues/molecules are",
301 "written out as a function of time. In the output, the first",
302 "column contains the frame time, the second contains the number",
303 "of positions, followed by the atom/residue/molecule numbers.",
304 "If more than one selection is specified, the size of the second",
305 "group immediately follows the last number of the first group",
306 "and so on. With [TT]-dump[tt], the frame time and the number",
307 "of positions is omitted from the output. In this case, only one",
308 "selection can be given.[PAR]",
309 "With [TT]-on[tt], the selected atoms are written as a index file",
310 "compatible with [TT]make_ndx[tt] and the analyzing tools. Each selection",
311 "is written as a selection group and for dynamic selections a",
312 "group is written for each frame.[PAR]",
313 "For residue numbers, the output of [TT]-oi[tt] can be controlled",
314 "with [TT]-resnr[tt]: [TT]number[tt] (default) prints the residue",
315 "numbers as they appear in the input file, while [TT]index[tt] prints",
316 "unique numbers assigned to the residues in the order they appear",
317 "in the input file, starting with 1. The former is more intuitive,",
318 "but if the input contains multiple residues with the same number,",
319 "the output can be less useful.[PAR]",
320 "With [TT]-om[tt], a mask is printed for the first selection",
321 "as a function of time. Each line in the output corresponds to",
322 "one frame, and contains either 0/1 for each atom/residue/molecule",
323 "possibly selected. 1 stands for the atom/residue/molecule being",
324 "selected for the current frame, 0 for not selected.",
325 "With [TT]-dump[tt], the frame time is omitted from the output.[PAR]",
326 "With [TT]-of[tt], the occupancy fraction of each position (i.e.,",
327 "the fraction of frames where the position is selected) is",
329 "With [TT]-ofpdb[tt], a PDB file is written out where the occupancy",
330 "column is filled with the occupancy fraction of each atom in the",
331 "selection. The coordinates in the PDB file will be those from the",
332 "input topology. [TT]-pdbatoms[tt] can be used to control which atoms",
333 "appear in the output PDB file: with [TT]all[tt] all atoms are",
334 "present, with [TT]maxsel[tt] all atoms possibly selected by the",
335 "selection are present, and with [TT]selected[tt] only atoms that are",
336 "selected at least in one frame are present.[PAR]",
337 "With [TT]-om[tt], [TT]-of[tt] and [TT]-ofpdb[tt], only one selection",
338 "can be provided. [TT]-om[tt] and [TT]-of[tt] only accept dynamic",
342 options->setDescription(concatenateStrings(desc));
344 options->addOption(FileNameOption("os").filetype(eftPlot).outputFile()
345 .store(&fnSize_).defaultBasename("size")
346 .description("Number of positions in each selection"));
347 options->addOption(FileNameOption("oc").filetype(eftPlot).outputFile()
348 .store(&fnFrac_).defaultBasename("cfrac")
349 .description("Covered fraction for each selection"));
350 options->addOption(FileNameOption("oi").filetype(eftGenericData).outputFile()
351 .store(&fnIndex_).defaultBasename("index")
352 .description("Indices selected by each selection"));
353 options->addOption(FileNameOption("on").filetype(eftIndex).outputFile()
354 .store(&fnNdx_).defaultBasename("index")
355 .description("Index file from the selection"));
356 options->addOption(FileNameOption("om").filetype(eftPlot).outputFile()
357 .store(&fnMask_).defaultBasename("mask")
358 .description("Mask for selected positions"));
359 options->addOption(FileNameOption("of").filetype(eftPlot).outputFile()
360 .store(&fnOccupancy_).defaultBasename("occupancy")
361 .description("Occupied fraction for selected positions"));
362 options->addOption(FileNameOption("ofpdb").filetype(eftPDB).outputFile()
363 .store(&fnPDB_).defaultBasename("occupancy")
364 .description("PDB file with occupied fraction for selected positions"));
366 selOpt_ = options->addOption(SelectionOption("select").storeVector(&sel_)
367 .required().multiValue()
368 .description("Selections to analyze"));
370 options->addOption(BooleanOption("dump").store(&bDump_)
371 .description("Do not print the frame time (-om, -oi) or the index size (-oi)"));
372 options->addOption(BooleanOption("norm").store(&bTotNorm_)
373 .description("Normalize by total number of positions with -os"));
374 options->addOption(BooleanOption("cfnorm").store(&bFracNorm_)
375 .description("Normalize by covered fraction with -os"));
376 const char *const cResNumberEnum[] = { "number", "index", NULL };
377 options->addOption(StringOption("resnr").store(&resNumberType_)
378 .enumValue(cResNumberEnum).defaultEnumIndex(0)
379 .description("Residue number output type with -oi and -on"));
380 const char *const cPDBAtomsEnum[] = { "all", "maxsel", "selected", NULL };
381 options->addOption(StringOption("pdbatoms").store(&pdbAtoms_)
382 .enumValue(cPDBAtomsEnum).defaultEnumIndex(0)
383 .description("Atoms to write with -ofpdb"));
387 Select::optionsFinished(Options * /*options*/,
388 TrajectoryAnalysisSettings *settings)
392 settings->setFlag(TrajectoryAnalysisSettings::efRequireTop);
393 settings->setFlag(TrajectoryAnalysisSettings::efUseTopX);
395 if ((!fnIndex_.empty() && bDump_)
396 || !fnMask_.empty() || !fnOccupancy_.empty() || !fnPDB_.empty())
398 selOpt_->setValueCount(1);
403 Select::initAnalysis(const TrajectoryAnalysisSettings &settings,
404 const TopologyInformation &top)
406 if (!sel_[0].isDynamic() && (!fnMask_.empty() || !fnOccupancy_.empty()))
408 GMX_THROW(InconsistentInputError(
409 "-om or -of are not meaningful with a static selection"));
411 bResInd_ = (resNumberType_ == "index");
413 for (SelectionList::iterator i = sel_.begin(); i != sel_.end(); ++i)
415 i->initCoveredFraction(CFRAC_SOLIDANGLE);
418 // TODO: For large systems, a float may not have enough precision
419 sdata_.setColumnCount(sel_.size());
420 totsize_.reserve(sel_.size());
421 for (size_t g = 0; g < sel_.size(); ++g)
423 totsize_.push_back(sel_[g].posCount());
425 if (!fnSize_.empty())
427 AnalysisDataPlotModulePointer plot(
428 new AnalysisDataPlotModule(settings.plotSettings()));
429 plot->setFileName(fnSize_);
430 plot->setTitle("Selection size");
431 plot->setXAxisIsTime();
432 plot->setYLabel("Number");
433 sdata_.addModule(plot);
436 cdata_.setColumnCount(sel_.size());
437 if (!fnFrac_.empty())
439 AnalysisDataPlotModulePointer plot(
440 new AnalysisDataPlotModule(settings.plotSettings()));
441 plot->setFileName(fnFrac_);
442 plot->setTitle("Covered fraction");
443 plot->setXAxisIsTime();
444 plot->setYLabel("Fraction");
445 plot->setYFormat(6, 4);
446 cdata_.addModule(plot);
449 // TODO: For large systems, a float may not have enough precision
450 if (!fnIndex_.empty())
452 AnalysisDataPlotModulePointer plot(
453 new AnalysisDataPlotModule(settings.plotSettings()));
454 plot->setFileName(fnIndex_);
455 plot->setPlainOutput(true);
456 plot->setYFormat(4, 0);
459 plot->setOmitX(bDump_);
460 idata_.addColumnModule(1, 1, plot);
464 idata_.addModule(plot);
469 boost::shared_ptr<IndexFileWriterModule> writer(new IndexFileWriterModule());
470 writer->setFileName(fnNdx_);
471 for (size_t g = 0; g < sel_.size(); ++g)
473 writer->addGroup(sel_[g].name(), sel_[g].isDynamic());
475 idata_.addModule(writer);
478 mdata_.setColumnCount(sel_[0].posCount());
479 mdata_.addModule(occupancyModule_);
480 if (!fnMask_.empty())
482 AnalysisDataPlotModulePointer plot(
483 new AnalysisDataPlotModule(settings.plotSettings()));
484 plot->setFileName(fnMask_);
485 plot->setPlainOutput(bDump_);
486 plot->setOmitX(bDump_);
487 plot->setTitle("Selection mask");
488 plot->setXAxisIsTime();
489 plot->setYLabel("Occupancy");
490 plot->setYFormat(1, 0);
491 mdata_.addModule(plot);
493 if (!fnOccupancy_.empty())
495 AnalysisDataPlotModulePointer plot(
496 new AnalysisDataPlotModule(settings.plotSettings()));
497 plot->setFileName(fnOccupancy_);
498 plot->setTitle("Fraction of time selection matches");
499 plot->setXLabel("Selected position");
500 plot->setYLabel("Occupied fraction");
501 occupancyModule_->addColumnModule(0, 1, plot);
509 Select::analyzeFrame(int frnr, const t_trxframe &fr, t_pbc *pbc,
510 TrajectoryAnalysisModuleData *pdata)
512 AnalysisDataHandle sdh = pdata->dataHandle(sdata_);
513 AnalysisDataHandle cdh = pdata->dataHandle(cdata_);
514 AnalysisDataHandle idh = pdata->dataHandle(idata_);
515 AnalysisDataHandle mdh = pdata->dataHandle(mdata_);
516 const SelectionList &sel = pdata->parallelSelections(sel_);
517 t_topology *top = top_->topology();
519 sdh.startFrame(frnr, fr.time);
520 for (size_t g = 0; g < sel.size(); ++g)
522 real normfac = bFracNorm_ ? 1.0 / sel[g].coveredFraction() : 1.0;
525 normfac /= totsize_[g];
527 sdh.setPoint(g, sel[g].posCount() * normfac);
531 cdh.startFrame(frnr, fr.time);
532 for (size_t g = 0; g < sel.size(); ++g)
534 cdh.setPoint(g, sel[g].coveredFraction());
538 idh.startFrame(frnr, fr.time);
539 for (size_t g = 0; g < sel.size(); ++g)
541 idh.setPoint(0, sel[g].posCount());
542 idh.finishPointSet();
543 for (int i = 0; i < sel[g].posCount(); ++i)
545 const SelectionPosition &p = sel[g].position(i);
546 if (sel[g].type() == INDEX_RES && !bResInd_)
548 idh.setPoint(1, top->atoms.resinfo[p.mappedId()].nr);
552 idh.setPoint(1, p.mappedId() + 1);
554 idh.finishPointSet();
559 mdh.startFrame(frnr, fr.time);
560 for (int i = 0; i < totsize_[0]; ++i)
564 for (int i = 0; i < sel[0].posCount(); ++i)
566 mdh.setPoint(sel[0].position(i).refId(), 1);
573 Select::finishAnalysis(int /*nframes*/)
579 Select::writeOutput()
583 GMX_RELEASE_ASSERT(top_->hasTopology(),
584 "Topology should have been loaded or an error given earlier");
586 atoms = top_->topology()->atoms;
588 snew(pdbinfo, atoms.nr);
589 scoped_ptr_sfree pdbinfoGuard(pdbinfo);
590 if (atoms.pdbinfo != NULL)
592 std::memcpy(pdbinfo, atoms.pdbinfo, atoms.nr*sizeof(*pdbinfo));
594 atoms.pdbinfo = pdbinfo;
595 for (int i = 0; i < atoms.nr; ++i)
597 pdbinfo[i].occup = 0.0;
599 for (int i = 0; i < sel_[0].posCount(); ++i)
601 ConstArrayRef<int> atomIndices = sel_[0].position(i).atomIndices();
602 ConstArrayRef<int>::const_iterator ai;
603 for (ai = atomIndices.begin(); ai != atomIndices.end(); ++ai)
605 pdbinfo[*ai].occup = occupancyModule_->average(i);
610 clear_trxframe(&fr, TRUE);
615 top_->getTopologyConf(&fr.x, fr.box);
617 if (pdbAtoms_ == "all")
619 t_trxstatus *status = open_trx(fnPDB_.c_str(), "w");
620 write_trxframe(status, &fr, NULL);
623 else if (pdbAtoms_ == "maxsel")
625 ConstArrayRef<int> atomIndices = sel_[0].atomIndices();
626 t_trxstatus *status = open_trx(fnPDB_.c_str(), "w");
627 write_trxframe_indexed(status, &fr, atomIndices.size(),
628 atomIndices.data(), NULL);
631 else if (pdbAtoms_ == "selected")
633 std::vector<int> indices;
634 for (int i = 0; i < sel_[0].posCount(); ++i)
636 if (occupancyModule_->average(i) > 0)
638 ConstArrayRef<int> atomIndices = sel_[0].position(i).atomIndices();
639 ConstArrayRef<int>::const_iterator ai;
640 for (ai = atomIndices.begin(); ai != atomIndices.end(); ++ai)
642 indices.push_back(*ai);
646 t_trxstatus *status = open_trx(fnPDB_.c_str(), "w");
647 write_trxframe_indexed(status, &fr, indices.size(), &indices[0], NULL);
652 GMX_RELEASE_ASSERT(false,
653 "Mismatch between -pdbatoms enum values and implementation");
658 } // namespace analysismodules