2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2018,2019, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
39 * Implements gmx::analysismodules::Rdf.
41 * \author Teemu Murtola <teemu.murtola@gmail.com> (C++ conversion)
42 * \ingroup module_trajectoryanalysis
55 #include "gromacs/analysisdata/analysisdata.h"
56 #include "gromacs/analysisdata/modules/average.h"
57 #include "gromacs/analysisdata/modules/histogram.h"
58 #include "gromacs/analysisdata/modules/plot.h"
59 #include "gromacs/math/functions.h"
60 #include "gromacs/math/utilities.h"
61 #include "gromacs/math/vec.h"
62 #include "gromacs/options/basicoptions.h"
63 #include "gromacs/options/filenameoption.h"
64 #include "gromacs/options/ioptionscontainer.h"
65 #include "gromacs/pbcutil/pbc.h"
66 #include "gromacs/selection/nbsearch.h"
67 #include "gromacs/selection/selection.h"
68 #include "gromacs/selection/selectionoption.h"
69 #include "gromacs/topology/topology.h"
70 #include "gromacs/trajectory/trajectoryframe.h"
71 #include "gromacs/trajectoryanalysis/analysismodule.h"
72 #include "gromacs/trajectoryanalysis/analysissettings.h"
73 #include "gromacs/trajectoryanalysis/topologyinformation.h"
74 #include "gromacs/utility/exceptions.h"
75 #include "gromacs/utility/stringutil.h"
80 namespace analysismodules
86 //! \addtogroup module_trajectoryanalysis
89 /********************************************************************
90 * Actual analysis module
93 //! Normalization for the computed distribution.
97 Normalization_NumberDensity,
100 //! String values corresponding to Normalization.
101 const char* const c_NormalizationEnum[] = { "rdf", "number_density", "none" };
102 //! Whether to compute RDF wrt. surface of the reference group.
106 SurfaceType_Molecule,
109 //! String values corresponding to SurfaceType.
110 const char* const c_SurfaceEnum[] = { "no", "mol", "res" };
113 * Implements `gmx rdf` trajectory analysis module.
115 class Rdf : public TrajectoryAnalysisModule
120 void initOptions(IOptionsContainer* options, TrajectoryAnalysisSettings* settings) override;
121 void optionsFinished(TrajectoryAnalysisSettings* settings) override;
122 void initAnalysis(const TrajectoryAnalysisSettings& settings, const TopologyInformation& top) override;
123 void initAfterFirstFrame(const TrajectoryAnalysisSettings& settings, const t_trxframe& fr) override;
125 TrajectoryAnalysisModuleDataPointer startFrames(const AnalysisDataParallelOptions& opt,
126 const SelectionCollection& selections) override;
127 void analyzeFrame(int frnr, const t_trxframe& fr, t_pbc* pbc, TrajectoryAnalysisModuleData* pdata) override;
129 void finishAnalysis(int nframes) override;
130 void writeOutput() override;
134 std::string fnCumulative_;
135 SurfaceType surface_;
136 AnalysisDataPlotSettings plotSettings_;
139 * Reference selection to compute RDFs around.
141 * With -surf, Selection::originalIds() and Selection::mappedIds()
142 * store the index of the surface group to which that position belongs.
143 * The RDF is computed by finding the nearest position from each
144 * surface group for each position, and then binning those distances.
148 * Selections to compute RDFs for.
153 * Raw pairwise distance data from which the RDF is computed.
155 * There is a data set for each selection in `sel_`, with a single
156 * column. Each point set will contain a single pairwise distance
157 * that contributes to the RDF.
159 AnalysisData pairDist_;
161 * Normalization factors for each frame.
163 * The first column contains the number of positions in `refSel_` for
164 * that frame (with surface RDF, the number of groups). There are
165 * `sel_.size()` more columns, each containing the number density of
166 * positions for one selection.
168 AnalysisData normFactors_;
170 * Histogram module that computes the actual RDF from `pairDist_`.
172 * The per-frame histograms are raw pair counts in each bin;
173 * the averager is normalized by the average number of reference
174 * positions (average of the first column of `normFactors_`).
176 AnalysisDataSimpleHistogramModulePointer pairCounts_;
178 * Average normalization factors.
180 AnalysisDataAverageModulePointer normAve_;
181 //! Neighborhood search with `refSel_` as the reference positions.
182 AnalysisNeighborhood nb_;
183 //! Topology exclusions used by neighborhood searching.
184 const gmx_localtop_t* localTop_;
186 // User input options.
190 Normalization normalization_;
191 bool bNormalizationSet_;
195 // Pre-computed values for faster access during analysis.
198 int surfaceGroupCount_;
200 // Copy and assign disallowed by base.
204 surface_(SurfaceType_None),
205 pairCounts_(new AnalysisDataSimpleHistogramModule()),
206 normAve_(new AnalysisDataAverageModule()),
211 normalization_(Normalization_Rdf),
212 bNormalizationSet_(false),
217 surfaceGroupCount_(0)
219 pairDist_.setMultipoint(true);
220 pairDist_.addModule(pairCounts_);
221 registerAnalysisDataset(&pairDist_, "pairdist");
222 registerBasicDataset(pairCounts_.get(), "paircount");
224 normFactors_.addModule(normAve_);
225 registerAnalysisDataset(&normFactors_, "norm");
228 void Rdf::initOptions(IOptionsContainer* options, TrajectoryAnalysisSettings* settings)
230 const char* const desc[] = {
231 "[THISMODULE] calculates radial distribution functions from one",
232 "reference set of position (set with [TT]-ref[tt]) to one or more",
233 "sets of positions (set with [TT]-sel[tt]). To compute the RDF with",
234 "respect to the closest position in a set in [TT]-ref[tt] instead, use",
235 "[TT]-surf[tt]: if set, then [TT]-ref[tt] is partitioned into sets",
236 "based on the value of [TT]-surf[tt], and the closest position in each",
237 "set is used. To compute the RDF around axes parallel to the",
238 "[IT]z[it]-axis, i.e., only in the [IT]x[it]-[IT]y[it] plane, use",
241 "To set the bin width and maximum distance to use in the RDF, use",
242 "[TT]-bin[tt] and [TT]-rmax[tt], respectively. The latter can be",
243 "used to limit the computational cost if the RDF is not of interest",
244 "up to the default (half of the box size with PBC, three times the",
245 "box size without PBC).",
247 "To use exclusions from the topology ([TT]-s[tt]), set [TT]-excl[tt]",
248 "and ensure that both [TT]-ref[tt] and [TT]-sel[tt] only select atoms.",
249 "A rougher alternative to exclude intra-molecular peaks is to set",
250 "[TT]-cut[tt] to a non-zero value to clear the RDF at small",
253 "The RDFs are normalized by 1) average number of positions in",
254 "[TT]-ref[tt] (the number of groups with [TT]-surf[tt]), 2) volume",
255 "of the bin, and 3) average particle density of [TT]-sel[tt] positions",
256 "for that selection. To change the normalization, use [TT]-norm[tt]:",
258 "* [TT]rdf[tt]: Use all factors for normalization.",
259 " This produces a normal RDF.",
260 "* [TT]number_density[tt]: Use the first two factors.",
261 " This produces a number density as a function of distance.",
262 "* [TT]none[tt]: Use only the first factor.",
263 " In this case, the RDF is only scaled with the bin width to make",
264 " the integral of the curve represent the number of pairs within a",
267 "Note that exclusions do not affect the normalization: even if",
268 "[TT]-excl[tt] is set, or [TT]-ref[tt] and",
269 "[TT]-sel[tt] contain the same selection, the normalization factor",
270 "is still N*M, not N*(M-excluded).",
272 "For [TT]-surf[tt], the selection provided to [TT]-ref[tt] must",
273 "select atoms, i.e., centers of mass are not supported. Further,",
274 "[TT]-nonorm[tt] is implied, as the bins have irregular shapes and",
275 "the volume of a bin is not easily computable.",
277 "Option [TT]-cn[tt] produces the cumulative number RDF,",
278 "i.e. the average number of particles within a distance r."
281 settings->setHelpText(desc);
283 options->addOption(FileNameOption("o")
288 .defaultBasename("rdf")
289 .description("Computed RDFs"));
290 options->addOption(FileNameOption("cn")
293 .store(&fnCumulative_)
294 .defaultBasename("rdf_cn")
295 .description("Cumulative RDFs"));
297 options->addOption(DoubleOption("bin").store(&binwidth_).description("Bin width (nm)"));
298 options->addOption(EnumOption<Normalization>("norm")
299 .enumValue(c_NormalizationEnum)
300 .store(&normalization_)
301 .storeIsSet(&bNormalizationSet_)
302 .description("Normalization"));
303 options->addOption(BooleanOption("xy").store(&bXY_).description(
304 "Use only the x and y components of the distance"));
306 BooleanOption("excl").store(&bExclusions_).description("Use exclusions from topology"));
307 options->addOption(DoubleOption("cut").store(&cutoff_).description(
308 "Shortest distance (nm) to be considered"));
310 DoubleOption("rmax").store(&rmax_).description("Largest distance (nm) to calculate"));
312 options->addOption(EnumOption<SurfaceType>("surf")
313 .enumValue(c_SurfaceEnum)
315 .description("RDF with respect to the surface of the reference"));
317 options->addOption(SelectionOption("ref").store(&refSel_).required().description(
318 "Reference selection for RDF computation"));
319 options->addOption(SelectionOption("sel").storeVector(&sel_).required().multiValue().description(
320 "Selections to compute RDFs for from the reference"));
323 void Rdf::optionsFinished(TrajectoryAnalysisSettings* settings)
325 if (surface_ != SurfaceType_None)
327 settings->setFlag(TrajectoryAnalysisSettings::efRequireTop);
329 if (bNormalizationSet_ && normalization_ != Normalization_None)
331 GMX_THROW(InconsistentInputError("-surf cannot be combined with -norm"));
333 normalization_ = Normalization_None;
336 GMX_THROW(InconsistentInputError("-surf cannot be combined with -excl"));
341 settings->setFlag(TrajectoryAnalysisSettings::efRequireTop);
349 void Rdf::initAnalysis(const TrajectoryAnalysisSettings& settings, const TopologyInformation& top)
351 pairDist_.setDataSetCount(sel_.size());
352 for (size_t i = 0; i < sel_.size(); ++i)
354 pairDist_.setColumnCount(i, 1);
356 plotSettings_ = settings.plotSettings();
359 normFactors_.setColumnCount(0, sel_.size() + 1);
361 const bool bSurface = (surface_ != SurfaceType_None);
364 if (!refSel_.hasOnlyAtoms())
366 GMX_THROW(InconsistentInputError("-surf only works with -ref that consists of atoms"));
368 const e_index_t type = (surface_ == SurfaceType_Molecule ? INDEX_MOL : INDEX_RES);
369 surfaceGroupCount_ = refSel_.initOriginalIdsToGroup(top.mtop(), type);
374 if (!refSel_.hasOnlyAtoms() || !refSel_.hasSortedAtomIndices())
377 InconsistentInputError("-excl only works with a -ref selection that consist of "
378 "atoms in ascending (sorted) order"));
380 for (size_t i = 0; i < sel_.size(); ++i)
382 if (!sel_[i].hasOnlyAtoms())
384 GMX_THROW(InconsistentInputError(
385 "-excl only works with selections that consist of atoms"));
388 localTop_ = top.expandedTopology();
389 if (localTop_->excls.empty())
391 GMX_THROW(InconsistentInputError(
392 "-excl is set, but the file provided to -s does not define exclusions"));
394 nb_.setTopologyExclusions(&localTop_->excls);
398 void Rdf::initAfterFirstFrame(const TrajectoryAnalysisSettings& settings, const t_trxframe& fr)
400 // If -rmax is not provided, determine one from the box for the first frame.
404 copy_mat(fr.box, box);
405 if (settings.hasPBC())
409 box[ZZ][ZZ] = 2 * std::max(box[XX][XX], box[YY][YY]);
411 rmax_ = std::sqrt(0.99 * 0.99 * max_cutoff2(bXY_ ? epbcXY : epbcXYZ, box));
419 rmax_ = 3 * std::max(box[XX][XX], std::max(box[YY][YY], box[ZZ][ZZ]));
422 cut2_ = gmx::square(cutoff_);
423 rmax2_ = gmx::square(rmax_);
424 nb_.setCutoff(rmax_);
425 // We use the double amount of bins, so we can correctly
426 // write the rdf and rdf_cn output at i*binwidth values.
427 pairCounts_->init(histogramFromRange(0.0, rmax_).binWidth(binwidth_ / 2.0));
431 * Temporary memory for use within a single-frame calculation.
433 class RdfModuleData : public TrajectoryAnalysisModuleData
437 * Reserves memory for the frame-local data.
439 * `surfaceGroupCount` will be zero if -surf is not specified.
441 RdfModuleData(TrajectoryAnalysisModule* module,
442 const AnalysisDataParallelOptions& opt,
443 const SelectionCollection& selections,
444 int surfaceGroupCount) :
445 TrajectoryAnalysisModuleData(module, opt, selections)
447 surfaceDist2_.resize(surfaceGroupCount);
450 void finish() override { finishDataHandles(); }
453 * Minimum distance to each surface group.
455 * One entry for each group (residue/molecule, per -surf) in the
456 * reference selection.
457 * This is needed to support neighborhood searching, which may not
458 * return the reference positions in order: for each position, we need
459 * to search through all the reference positions and update this array
460 * to find the minimum distance to each surface group, and then compute
461 * the RDF from these numbers.
463 std::vector<real> surfaceDist2_;
466 TrajectoryAnalysisModuleDataPointer Rdf::startFrames(const AnalysisDataParallelOptions& opt,
467 const SelectionCollection& selections)
469 return TrajectoryAnalysisModuleDataPointer(new RdfModuleData(this, opt, selections, surfaceGroupCount_));
472 void Rdf::analyzeFrame(int frnr, const t_trxframe& fr, t_pbc* pbc, TrajectoryAnalysisModuleData* pdata)
474 AnalysisDataHandle dh = pdata->dataHandle(pairDist_);
475 AnalysisDataHandle nh = pdata->dataHandle(normFactors_);
476 const Selection& refSel = pdata->parallelSelection(refSel_);
477 const SelectionList& sel = pdata->parallelSelections(sel_);
478 RdfModuleData& frameData = *static_cast<RdfModuleData*>(pdata);
479 const bool bSurface = !frameData.surfaceDist2_.empty();
482 copy_mat(fr.box, boxForVolume);
485 // Set z-size to 1 so we get the surface are iso the volume
486 clear_rvec(boxForVolume[ZZ]);
487 boxForVolume[ZZ][ZZ] = 1;
489 const real inverseVolume = 1.0 / det(boxForVolume);
491 nh.startFrame(frnr, fr.time);
492 // Compute the normalization factor for the number of reference positions.
495 if (refSel.isDynamic())
497 // Count the number of distinct groups.
498 // This assumes that each group is continuous, which is currently
502 for (int i = 0; i < refSel.posCount(); ++i)
504 const int id = refSel.position(i).mappedId();
511 nh.setPoint(0, count);
515 nh.setPoint(0, surfaceGroupCount_);
520 nh.setPoint(0, refSel.posCount());
523 dh.startFrame(frnr, fr.time);
524 AnalysisNeighborhoodSearch nbsearch = nb_.initSearch(pbc, refSel);
525 for (size_t g = 0; g < sel.size(); ++g)
531 // Special loop for surface calculation, where a separate neighbor
532 // search is done for each position in the selection, and the
533 // nearest position from each surface group is tracked.
534 std::vector<real>& surfaceDist2 = frameData.surfaceDist2_;
535 for (int i = 0; i < sel[g].posCount(); ++i)
537 std::fill(surfaceDist2.begin(), surfaceDist2.end(), std::numeric_limits<real>::max());
538 AnalysisNeighborhoodPairSearch pairSearch = nbsearch.startPairSearch(sel[g].position(i));
539 AnalysisNeighborhoodPair pair;
540 while (pairSearch.findNextPair(&pair))
542 const real r2 = pair.distance2();
543 const int refId = refSel.position(pair.refIndex()).mappedId();
544 if (r2 < surfaceDist2[refId])
546 surfaceDist2[refId] = r2;
549 // Accumulate the RDF from the distances to the surface.
550 for (size_t i = 0; i < surfaceDist2.size(); ++i)
552 const real r2 = surfaceDist2[i];
553 // Here, we need to check for rmax, since the value might
554 // be above the cutoff if no points were close to some
555 // surface positions.
556 if (r2 > cut2_ && r2 <= rmax2_)
558 dh.setPoint(0, std::sqrt(r2));
566 // Standard neighborhood search over all pairs within the cutoff
567 // for the -surf no case.
568 AnalysisNeighborhoodPairSearch pairSearch = nbsearch.startPairSearch(sel[g]);
569 AnalysisNeighborhoodPair pair;
570 while (pairSearch.findNextPair(&pair))
572 const real r2 = pair.distance2();
575 // TODO: Consider whether the histogramming could be done with
576 // less overhead (after first measuring the overhead).
577 dh.setPoint(0, std::sqrt(r2));
582 // Normalization factor for the number density (only used without
583 // -surf, but does not hurt to populate otherwise).
584 nh.setPoint(g + 1, sel[g].posCount() * inverseVolume);
590 void Rdf::finishAnalysis(int /*nframes*/)
592 // Normalize the averager with the number of reference positions,
593 // from where the normalization propagates to all the output.
594 const real refPosCount = normAve_->average(0, 0);
595 pairCounts_->averager().scaleAll(1.0 / refPosCount);
596 pairCounts_->averager().done();
598 // TODO: Consider how these could be exposed to the testing framework
599 // through the dataset registration mechanism.
600 AverageHistogramPointer finalRdf = pairCounts_->averager().resampleDoubleBinWidth(true);
602 if (normalization_ != Normalization_None)
604 // Normalize by the volume of the bins (volume of sphere segments or
605 // length of circle segments).
606 std::vector<real> invBinVolume;
607 const int nbin = finalRdf->settings().binCount();
608 invBinVolume.resize(nbin);
609 real prevSphereVolume = 0.0;
610 for (int i = 0; i < nbin; ++i)
612 const real r = (i + 0.5) * binwidth_;
616 sphereVolume = M_PI * r * r;
620 sphereVolume = (4.0 / 3.0) * M_PI * r * r * r;
622 const real binVolume = sphereVolume - prevSphereVolume;
623 invBinVolume[i] = 1.0 / binVolume;
624 prevSphereVolume = sphereVolume;
626 finalRdf->scaleAllByVector(invBinVolume.data());
628 if (normalization_ == Normalization_Rdf)
630 // Normalize by particle density.
631 for (size_t g = 0; g < sel_.size(); ++g)
633 finalRdf->scaleSingle(g, 1.0 / normAve_->average(0, g + 1));
639 // With no normalization, just scale with bin width to make the
640 // integral of the curve (instead of raw bin sum) represent the pair
642 finalRdf->scaleAll(1.0 / binwidth_);
646 // TODO: Consider if some of this should be done in writeOutput().
648 AnalysisDataPlotModulePointer plotm(new AnalysisDataPlotModule(plotSettings_));
649 plotm->setFileName(fnRdf_);
650 plotm->setTitle("Radial distribution");
651 plotm->setSubtitle(formatString("reference %s", refSel_.name()));
652 plotm->setXLabel("r (nm)");
653 plotm->setYLabel("g(r)");
654 for (size_t i = 0; i < sel_.size(); ++i)
656 plotm->appendLegend(sel_[i].name());
658 finalRdf->addModule(plotm);
661 if (!fnCumulative_.empty())
663 AverageHistogramPointer cumulativeRdf = pairCounts_->averager().resampleDoubleBinWidth(false);
664 cumulativeRdf->makeCumulative();
665 cumulativeRdf->done();
667 AnalysisDataPlotModulePointer plotm(new AnalysisDataPlotModule(plotSettings_));
668 plotm->setFileName(fnCumulative_);
669 plotm->setTitle("Cumulative Number RDF");
670 plotm->setSubtitle(formatString("reference %s", refSel_.name()));
671 plotm->setXLabel("r (nm)");
672 plotm->setYLabel("number");
673 for (size_t i = 0; i < sel_.size(); ++i)
675 plotm->appendLegend(sel_[i].name());
677 cumulativeRdf->addModule(plotm);
681 void Rdf::writeOutput() {}
687 const char RdfInfo::name[] = "rdf";
688 const char RdfInfo::shortDescription[] = "Calculate radial distribution functions";
690 TrajectoryAnalysisModulePointer RdfInfo::create()
692 return TrajectoryAnalysisModulePointer(new Rdf);
695 } // namespace analysismodules