2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2018 by the GROMACS development team.
7 * Copyright (c) 2019,2020, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
40 * Implements gmx::analysismodules::Rdf.
42 * \author Teemu Murtola <teemu.murtola@gmail.com> (C++ conversion)
43 * \ingroup module_trajectoryanalysis
56 #include "gromacs/analysisdata/analysisdata.h"
57 #include "gromacs/analysisdata/modules/average.h"
58 #include "gromacs/analysisdata/modules/histogram.h"
59 #include "gromacs/analysisdata/modules/plot.h"
60 #include "gromacs/math/functions.h"
61 #include "gromacs/math/utilities.h"
62 #include "gromacs/math/vec.h"
63 #include "gromacs/options/basicoptions.h"
64 #include "gromacs/options/filenameoption.h"
65 #include "gromacs/options/ioptionscontainer.h"
66 #include "gromacs/pbcutil/pbc.h"
67 #include "gromacs/selection/nbsearch.h"
68 #include "gromacs/selection/selection.h"
69 #include "gromacs/selection/selectionoption.h"
70 #include "gromacs/topology/topology.h"
71 #include "gromacs/trajectory/trajectoryframe.h"
72 #include "gromacs/trajectoryanalysis/analysismodule.h"
73 #include "gromacs/trajectoryanalysis/analysissettings.h"
74 #include "gromacs/trajectoryanalysis/topologyinformation.h"
75 #include "gromacs/utility/exceptions.h"
76 #include "gromacs/utility/stringutil.h"
81 namespace analysismodules
87 //! \addtogroup module_trajectoryanalysis
90 /********************************************************************
91 * Actual analysis module
94 //! Normalization for the computed distribution.
95 enum class Normalization : int
102 //! String values corresponding to Normalization.
103 const EnumerationArray<Normalization, const char*> c_normalizationNames = {
104 { "rdf", "number_density", "none" }
106 //! Whether to compute RDF wrt. surface of the reference group.
107 enum class SurfaceType : int
114 //! String values corresponding to SurfaceType.
115 const EnumerationArray<SurfaceType, const char*> c_surfaceTypeNames = { { "no", "mol", "res" } };
118 * Implements `gmx rdf` trajectory analysis module.
120 class Rdf : public TrajectoryAnalysisModule
125 void initOptions(IOptionsContainer* options, TrajectoryAnalysisSettings* settings) override;
126 void optionsFinished(TrajectoryAnalysisSettings* settings) override;
127 void initAnalysis(const TrajectoryAnalysisSettings& settings, const TopologyInformation& top) override;
128 void initAfterFirstFrame(const TrajectoryAnalysisSettings& settings, const t_trxframe& fr) override;
130 TrajectoryAnalysisModuleDataPointer startFrames(const AnalysisDataParallelOptions& opt,
131 const SelectionCollection& selections) override;
132 void analyzeFrame(int frnr, const t_trxframe& fr, t_pbc* pbc, TrajectoryAnalysisModuleData* pdata) override;
134 void finishAnalysis(int nframes) override;
135 void writeOutput() override;
139 std::string fnCumulative_;
140 SurfaceType surface_;
141 AnalysisDataPlotSettings plotSettings_;
144 * Reference selection to compute RDFs around.
146 * With -surf, Selection::originalIds() and Selection::mappedIds()
147 * store the index of the surface group to which that position belongs.
148 * The RDF is computed by finding the nearest position from each
149 * surface group for each position, and then binning those distances.
153 * Selections to compute RDFs for.
158 * Raw pairwise distance data from which the RDF is computed.
160 * There is a data set for each selection in `sel_`, with a single
161 * column. Each point set will contain a single pairwise distance
162 * that contributes to the RDF.
164 AnalysisData pairDist_;
166 * Normalization factors for each frame.
168 * The first column contains the number of positions in `refSel_` for
169 * that frame (with surface RDF, the number of groups). There are
170 * `sel_.size()` more columns, each containing the number density of
171 * positions for one selection.
173 AnalysisData normFactors_;
175 * Histogram module that computes the actual RDF from `pairDist_`.
177 * The per-frame histograms are raw pair counts in each bin;
178 * the averager is normalized by the average number of reference
179 * positions (average of the first column of `normFactors_`).
181 AnalysisDataSimpleHistogramModulePointer pairCounts_;
183 * Average normalization factors.
185 AnalysisDataAverageModulePointer normAve_;
186 //! Neighborhood search with `refSel_` as the reference positions.
187 AnalysisNeighborhood nb_;
188 //! Topology exclusions used by neighborhood searching.
189 const gmx_localtop_t* localTop_;
191 // User input options.
195 Normalization normalization_;
196 bool bNormalizationSet_;
200 // Pre-computed values for faster access during analysis.
203 int surfaceGroupCount_;
205 // Copy and assign disallowed by base.
209 surface_(SurfaceType::None),
210 pairCounts_(new AnalysisDataSimpleHistogramModule()),
211 normAve_(new AnalysisDataAverageModule()),
216 normalization_(Normalization::Rdf),
217 bNormalizationSet_(false),
222 surfaceGroupCount_(0)
224 pairDist_.setMultipoint(true);
225 pairDist_.addModule(pairCounts_);
226 registerAnalysisDataset(&pairDist_, "pairdist");
227 registerBasicDataset(pairCounts_.get(), "paircount");
229 normFactors_.addModule(normAve_);
230 registerAnalysisDataset(&normFactors_, "norm");
233 void Rdf::initOptions(IOptionsContainer* options, TrajectoryAnalysisSettings* settings)
235 const char* const desc[] = {
236 "[THISMODULE] calculates radial distribution functions from one",
237 "reference set of position (set with [TT]-ref[tt]) to one or more",
238 "sets of positions (set with [TT]-sel[tt]). To compute the RDF with",
239 "respect to the closest position in a set in [TT]-ref[tt] instead, use",
240 "[TT]-surf[tt]: if set, then [TT]-ref[tt] is partitioned into sets",
241 "based on the value of [TT]-surf[tt], and the closest position in each",
242 "set is used. To compute the RDF around axes parallel to the",
243 "[IT]z[it]-axis, i.e., only in the [IT]x[it]-[IT]y[it] plane, use",
246 "To set the bin width and maximum distance to use in the RDF, use",
247 "[TT]-bin[tt] and [TT]-rmax[tt], respectively. The latter can be",
248 "used to limit the computational cost if the RDF is not of interest",
249 "up to the default (half of the box size with PBC, three times the",
250 "box size without PBC).",
252 "To use exclusions from the topology ([TT]-s[tt]), set [TT]-excl[tt]",
253 "and ensure that both [TT]-ref[tt] and [TT]-sel[tt] only select atoms.",
254 "A rougher alternative to exclude intra-molecular peaks is to set",
255 "[TT]-cut[tt] to a non-zero value to clear the RDF at small",
258 "The RDFs are normalized by 1) average number of positions in",
259 "[TT]-ref[tt] (the number of groups with [TT]-surf[tt]), 2) volume",
260 "of the bin, and 3) average particle density of [TT]-sel[tt] positions",
261 "for that selection. To change the normalization, use [TT]-norm[tt]:",
263 "* [TT]rdf[tt]: Use all factors for normalization.",
264 " This produces a normal RDF.",
265 "* [TT]number_density[tt]: Use the first two factors.",
266 " This produces a number density as a function of distance.",
267 "* [TT]none[tt]: Use only the first factor.",
268 " In this case, the RDF is only scaled with the bin width to make",
269 " the integral of the curve represent the number of pairs within a",
272 "Note that exclusions do not affect the normalization: even if",
273 "[TT]-excl[tt] is set, or [TT]-ref[tt] and",
274 "[TT]-sel[tt] contain the same selection, the normalization factor",
275 "is still N*M, not N*(M-excluded).",
277 "For [TT]-surf[tt], the selection provided to [TT]-ref[tt] must",
278 "select atoms, i.e., centers of mass are not supported. Further,",
279 "[TT]-nonorm[tt] is implied, as the bins have irregular shapes and",
280 "the volume of a bin is not easily computable.",
282 "Option [TT]-cn[tt] produces the cumulative number RDF,",
283 "i.e. the average number of particles within a distance r."
286 settings->setHelpText(desc);
288 options->addOption(FileNameOption("o")
293 .defaultBasename("rdf")
294 .description("Computed RDFs"));
295 options->addOption(FileNameOption("cn")
298 .store(&fnCumulative_)
299 .defaultBasename("rdf_cn")
300 .description("Cumulative RDFs"));
302 options->addOption(DoubleOption("bin").store(&binwidth_).description("Bin width (nm)"));
303 options->addOption(EnumOption<Normalization>("norm")
304 .enumValue(c_normalizationNames)
305 .store(&normalization_)
306 .storeIsSet(&bNormalizationSet_)
307 .description("Normalization"));
308 options->addOption(BooleanOption("xy").store(&bXY_).description(
309 "Use only the x and y components of the distance"));
311 BooleanOption("excl").store(&bExclusions_).description("Use exclusions from topology"));
312 options->addOption(DoubleOption("cut").store(&cutoff_).description(
313 "Shortest distance (nm) to be considered"));
315 DoubleOption("rmax").store(&rmax_).description("Largest distance (nm) to calculate"));
317 options->addOption(EnumOption<SurfaceType>("surf")
318 .enumValue(c_surfaceTypeNames)
320 .description("RDF with respect to the surface of the reference"));
322 options->addOption(SelectionOption("ref").store(&refSel_).required().description(
323 "Reference selection for RDF computation"));
324 options->addOption(SelectionOption("sel").storeVector(&sel_).required().multiValue().description(
325 "Selections to compute RDFs for from the reference"));
328 void Rdf::optionsFinished(TrajectoryAnalysisSettings* settings)
330 if (surface_ != SurfaceType::None)
332 settings->setFlag(TrajectoryAnalysisSettings::efRequireTop);
334 if (bNormalizationSet_ && normalization_ != Normalization::None)
336 GMX_THROW(InconsistentInputError("-surf cannot be combined with -norm"));
338 normalization_ = Normalization::None;
341 GMX_THROW(InconsistentInputError("-surf cannot be combined with -excl"));
346 settings->setFlag(TrajectoryAnalysisSettings::efRequireTop);
354 void Rdf::initAnalysis(const TrajectoryAnalysisSettings& settings, const TopologyInformation& top)
356 pairDist_.setDataSetCount(sel_.size());
357 for (size_t i = 0; i < sel_.size(); ++i)
359 pairDist_.setColumnCount(i, 1);
361 plotSettings_ = settings.plotSettings();
364 normFactors_.setColumnCount(0, sel_.size() + 1);
366 const bool bSurface = (surface_ != SurfaceType::None);
369 if (!refSel_.hasOnlyAtoms())
371 GMX_THROW(InconsistentInputError("-surf only works with -ref that consists of atoms"));
373 const e_index_t type = (surface_ == SurfaceType::Molecule ? INDEX_MOL : INDEX_RES);
374 surfaceGroupCount_ = refSel_.initOriginalIdsToGroup(top.mtop(), type);
379 if (!refSel_.hasOnlyAtoms() || !refSel_.hasSortedAtomIndices())
382 InconsistentInputError("-excl only works with a -ref selection that consist of "
383 "atoms in ascending (sorted) order"));
385 for (size_t i = 0; i < sel_.size(); ++i)
387 if (!sel_[i].hasOnlyAtoms())
389 GMX_THROW(InconsistentInputError(
390 "-excl only works with selections that consist of atoms"));
393 localTop_ = top.expandedTopology();
394 if (localTop_->excls.empty())
396 GMX_THROW(InconsistentInputError(
397 "-excl is set, but the file provided to -s does not define exclusions"));
399 nb_.setTopologyExclusions(&localTop_->excls);
403 void Rdf::initAfterFirstFrame(const TrajectoryAnalysisSettings& settings, const t_trxframe& fr)
405 // If -rmax is not provided, determine one from the box for the first frame.
409 copy_mat(fr.box, box);
410 if (settings.hasPBC())
414 box[ZZ][ZZ] = 2 * std::max(box[XX][XX], box[YY][YY]);
416 rmax_ = std::sqrt(0.99 * 0.99 * max_cutoff2(bXY_ ? PbcType::XY : PbcType::Xyz, box));
424 rmax_ = 3 * std::max(box[XX][XX], std::max(box[YY][YY], box[ZZ][ZZ]));
427 cut2_ = gmx::square(cutoff_);
428 rmax2_ = gmx::square(rmax_);
429 nb_.setCutoff(rmax_);
430 // We use the double amount of bins, so we can correctly
431 // write the rdf and rdf_cn output at i*binwidth values.
432 pairCounts_->init(histogramFromRange(0.0, rmax_).binWidth(binwidth_ / 2.0));
436 * Temporary memory for use within a single-frame calculation.
438 class RdfModuleData : public TrajectoryAnalysisModuleData
442 * Reserves memory for the frame-local data.
444 * `surfaceGroupCount` will be zero if -surf is not specified.
446 RdfModuleData(TrajectoryAnalysisModule* module,
447 const AnalysisDataParallelOptions& opt,
448 const SelectionCollection& selections,
449 int surfaceGroupCount) :
450 TrajectoryAnalysisModuleData(module, opt, selections)
452 surfaceDist2_.resize(surfaceGroupCount);
455 void finish() override { finishDataHandles(); }
458 * Minimum distance to each surface group.
460 * One entry for each group (residue/molecule, per -surf) in the
461 * reference selection.
462 * This is needed to support neighborhood searching, which may not
463 * return the reference positions in order: for each position, we need
464 * to search through all the reference positions and update this array
465 * to find the minimum distance to each surface group, and then compute
466 * the RDF from these numbers.
468 std::vector<real> surfaceDist2_;
471 TrajectoryAnalysisModuleDataPointer Rdf::startFrames(const AnalysisDataParallelOptions& opt,
472 const SelectionCollection& selections)
474 return TrajectoryAnalysisModuleDataPointer(new RdfModuleData(this, opt, selections, surfaceGroupCount_));
477 void Rdf::analyzeFrame(int frnr, const t_trxframe& fr, t_pbc* pbc, TrajectoryAnalysisModuleData* pdata)
479 AnalysisDataHandle dh = pdata->dataHandle(pairDist_);
480 AnalysisDataHandle nh = pdata->dataHandle(normFactors_);
481 const Selection& refSel = pdata->parallelSelection(refSel_);
482 const SelectionList& sel = pdata->parallelSelections(sel_);
483 RdfModuleData& frameData = *static_cast<RdfModuleData*>(pdata);
484 const bool bSurface = !frameData.surfaceDist2_.empty();
487 copy_mat(fr.box, boxForVolume);
490 // Set z-size to 1 so we get the surface are iso the volume
491 clear_rvec(boxForVolume[ZZ]);
492 boxForVolume[ZZ][ZZ] = 1;
494 const real inverseVolume = 1.0 / det(boxForVolume);
496 nh.startFrame(frnr, fr.time);
497 // Compute the normalization factor for the number of reference positions.
500 if (refSel.isDynamic())
502 // Count the number of distinct groups.
503 // This assumes that each group is continuous, which is currently
507 for (int i = 0; i < refSel.posCount(); ++i)
509 const int id = refSel.position(i).mappedId();
516 nh.setPoint(0, count);
520 nh.setPoint(0, surfaceGroupCount_);
525 nh.setPoint(0, refSel.posCount());
528 dh.startFrame(frnr, fr.time);
529 AnalysisNeighborhoodSearch nbsearch = nb_.initSearch(pbc, refSel);
530 for (size_t g = 0; g < sel.size(); ++g)
536 // Special loop for surface calculation, where a separate neighbor
537 // search is done for each position in the selection, and the
538 // nearest position from each surface group is tracked.
539 std::vector<real>& surfaceDist2 = frameData.surfaceDist2_;
540 for (int i = 0; i < sel[g].posCount(); ++i)
542 std::fill(surfaceDist2.begin(), surfaceDist2.end(), std::numeric_limits<real>::max());
543 AnalysisNeighborhoodPairSearch pairSearch = nbsearch.startPairSearch(sel[g].position(i));
544 AnalysisNeighborhoodPair pair;
545 while (pairSearch.findNextPair(&pair))
547 const real r2 = pair.distance2();
548 const int refId = refSel.position(pair.refIndex()).mappedId();
549 if (r2 < surfaceDist2[refId])
551 surfaceDist2[refId] = r2;
554 // Accumulate the RDF from the distances to the surface.
555 for (size_t i = 0; i < surfaceDist2.size(); ++i)
557 const real r2 = surfaceDist2[i];
558 // Here, we need to check for rmax, since the value might
559 // be above the cutoff if no points were close to some
560 // surface positions.
561 if (r2 > cut2_ && r2 <= rmax2_)
563 dh.setPoint(0, std::sqrt(r2));
571 // Standard neighborhood search over all pairs within the cutoff
572 // for the -surf no case.
573 AnalysisNeighborhoodPairSearch pairSearch = nbsearch.startPairSearch(sel[g]);
574 AnalysisNeighborhoodPair pair;
575 while (pairSearch.findNextPair(&pair))
577 const real r2 = pair.distance2();
580 // TODO: Consider whether the histogramming could be done with
581 // less overhead (after first measuring the overhead).
582 dh.setPoint(0, std::sqrt(r2));
587 // Normalization factor for the number density (only used without
588 // -surf, but does not hurt to populate otherwise).
589 nh.setPoint(g + 1, sel[g].posCount() * inverseVolume);
595 void Rdf::finishAnalysis(int /*nframes*/)
597 // Normalize the averager with the number of reference positions,
598 // from where the normalization propagates to all the output.
599 const real refPosCount = normAve_->average(0, 0);
600 pairCounts_->averager().scaleAll(1.0 / refPosCount);
601 pairCounts_->averager().done();
603 // TODO: Consider how these could be exposed to the testing framework
604 // through the dataset registration mechanism.
605 AverageHistogramPointer finalRdf = pairCounts_->averager().resampleDoubleBinWidth(true);
607 if (normalization_ != Normalization::None)
609 // Normalize by the volume of the bins (volume of sphere segments or
610 // length of circle segments).
611 std::vector<real> invBinVolume;
612 const int nbin = finalRdf->settings().binCount();
613 invBinVolume.resize(nbin);
614 real prevSphereVolume = 0.0;
615 for (int i = 0; i < nbin; ++i)
617 const real r = (i + 0.5) * binwidth_;
621 sphereVolume = M_PI * r * r;
625 sphereVolume = (4.0 / 3.0) * M_PI * r * r * r;
627 const real binVolume = sphereVolume - prevSphereVolume;
628 invBinVolume[i] = 1.0 / binVolume;
629 prevSphereVolume = sphereVolume;
631 finalRdf->scaleAllByVector(invBinVolume.data());
633 if (normalization_ == Normalization::Rdf)
635 // Normalize by particle density.
636 for (size_t g = 0; g < sel_.size(); ++g)
638 finalRdf->scaleSingle(g, 1.0 / normAve_->average(0, g + 1));
644 // With no normalization, just scale with bin width to make the
645 // integral of the curve (instead of raw bin sum) represent the pair
647 finalRdf->scaleAll(1.0 / binwidth_);
651 // TODO: Consider if some of this should be done in writeOutput().
653 AnalysisDataPlotModulePointer plotm(new AnalysisDataPlotModule(plotSettings_));
654 plotm->setFileName(fnRdf_);
655 plotm->setTitle("Radial distribution");
656 plotm->setSubtitle(formatString("reference %s", refSel_.name()));
657 plotm->setXLabel("r (nm)");
658 plotm->setYLabel("g(r)");
659 for (size_t i = 0; i < sel_.size(); ++i)
661 plotm->appendLegend(sel_[i].name());
663 finalRdf->addModule(plotm);
666 if (!fnCumulative_.empty())
668 AverageHistogramPointer cumulativeRdf = pairCounts_->averager().resampleDoubleBinWidth(false);
669 cumulativeRdf->makeCumulative();
670 cumulativeRdf->done();
672 AnalysisDataPlotModulePointer plotm(new AnalysisDataPlotModule(plotSettings_));
673 plotm->setFileName(fnCumulative_);
674 plotm->setTitle("Cumulative Number RDF");
675 plotm->setSubtitle(formatString("reference %s", refSel_.name()));
676 plotm->setXLabel("r (nm)");
677 plotm->setYLabel("number");
678 for (size_t i = 0; i < sel_.size(); ++i)
680 plotm->appendLegend(sel_[i].name());
682 cumulativeRdf->addModule(plotm);
686 void Rdf::writeOutput() {}
692 const char RdfInfo::name[] = "rdf";
693 const char RdfInfo::shortDescription[] = "Calculate radial distribution functions";
695 TrajectoryAnalysisModulePointer RdfInfo::create()
697 return TrajectoryAnalysisModulePointer(new Rdf);
700 } // namespace analysismodules