2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2018 by the GROMACS development team.
7 * Copyright (c) 2019,2020,2021, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
40 * Implements gmx::analysismodules::Rdf.
42 * \author Teemu Murtola <teemu.murtola@gmail.com> (C++ conversion)
43 * \ingroup module_trajectoryanalysis
56 #include "gromacs/analysisdata/analysisdata.h"
57 #include "gromacs/analysisdata/modules/average.h"
58 #include "gromacs/analysisdata/modules/histogram.h"
59 #include "gromacs/analysisdata/modules/plot.h"
60 #include "gromacs/math/functions.h"
61 #include "gromacs/math/units.h"
62 #include "gromacs/math/utilities.h"
63 #include "gromacs/math/vec.h"
64 #include "gromacs/options/basicoptions.h"
65 #include "gromacs/options/filenameoption.h"
66 #include "gromacs/options/ioptionscontainer.h"
67 #include "gromacs/pbcutil/pbc.h"
68 #include "gromacs/selection/nbsearch.h"
69 #include "gromacs/selection/selection.h"
70 #include "gromacs/selection/selectionoption.h"
71 #include "gromacs/topology/topology.h"
72 #include "gromacs/trajectory/trajectoryframe.h"
73 #include "gromacs/trajectoryanalysis/analysismodule.h"
74 #include "gromacs/trajectoryanalysis/analysissettings.h"
75 #include "gromacs/trajectoryanalysis/topologyinformation.h"
76 #include "gromacs/utility/exceptions.h"
77 #include "gromacs/utility/stringutil.h"
82 namespace analysismodules
88 //! \addtogroup module_trajectoryanalysis
91 /********************************************************************
92 * Actual analysis module
95 //! Normalization for the computed distribution.
96 enum class Normalization : int
103 //! String values corresponding to Normalization.
104 const EnumerationArray<Normalization, const char*> c_normalizationNames = {
105 { "rdf", "number_density", "none" }
107 //! Whether to compute RDF wrt. surface of the reference group.
108 enum class SurfaceType : int
115 //! String values corresponding to SurfaceType.
116 const EnumerationArray<SurfaceType, const char*> c_surfaceTypeNames = { { "no", "mol", "res" } };
119 * Implements `gmx rdf` trajectory analysis module.
121 class Rdf : public TrajectoryAnalysisModule
126 void initOptions(IOptionsContainer* options, TrajectoryAnalysisSettings* settings) override;
127 void optionsFinished(TrajectoryAnalysisSettings* settings) override;
128 void initAnalysis(const TrajectoryAnalysisSettings& settings, const TopologyInformation& top) override;
129 void initAfterFirstFrame(const TrajectoryAnalysisSettings& settings, const t_trxframe& fr) override;
131 TrajectoryAnalysisModuleDataPointer startFrames(const AnalysisDataParallelOptions& opt,
132 const SelectionCollection& selections) override;
133 void analyzeFrame(int frnr, const t_trxframe& fr, t_pbc* pbc, TrajectoryAnalysisModuleData* pdata) override;
135 void finishAnalysis(int nframes) override;
136 void writeOutput() override;
140 std::string fnCumulative_;
141 SurfaceType surface_;
142 AnalysisDataPlotSettings plotSettings_;
145 * Reference selection to compute RDFs around.
147 * With -surf, Selection::originalIds() and Selection::mappedIds()
148 * store the index of the surface group to which that position belongs.
149 * The RDF is computed by finding the nearest position from each
150 * surface group for each position, and then binning those distances.
154 * Selections to compute RDFs for.
159 * Raw pairwise distance data from which the RDF is computed.
161 * There is a data set for each selection in `sel_`, with a single
162 * column. Each point set will contain a single pairwise distance
163 * that contributes to the RDF.
165 AnalysisData pairDist_;
167 * Normalization factors for each frame.
169 * The first column contains the number of positions in `refSel_` for
170 * that frame (with surface RDF, the number of groups). There are
171 * `sel_.size()` more columns, each containing the number density of
172 * positions for one selection.
174 AnalysisData normFactors_;
176 * Histogram module that computes the actual RDF from `pairDist_`.
178 * The per-frame histograms are raw pair counts in each bin;
179 * the averager is normalized by the average number of reference
180 * positions (average of the first column of `normFactors_`).
182 AnalysisDataSimpleHistogramModulePointer pairCounts_;
184 * Average normalization factors.
186 AnalysisDataAverageModulePointer normAve_;
187 //! Neighborhood search with `refSel_` as the reference positions.
188 AnalysisNeighborhood nb_;
189 //! Topology exclusions used by neighborhood searching.
190 const gmx_localtop_t* localTop_;
192 // User input options.
196 Normalization normalization_;
197 bool bNormalizationSet_;
201 // Pre-computed values for faster access during analysis.
204 int surfaceGroupCount_;
206 // Copy and assign disallowed by base.
210 surface_(SurfaceType::None),
211 pairCounts_(new AnalysisDataSimpleHistogramModule()),
212 normAve_(new AnalysisDataAverageModule()),
217 normalization_(Normalization::Rdf),
218 bNormalizationSet_(false),
223 surfaceGroupCount_(0)
225 pairDist_.setMultipoint(true);
226 pairDist_.addModule(pairCounts_);
227 registerAnalysisDataset(&pairDist_, "pairdist");
228 registerBasicDataset(pairCounts_.get(), "paircount");
230 normFactors_.addModule(normAve_);
231 registerAnalysisDataset(&normFactors_, "norm");
234 void Rdf::initOptions(IOptionsContainer* options, TrajectoryAnalysisSettings* settings)
236 const char* const desc[] = {
237 "[THISMODULE] calculates radial distribution functions from one",
238 "reference set of position (set with [TT]-ref[tt]) to one or more",
239 "sets of positions (set with [TT]-sel[tt]). To compute the RDF with",
240 "respect to the closest position in a set in [TT]-ref[tt] instead, use",
241 "[TT]-surf[tt]: if set, then [TT]-ref[tt] is partitioned into sets",
242 "based on the value of [TT]-surf[tt], and the closest position in each",
243 "set is used. To compute the RDF around axes parallel to the",
244 "[IT]z[it]-axis, i.e., only in the [IT]x[it]-[IT]y[it] plane, use",
247 "To set the bin width and maximum distance to use in the RDF, use",
248 "[TT]-bin[tt] and [TT]-rmax[tt], respectively. The latter can be",
249 "used to limit the computational cost if the RDF is not of interest",
250 "up to the default (half of the box size with PBC, three times the",
251 "box size without PBC).",
253 "To use exclusions from the topology ([TT]-s[tt]), set [TT]-excl[tt]",
254 "and ensure that both [TT]-ref[tt] and [TT]-sel[tt] only select atoms.",
255 "A rougher alternative to exclude intra-molecular peaks is to set",
256 "[TT]-cut[tt] to a non-zero value to clear the RDF at small",
259 "The RDFs are normalized by 1) average number of positions in",
260 "[TT]-ref[tt] (the number of groups with [TT]-surf[tt]), 2) volume",
261 "of the bin, and 3) average particle density of [TT]-sel[tt] positions",
262 "for that selection. To change the normalization, use [TT]-norm[tt]:",
264 "* [TT]rdf[tt]: Use all factors for normalization.",
265 " This produces a normal RDF.",
266 "* [TT]number_density[tt]: Use the first two factors.",
267 " This produces a number density as a function of distance.",
268 "* [TT]none[tt]: Use only the first factor.",
269 " In this case, the RDF is only scaled with the bin width to make",
270 " the integral of the curve represent the number of pairs within a",
273 "Note that exclusions do not affect the normalization: even if",
274 "[TT]-excl[tt] is set, or [TT]-ref[tt] and",
275 "[TT]-sel[tt] contain the same selection, the normalization factor",
276 "is still N*M, not N*(M-excluded).",
278 "For [TT]-surf[tt], the selection provided to [TT]-ref[tt] must",
279 "select atoms, i.e., centers of mass are not supported. Further,",
280 "[TT]-nonorm[tt] is implied, as the bins have irregular shapes and",
281 "the volume of a bin is not easily computable.",
283 "Option [TT]-cn[tt] produces the cumulative number RDF,",
284 "i.e. the average number of particles within a distance r."
287 settings->setHelpText(desc);
289 options->addOption(FileNameOption("o")
290 .filetype(OptionFileType::Plot)
294 .defaultBasename("rdf")
295 .description("Computed RDFs"));
296 options->addOption(FileNameOption("cn")
297 .filetype(OptionFileType::Plot)
299 .store(&fnCumulative_)
300 .defaultBasename("rdf_cn")
301 .description("Cumulative RDFs"));
303 options->addOption(DoubleOption("bin").store(&binwidth_).description("Bin width (nm)"));
304 options->addOption(EnumOption<Normalization>("norm")
305 .enumValue(c_normalizationNames)
306 .store(&normalization_)
307 .storeIsSet(&bNormalizationSet_)
308 .description("Normalization"));
309 options->addOption(BooleanOption("xy").store(&bXY_).description(
310 "Use only the x and y components of the distance"));
312 BooleanOption("excl").store(&bExclusions_).description("Use exclusions from topology"));
313 options->addOption(DoubleOption("cut").store(&cutoff_).description(
314 "Shortest distance (nm) to be considered"));
316 DoubleOption("rmax").store(&rmax_).description("Largest distance (nm) to calculate"));
318 options->addOption(EnumOption<SurfaceType>("surf")
319 .enumValue(c_surfaceTypeNames)
321 .description("RDF with respect to the surface of the reference"));
323 options->addOption(SelectionOption("ref").store(&refSel_).required().description(
324 "Reference selection for RDF computation"));
325 options->addOption(SelectionOption("sel").storeVector(&sel_).required().multiValue().description(
326 "Selections to compute RDFs for from the reference"));
329 void Rdf::optionsFinished(TrajectoryAnalysisSettings* settings)
331 if (surface_ != SurfaceType::None)
333 settings->setFlag(TrajectoryAnalysisSettings::efRequireTop);
335 if (bNormalizationSet_ && normalization_ != Normalization::None)
337 GMX_THROW(InconsistentInputError("-surf cannot be combined with -norm"));
339 normalization_ = Normalization::None;
342 GMX_THROW(InconsistentInputError("-surf cannot be combined with -excl"));
347 settings->setFlag(TrajectoryAnalysisSettings::efRequireTop);
355 void Rdf::initAnalysis(const TrajectoryAnalysisSettings& settings, const TopologyInformation& top)
357 pairDist_.setDataSetCount(sel_.size());
358 for (size_t i = 0; i < sel_.size(); ++i)
360 pairDist_.setColumnCount(i, 1);
362 plotSettings_ = settings.plotSettings();
365 normFactors_.setColumnCount(0, sel_.size() + 1);
367 const bool bSurface = (surface_ != SurfaceType::None);
370 if (!refSel_.hasOnlyAtoms())
372 GMX_THROW(InconsistentInputError("-surf only works with -ref that consists of atoms"));
374 const e_index_t type = (surface_ == SurfaceType::Molecule ? INDEX_MOL : INDEX_RES);
375 surfaceGroupCount_ = refSel_.initOriginalIdsToGroup(top.mtop(), type);
380 if (!refSel_.hasOnlyAtoms() || !refSel_.hasSortedAtomIndices())
383 InconsistentInputError("-excl only works with a -ref selection that consist of "
384 "atoms in ascending (sorted) order"));
386 for (size_t i = 0; i < sel_.size(); ++i)
388 if (!sel_[i].hasOnlyAtoms())
390 GMX_THROW(InconsistentInputError(
391 "-excl only works with selections that consist of atoms"));
394 localTop_ = top.expandedTopology();
395 if (localTop_->excls.empty())
397 GMX_THROW(InconsistentInputError(
398 "-excl is set, but the file provided to -s does not define exclusions"));
400 nb_.setTopologyExclusions(&localTop_->excls);
404 void Rdf::initAfterFirstFrame(const TrajectoryAnalysisSettings& settings, const t_trxframe& fr)
406 // If -rmax is not provided, determine one from the box for the first frame.
410 copy_mat(fr.box, box);
411 if (settings.hasPBC())
415 box[ZZ][ZZ] = 2 * std::max(box[XX][XX], box[YY][YY]);
417 rmax_ = std::sqrt(0.99 * 0.99 * max_cutoff2(bXY_ ? PbcType::XY : PbcType::Xyz, box));
425 rmax_ = 3 * std::max(box[XX][XX], std::max(box[YY][YY], box[ZZ][ZZ]));
428 cut2_ = gmx::square(cutoff_);
429 rmax2_ = gmx::square(rmax_);
430 nb_.setCutoff(rmax_);
431 // We use the double amount of bins, so we can correctly
432 // write the rdf and rdf_cn output at i*binwidth values.
433 pairCounts_->init(histogramFromRange(0.0, rmax_).binWidth(binwidth_ / 2.0));
437 * Temporary memory for use within a single-frame calculation.
439 class RdfModuleData : public TrajectoryAnalysisModuleData
443 * Reserves memory for the frame-local data.
445 * `surfaceGroupCount` will be zero if -surf is not specified.
447 RdfModuleData(TrajectoryAnalysisModule* module,
448 const AnalysisDataParallelOptions& opt,
449 const SelectionCollection& selections,
450 int surfaceGroupCount) :
451 TrajectoryAnalysisModuleData(module, opt, selections)
453 surfaceDist2_.resize(surfaceGroupCount);
456 void finish() override { finishDataHandles(); }
459 * Minimum distance to each surface group.
461 * One entry for each group (residue/molecule, per -surf) in the
462 * reference selection.
463 * This is needed to support neighborhood searching, which may not
464 * return the reference positions in order: for each position, we need
465 * to search through all the reference positions and update this array
466 * to find the minimum distance to each surface group, and then compute
467 * the RDF from these numbers.
469 std::vector<real> surfaceDist2_;
472 TrajectoryAnalysisModuleDataPointer Rdf::startFrames(const AnalysisDataParallelOptions& opt,
473 const SelectionCollection& selections)
475 return TrajectoryAnalysisModuleDataPointer(new RdfModuleData(this, opt, selections, surfaceGroupCount_));
478 void Rdf::analyzeFrame(int frnr, const t_trxframe& fr, t_pbc* pbc, TrajectoryAnalysisModuleData* pdata)
480 AnalysisDataHandle dh = pdata->dataHandle(pairDist_);
481 AnalysisDataHandle nh = pdata->dataHandle(normFactors_);
482 const Selection& refSel = TrajectoryAnalysisModuleData::parallelSelection(refSel_);
483 const SelectionList& sel = TrajectoryAnalysisModuleData::parallelSelections(sel_);
484 RdfModuleData& frameData = *static_cast<RdfModuleData*>(pdata);
485 const bool bSurface = !frameData.surfaceDist2_.empty();
488 copy_mat(fr.box, boxForVolume);
491 // Set z-size to 1 so we get the surface are iso the volume
492 clear_rvec(boxForVolume[ZZ]);
493 boxForVolume[ZZ][ZZ] = 1;
495 const real inverseVolume = 1.0 / det(boxForVolume);
497 nh.startFrame(frnr, fr.time);
498 // Compute the normalization factor for the number of reference positions.
501 if (refSel.isDynamic())
503 // Count the number of distinct groups.
504 // This assumes that each group is continuous, which is currently
508 for (int i = 0; i < refSel.posCount(); ++i)
510 const int id = refSel.position(i).mappedId();
517 nh.setPoint(0, count);
521 nh.setPoint(0, surfaceGroupCount_);
526 nh.setPoint(0, refSel.posCount());
529 dh.startFrame(frnr, fr.time);
530 AnalysisNeighborhoodSearch nbsearch = nb_.initSearch(pbc, refSel);
531 for (size_t g = 0; g < sel.size(); ++g)
537 // Special loop for surface calculation, where a separate neighbor
538 // search is done for each position in the selection, and the
539 // nearest position from each surface group is tracked.
540 std::vector<real>& surfaceDist2 = frameData.surfaceDist2_;
541 for (int i = 0; i < sel[g].posCount(); ++i)
543 std::fill(surfaceDist2.begin(), surfaceDist2.end(), std::numeric_limits<real>::max());
544 AnalysisNeighborhoodPairSearch pairSearch = nbsearch.startPairSearch(sel[g].position(i));
545 AnalysisNeighborhoodPair pair;
546 while (pairSearch.findNextPair(&pair))
548 const real r2 = pair.distance2();
549 const int refId = refSel.position(pair.refIndex()).mappedId();
550 if (r2 < surfaceDist2[refId])
552 surfaceDist2[refId] = r2;
555 // Accumulate the RDF from the distances to the surface.
556 for (size_t i = 0; i < surfaceDist2.size(); ++i)
558 const real r2 = surfaceDist2[i];
559 // Here, we need to check for rmax, since the value might
560 // be above the cutoff if no points were close to some
561 // surface positions.
562 if (r2 > cut2_ && r2 <= rmax2_)
564 dh.setPoint(0, std::sqrt(r2));
572 // Standard neighborhood search over all pairs within the cutoff
573 // for the -surf no case.
574 AnalysisNeighborhoodPairSearch pairSearch = nbsearch.startPairSearch(sel[g]);
575 AnalysisNeighborhoodPair pair;
576 while (pairSearch.findNextPair(&pair))
578 const real r2 = pair.distance2();
581 // TODO: Consider whether the histogramming could be done with
582 // less overhead (after first measuring the overhead).
583 dh.setPoint(0, std::sqrt(r2));
588 // Normalization factor for the number density (only used without
589 // -surf, but does not hurt to populate otherwise).
590 nh.setPoint(g + 1, sel[g].posCount() * inverseVolume);
596 void Rdf::finishAnalysis(int /*nframes*/)
598 // Normalize the averager with the number of reference positions,
599 // from where the normalization propagates to all the output.
600 const real refPosCount = normAve_->average(0, 0);
601 pairCounts_->averager().scaleAll(1.0 / refPosCount);
602 pairCounts_->averager().done();
604 // TODO: Consider how these could be exposed to the testing framework
605 // through the dataset registration mechanism.
606 AverageHistogramPointer finalRdf = pairCounts_->averager().resampleDoubleBinWidth(true);
608 if (normalization_ != Normalization::None)
610 // Normalize by the volume of the bins (volume of sphere segments or
611 // length of circle segments).
612 std::vector<real> invBinVolume;
613 const int nbin = finalRdf->settings().binCount();
614 invBinVolume.resize(nbin);
615 real prevSphereVolume = 0.0;
616 for (int i = 0; i < nbin; ++i)
618 const real r = (i + 0.5) * binwidth_;
619 const real sphereVolume = (bXY_) ? M_PI * r * r : (4.0 / 3.0) * M_PI * r * r * r;
620 const real binVolume = sphereVolume - prevSphereVolume;
621 invBinVolume[i] = 1.0 / binVolume;
622 prevSphereVolume = sphereVolume;
624 finalRdf->scaleAllByVector(invBinVolume.data());
626 if (normalization_ == Normalization::Rdf)
628 // Normalize by particle density.
629 for (size_t g = 0; g < sel_.size(); ++g)
631 finalRdf->scaleSingle(g, 1.0 / normAve_->average(0, g + 1));
637 // With no normalization, just scale with bin width to make the
638 // integral of the curve (instead of raw bin sum) represent the pair
640 finalRdf->scaleAll(1.0 / binwidth_);
644 // TODO: Consider if some of this should be done in writeOutput().
646 AnalysisDataPlotModulePointer plotm(new AnalysisDataPlotModule(plotSettings_));
647 plotm->setFileName(fnRdf_);
648 plotm->setTitle("Radial distribution");
649 plotm->setSubtitle(formatString("reference %s", refSel_.name()));
650 plotm->setXLabel("r (nm)");
651 plotm->setYLabel("g(r)");
652 for (size_t i = 0; i < sel_.size(); ++i)
654 plotm->appendLegend(sel_[i].name());
656 finalRdf->addModule(plotm);
659 if (!fnCumulative_.empty())
661 AverageHistogramPointer cumulativeRdf = pairCounts_->averager().resampleDoubleBinWidth(false);
662 cumulativeRdf->makeCumulative();
663 cumulativeRdf->done();
665 AnalysisDataPlotModulePointer plotm(new AnalysisDataPlotModule(plotSettings_));
666 plotm->setFileName(fnCumulative_);
667 plotm->setTitle("Cumulative Number RDF");
668 plotm->setSubtitle(formatString("reference %s", refSel_.name()));
669 plotm->setXLabel("r (nm)");
670 plotm->setYLabel("number");
671 for (size_t i = 0; i < sel_.size(); ++i)
673 plotm->appendLegend(sel_[i].name());
675 cumulativeRdf->addModule(plotm);
679 void Rdf::writeOutput() {}
685 const char RdfInfo::name[] = "rdf";
686 const char RdfInfo::shortDescription[] = "Calculate radial distribution functions";
688 TrajectoryAnalysisModulePointer RdfInfo::create()
690 return TrajectoryAnalysisModulePointer(new Rdf);
693 } // namespace analysismodules