2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2018 by the GROMACS development team.
7 * Copyright (c) 2019,2020, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
40 * Implements gmx::analysismodules::Rdf.
42 * \author Teemu Murtola <teemu.murtola@gmail.com> (C++ conversion)
43 * \ingroup module_trajectoryanalysis
56 #include "gromacs/analysisdata/analysisdata.h"
57 #include "gromacs/analysisdata/modules/average.h"
58 #include "gromacs/analysisdata/modules/histogram.h"
59 #include "gromacs/analysisdata/modules/plot.h"
60 #include "gromacs/math/functions.h"
61 #include "gromacs/math/utilities.h"
62 #include "gromacs/math/vec.h"
63 #include "gromacs/options/basicoptions.h"
64 #include "gromacs/options/filenameoption.h"
65 #include "gromacs/options/ioptionscontainer.h"
66 #include "gromacs/pbcutil/pbc.h"
67 #include "gromacs/selection/nbsearch.h"
68 #include "gromacs/selection/selection.h"
69 #include "gromacs/selection/selectionoption.h"
70 #include "gromacs/topology/topology.h"
71 #include "gromacs/trajectory/trajectoryframe.h"
72 #include "gromacs/trajectoryanalysis/analysismodule.h"
73 #include "gromacs/trajectoryanalysis/analysissettings.h"
74 #include "gromacs/trajectoryanalysis/topologyinformation.h"
75 #include "gromacs/utility/exceptions.h"
76 #include "gromacs/utility/stringutil.h"
81 namespace analysismodules
87 //! \addtogroup module_trajectoryanalysis
90 /********************************************************************
91 * Actual analysis module
94 //! Normalization for the computed distribution.
98 Normalization_NumberDensity,
101 //! String values corresponding to Normalization.
102 const char* const c_NormalizationEnum[] = { "rdf", "number_density", "none" };
103 //! Whether to compute RDF wrt. surface of the reference group.
107 SurfaceType_Molecule,
110 //! String values corresponding to SurfaceType.
111 const char* const c_SurfaceEnum[] = { "no", "mol", "res" };
114 * Implements `gmx rdf` trajectory analysis module.
116 class Rdf : public TrajectoryAnalysisModule
121 void initOptions(IOptionsContainer* options, TrajectoryAnalysisSettings* settings) override;
122 void optionsFinished(TrajectoryAnalysisSettings* settings) override;
123 void initAnalysis(const TrajectoryAnalysisSettings& settings, const TopologyInformation& top) override;
124 void initAfterFirstFrame(const TrajectoryAnalysisSettings& settings, const t_trxframe& fr) override;
126 TrajectoryAnalysisModuleDataPointer startFrames(const AnalysisDataParallelOptions& opt,
127 const SelectionCollection& selections) override;
128 void analyzeFrame(int frnr, const t_trxframe& fr, t_pbc* pbc, TrajectoryAnalysisModuleData* pdata) override;
130 void finishAnalysis(int nframes) override;
131 void writeOutput() override;
135 std::string fnCumulative_;
136 SurfaceType surface_;
137 AnalysisDataPlotSettings plotSettings_;
140 * Reference selection to compute RDFs around.
142 * With -surf, Selection::originalIds() and Selection::mappedIds()
143 * store the index of the surface group to which that position belongs.
144 * The RDF is computed by finding the nearest position from each
145 * surface group for each position, and then binning those distances.
149 * Selections to compute RDFs for.
154 * Raw pairwise distance data from which the RDF is computed.
156 * There is a data set for each selection in `sel_`, with a single
157 * column. Each point set will contain a single pairwise distance
158 * that contributes to the RDF.
160 AnalysisData pairDist_;
162 * Normalization factors for each frame.
164 * The first column contains the number of positions in `refSel_` for
165 * that frame (with surface RDF, the number of groups). There are
166 * `sel_.size()` more columns, each containing the number density of
167 * positions for one selection.
169 AnalysisData normFactors_;
171 * Histogram module that computes the actual RDF from `pairDist_`.
173 * The per-frame histograms are raw pair counts in each bin;
174 * the averager is normalized by the average number of reference
175 * positions (average of the first column of `normFactors_`).
177 AnalysisDataSimpleHistogramModulePointer pairCounts_;
179 * Average normalization factors.
181 AnalysisDataAverageModulePointer normAve_;
182 //! Neighborhood search with `refSel_` as the reference positions.
183 AnalysisNeighborhood nb_;
184 //! Topology exclusions used by neighborhood searching.
185 const gmx_localtop_t* localTop_;
187 // User input options.
191 Normalization normalization_;
192 bool bNormalizationSet_;
196 // Pre-computed values for faster access during analysis.
199 int surfaceGroupCount_;
201 // Copy and assign disallowed by base.
205 surface_(SurfaceType_None),
206 pairCounts_(new AnalysisDataSimpleHistogramModule()),
207 normAve_(new AnalysisDataAverageModule()),
212 normalization_(Normalization_Rdf),
213 bNormalizationSet_(false),
218 surfaceGroupCount_(0)
220 pairDist_.setMultipoint(true);
221 pairDist_.addModule(pairCounts_);
222 registerAnalysisDataset(&pairDist_, "pairdist");
223 registerBasicDataset(pairCounts_.get(), "paircount");
225 normFactors_.addModule(normAve_);
226 registerAnalysisDataset(&normFactors_, "norm");
229 void Rdf::initOptions(IOptionsContainer* options, TrajectoryAnalysisSettings* settings)
231 const char* const desc[] = {
232 "[THISMODULE] calculates radial distribution functions from one",
233 "reference set of position (set with [TT]-ref[tt]) to one or more",
234 "sets of positions (set with [TT]-sel[tt]). To compute the RDF with",
235 "respect to the closest position in a set in [TT]-ref[tt] instead, use",
236 "[TT]-surf[tt]: if set, then [TT]-ref[tt] is partitioned into sets",
237 "based on the value of [TT]-surf[tt], and the closest position in each",
238 "set is used. To compute the RDF around axes parallel to the",
239 "[IT]z[it]-axis, i.e., only in the [IT]x[it]-[IT]y[it] plane, use",
242 "To set the bin width and maximum distance to use in the RDF, use",
243 "[TT]-bin[tt] and [TT]-rmax[tt], respectively. The latter can be",
244 "used to limit the computational cost if the RDF is not of interest",
245 "up to the default (half of the box size with PBC, three times the",
246 "box size without PBC).",
248 "To use exclusions from the topology ([TT]-s[tt]), set [TT]-excl[tt]",
249 "and ensure that both [TT]-ref[tt] and [TT]-sel[tt] only select atoms.",
250 "A rougher alternative to exclude intra-molecular peaks is to set",
251 "[TT]-cut[tt] to a non-zero value to clear the RDF at small",
254 "The RDFs are normalized by 1) average number of positions in",
255 "[TT]-ref[tt] (the number of groups with [TT]-surf[tt]), 2) volume",
256 "of the bin, and 3) average particle density of [TT]-sel[tt] positions",
257 "for that selection. To change the normalization, use [TT]-norm[tt]:",
259 "* [TT]rdf[tt]: Use all factors for normalization.",
260 " This produces a normal RDF.",
261 "* [TT]number_density[tt]: Use the first two factors.",
262 " This produces a number density as a function of distance.",
263 "* [TT]none[tt]: Use only the first factor.",
264 " In this case, the RDF is only scaled with the bin width to make",
265 " the integral of the curve represent the number of pairs within a",
268 "Note that exclusions do not affect the normalization: even if",
269 "[TT]-excl[tt] is set, or [TT]-ref[tt] and",
270 "[TT]-sel[tt] contain the same selection, the normalization factor",
271 "is still N*M, not N*(M-excluded).",
273 "For [TT]-surf[tt], the selection provided to [TT]-ref[tt] must",
274 "select atoms, i.e., centers of mass are not supported. Further,",
275 "[TT]-nonorm[tt] is implied, as the bins have irregular shapes and",
276 "the volume of a bin is not easily computable.",
278 "Option [TT]-cn[tt] produces the cumulative number RDF,",
279 "i.e. the average number of particles within a distance r."
282 settings->setHelpText(desc);
284 options->addOption(FileNameOption("o")
289 .defaultBasename("rdf")
290 .description("Computed RDFs"));
291 options->addOption(FileNameOption("cn")
294 .store(&fnCumulative_)
295 .defaultBasename("rdf_cn")
296 .description("Cumulative RDFs"));
298 options->addOption(DoubleOption("bin").store(&binwidth_).description("Bin width (nm)"));
299 options->addOption(EnumOption<Normalization>("norm")
300 .enumValue(c_NormalizationEnum)
301 .store(&normalization_)
302 .storeIsSet(&bNormalizationSet_)
303 .description("Normalization"));
304 options->addOption(BooleanOption("xy").store(&bXY_).description(
305 "Use only the x and y components of the distance"));
307 BooleanOption("excl").store(&bExclusions_).description("Use exclusions from topology"));
308 options->addOption(DoubleOption("cut").store(&cutoff_).description(
309 "Shortest distance (nm) to be considered"));
311 DoubleOption("rmax").store(&rmax_).description("Largest distance (nm) to calculate"));
313 options->addOption(EnumOption<SurfaceType>("surf")
314 .enumValue(c_SurfaceEnum)
316 .description("RDF with respect to the surface of the reference"));
318 options->addOption(SelectionOption("ref").store(&refSel_).required().description(
319 "Reference selection for RDF computation"));
320 options->addOption(SelectionOption("sel").storeVector(&sel_).required().multiValue().description(
321 "Selections to compute RDFs for from the reference"));
324 void Rdf::optionsFinished(TrajectoryAnalysisSettings* settings)
326 if (surface_ != SurfaceType_None)
328 settings->setFlag(TrajectoryAnalysisSettings::efRequireTop);
330 if (bNormalizationSet_ && normalization_ != Normalization_None)
332 GMX_THROW(InconsistentInputError("-surf cannot be combined with -norm"));
334 normalization_ = Normalization_None;
337 GMX_THROW(InconsistentInputError("-surf cannot be combined with -excl"));
342 settings->setFlag(TrajectoryAnalysisSettings::efRequireTop);
350 void Rdf::initAnalysis(const TrajectoryAnalysisSettings& settings, const TopologyInformation& top)
352 pairDist_.setDataSetCount(sel_.size());
353 for (size_t i = 0; i < sel_.size(); ++i)
355 pairDist_.setColumnCount(i, 1);
357 plotSettings_ = settings.plotSettings();
360 normFactors_.setColumnCount(0, sel_.size() + 1);
362 const bool bSurface = (surface_ != SurfaceType_None);
365 if (!refSel_.hasOnlyAtoms())
367 GMX_THROW(InconsistentInputError("-surf only works with -ref that consists of atoms"));
369 const e_index_t type = (surface_ == SurfaceType_Molecule ? INDEX_MOL : INDEX_RES);
370 surfaceGroupCount_ = refSel_.initOriginalIdsToGroup(top.mtop(), type);
375 if (!refSel_.hasOnlyAtoms() || !refSel_.hasSortedAtomIndices())
378 InconsistentInputError("-excl only works with a -ref selection that consist of "
379 "atoms in ascending (sorted) order"));
381 for (size_t i = 0; i < sel_.size(); ++i)
383 if (!sel_[i].hasOnlyAtoms())
385 GMX_THROW(InconsistentInputError(
386 "-excl only works with selections that consist of atoms"));
389 localTop_ = top.expandedTopology();
390 if (localTop_->excls.empty())
392 GMX_THROW(InconsistentInputError(
393 "-excl is set, but the file provided to -s does not define exclusions"));
395 nb_.setTopologyExclusions(&localTop_->excls);
399 void Rdf::initAfterFirstFrame(const TrajectoryAnalysisSettings& settings, const t_trxframe& fr)
401 // If -rmax is not provided, determine one from the box for the first frame.
405 copy_mat(fr.box, box);
406 if (settings.hasPBC())
410 box[ZZ][ZZ] = 2 * std::max(box[XX][XX], box[YY][YY]);
412 rmax_ = std::sqrt(0.99 * 0.99 * max_cutoff2(bXY_ ? epbcXY : epbcXYZ, box));
420 rmax_ = 3 * std::max(box[XX][XX], std::max(box[YY][YY], box[ZZ][ZZ]));
423 cut2_ = gmx::square(cutoff_);
424 rmax2_ = gmx::square(rmax_);
425 nb_.setCutoff(rmax_);
426 // We use the double amount of bins, so we can correctly
427 // write the rdf and rdf_cn output at i*binwidth values.
428 pairCounts_->init(histogramFromRange(0.0, rmax_).binWidth(binwidth_ / 2.0));
432 * Temporary memory for use within a single-frame calculation.
434 class RdfModuleData : public TrajectoryAnalysisModuleData
438 * Reserves memory for the frame-local data.
440 * `surfaceGroupCount` will be zero if -surf is not specified.
442 RdfModuleData(TrajectoryAnalysisModule* module,
443 const AnalysisDataParallelOptions& opt,
444 const SelectionCollection& selections,
445 int surfaceGroupCount) :
446 TrajectoryAnalysisModuleData(module, opt, selections)
448 surfaceDist2_.resize(surfaceGroupCount);
451 void finish() override { finishDataHandles(); }
454 * Minimum distance to each surface group.
456 * One entry for each group (residue/molecule, per -surf) in the
457 * reference selection.
458 * This is needed to support neighborhood searching, which may not
459 * return the reference positions in order: for each position, we need
460 * to search through all the reference positions and update this array
461 * to find the minimum distance to each surface group, and then compute
462 * the RDF from these numbers.
464 std::vector<real> surfaceDist2_;
467 TrajectoryAnalysisModuleDataPointer Rdf::startFrames(const AnalysisDataParallelOptions& opt,
468 const SelectionCollection& selections)
470 return TrajectoryAnalysisModuleDataPointer(new RdfModuleData(this, opt, selections, surfaceGroupCount_));
473 void Rdf::analyzeFrame(int frnr, const t_trxframe& fr, t_pbc* pbc, TrajectoryAnalysisModuleData* pdata)
475 AnalysisDataHandle dh = pdata->dataHandle(pairDist_);
476 AnalysisDataHandle nh = pdata->dataHandle(normFactors_);
477 const Selection& refSel = pdata->parallelSelection(refSel_);
478 const SelectionList& sel = pdata->parallelSelections(sel_);
479 RdfModuleData& frameData = *static_cast<RdfModuleData*>(pdata);
480 const bool bSurface = !frameData.surfaceDist2_.empty();
483 copy_mat(fr.box, boxForVolume);
486 // Set z-size to 1 so we get the surface are iso the volume
487 clear_rvec(boxForVolume[ZZ]);
488 boxForVolume[ZZ][ZZ] = 1;
490 const real inverseVolume = 1.0 / det(boxForVolume);
492 nh.startFrame(frnr, fr.time);
493 // Compute the normalization factor for the number of reference positions.
496 if (refSel.isDynamic())
498 // Count the number of distinct groups.
499 // This assumes that each group is continuous, which is currently
503 for (int i = 0; i < refSel.posCount(); ++i)
505 const int id = refSel.position(i).mappedId();
512 nh.setPoint(0, count);
516 nh.setPoint(0, surfaceGroupCount_);
521 nh.setPoint(0, refSel.posCount());
524 dh.startFrame(frnr, fr.time);
525 AnalysisNeighborhoodSearch nbsearch = nb_.initSearch(pbc, refSel);
526 for (size_t g = 0; g < sel.size(); ++g)
532 // Special loop for surface calculation, where a separate neighbor
533 // search is done for each position in the selection, and the
534 // nearest position from each surface group is tracked.
535 std::vector<real>& surfaceDist2 = frameData.surfaceDist2_;
536 for (int i = 0; i < sel[g].posCount(); ++i)
538 std::fill(surfaceDist2.begin(), surfaceDist2.end(), std::numeric_limits<real>::max());
539 AnalysisNeighborhoodPairSearch pairSearch = nbsearch.startPairSearch(sel[g].position(i));
540 AnalysisNeighborhoodPair pair;
541 while (pairSearch.findNextPair(&pair))
543 const real r2 = pair.distance2();
544 const int refId = refSel.position(pair.refIndex()).mappedId();
545 if (r2 < surfaceDist2[refId])
547 surfaceDist2[refId] = r2;
550 // Accumulate the RDF from the distances to the surface.
551 for (size_t i = 0; i < surfaceDist2.size(); ++i)
553 const real r2 = surfaceDist2[i];
554 // Here, we need to check for rmax, since the value might
555 // be above the cutoff if no points were close to some
556 // surface positions.
557 if (r2 > cut2_ && r2 <= rmax2_)
559 dh.setPoint(0, std::sqrt(r2));
567 // Standard neighborhood search over all pairs within the cutoff
568 // for the -surf no case.
569 AnalysisNeighborhoodPairSearch pairSearch = nbsearch.startPairSearch(sel[g]);
570 AnalysisNeighborhoodPair pair;
571 while (pairSearch.findNextPair(&pair))
573 const real r2 = pair.distance2();
576 // TODO: Consider whether the histogramming could be done with
577 // less overhead (after first measuring the overhead).
578 dh.setPoint(0, std::sqrt(r2));
583 // Normalization factor for the number density (only used without
584 // -surf, but does not hurt to populate otherwise).
585 nh.setPoint(g + 1, sel[g].posCount() * inverseVolume);
591 void Rdf::finishAnalysis(int /*nframes*/)
593 // Normalize the averager with the number of reference positions,
594 // from where the normalization propagates to all the output.
595 const real refPosCount = normAve_->average(0, 0);
596 pairCounts_->averager().scaleAll(1.0 / refPosCount);
597 pairCounts_->averager().done();
599 // TODO: Consider how these could be exposed to the testing framework
600 // through the dataset registration mechanism.
601 AverageHistogramPointer finalRdf = pairCounts_->averager().resampleDoubleBinWidth(true);
603 if (normalization_ != Normalization_None)
605 // Normalize by the volume of the bins (volume of sphere segments or
606 // length of circle segments).
607 std::vector<real> invBinVolume;
608 const int nbin = finalRdf->settings().binCount();
609 invBinVolume.resize(nbin);
610 real prevSphereVolume = 0.0;
611 for (int i = 0; i < nbin; ++i)
613 const real r = (i + 0.5) * binwidth_;
617 sphereVolume = M_PI * r * r;
621 sphereVolume = (4.0 / 3.0) * M_PI * r * r * r;
623 const real binVolume = sphereVolume - prevSphereVolume;
624 invBinVolume[i] = 1.0 / binVolume;
625 prevSphereVolume = sphereVolume;
627 finalRdf->scaleAllByVector(invBinVolume.data());
629 if (normalization_ == Normalization_Rdf)
631 // Normalize by particle density.
632 for (size_t g = 0; g < sel_.size(); ++g)
634 finalRdf->scaleSingle(g, 1.0 / normAve_->average(0, g + 1));
640 // With no normalization, just scale with bin width to make the
641 // integral of the curve (instead of raw bin sum) represent the pair
643 finalRdf->scaleAll(1.0 / binwidth_);
647 // TODO: Consider if some of this should be done in writeOutput().
649 AnalysisDataPlotModulePointer plotm(new AnalysisDataPlotModule(plotSettings_));
650 plotm->setFileName(fnRdf_);
651 plotm->setTitle("Radial distribution");
652 plotm->setSubtitle(formatString("reference %s", refSel_.name()));
653 plotm->setXLabel("r (nm)");
654 plotm->setYLabel("g(r)");
655 for (size_t i = 0; i < sel_.size(); ++i)
657 plotm->appendLegend(sel_[i].name());
659 finalRdf->addModule(plotm);
662 if (!fnCumulative_.empty())
664 AverageHistogramPointer cumulativeRdf = pairCounts_->averager().resampleDoubleBinWidth(false);
665 cumulativeRdf->makeCumulative();
666 cumulativeRdf->done();
668 AnalysisDataPlotModulePointer plotm(new AnalysisDataPlotModule(plotSettings_));
669 plotm->setFileName(fnCumulative_);
670 plotm->setTitle("Cumulative Number RDF");
671 plotm->setSubtitle(formatString("reference %s", refSel_.name()));
672 plotm->setXLabel("r (nm)");
673 plotm->setYLabel("number");
674 for (size_t i = 0; i < sel_.size(); ++i)
676 plotm->appendLegend(sel_[i].name());
678 cumulativeRdf->addModule(plotm);
682 void Rdf::writeOutput() {}
688 const char RdfInfo::name[] = "rdf";
689 const char RdfInfo::shortDescription[] = "Calculate radial distribution functions";
691 TrajectoryAnalysisModulePointer RdfInfo::create()
693 return TrajectoryAnalysisModulePointer(new Rdf);
696 } // namespace analysismodules