2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2018, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
39 * Implements gmx::analysismodules::Rdf.
41 * \author Teemu Murtola <teemu.murtola@gmail.com> (C++ conversion)
42 * \ingroup module_trajectoryanalysis
55 #include "gromacs/analysisdata/analysisdata.h"
56 #include "gromacs/analysisdata/modules/average.h"
57 #include "gromacs/analysisdata/modules/histogram.h"
58 #include "gromacs/analysisdata/modules/plot.h"
59 #include "gromacs/math/functions.h"
60 #include "gromacs/math/utilities.h"
61 #include "gromacs/math/vec.h"
62 #include "gromacs/options/basicoptions.h"
63 #include "gromacs/options/filenameoption.h"
64 #include "gromacs/options/ioptionscontainer.h"
65 #include "gromacs/pbcutil/pbc.h"
66 #include "gromacs/selection/nbsearch.h"
67 #include "gromacs/selection/selection.h"
68 #include "gromacs/selection/selectionoption.h"
69 #include "gromacs/topology/topology.h"
70 #include "gromacs/trajectory/trajectoryframe.h"
71 #include "gromacs/trajectoryanalysis/analysismodule.h"
72 #include "gromacs/trajectoryanalysis/analysissettings.h"
73 #include "gromacs/trajectoryanalysis/topologyinformation.h"
74 #include "gromacs/utility/exceptions.h"
75 #include "gromacs/utility/stringutil.h"
80 namespace analysismodules
86 //! \addtogroup module_trajectoryanalysis
89 /********************************************************************
90 * Actual analysis module
93 //! Normalization for the computed distribution.
97 Normalization_NumberDensity,
100 //! String values corresponding to Normalization.
101 const char *const c_NormalizationEnum[] = { "rdf", "number_density", "none" };
102 //! Whether to compute RDF wrt. surface of the reference group.
106 SurfaceType_Molecule,
109 //! String values corresponding to SurfaceType.
110 const char *const c_SurfaceEnum[] = { "no", "mol", "res" };
113 * Implements `gmx rdf` trajectory analysis module.
115 class Rdf : public TrajectoryAnalysisModule
120 virtual void initOptions(IOptionsContainer *options,
121 TrajectoryAnalysisSettings *settings);
122 virtual void optionsFinished(TrajectoryAnalysisSettings *settings);
123 virtual void initAnalysis(const TrajectoryAnalysisSettings &settings,
124 const TopologyInformation &top);
125 virtual void initAfterFirstFrame(const TrajectoryAnalysisSettings &settings,
126 const t_trxframe &fr);
128 virtual TrajectoryAnalysisModuleDataPointer startFrames(
129 const AnalysisDataParallelOptions &opt,
130 const SelectionCollection &selections);
131 virtual void analyzeFrame(int frnr, const t_trxframe &fr, t_pbc *pbc,
132 TrajectoryAnalysisModuleData *pdata);
134 virtual void finishAnalysis(int nframes);
135 virtual void writeOutput();
139 std::string fnCumulative_;
140 SurfaceType surface_;
141 AnalysisDataPlotSettings plotSettings_;
144 * Reference selection to compute RDFs around.
146 * With -surf, Selection::originalIds() and Selection::mappedIds()
147 * store the index of the surface group to which that position belongs.
148 * The RDF is computed by finding the nearest position from each
149 * surface group for each position, and then binning those distances.
153 * Selections to compute RDFs for.
158 * Raw pairwise distance data from which the RDF is computed.
160 * There is a data set for each selection in `sel_`, with a single
161 * column. Each point set will contain a single pairwise distance
162 * that contributes to the RDF.
164 AnalysisData pairDist_;
166 * Normalization factors for each frame.
168 * The first column contains the number of positions in `refSel_` for
169 * that frame (with surface RDF, the number of groups). There are
170 * `sel_.size()` more columns, each containing the number density of
171 * positions for one selection.
173 AnalysisData normFactors_;
175 * Histogram module that computes the actual RDF from `pairDist_`.
177 * The per-frame histograms are raw pair counts in each bin;
178 * the averager is normalized by the average number of reference
179 * positions (average of the first column of `normFactors_`).
181 AnalysisDataSimpleHistogramModulePointer pairCounts_;
183 * Average normalization factors.
185 AnalysisDataAverageModulePointer normAve_;
186 //! Neighborhood search with `refSel_` as the reference positions.
187 AnalysisNeighborhood nb_;
189 // User input options.
193 Normalization normalization_;
194 bool bNormalizationSet_;
198 // Pre-computed values for faster access during analysis.
201 int surfaceGroupCount_;
203 // Copy and assign disallowed by base.
207 : surface_(SurfaceType_None),
208 pairCounts_(new AnalysisDataSimpleHistogramModule()),
209 normAve_(new AnalysisDataAverageModule()),
210 binwidth_(0.002), cutoff_(0.0), rmax_(0.0),
211 normalization_(Normalization_Rdf), bNormalizationSet_(false), bXY_(false),
213 cut2_(0.0), rmax2_(0.0), surfaceGroupCount_(0)
215 pairDist_.setMultipoint(true);
216 pairDist_.addModule(pairCounts_);
217 registerAnalysisDataset(&pairDist_, "pairdist");
218 registerBasicDataset(pairCounts_.get(), "paircount");
220 normFactors_.addModule(normAve_);
221 registerAnalysisDataset(&normFactors_, "norm");
225 Rdf::initOptions(IOptionsContainer *options, TrajectoryAnalysisSettings *settings)
227 const char *const desc[] = {
228 "[THISMODULE] calculates radial distribution functions from one",
229 "reference set of position (set with [TT]-ref[tt]) to one or more",
230 "sets of positions (set with [TT]-sel[tt]). To compute the RDF with",
231 "respect to the closest position in a set in [TT]-ref[tt] instead, use",
232 "[TT]-surf[tt]: if set, then [TT]-ref[tt] is partitioned into sets",
233 "based on the value of [TT]-surf[tt], and the closest position in each",
234 "set is used. To compute the RDF around axes parallel to the",
235 "[IT]z[it]-axis, i.e., only in the [IT]x[it]-[IT]y[it] plane, use",
238 "To set the bin width and maximum distance to use in the RDF, use",
239 "[TT]-bin[tt] and [TT]-rmax[tt], respectively. The latter can be",
240 "used to limit the computational cost if the RDF is not of interest",
241 "up to the default (half of the box size with PBC, three times the",
242 "box size without PBC).",
244 "To use exclusions from the topology ([TT]-s[tt]), set [TT]-excl[tt]",
245 "and ensure that both [TT]-ref[tt] and [TT]-sel[tt] only select atoms.",
246 "A rougher alternative to exclude intra-molecular peaks is to set",
247 "[TT]-cut[tt] to a non-zero value to clear the RDF at small",
250 "The RDFs are normalized by 1) average number of positions in",
251 "[TT]-ref[tt] (the number of groups with [TT]-surf[tt]), 2) volume",
252 "of the bin, and 3) average particle density of [TT]-sel[tt] positions",
253 "for that selection. To change the normalization, use [TT]-norm[tt]:",
255 "* [TT]rdf[tt]: Use all factors for normalization.",
256 " This produces a normal RDF.",
257 "* [TT]number_density[tt]: Use the first two factors.",
258 " This produces a number density as a function of distance.",
259 "* [TT]none[tt]: Use only the first factor.",
260 " In this case, the RDF is only scaled with the bin width to make",
261 " the integral of the curve represent the number of pairs within a",
264 "Note that exclusions do not affect the normalization: even if",
265 "[TT]-excl[tt] is set, or [TT]-ref[tt] and",
266 "[TT]-sel[tt] contain the same selection, the normalization factor",
267 "is still N*M, not N*(M-excluded).",
269 "For [TT]-surf[tt], the selection provided to [TT]-ref[tt] must",
270 "select atoms, i.e., centers of mass are not supported. Further,",
271 "[TT]-nonorm[tt] is implied, as the bins have irregular shapes and",
272 "the volume of a bin is not easily computable.",
274 "Option [TT]-cn[tt] produces the cumulative number RDF,",
275 "i.e. the average number of particles within a distance r."
278 settings->setHelpText(desc);
280 options->addOption(FileNameOption("o").filetype(eftPlot).outputFile().required()
281 .store(&fnRdf_).defaultBasename("rdf")
282 .description("Computed RDFs"));
283 options->addOption(FileNameOption("cn").filetype(eftPlot).outputFile()
284 .store(&fnCumulative_).defaultBasename("rdf_cn")
285 .description("Cumulative RDFs"));
287 options->addOption(DoubleOption("bin").store(&binwidth_)
288 .description("Bin width (nm)"));
289 options->addOption(EnumOption<Normalization>("norm").enumValue(c_NormalizationEnum)
290 .store(&normalization_)
291 .storeIsSet(&bNormalizationSet_)
292 .description("Normalization"));
293 options->addOption(BooleanOption("xy").store(&bXY_)
294 .description("Use only the x and y components of the distance"));
295 options->addOption(BooleanOption("excl").store(&bExclusions_)
296 .description("Use exclusions from topology"));
297 options->addOption(DoubleOption("cut").store(&cutoff_)
298 .description("Shortest distance (nm) to be considered"));
299 options->addOption(DoubleOption("rmax").store(&rmax_)
300 .description("Largest distance (nm) to calculate"));
302 options->addOption(EnumOption<SurfaceType>("surf").enumValue(c_SurfaceEnum)
304 .description("RDF with respect to the surface of the reference"));
306 options->addOption(SelectionOption("ref").store(&refSel_).required()
307 .description("Reference selection for RDF computation"));
308 options->addOption(SelectionOption("sel").storeVector(&sel_)
309 .required().multiValue()
310 .description("Selections to compute RDFs for from the reference"));
314 Rdf::optionsFinished(TrajectoryAnalysisSettings *settings)
316 if (surface_ != SurfaceType_None)
318 settings->setFlag(TrajectoryAnalysisSettings::efRequireTop);
320 if (bNormalizationSet_ && normalization_ != Normalization_None)
322 GMX_THROW(InconsistentInputError("-surf cannot be combined with -norm"));
324 normalization_ = Normalization_None;
327 GMX_THROW(InconsistentInputError("-surf cannot be combined with -excl"));
332 settings->setFlag(TrajectoryAnalysisSettings::efRequireTop);
341 Rdf::initAnalysis(const TrajectoryAnalysisSettings &settings,
342 const TopologyInformation &top)
344 pairDist_.setDataSetCount(sel_.size());
345 for (size_t i = 0; i < sel_.size(); ++i)
347 pairDist_.setColumnCount(i, 1);
349 plotSettings_ = settings.plotSettings();
352 normFactors_.setColumnCount(0, sel_.size() + 1);
354 const bool bSurface = (surface_ != SurfaceType_None);
357 if (!refSel_.hasOnlyAtoms())
359 GMX_THROW(InconsistentInputError("-surf only works with -ref that consists of atoms"));
361 const e_index_t type = (surface_ == SurfaceType_Molecule ? INDEX_MOL : INDEX_RES);
362 surfaceGroupCount_ = refSel_.initOriginalIdsToGroup(top.mtop(), type);
367 if (!refSel_.hasOnlyAtoms() || !refSel_.hasSortedAtomIndices())
369 GMX_THROW(InconsistentInputError("-excl only works with a -ref selection that consist of atoms in ascending (sorted) order"));
371 for (size_t i = 0; i < sel_.size(); ++i)
373 if (!sel_[i].hasOnlyAtoms())
375 GMX_THROW(InconsistentInputError("-excl only works with selections that consist of atoms"));
378 const t_topology *topology = top.topology();
379 if (topology->excls.nr == 0)
381 GMX_THROW(InconsistentInputError("-excl is set, but the file provided to -s does not define exclusions"));
383 nb_.setTopologyExclusions(&topology->excls);
388 Rdf::initAfterFirstFrame(const TrajectoryAnalysisSettings &settings,
389 const t_trxframe &fr)
391 // If -rmax is not provided, determine one from the box for the first frame.
395 copy_mat(fr.box, box);
396 if (settings.hasPBC())
400 box[ZZ][ZZ] = 2*std::max(box[XX][XX], box[YY][YY]);
402 rmax_ = std::sqrt(0.99*0.99*max_cutoff2(bXY_ ? epbcXY : epbcXYZ, box));
410 rmax_ = 3*std::max(box[XX][XX], std::max(box[YY][YY], box[ZZ][ZZ]));
413 cut2_ = gmx::square(cutoff_);
414 rmax2_ = gmx::square(rmax_);
415 nb_.setCutoff(rmax_);
416 // We use the double amount of bins, so we can correctly
417 // write the rdf and rdf_cn output at i*binwidth values.
418 pairCounts_->init(histogramFromRange(0.0, rmax_).binWidth(binwidth_ / 2.0));
422 * Temporary memory for use within a single-frame calculation.
424 class RdfModuleData : public TrajectoryAnalysisModuleData
428 * Reserves memory for the frame-local data.
430 * `surfaceGroupCount` will be zero if -surf is not specified.
432 RdfModuleData(TrajectoryAnalysisModule *module,
433 const AnalysisDataParallelOptions &opt,
434 const SelectionCollection &selections,
435 int surfaceGroupCount)
436 : TrajectoryAnalysisModuleData(module, opt, selections)
438 surfaceDist2_.resize(surfaceGroupCount);
441 virtual void finish() { finishDataHandles(); }
444 * Minimum distance to each surface group.
446 * One entry for each group (residue/molecule, per -surf) in the
447 * reference selection.
448 * This is needed to support neighborhood searching, which may not
449 * return the reference positions in order: for each position, we need
450 * to search through all the reference positions and update this array
451 * to find the minimum distance to each surface group, and then compute
452 * the RDF from these numbers.
454 std::vector<real> surfaceDist2_;
457 TrajectoryAnalysisModuleDataPointer Rdf::startFrames(
458 const AnalysisDataParallelOptions &opt,
459 const SelectionCollection &selections)
461 return TrajectoryAnalysisModuleDataPointer(
462 new RdfModuleData(this, opt, selections, surfaceGroupCount_));
466 Rdf::analyzeFrame(int frnr, const t_trxframe &fr, t_pbc *pbc,
467 TrajectoryAnalysisModuleData *pdata)
469 AnalysisDataHandle dh = pdata->dataHandle(pairDist_);
470 AnalysisDataHandle nh = pdata->dataHandle(normFactors_);
471 const Selection &refSel = pdata->parallelSelection(refSel_);
472 const SelectionList &sel = pdata->parallelSelections(sel_);
473 RdfModuleData &frameData = *static_cast<RdfModuleData *>(pdata);
474 const bool bSurface = !frameData.surfaceDist2_.empty();
477 copy_mat(fr.box, boxForVolume);
480 // Set z-size to 1 so we get the surface are iso the volume
481 clear_rvec(boxForVolume[ZZ]);
482 boxForVolume[ZZ][ZZ] = 1;
484 const real inverseVolume = 1.0 / det(boxForVolume);
486 nh.startFrame(frnr, fr.time);
487 // Compute the normalization factor for the number of reference positions.
490 if (refSel.isDynamic())
492 // Count the number of distinct groups.
493 // This assumes that each group is continuous, which is currently
497 for (int i = 0; i < refSel.posCount(); ++i)
499 const int id = refSel.position(i).mappedId();
506 nh.setPoint(0, count);
510 nh.setPoint(0, surfaceGroupCount_);
515 nh.setPoint(0, refSel.posCount());
518 dh.startFrame(frnr, fr.time);
519 AnalysisNeighborhoodSearch nbsearch = nb_.initSearch(pbc, refSel);
520 for (size_t g = 0; g < sel.size(); ++g)
526 // Special loop for surface calculation, where a separate neighbor
527 // search is done for each position in the selection, and the
528 // nearest position from each surface group is tracked.
529 std::vector<real> &surfaceDist2 = frameData.surfaceDist2_;
530 for (int i = 0; i < sel[g].posCount(); ++i)
532 std::fill(surfaceDist2.begin(), surfaceDist2.end(),
533 std::numeric_limits<real>::max());
534 AnalysisNeighborhoodPairSearch pairSearch =
535 nbsearch.startPairSearch(sel[g].position(i));
536 AnalysisNeighborhoodPair pair;
537 while (pairSearch.findNextPair(&pair))
539 const real r2 = pair.distance2();
540 const int refId = refSel.position(pair.refIndex()).mappedId();
541 if (r2 < surfaceDist2[refId])
543 surfaceDist2[refId] = r2;
546 // Accumulate the RDF from the distances to the surface.
547 for (size_t i = 0; i < surfaceDist2.size(); ++i)
549 const real r2 = surfaceDist2[i];
550 // Here, we need to check for rmax, since the value might
551 // be above the cutoff if no points were close to some
552 // surface positions.
553 if (r2 > cut2_ && r2 <= rmax2_)
555 dh.setPoint(0, std::sqrt(r2));
563 // Standard neighborhood search over all pairs within the cutoff
564 // for the -surf no case.
565 AnalysisNeighborhoodPairSearch pairSearch = nbsearch.startPairSearch(sel[g]);
566 AnalysisNeighborhoodPair pair;
567 while (pairSearch.findNextPair(&pair))
569 const real r2 = pair.distance2();
572 // TODO: Consider whether the histogramming could be done with
573 // less overhead (after first measuring the overhead).
574 dh.setPoint(0, std::sqrt(r2));
579 // Normalization factor for the number density (only used without
580 // -surf, but does not hurt to populate otherwise).
581 nh.setPoint(g + 1, sel[g].posCount() * inverseVolume);
588 Rdf::finishAnalysis(int /*nframes*/)
590 // Normalize the averager with the number of reference positions,
591 // from where the normalization propagates to all the output.
592 const real refPosCount = normAve_->average(0, 0);
593 pairCounts_->averager().scaleAll(1.0 / refPosCount);
594 pairCounts_->averager().done();
596 // TODO: Consider how these could be exposed to the testing framework
597 // through the dataset registration mechanism.
598 AverageHistogramPointer finalRdf
599 = pairCounts_->averager().resampleDoubleBinWidth(true);
601 if (normalization_ != Normalization_None)
603 // Normalize by the volume of the bins (volume of sphere segments or
604 // length of circle segments).
605 std::vector<real> invBinVolume;
606 const int nbin = finalRdf->settings().binCount();
607 invBinVolume.resize(nbin);
608 real prevSphereVolume = 0.0;
609 for (int i = 0; i < nbin; ++i)
611 const real r = (i + 0.5)*binwidth_;
615 sphereVolume = M_PI*r*r;
619 sphereVolume = (4.0/3.0)*M_PI*r*r*r;
621 const real binVolume = sphereVolume - prevSphereVolume;
622 invBinVolume[i] = 1.0 / binVolume;
623 prevSphereVolume = sphereVolume;
625 finalRdf->scaleAllByVector(invBinVolume.data());
627 if (normalization_ == Normalization_Rdf)
629 // Normalize by particle density.
630 for (size_t g = 0; g < sel_.size(); ++g)
632 finalRdf->scaleSingle(g, 1.0 / normAve_->average(0, g + 1));
638 // With no normalization, just scale with bin width to make the
639 // integral of the curve (instead of raw bin sum) represent the pair
641 finalRdf->scaleAll(1.0 / binwidth_);
645 // TODO: Consider if some of this should be done in writeOutput().
647 AnalysisDataPlotModulePointer plotm(
648 new AnalysisDataPlotModule(plotSettings_));
649 plotm->setFileName(fnRdf_);
650 plotm->setTitle("Radial distribution");
651 plotm->setSubtitle(formatString("reference %s", refSel_.name()));
652 plotm->setXLabel("r (nm)");
653 plotm->setYLabel("g(r)");
654 for (size_t i = 0; i < sel_.size(); ++i)
656 plotm->appendLegend(sel_[i].name());
658 finalRdf->addModule(plotm);
661 if (!fnCumulative_.empty())
663 AverageHistogramPointer cumulativeRdf
664 = pairCounts_->averager().resampleDoubleBinWidth(false);
665 cumulativeRdf->makeCumulative();
666 cumulativeRdf->done();
668 AnalysisDataPlotModulePointer plotm(
669 new AnalysisDataPlotModule(plotSettings_));
670 plotm->setFileName(fnCumulative_);
671 plotm->setTitle("Cumulative Number RDF");
672 plotm->setSubtitle(formatString("reference %s", refSel_.name()));
673 plotm->setXLabel("r (nm)");
674 plotm->setYLabel("number");
675 for (size_t i = 0; i < sel_.size(); ++i)
677 plotm->appendLegend(sel_[i].name());
679 cumulativeRdf->addModule(plotm);
692 const char RdfInfo::name[] = "rdf";
693 const char RdfInfo::shortDescription[] =
694 "Calculate radial distribution functions";
696 TrajectoryAnalysisModulePointer RdfInfo::create()
698 return TrajectoryAnalysisModulePointer(new Rdf);
701 } // namespace analysismodules