2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
39 * Implements gmx::analysismodules::Rdf.
41 * \author Teemu Murtola <teemu.murtola@gmail.com> (C++ conversion)
42 * \ingroup module_trajectoryanalysis
55 #include "gromacs/analysisdata/analysisdata.h"
56 #include "gromacs/analysisdata/modules/average.h"
57 #include "gromacs/analysisdata/modules/histogram.h"
58 #include "gromacs/analysisdata/modules/plot.h"
59 #include "gromacs/fileio/trx.h"
60 #include "gromacs/math/utilities.h"
61 #include "gromacs/math/vec.h"
62 #include "gromacs/options/basicoptions.h"
63 #include "gromacs/options/filenameoption.h"
64 #include "gromacs/options/ioptionscontainer.h"
65 #include "gromacs/pbcutil/pbc.h"
66 #include "gromacs/selection/nbsearch.h"
67 #include "gromacs/selection/selection.h"
68 #include "gromacs/selection/selectionoption.h"
69 #include "gromacs/topology/topology.h"
70 #include "gromacs/trajectoryanalysis/analysismodule.h"
71 #include "gromacs/trajectoryanalysis/analysissettings.h"
72 #include "gromacs/utility/exceptions.h"
73 #include "gromacs/utility/stringutil.h"
78 namespace analysismodules
84 //! \addtogroup module_trajectoryanalysis
87 /********************************************************************
88 * Actual analysis module
92 * Implements `gmx rdf` trajectory analysis module.
94 class Rdf : public TrajectoryAnalysisModule
99 virtual void initOptions(IOptionsContainer *options,
100 TrajectoryAnalysisSettings *settings);
101 virtual void optionsFinished(TrajectoryAnalysisSettings *settings);
102 virtual void initAnalysis(const TrajectoryAnalysisSettings &settings,
103 const TopologyInformation &top);
104 virtual void initAfterFirstFrame(const TrajectoryAnalysisSettings &settings,
105 const t_trxframe &fr);
107 virtual TrajectoryAnalysisModuleDataPointer startFrames(
108 const AnalysisDataParallelOptions &opt,
109 const SelectionCollection &selections);
110 virtual void analyzeFrame(int frnr, const t_trxframe &fr, t_pbc *pbc,
111 TrajectoryAnalysisModuleData *pdata);
113 virtual void finishAnalysis(int nframes);
114 virtual void writeOutput();
118 std::string fnCumulative_;
119 std::string surface_;
120 AnalysisDataPlotSettings plotSettings_;
123 * Reference selection to compute RDFs around.
125 * With -surf, Selection::originalIds() and Selection::mappedIds()
126 * store the index of the surface group to which that position belongs.
127 * The RDF is computed by finding the nearest position from each
128 * surface group for each position, and then binning those distances.
132 * Selections to compute RDFs for.
137 * Raw pairwise distance data from which the RDF is computed.
139 * There is a data set for each selection in `sel_`, with a single
140 * column. Each point set will contain a single pairwise distance
141 * that contributes to the RDF.
143 AnalysisData pairDist_;
145 * Normalization factors for each frame.
147 * The first column contains the number of positions in `refSel_` for
148 * that frame (with surface RDF, the number of groups). There are
149 * `sel_.size()` more columns, each containing the number density of
150 * positions for one selection.
152 AnalysisData normFactors_;
154 * Histogram module that computes the actual RDF from `pairDist_`.
156 * The per-frame histograms are raw pair counts in each bin;
157 * the averager is normalized by the average number of reference
158 * positions (average of the first column of `normFactors_`).
160 AnalysisDataSimpleHistogramModulePointer pairCounts_;
162 * Average normalization factors.
164 AnalysisDataAverageModulePointer normAve_;
165 //! Neighborhood search with `refSel_` as the reference positions.
166 AnalysisNeighborhood nb_;
168 // User input options.
173 bool bNormalizationSet_;
177 // Pre-computed values for faster access during analysis.
180 int surfaceGroupCount_;
182 // Copy and assign disallowed by base.
186 : TrajectoryAnalysisModule(RdfInfo::name, RdfInfo::shortDescription),
187 pairCounts_(new AnalysisDataSimpleHistogramModule()),
188 normAve_(new AnalysisDataAverageModule()),
189 binwidth_(0.002), cutoff_(0.0), rmax_(0.0),
190 bNormalize_(true), bNormalizationSet_(false), bXY_(false),
192 cut2_(0.0), rmax2_(0.0), surfaceGroupCount_(0)
194 pairDist_.setMultipoint(true);
195 pairDist_.addModule(pairCounts_);
196 registerAnalysisDataset(&pairDist_, "pairdist");
197 registerBasicDataset(pairCounts_.get(), "paircount");
199 normFactors_.addModule(normAve_);
200 registerAnalysisDataset(&normFactors_, "norm");
204 Rdf::initOptions(IOptionsContainer *options, TrajectoryAnalysisSettings *settings)
206 static const char *const desc[] = {
207 "[THISMODULE] calculates radial distribution functions from one",
208 "refernce set of position (set with [TT]-ref[tt]) to one or more",
209 "sets of positions (set with [TT]-sel[tt]). To compute the RDF with",
210 "respect to the closest position in a set in [TT]-ref[tt] instead, use",
211 "[TT]-surf[tt]: if set, then [TT]-ref[tt] is partitioned into sets",
212 "based on the value of [TT]-surf[tt], and the closest position in each",
213 "set is used. To compute the RDF around axes parallel to the",
214 "[IT]z[it]-axis, i.e., only in the [IT]x[it]-[IT]y[it] plane, use",
216 "To set the bin width and maximum distance to use in the RDF, use",
217 "[TT]-bin[tt] and [TT]-rmax[tt], respectively. The latter can be",
218 "used to limit the computational cost if the RDF is not of interest",
219 "up to the default (half of the box size with PBC, three times the",
220 "box size without PBC).[PAR]",
221 "To use exclusions from the topology ([TT]-s[tt]), set [TT]-excl[tt]",
222 "and ensure that both [TT]-ref[tt] and [TT]-sel[tt] only select atoms.",
223 "A rougher alternative to exclude intra-molecular peaks is to set",
224 "[TT]-cut[tt] to a non-zero value to clear the RDF at small",
226 "The RDFs are normalized by 1) average number of positions in",
227 "[TT]-ref[tt] (the number of groups with [TT]-surf[tt]), 2) volume",
228 "of the bin, and 3) average particle density of [TT]-sel[tt] positions",
229 "for that selection. To only use the first factor for normalization,",
230 "set [TT]-nonorm[tt]. In this case, the RDF is only scaled with the",
231 "bin width to make the integral of the curve represent the number of",
232 "pairs within a range. Note that exclusions do not affect the",
233 "normalization: even if [TT]-excl[tt] is set, or [TT]-ref[tt] and",
234 "[TT]-sel[tt] contain the same selection, the normalization factor",
235 "is still N*M, not N*(M-excluded).[PAR]",
236 "For [TT]-surf[tt], the selection provided to [TT]-ref[tt] must",
237 "select atoms, i.e., centers of mass are not supported. Further,",
238 "[TT]-nonorm[tt] is implied, as the bins have irregular shapes and",
239 "the volume of a bin is not easily computable.[PAR]",
240 "Option [TT]-cn[tt] produces the cumulative number RDF,",
241 "i.e. the average number of particles within a distance r.[PAR]"
244 settings->setHelpText(desc);
246 options->addOption(FileNameOption("o").filetype(eftPlot).outputFile().required()
247 .store(&fnRdf_).defaultBasename("rdf")
248 .description("Computed RDFs"));
249 options->addOption(FileNameOption("cn").filetype(eftPlot).outputFile()
250 .store(&fnCumulative_).defaultBasename("rdf_cn")
251 .description("Cumulative RDFs"));
253 options->addOption(DoubleOption("bin").store(&binwidth_)
254 .description("Bin width (nm)"));
255 options->addOption(BooleanOption("norm").store(&bNormalize_)
256 .storeIsSet(&bNormalizationSet_)
257 .description("Normalize for bin volume and density"));
258 options->addOption(BooleanOption("xy").store(&bXY_)
259 .description("Use only the x and y components of the distance"));
260 options->addOption(BooleanOption("excl").store(&bExclusions_)
261 .description("Use exclusions from topology"));
262 options->addOption(DoubleOption("cut").store(&cutoff_)
263 .description("Shortest distance (nm) to be considered"));
264 options->addOption(DoubleOption("rmax").store(&rmax_)
265 .description("Largest distance (nm) to calculate"));
267 const char *const cSurfaceEnum[] = { "no", "mol", "res" };
268 options->addOption(StringOption("surf").enumValue(cSurfaceEnum)
269 .defaultEnumIndex(0).store(&surface_)
270 .description("RDF with respect to the surface of the reference"));
272 options->addOption(SelectionOption("ref").store(&refSel_).required()
273 .description("Reference selection for RDF computation"));
274 options->addOption(SelectionOption("sel").storeVector(&sel_)
275 .required().multiValue()
276 .description("Selections to compute RDFs for from the reference"));
280 Rdf::optionsFinished(TrajectoryAnalysisSettings *settings)
282 if (surface_ != "no")
284 settings->setFlag(TrajectoryAnalysisSettings::efRequireTop);
286 if (bNormalizationSet_ && bNormalize_)
288 GMX_THROW(InconsistentInputError("-surf cannot be combined with -norm"));
293 GMX_THROW(InconsistentInputError("-surf cannot be combined with -excl"));
298 settings->setFlag(TrajectoryAnalysisSettings::efRequireTop);
307 Rdf::initAnalysis(const TrajectoryAnalysisSettings &settings,
308 const TopologyInformation &top)
310 pairDist_.setDataSetCount(sel_.size());
311 for (size_t i = 0; i < sel_.size(); ++i)
313 pairDist_.setColumnCount(i, 1);
315 plotSettings_ = settings.plotSettings();
318 normFactors_.setColumnCount(0, sel_.size() + 1);
320 const bool bSurface = (surface_ != "no");
323 if (!refSel_.hasOnlyAtoms())
325 GMX_THROW(InconsistentInputError("-surf only works with -refsel that consists of atoms"));
327 const e_index_t type = (surface_ == "mol" ? INDEX_MOL : INDEX_RES);
328 surfaceGroupCount_ = refSel_.initOriginalIdsToGroup(top.topology(), type);
333 if (!refSel_.hasOnlyAtoms())
335 GMX_THROW(InconsistentInputError("-excl only works with selections that consist of atoms"));
337 for (size_t i = 0; i < sel_.size(); ++i)
339 if (!sel_[i].hasOnlyAtoms())
341 GMX_THROW(InconsistentInputError("-excl only works with selections that consist of atoms"));
344 const t_topology *topology = top.topology();
345 if (topology->excls.nr == 0)
347 GMX_THROW(InconsistentInputError("-excl is set, but the file provided to -s does not define exclusions"));
349 nb_.setTopologyExclusions(&topology->excls);
354 Rdf::initAfterFirstFrame(const TrajectoryAnalysisSettings &settings,
355 const t_trxframe &fr)
357 // If -rmax is not provided, determine one from the box for the first frame.
361 copy_mat(fr.box, box);
362 if (settings.hasPBC())
366 box[ZZ][ZZ] = 2*std::max(box[XX][XX], box[YY][YY]);
368 rmax_ = std::sqrt(0.99*0.99*max_cutoff2(bXY_ ? epbcXY : epbcXYZ, box));
376 rmax_ = 3*std::max(box[XX][XX], std::max(box[YY][YY], box[ZZ][ZZ]));
379 cut2_ = sqr(cutoff_);
381 nb_.setCutoff(rmax_);
382 // We use the double amount of bins, so we can correctly
383 // write the rdf and rdf_cn output at i*binwidth values.
384 pairCounts_->init(histogramFromRange(0.0, rmax_).binWidth(binwidth_ / 2.0));
388 * Temporary memory for use within a single-frame calculation.
390 class RdfModuleData : public TrajectoryAnalysisModuleData
394 * Reserves memory for the frame-local data.
396 * `surfaceGroupCount` will be zero if -surf is not specified.
398 RdfModuleData(TrajectoryAnalysisModule *module,
399 const AnalysisDataParallelOptions &opt,
400 const SelectionCollection &selections,
401 int surfaceGroupCount)
402 : TrajectoryAnalysisModuleData(module, opt, selections)
404 surfaceDist2_.resize(surfaceGroupCount);
407 virtual void finish() { finishDataHandles(); }
410 * Minimum distance to each surface group.
412 * One entry for each group (residue/molecule, per -surf) in the
413 * reference selection.
414 * This is needed to support neighborhood searching, which may not
415 * return the reference positions in order: for each position, we need
416 * to search through all the reference positions and update this array
417 * to find the minimum distance to each surface group, and then compute
418 * the RDF from these numbers.
420 std::vector<real> surfaceDist2_;
423 TrajectoryAnalysisModuleDataPointer Rdf::startFrames(
424 const AnalysisDataParallelOptions &opt,
425 const SelectionCollection &selections)
427 return TrajectoryAnalysisModuleDataPointer(
428 new RdfModuleData(this, opt, selections, surfaceGroupCount_));
432 Rdf::analyzeFrame(int frnr, const t_trxframe &fr, t_pbc *pbc,
433 TrajectoryAnalysisModuleData *pdata)
435 AnalysisDataHandle dh = pdata->dataHandle(pairDist_);
436 AnalysisDataHandle nh = pdata->dataHandle(normFactors_);
437 const Selection &refSel = pdata->parallelSelection(refSel_);
438 const SelectionList &sel = pdata->parallelSelections(sel_);
439 RdfModuleData &frameData = *static_cast<RdfModuleData *>(pdata);
440 const bool bSurface = !frameData.surfaceDist2_.empty();
443 copy_mat(fr.box, boxForVolume);
446 // Set z-size to 1 so we get the surface are iso the volume
447 clear_rvec(boxForVolume[ZZ]);
448 boxForVolume[ZZ][ZZ] = 1;
450 const real inverseVolume = 1.0 / det(boxForVolume);
452 nh.startFrame(frnr, fr.time);
453 // Compute the normalization factor for the number of reference positions.
456 if (refSel.isDynamic())
458 // Count the number of distinct groups.
459 // This assumes that each group is continuous, which is currently
463 for (int i = 0; i < refSel.posCount(); ++i)
465 const int id = refSel.position(i).mappedId();
472 nh.setPoint(0, count);
476 nh.setPoint(0, surfaceGroupCount_);
481 nh.setPoint(0, refSel.posCount());
484 dh.startFrame(frnr, fr.time);
485 AnalysisNeighborhoodSearch nbsearch = nb_.initSearch(pbc, refSel);
486 for (size_t g = 0; g < sel.size(); ++g)
492 // Special loop for surface calculation, where a separate neighbor
493 // search is done for each position in the selection, and the
494 // nearest position from each surface group is tracked.
495 std::vector<real> &surfaceDist2 = frameData.surfaceDist2_;
496 for (int i = 0; i < sel[g].posCount(); ++i)
498 std::fill(surfaceDist2.begin(), surfaceDist2.end(),
499 std::numeric_limits<real>::max());
500 AnalysisNeighborhoodPairSearch pairSearch =
501 nbsearch.startPairSearch(sel[g].position(i));
502 AnalysisNeighborhoodPair pair;
503 while (pairSearch.findNextPair(&pair))
505 const real r2 = pair.distance2();
506 const int refId = refSel.position(pair.refIndex()).mappedId();
507 if (r2 < surfaceDist2[refId])
509 surfaceDist2[refId] = r2;
512 // Accumulate the RDF from the distances to the surface.
513 for (size_t i = 0; i < surfaceDist2.size(); ++i)
515 const real r2 = surfaceDist2[i];
516 // Here, we need to check for rmax, since the value might
517 // be above the cutoff if no points were close to some
518 // surface positions.
519 if (r2 > cut2_ && r2 <= rmax2_)
521 dh.setPoint(0, std::sqrt(r2));
529 // Standard neighborhood search over all pairs within the cutoff
530 // for the -surf no case.
531 AnalysisNeighborhoodPairSearch pairSearch = nbsearch.startPairSearch(sel[g]);
532 AnalysisNeighborhoodPair pair;
533 while (pairSearch.findNextPair(&pair))
535 const real r2 = pair.distance2();
538 // TODO: Consider whether the histogramming could be done with
539 // less overhead (after first measuring the overhead).
540 dh.setPoint(0, std::sqrt(r2));
545 // Normalization factor for the number density (only used without
546 // -surf, but does not hurt to populate otherwise).
547 nh.setPoint(g + 1, sel[g].posCount() * inverseVolume);
554 Rdf::finishAnalysis(int /*nframes*/)
556 // Normalize the averager with the number of reference positions,
557 // from where the normalization propagates to all the output.
558 const real refPosCount = normAve_->average(0, 0);
559 pairCounts_->averager().scaleAll(1.0 / refPosCount);
560 pairCounts_->averager().done();
562 // TODO: Consider how these could be exposed to the testing framework
563 // through the dataset registration mechanism.
564 AverageHistogramPointer finalRdf
565 = pairCounts_->averager().resampleDoubleBinWidth(true);
569 // Normalize by the volume of the bins (volume of sphere segments or
570 // length of circle segments).
571 std::vector<real> invBinVolume;
572 const int nbin = finalRdf->settings().binCount();
573 invBinVolume.resize(nbin);
574 real prevSphereVolume = 0.0;
575 for (int i = 0; i < nbin; ++i)
577 const real r = (i + 0.5)*binwidth_;
581 sphereVolume = M_PI*r*r;
585 sphereVolume = (4.0/3.0)*M_PI*r*r*r;
587 const real binVolume = sphereVolume - prevSphereVolume;
588 invBinVolume[i] = 1.0 / binVolume;
589 prevSphereVolume = sphereVolume;
591 finalRdf->scaleAllByVector(&invBinVolume[0]);
593 // Normalize by particle density.
594 for (size_t g = 0; g < sel_.size(); ++g)
596 finalRdf->scaleSingle(g, 1.0 / normAve_->average(0, g + 1));
601 // With -nonorm, just scale with bin width to make the integral of the
602 // curve (instead of raw bin sum) represent the pair count.
603 finalRdf->scaleAll(1.0 / binwidth_);
607 // TODO: Consider if some of this should be done in writeOutput().
609 AnalysisDataPlotModulePointer plotm(
610 new AnalysisDataPlotModule(plotSettings_));
611 plotm->setFileName(fnRdf_);
612 plotm->setTitle("Radial distribution");
613 plotm->setSubtitle(formatString("reference %s", refSel_.name()));
614 plotm->setXLabel("r (nm)");
615 plotm->setYLabel("g(r)");
616 for (size_t i = 0; i < sel_.size(); ++i)
618 plotm->appendLegend(sel_[i].name());
620 finalRdf->addModule(plotm);
623 if (!fnCumulative_.empty())
625 AverageHistogramPointer cumulativeRdf
626 = pairCounts_->averager().resampleDoubleBinWidth(false);
627 cumulativeRdf->makeCumulative();
628 cumulativeRdf->done();
630 AnalysisDataPlotModulePointer plotm(
631 new AnalysisDataPlotModule(plotSettings_));
632 plotm->setFileName(fnCumulative_);
633 plotm->setTitle("Cumulative Number RDF");
634 plotm->setSubtitle(formatString("reference %s", refSel_.name()));
635 plotm->setXLabel("r (nm)");
636 plotm->setYLabel("number");
637 for (size_t i = 0; i < sel_.size(); ++i)
639 plotm->appendLegend(sel_[i].name());
641 cumulativeRdf->addModule(plotm);
654 const char RdfInfo::name[] = "rdf";
655 const char RdfInfo::shortDescription[] =
656 "Calculate radial distribution functions";
658 TrajectoryAnalysisModulePointer RdfInfo::create()
660 return TrajectoryAnalysisModulePointer(new Rdf);
663 } // namespace analysismodules