2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
39 * Implements gmx::analysismodules::Rdf.
41 * \author Teemu Murtola <teemu.murtola@gmail.com> (C++ conversion)
42 * \ingroup module_trajectoryanalysis
55 #include "gromacs/analysisdata/analysisdata.h"
56 #include "gromacs/analysisdata/modules/average.h"
57 #include "gromacs/analysisdata/modules/histogram.h"
58 #include "gromacs/analysisdata/modules/plot.h"
59 #include "gromacs/fileio/trx.h"
60 #include "gromacs/math/utilities.h"
61 #include "gromacs/math/vec.h"
62 #include "gromacs/options/basicoptions.h"
63 #include "gromacs/options/filenameoption.h"
64 #include "gromacs/options/ioptionscontainer.h"
65 #include "gromacs/options/options.h"
66 #include "gromacs/pbcutil/pbc.h"
67 #include "gromacs/selection/nbsearch.h"
68 #include "gromacs/selection/selection.h"
69 #include "gromacs/selection/selectionoption.h"
70 #include "gromacs/topology/topology.h"
71 #include "gromacs/trajectoryanalysis/analysismodule.h"
72 #include "gromacs/trajectoryanalysis/analysissettings.h"
73 #include "gromacs/utility/exceptions.h"
74 #include "gromacs/utility/stringutil.h"
79 namespace analysismodules
85 //! \addtogroup module_trajectoryanalysis
88 /********************************************************************
89 * Actual analysis module
93 * Implements `gmx rdf` trajectory analysis module.
95 class Rdf : public TrajectoryAnalysisModule
100 virtual void initOptions(IOptionsContainer *options,
101 TrajectoryAnalysisSettings *settings);
102 virtual void optionsFinished(Options *options,
103 TrajectoryAnalysisSettings *settings);
104 virtual void initAnalysis(const TrajectoryAnalysisSettings &settings,
105 const TopologyInformation &top);
106 virtual void initAfterFirstFrame(const TrajectoryAnalysisSettings &settings,
107 const t_trxframe &fr);
109 virtual TrajectoryAnalysisModuleDataPointer startFrames(
110 const AnalysisDataParallelOptions &opt,
111 const SelectionCollection &selections);
112 virtual void analyzeFrame(int frnr, const t_trxframe &fr, t_pbc *pbc,
113 TrajectoryAnalysisModuleData *pdata);
115 virtual void finishAnalysis(int nframes);
116 virtual void writeOutput();
120 std::string fnCumulative_;
121 std::string surface_;
122 AnalysisDataPlotSettings plotSettings_;
125 * Reference selection to compute RDFs around.
127 * With -surf, Selection::originalIds() and Selection::mappedIds()
128 * store the index of the surface group to which that position belongs.
129 * The RDF is computed by finding the nearest position from each
130 * surface group for each position, and then binning those distances.
134 * Selections to compute RDFs for.
139 * Raw pairwise distance data from which the RDF is computed.
141 * There is a data set for each selection in `sel_`, with a single
142 * column. Each point set will contain a single pairwise distance
143 * that contributes to the RDF.
145 AnalysisData pairDist_;
147 * Normalization factors for each frame.
149 * The first column contains the number of positions in `refSel_` for
150 * that frame (with surface RDF, the number of groups). There are
151 * `sel_.size()` more columns, each containing the number density of
152 * positions for one selection.
154 AnalysisData normFactors_;
156 * Histogram module that computes the actual RDF from `pairDist_`.
158 * The per-frame histograms are raw pair counts in each bin;
159 * the averager is normalized by the average number of reference
160 * positions (average of the first column of `normFactors_`).
162 AnalysisDataSimpleHistogramModulePointer pairCounts_;
164 * Average normalization factors.
166 AnalysisDataAverageModulePointer normAve_;
167 //! Neighborhood search with `refSel_` as the reference positions.
168 AnalysisNeighborhood nb_;
170 // User input options.
178 // Pre-computed values for faster access during analysis.
181 int surfaceGroupCount_;
183 // Copy and assign disallowed by base.
187 : TrajectoryAnalysisModule(RdfInfo::name, RdfInfo::shortDescription),
188 pairCounts_(new AnalysisDataSimpleHistogramModule()),
189 normAve_(new AnalysisDataAverageModule()),
190 binwidth_(0.002), cutoff_(0.0), rmax_(0.0),
191 bNormalize_(true), bXY_(false), bExclusions_(false),
192 cut2_(0.0), rmax2_(0.0), surfaceGroupCount_(0)
194 pairDist_.setMultipoint(true);
195 pairDist_.addModule(pairCounts_);
196 registerAnalysisDataset(&pairDist_, "pairdist");
197 registerBasicDataset(pairCounts_.get(), "paircount");
199 normFactors_.addModule(normAve_);
200 registerAnalysisDataset(&normFactors_, "norm");
204 Rdf::initOptions(IOptionsContainer *options, TrajectoryAnalysisSettings *settings)
206 static const char *const desc[] = {
207 "[THISMODULE] calculates radial distribution functions from one",
208 "refernce set of position (set with [TT]-ref[tt]) to one or more",
209 "sets of positions (set with [TT]-sel[tt]). To compute the RDF with",
210 "respect to the closest position in a set in [TT]-ref[tt] instead, use",
211 "[TT]-surf[tt]: if set, then [TT]-ref[tt] is partitioned into sets",
212 "based on the value of [TT]-surf[tt], and the closest position in each",
213 "set is used. To compute the RDF around axes parallel to the",
214 "[IT]z[it]-axis, i.e., only in the [IT]x[it]-[IT]y[it] plane, use",
216 "To set the bin width and maximum distance to use in the RDF, use",
217 "[TT]-bin[tt] and [TT]-rmax[tt], respectively. The latter can be",
218 "used to limit the computational cost if the RDF is not of interest",
219 "up to the default (half of the box size with PBC, three times the",
220 "box size without PBC).[PAR]",
221 "To use exclusions from the topology ([TT]-s[tt]), set [TT]-excl[tt]",
222 "and ensure that both [TT]-ref[tt] and [TT]-sel[tt] only select atoms.",
223 "A rougher alternative to exclude intra-molecular peaks is to set",
224 "[TT]-cut[tt] to a non-zero value to clear the RDF at small",
226 "The RDFs are normalized by 1) average number of positions in",
227 "[TT]-ref[tt] (the number of groups with [TT]-surf[tt]), 2) volume",
228 "of the bin, and 3) average particle density of [TT]-sel[tt] positions",
229 "for that selection. To only use the first factor for normalization,",
230 "set [TT]-nonorm[tt]. In this case, the RDF is only scaled with the",
231 "bin width to make the integral of the curve represent the number of",
232 "pairs within a range. Note that exclusions do not affect the",
233 "normalization: even if [TT]-excl[tt] is set, or [TT]-ref[tt] and",
234 "[TT]-sel[tt] contain the same selection, the normalization factor",
235 "is still N*M, not N*(M-excluded).[PAR]",
236 "For [TT]-surf[tt], the selection provided to [TT]-ref[tt] must",
237 "select atoms, i.e., centers of mass are not supported. Further,",
238 "[TT]-nonorm[tt] is implied, as the bins have irregular shapes and",
239 "the volume of a bin is not easily computable.[PAR]",
240 "Option [TT]-cn[tt] produces the cumulative number RDF,",
241 "i.e. the average number of particles within a distance r.[PAR]"
244 settings->setHelpText(desc);
246 options->addOption(FileNameOption("o").filetype(eftPlot).outputFile().required()
247 .store(&fnRdf_).defaultBasename("rdf")
248 .description("Computed RDFs"));
249 options->addOption(FileNameOption("cn").filetype(eftPlot).outputFile()
250 .store(&fnCumulative_).defaultBasename("rdf_cn")
251 .description("Cumulative RDFs"));
253 options->addOption(DoubleOption("bin").store(&binwidth_)
254 .description("Bin width (nm)"));
255 options->addOption(BooleanOption("norm").store(&bNormalize_)
256 .description("Normalize for bin volume and density"));
257 options->addOption(BooleanOption("xy").store(&bXY_)
258 .description("Use only the x and y components of the distance"));
259 options->addOption(BooleanOption("excl").store(&bExclusions_)
260 .description("Use exclusions from topology"));
261 options->addOption(DoubleOption("cut").store(&cutoff_)
262 .description("Shortest distance (nm) to be considered"));
263 options->addOption(DoubleOption("rmax").store(&rmax_)
264 .description("Largest distance (nm) to calculate"));
266 const char *const cSurfaceEnum[] = { "no", "mol", "res" };
267 options->addOption(StringOption("surf").enumValue(cSurfaceEnum)
268 .defaultEnumIndex(0).store(&surface_)
269 .description("RDF with respect to the surface of the reference"));
271 options->addOption(SelectionOption("ref").store(&refSel_).required()
272 .description("Reference selection for RDF computation"));
273 options->addOption(SelectionOption("sel").storeVector(&sel_)
274 .required().multiValue()
275 .description("Selections to compute RDFs for from the reference"));
279 Rdf::optionsFinished(Options *options, TrajectoryAnalysisSettings *settings)
281 if (surface_ != "no")
283 settings->setFlag(TrajectoryAnalysisSettings::efRequireTop);
285 if (options->isSet("norm") && bNormalize_)
287 GMX_THROW(InconsistentInputError("-surf cannot be combined with -norm"));
292 GMX_THROW(InconsistentInputError("-surf cannot be combined with -excl"));
297 settings->setFlag(TrajectoryAnalysisSettings::efRequireTop);
306 Rdf::initAnalysis(const TrajectoryAnalysisSettings &settings,
307 const TopologyInformation &top)
309 pairDist_.setDataSetCount(sel_.size());
310 for (size_t i = 0; i < sel_.size(); ++i)
312 pairDist_.setColumnCount(i, 1);
314 plotSettings_ = settings.plotSettings();
317 normFactors_.setColumnCount(0, sel_.size() + 1);
319 const bool bSurface = (surface_ != "no");
322 if (!refSel_.hasOnlyAtoms())
324 GMX_THROW(InconsistentInputError("-surf only works with -refsel that consists of atoms"));
326 const e_index_t type = (surface_ == "mol" ? INDEX_MOL : INDEX_RES);
327 surfaceGroupCount_ = refSel_.initOriginalIdsToGroup(top.topology(), type);
332 if (!refSel_.hasOnlyAtoms())
334 GMX_THROW(InconsistentInputError("-excl only works with selections that consist of atoms"));
336 for (size_t i = 0; i < sel_.size(); ++i)
338 if (!sel_[i].hasOnlyAtoms())
340 GMX_THROW(InconsistentInputError("-excl only works with selections that consist of atoms"));
343 const t_topology *topology = top.topology();
344 if (topology->excls.nr == 0)
346 GMX_THROW(InconsistentInputError("-excl is set, but the file provided to -s does not define exclusions"));
348 nb_.setTopologyExclusions(&topology->excls);
353 Rdf::initAfterFirstFrame(const TrajectoryAnalysisSettings &settings,
354 const t_trxframe &fr)
356 // If -rmax is not provided, determine one from the box for the first frame.
360 copy_mat(fr.box, box);
361 if (settings.hasPBC())
365 box[ZZ][ZZ] = 2*std::max(box[XX][XX], box[YY][YY]);
367 rmax_ = std::sqrt(0.99*0.99*max_cutoff2(bXY_ ? epbcXY : epbcXYZ, box));
375 rmax_ = 3*std::max(box[XX][XX], std::max(box[YY][YY], box[ZZ][ZZ]));
378 cut2_ = sqr(cutoff_);
380 nb_.setCutoff(rmax_);
381 // We use the double amount of bins, so we can correctly
382 // write the rdf and rdf_cn output at i*binwidth values.
383 pairCounts_->init(histogramFromRange(0.0, rmax_).binWidth(binwidth_ / 2.0));
387 * Temporary memory for use within a single-frame calculation.
389 class RdfModuleData : public TrajectoryAnalysisModuleData
393 * Reserves memory for the frame-local data.
395 * `surfaceGroupCount` will be zero if -surf is not specified.
397 RdfModuleData(TrajectoryAnalysisModule *module,
398 const AnalysisDataParallelOptions &opt,
399 const SelectionCollection &selections,
400 int surfaceGroupCount)
401 : TrajectoryAnalysisModuleData(module, opt, selections)
403 surfaceDist2_.resize(surfaceGroupCount);
406 virtual void finish() { finishDataHandles(); }
409 * Minimum distance to each surface group.
411 * One entry for each group (residue/molecule, per -surf) in the
412 * reference selection.
413 * This is needed to support neighborhood searching, which may not
414 * return the reference positions in order: for each position, we need
415 * to search through all the reference positions and update this array
416 * to find the minimum distance to each surface group, and then compute
417 * the RDF from these numbers.
419 std::vector<real> surfaceDist2_;
422 TrajectoryAnalysisModuleDataPointer Rdf::startFrames(
423 const AnalysisDataParallelOptions &opt,
424 const SelectionCollection &selections)
426 return TrajectoryAnalysisModuleDataPointer(
427 new RdfModuleData(this, opt, selections, surfaceGroupCount_));
431 Rdf::analyzeFrame(int frnr, const t_trxframe &fr, t_pbc *pbc,
432 TrajectoryAnalysisModuleData *pdata)
434 AnalysisDataHandle dh = pdata->dataHandle(pairDist_);
435 AnalysisDataHandle nh = pdata->dataHandle(normFactors_);
436 const Selection &refSel = pdata->parallelSelection(refSel_);
437 const SelectionList &sel = pdata->parallelSelections(sel_);
438 RdfModuleData &frameData = *static_cast<RdfModuleData *>(pdata);
439 const bool bSurface = !frameData.surfaceDist2_.empty();
442 copy_mat(fr.box, boxForVolume);
445 // Set z-size to 1 so we get the surface are iso the volume
446 clear_rvec(boxForVolume[ZZ]);
447 boxForVolume[ZZ][ZZ] = 1;
449 const real inverseVolume = 1.0 / det(boxForVolume);
451 nh.startFrame(frnr, fr.time);
452 // Compute the normalization factor for the number of reference positions.
455 if (refSel.isDynamic())
457 // Count the number of distinct groups.
458 // This assumes that each group is continuous, which is currently
462 for (int i = 0; i < refSel.posCount(); ++i)
464 const int id = refSel.position(i).mappedId();
471 nh.setPoint(0, count);
475 nh.setPoint(0, surfaceGroupCount_);
480 nh.setPoint(0, refSel.posCount());
483 dh.startFrame(frnr, fr.time);
484 AnalysisNeighborhoodSearch nbsearch = nb_.initSearch(pbc, refSel);
485 for (size_t g = 0; g < sel.size(); ++g)
491 // Special loop for surface calculation, where a separate neighbor
492 // search is done for each position in the selection, and the
493 // nearest position from each surface group is tracked.
494 std::vector<real> &surfaceDist2 = frameData.surfaceDist2_;
495 for (int i = 0; i < sel[g].posCount(); ++i)
497 std::fill(surfaceDist2.begin(), surfaceDist2.end(),
498 std::numeric_limits<real>::max());
499 AnalysisNeighborhoodPairSearch pairSearch =
500 nbsearch.startPairSearch(sel[g].position(i));
501 AnalysisNeighborhoodPair pair;
502 while (pairSearch.findNextPair(&pair))
504 const real r2 = pair.distance2();
505 const int refId = refSel.position(pair.refIndex()).mappedId();
506 if (r2 < surfaceDist2[refId])
508 surfaceDist2[refId] = r2;
511 // Accumulate the RDF from the distances to the surface.
512 for (size_t i = 0; i < surfaceDist2.size(); ++i)
514 const real r2 = surfaceDist2[i];
515 // Here, we need to check for rmax, since the value might
516 // be above the cutoff if no points were close to some
517 // surface positions.
518 if (r2 > cut2_ && r2 <= rmax2_)
520 dh.setPoint(0, std::sqrt(r2));
528 // Standard neighborhood search over all pairs within the cutoff
529 // for the -surf no case.
530 AnalysisNeighborhoodPairSearch pairSearch = nbsearch.startPairSearch(sel[g]);
531 AnalysisNeighborhoodPair pair;
532 while (pairSearch.findNextPair(&pair))
534 const real r2 = pair.distance2();
537 // TODO: Consider whether the histogramming could be done with
538 // less overhead (after first measuring the overhead).
539 dh.setPoint(0, std::sqrt(r2));
544 // Normalization factor for the number density (only used without
545 // -surf, but does not hurt to populate otherwise).
546 nh.setPoint(g + 1, sel[g].posCount() * inverseVolume);
553 Rdf::finishAnalysis(int /*nframes*/)
555 // Normalize the averager with the number of reference positions,
556 // from where the normalization propagates to all the output.
557 const real refPosCount = normAve_->average(0, 0);
558 pairCounts_->averager().scaleAll(1.0 / refPosCount);
559 pairCounts_->averager().done();
561 // TODO: Consider how these could be exposed to the testing framework
562 // through the dataset registration mechanism.
563 AverageHistogramPointer finalRdf
564 = pairCounts_->averager().resampleDoubleBinWidth(true);
568 // Normalize by the volume of the bins (volume of sphere segments or
569 // length of circle segments).
570 std::vector<real> invBinVolume;
571 const int nbin = finalRdf->settings().binCount();
572 invBinVolume.resize(nbin);
573 real prevSphereVolume = 0.0;
574 for (int i = 0; i < nbin; ++i)
576 const real r = (i + 0.5)*binwidth_;
580 sphereVolume = M_PI*r*r;
584 sphereVolume = (4.0/3.0)*M_PI*r*r*r;
586 const real binVolume = sphereVolume - prevSphereVolume;
587 invBinVolume[i] = 1.0 / binVolume;
588 prevSphereVolume = sphereVolume;
590 finalRdf->scaleAllByVector(&invBinVolume[0]);
592 // Normalize by particle density.
593 for (size_t g = 0; g < sel_.size(); ++g)
595 finalRdf->scaleSingle(g, 1.0 / normAve_->average(0, g + 1));
600 // With -nonorm, just scale with bin width to make the integral of the
601 // curve (instead of raw bin sum) represent the pair count.
602 finalRdf->scaleAll(1.0 / binwidth_);
606 // TODO: Consider if some of this should be done in writeOutput().
608 AnalysisDataPlotModulePointer plotm(
609 new AnalysisDataPlotModule(plotSettings_));
610 plotm->setFileName(fnRdf_);
611 plotm->setTitle("Radial distribution");
612 plotm->setSubtitle(formatString("reference %s", refSel_.name()));
613 plotm->setXLabel("r (nm)");
614 plotm->setYLabel("g(r)");
615 for (size_t i = 0; i < sel_.size(); ++i)
617 plotm->appendLegend(sel_[i].name());
619 finalRdf->addModule(plotm);
622 if (!fnCumulative_.empty())
624 AverageHistogramPointer cumulativeRdf
625 = pairCounts_->averager().resampleDoubleBinWidth(false);
626 cumulativeRdf->makeCumulative();
627 cumulativeRdf->done();
629 AnalysisDataPlotModulePointer plotm(
630 new AnalysisDataPlotModule(plotSettings_));
631 plotm->setFileName(fnCumulative_);
632 plotm->setTitle("Cumulative Number RDF");
633 plotm->setSubtitle(formatString("reference %s", refSel_.name()));
634 plotm->setXLabel("r (nm)");
635 plotm->setYLabel("number");
636 for (size_t i = 0; i < sel_.size(); ++i)
638 plotm->appendLegend(sel_[i].name());
640 cumulativeRdf->addModule(plotm);
653 const char RdfInfo::name[] = "rdf";
654 const char RdfInfo::shortDescription[] =
655 "Calculate radial distribution functions";
657 TrajectoryAnalysisModulePointer RdfInfo::create()
659 return TrajectoryAnalysisModulePointer(new Rdf);
662 } // namespace analysismodules