2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2016,2018,2019,2020,2021, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
37 * Implements gmx::analysismodules::PairDistance.
39 * \author Teemu Murtola <teemu.murtola@gmail.com>
40 * \ingroup module_trajectoryanalysis
53 #include "gromacs/analysisdata/analysisdata.h"
54 #include "gromacs/analysisdata/modules/plot.h"
55 #include "gromacs/options/basicoptions.h"
56 #include "gromacs/options/filenameoption.h"
57 #include "gromacs/options/ioptionscontainer.h"
58 #include "gromacs/selection/nbsearch.h"
59 #include "gromacs/selection/selection.h"
60 #include "gromacs/selection/selectionoption.h"
61 #include "gromacs/trajectory/trajectoryframe.h"
62 #include "gromacs/trajectoryanalysis/analysissettings.h"
63 #include "gromacs/trajectoryanalysis/topologyinformation.h"
64 #include "gromacs/utility/arrayref.h"
65 #include "gromacs/utility/exceptions.h"
66 #include "gromacs/utility/stringutil.h"
71 namespace analysismodules
77 //! \addtogroup module_trajectoryanalysis
80 //! Enum value to store the selected value for `-type`.
81 enum class DistanceType : int
88 //! Enum value to store the selected value for `-refgrouping`/`-selgrouping`.
89 enum class GroupType : int
98 //! Strings corresponding to DistanceType.
99 const EnumerationArray<DistanceType, const char*> c_distanceTypeNames = { { "min", "max" } };
100 //! Strings corresponding to GroupType.
101 const EnumerationArray<GroupType, const char*> c_groupTypeNames = {
102 { "all", "res", "mol", "none" }
106 * Implements `gmx pairdist` trajectory analysis module.
108 class PairDistance : public TrajectoryAnalysisModule
113 void initOptions(IOptionsContainer* options, TrajectoryAnalysisSettings* settings) override;
114 void initAnalysis(const TrajectoryAnalysisSettings& settings, const TopologyInformation& top) override;
116 TrajectoryAnalysisModuleDataPointer startFrames(const AnalysisDataParallelOptions& opt,
117 const SelectionCollection& selections) override;
118 void analyzeFrame(int frnr, const t_trxframe& fr, t_pbc* pbc, TrajectoryAnalysisModuleData* pdata) override;
120 void finishAnalysis(int nframes) override;
121 void writeOutput() override;
125 * Computed distances as a function of time.
127 * There is one data set for each selection in `sel_`.
128 * Within each data set, there is one column for each distance to be
129 * computed, as explained in the `-h` text.
131 AnalysisData distances_;
134 * Reference selection to compute distances to.
136 * mappedId() identifies the group (of type `refGroupType_`) into which
137 * each position belogs.
141 * Selections to compute distances from.
143 * mappedId() identifies the group (of type `selGroupType_`) into which
144 * each position belogs.
151 DistanceType distanceType_;
152 GroupType refGroupType_;
153 GroupType selGroupType_;
155 //! Number of groups in `refSel_`.
157 //! Maximum number of pairs of groups for one selection.
159 //! Initial squared distance for distance accumulation.
161 //! Cutoff squared for use in the actual calculation.
164 //! Neighborhood search object for the pair search.
165 AnalysisNeighborhood nb_;
167 // Copy and assign disallowed by base.
170 PairDistance::PairDistance() :
172 distanceType_(DistanceType::Min),
173 refGroupType_(GroupType::All),
174 selGroupType_(GroupType::All),
180 registerAnalysisDataset(&distances_, "dist");
184 void PairDistance::initOptions(IOptionsContainer* options, TrajectoryAnalysisSettings* settings)
186 static const char* const desc[] = {
187 "[THISMODULE] calculates pairwise distances between one reference",
188 "selection (given with [TT]-ref[tt]) and one or more other selections",
189 "(given with [TT]-sel[tt]). It can calculate either the minimum",
190 "distance (the default), or the maximum distance (with",
191 "[TT]-type max[tt]). Distances to each selection provided with",
192 "[TT]-sel[tt] are computed independently.[PAR]",
193 "By default, the global minimum/maximum distance is computed.",
194 "To compute more distances (e.g., minimum distances to each residue",
195 "in [TT]-ref[tt]), use [TT]-refgrouping[tt] and/or [TT]-selgrouping[tt]",
196 "to specify how the positions within each selection should be",
198 "Computed distances are written to the file specified with [TT]-o[tt].",
199 "If there are N groups in [TT]-ref[tt] and M groups in the first",
200 "selection in [TT]-sel[tt], then the output contains N*M columns",
201 "for the first selection. The columns contain distances like this:",
202 "r1-s1, r2-s1, ..., r1-s2, r2-s2, ..., where rn is the n'th group",
203 "in [TT]-ref[tt] and sn is the n'th group in the other selection.",
204 "The distances for the second selection comes as separate columns",
205 "after the first selection, and so on. If some selections are",
206 "dynamic, only the selected positions are used in the computation",
207 "but the same number of columns is always written out. If there",
208 "are no positions contributing to some group pair, then the cutoff",
209 "value is written (see below).[PAR]",
210 "[TT]-cutoff[tt] sets a cutoff for the computed distances.",
211 "If the result would contain a distance over the cutoff, the cutoff",
212 "value is written to the output file instead. By default, no cutoff",
213 "is used, but if you are not interested in values beyond a cutoff,",
214 "or if you know that the minimum distance is smaller than a cutoff,",
215 "you should set this option to allow the tool to use grid-based",
216 "searching and be significantly faster.[PAR]",
217 "If you want to compute distances between fixed pairs,",
218 "[gmx-distance] may be a more suitable tool."
221 settings->setHelpText(desc);
223 options->addOption(FileNameOption("o")
224 .filetype(OptionFileType::Plot)
228 .defaultBasename("dist")
229 .description("Distances as function of time"));
232 DoubleOption("cutoff").store(&cutoff_).description("Maximum distance to consider"));
233 options->addOption(EnumOption<DistanceType>("type")
234 .store(&distanceType_)
235 .enumValue(c_distanceTypeNames)
236 .description("Type of distances to calculate"));
238 EnumOption<GroupType>("refgrouping")
239 .store(&refGroupType_)
240 .enumValue(c_groupTypeNames)
241 .description("Grouping of -ref positions to compute the min/max over"));
243 EnumOption<GroupType>("selgrouping")
244 .store(&selGroupType_)
245 .enumValue(c_groupTypeNames)
246 .description("Grouping of -sel positions to compute the min/max over"));
248 options->addOption(SelectionOption("ref").store(&refSel_).required().description(
249 "Reference positions to calculate distances from"));
250 options->addOption(SelectionOption("sel").storeVector(&sel_).required().multiValue().description(
251 "Positions to calculate distances for"));
254 //! Helper function to initialize the grouping for a selection.
255 int initSelectionGroups(Selection* sel, const gmx_mtop_t* top, GroupType type)
257 e_index_t indexType = INDEX_UNKNOWN;
258 // If the selection type is INDEX_UNKNOWN (e.g. a position not associated
259 // with a set of particles), don't overwrite the selection type.
260 if (sel->type() != INDEX_UNKNOWN)
264 case GroupType::All: indexType = INDEX_ALL; break;
265 case GroupType::Residue: indexType = INDEX_RES; break;
266 case GroupType::Molecule: indexType = INDEX_MOL; break;
267 case GroupType::None: indexType = INDEX_ATOM; break;
268 case GroupType::Count: GMX_THROW(InternalError("Invalid GroupType"));
271 return sel->initOriginalIdsToGroup(top, indexType);
275 void PairDistance::initAnalysis(const TrajectoryAnalysisSettings& settings, const TopologyInformation& top)
277 refGroupCount_ = initSelectionGroups(&refSel_, top.mtop(), refGroupType_);
280 distances_.setDataSetCount(sel_.size());
281 for (size_t i = 0; i < sel_.size(); ++i)
283 const int selGroupCount = initSelectionGroups(&sel_[i], top.mtop(), selGroupType_);
284 const int columnCount = refGroupCount_ * selGroupCount;
285 maxGroupCount_ = std::max(maxGroupCount_, columnCount);
286 distances_.setColumnCount(i, columnCount);
289 if (!fnDist_.empty())
291 AnalysisDataPlotModulePointer plotm(new AnalysisDataPlotModule(settings.plotSettings()));
292 plotm->setFileName(fnDist_);
293 if (distanceType_ == DistanceType::Max)
295 plotm->setTitle("Maximum distance");
299 plotm->setTitle("Minimum distance");
301 // TODO: Figure out and add a descriptive subtitle and/or a longer
302 // title and/or better legends based on the grouping and the reference
304 plotm->setXAxisIsTime();
305 plotm->setYLabel("Distance (nm)");
306 for (size_t g = 0; g < sel_.size(); ++g)
308 plotm->appendLegend(sel_[g].name());
310 distances_.addModule(plotm);
313 nb_.setCutoff(cutoff_);
317 initialDist2_ = std::numeric_limits<real>::max();
321 initialDist2_ = cutoff_ * cutoff_;
323 if (distanceType_ == DistanceType::Max)
327 cutoff2_ = cutoff_ * cutoff_;
331 * Temporary memory for use within a single-frame calculation.
333 class PairDistanceModuleData : public TrajectoryAnalysisModuleData
337 * Reserves memory for the frame-local data.
339 PairDistanceModuleData(TrajectoryAnalysisModule* module,
340 const AnalysisDataParallelOptions& opt,
341 const SelectionCollection& selections,
343 const Selection& refSel,
345 TrajectoryAnalysisModuleData(module, opt, selections)
347 distArray_.resize(maxGroupCount);
348 countArray_.resize(maxGroupCount);
349 refCountArray_.resize(refGroupCount);
350 if (!refSel.isDynamic())
352 initRefCountArray(refSel);
356 void finish() override { finishDataHandles(); }
359 * Computes the number of positions in each group in \p refSel
360 * and stores them into `refCountArray_`.
362 void initRefCountArray(const Selection& refSel)
364 std::fill(refCountArray_.begin(), refCountArray_.end(), 0);
366 while (refPos < refSel.posCount())
368 const int refIndex = refSel.position(refPos).mappedId();
369 const int startPos = refPos;
371 while (refPos < refSel.posCount() && refSel.position(refPos).mappedId() == refIndex)
375 refCountArray_[refIndex] = refPos - startPos;
380 * Squared distance between each group
382 * One entry for each group pair for the current selection.
383 * Enough memory is allocated to fit the largest calculation selection.
384 * This is needed to support neighborhood searching, which may not
385 * return the pairs in order: for each group pair, we need to search
386 * through all the position pairs and update this array to find the
387 * minimum/maximum distance between them.
389 std::vector<real> distArray_;
391 * Number of pairs within the cutoff that have contributed to the value
394 * This is needed to identify whether there were any pairs inside the
395 * cutoff and whether there were additional pairs outside the cutoff
396 * that were not covered by the neihborhood search.
398 std::vector<int> countArray_;
400 * Number of positions within each reference group.
402 * This is used to more efficiently compute the total number of pairs
403 * (for comparison with `countArray_`), as otherwise these numbers
404 * would need to be recomputed for each selection.
406 std::vector<int> refCountArray_;
409 TrajectoryAnalysisModuleDataPointer PairDistance::startFrames(const AnalysisDataParallelOptions& opt,
410 const SelectionCollection& selections)
412 return TrajectoryAnalysisModuleDataPointer(new PairDistanceModuleData(
413 this, opt, selections, refGroupCount_, refSel_, maxGroupCount_));
416 void PairDistance::analyzeFrame(int frnr, const t_trxframe& fr, t_pbc* pbc, TrajectoryAnalysisModuleData* pdata)
418 AnalysisDataHandle dh = pdata->dataHandle(distances_);
419 const Selection& refSel = TrajectoryAnalysisModuleData::parallelSelection(refSel_);
420 const SelectionList& sel = TrajectoryAnalysisModuleData::parallelSelections(sel_);
421 PairDistanceModuleData& frameData = *static_cast<PairDistanceModuleData*>(pdata);
422 std::vector<real>& distArray = frameData.distArray_;
423 std::vector<int>& countArray = frameData.countArray_;
425 if (cutoff_ > 0.0 && refSel.isDynamic())
427 // Count the number of reference positions in each group, so that
428 // this does not need to be computed again for each selection.
429 // This is needed only if it is possible that the neighborhood search
430 // does not cover all the pairs, hence the cutoff > 0.0 check.
431 // If refSel is static, then the array contents are static as well,
432 // and it has been initialized in the constructor of the data object.
433 frameData.initRefCountArray(refSel);
435 const std::vector<int>& refCountArray = frameData.refCountArray_;
437 AnalysisNeighborhoodSearch nbsearch = nb_.initSearch(pbc, refSel);
438 dh.startFrame(frnr, fr.time);
439 for (size_t g = 0; g < sel.size(); ++g)
441 const int columnCount = distances_.columnCount(g);
442 std::fill(distArray.begin(), distArray.begin() + columnCount, initialDist2_);
443 std::fill(countArray.begin(), countArray.begin() + columnCount, 0);
445 // Accumulate the number of position pairs within the cutoff and the
446 // min/max distance for each group pair.
447 AnalysisNeighborhoodPairSearch pairSearch = nbsearch.startPairSearch(sel[g]);
448 AnalysisNeighborhoodPair pair;
449 while (pairSearch.findNextPair(&pair))
451 const SelectionPosition& refPos = refSel.position(pair.refIndex());
452 const SelectionPosition& selPos = sel[g].position(pair.testIndex());
453 const int refIndex = refPos.mappedId();
454 const int selIndex = selPos.mappedId();
455 const int index = selIndex * refGroupCount_ + refIndex;
456 const real r2 = pair.distance2();
457 if (distanceType_ == DistanceType::Min)
459 if (distArray[index] > r2)
461 distArray[index] = r2;
466 if (distArray[index] < r2)
468 distArray[index] = r2;
474 // If it is possible that positions outside the cutoff (or lack of
475 // them) affects the result, then we need to check whether there were
476 // any. This is necessary for two cases:
477 // - With max distances, if there are pairs outside the cutoff, then
478 // the computed distance should be equal to the cutoff instead of
479 // the largest distance that was found above.
480 // - With either distance type, if all pairs are outside the cutoff,
481 // then countArray must be updated so that the presence flag
482 // in the output data reflects the dynamic selection status, not
483 // whether something was inside the cutoff or not.
487 // Loop over groups in this selection (at start, selPos is always
488 // the first position in the next group).
489 while (selPos < sel[g].posCount())
491 // Count the number of positions in this group.
492 const int selIndex = sel[g].position(selPos).mappedId();
493 const int startPos = selPos;
495 while (selPos < sel[g].posCount() && sel[g].position(selPos).mappedId() == selIndex)
499 const int count = selPos - startPos;
500 // Check all group pairs that contain this group.
501 for (int i = 0; i < refGroupCount_; ++i)
503 const int index = selIndex * refGroupCount_ + i;
504 const int totalCount = refCountArray[i] * count;
505 // If there were positions outside the cutoff,
506 // update the distance if necessary and the count.
507 if (countArray[index] < totalCount)
509 if (distanceType_ == DistanceType::Max)
511 distArray[index] = cutoff2_;
513 countArray[index] = totalCount;
519 // Write the computed distances to the output data.
521 for (int i = 0; i < columnCount; ++i)
523 if (countArray[i] > 0)
525 dh.setPoint(i, std::sqrt(distArray[i]));
529 // If there are no contributing positions, write out the cutoff
531 dh.setPoint(i, cutoff_, false);
538 void PairDistance::finishAnalysis(int /*nframes*/) {}
540 void PairDistance::writeOutput() {}
546 const char PairDistanceInfo::name[] = "pairdist";
547 const char PairDistanceInfo::shortDescription[] =
548 "Calculate pairwise distances between groups of positions";
550 TrajectoryAnalysisModulePointer PairDistanceInfo::create()
552 return TrajectoryAnalysisModulePointer(new PairDistance);
555 } // namespace analysismodules