2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2017,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 /*! \libinternal \file
37 * Implements gmx::UnionFinder and gmx::MappedUnionFinder.
39 * \author Teemu Murtola <teemu.murtola@gmail.com>
40 * \ingroup module_trajectoryanalysis
42 #ifndef GMX_TRAJECTORYANALYSIS_UNIONFIND_H
43 #define GMX_TRAJECTORYANALYSIS_UNIONFIND_H
49 #include "gromacs/utility/arrayref.h"
50 #include "gromacs/utility/gmxassert.h"
55 /*! \libinternal \brief
56 * Union-find data structure for keeping track of disjoint sets.
58 * Union-find keeps track of a number of items, represented here by continuous
59 * integer indices starting at zero, and supports the following operations:
60 * - Initialization puts each item into a set of its own.
61 * - Given two items, merge the sets that contain these items.
62 * - Given an item, find a representative item that is in the same set, such
63 * that queries for items in the same set yield the same value.
64 * Merging and querying is supported in amortized constant time.
66 * Note that in order to achieve the amortized behavior, querying the structure
67 * modifies the internal state, but does not alter the externally visible
70 * \ingroup module_trajectoryanalysis
76 * Initializes `count` items, putting each in its own set.
82 parent_.reserve(count);
84 parent_.resize(count);
85 rank_.resize(count, 0);
86 std::iota(parent_.begin(), parent_.end(), 0);
89 * Merges sets that contain two given items.
91 * If the items are already in the same set, nothing happens.
93 void merge(int item1, int item2)
95 GMX_ASSERT(item1 >= 0 && item1 < count(), "Input index out of range");
96 GMX_ASSERT(item2 >= 0 && item2 < count(), "Input index out of range");
97 const int root1 = findRootAndCompressPath(item1);
98 const int root2 = findRootAndCompressPath(item2);
101 mergeRoots(root1, root2);
105 * Returns a representative item from the set containing `item`.
107 int representativeItem(int item)
109 GMX_ASSERT(item >= 0 && item < count(), "Input index out of range");
110 return findRootAndCompressPath(item);
113 * Returns the sizes of all sets (in arbitrary order).
115 std::vector<int> allSizes()
117 const int count = parent_.size();
118 std::vector<int> result(count, 0);
119 for (int i = 0; i < count; ++i)
121 ++result[findRootAndCompressPath(i)];
123 result.erase(std::remove(result.begin(), result.end(), 0), result.end());
129 int count() const { return parent_.size(); }
130 int findRootAndCompressPath(int i)
132 while (parent_[i] != i)
136 parent_[prev] = parent_[i];
140 void mergeRoots(int root1, int root2)
142 if (rank_[root1] > rank_[root2])
144 parent_[root2] = root1;
146 else if (rank_[root2] > rank_[root1])
148 parent_[root1] = root2;
152 parent_[root1] = root2;
158 * Parent item for each item in the tree representing the set.
160 * Root items are parents of themselves, and are the reprensentative
161 * items of their sets.
163 std::vector<int> parent_;
164 //! Worst-case height for each root (as if no compression was done).
165 std::vector<int> rank_;
168 /*! \libinternal \brief
169 * Extension of UnionFind that supports non-consecutive integer indices as
172 * Sometimes, it is more convenient to operate on a set of integers that do not
173 * start at zero and are not consecutive as UnionFind expects. This class
174 * implements a mapping on top of UnionFind such that this is possible.
176 * The current implementation assumes that the indices are bounded between zero
177 * and some reasonably small integer, i.e., the memory usage depends on the
178 * largest index number, not just on the number of items.
180 * \ingroup module_trajectoryanalysis
182 class MappedUnionFinder
186 * Initializes the finder with indices.
188 * The size of `indices` sets the number of input items, and each
189 * unique value in `indices` maps to a single internal item.
190 * If multiple indices are the same, then these items are considered
193 void initWithGroupIndices(ArrayRef<const int> indices)
197 if (!indices.empty())
199 const int maxIndex = *std::max_element(indices.begin(), indices.end());
200 mapping_.resize(maxIndex + 1, -1);
201 for (int item : indices)
203 GMX_ASSERT(item >= 0, "Negative group numbers not supported");
204 if (mapping_[item] == -1)
206 mapping_[item] = groupCount;
211 finder_.init(groupCount);
214 * Returns a reprensetative value for an item that is unique for each
217 * `group` should be one of the values that were passed in as an index
218 * to initWithGroupIndices().
219 * The return value is an internal index that has no simple relation to
222 int representativeValue(int group)
224 GMX_ASSERT(group >= 0 && group < maxGroupNumber(), "Input value out of range");
225 GMX_ASSERT(mapping_[group] != -1, "Input value not in initialization set");
226 return finder_.representativeItem(mapping_[group]);
229 * Merges sets that contain two given items.
231 * If the items are already in the same set, nothing happens.
232 * Each input value should be one of the values that were passed in as
233 * an index to initWithGroupIndices().
235 void mergeGroups(int group1, int group2)
237 GMX_ASSERT(group1 >= 0 && group1 < maxGroupNumber(), "Input value out of range");
238 GMX_ASSERT(group2 >= 0 && group2 < maxGroupNumber(), "Input value out of range");
239 GMX_ASSERT(mapping_[group1] != -1, "Input value not in initialization set");
240 GMX_ASSERT(mapping_[group2] != -1, "Input value not in initialization set");
241 finder_.merge(mapping_[group1], mapping_[group2]);
244 * Returns the sizes of all sets (in arbitrary order).
246 * If there were multiple identical indices passed to
247 * initWithGroupIndices(), these are only counted as one when
248 * computing the sizes.
250 std::vector<int> allSizes() { return finder_.allSizes(); }
253 int maxGroupNumber() const { return mapping_.size(); }
256 //! Mapping from input indices to zero-based indices used by finder_.
257 std::vector<int> mapping_;