|
| 1 | +//! "Lazy" set-like data structure |
| 2 | +//! |
| 3 | +//! This data structure supports an in-place union operation similar to `BitSet`'s [`union`]. |
| 4 | +//! However, unlike `BitSet`'s `union`, which returns true only when new bits were added, this data |
| 5 | +//! structure's `union` will sometimes delay computation and return `true` immediately even when no |
| 6 | +//! new elements were added. The time savings are paid for with inaccuracy in the return value. |
| 7 | +//! |
| 8 | +//! Delayed computations are represented in a tree-like data structure, and each such tree belongs |
| 9 | +//! to a "batch." When a tree's number of nodes reaches a predetermined maximum, the tree is |
| 10 | +//! "flattened" into an instance of the underlying set type. When this condition occurs for one tree |
| 11 | +//! within a batch, the other trees in the batch are similarly flattened as needed. |
| 12 | +//! |
| 13 | +//! When the underlying set is a `BitSet` (or is `BitSet`-like), it is expected that the maximum |
| 14 | +//! tree size is the domain size. The intuition is: the cost of maintaining a tree of this size is |
| 15 | +//! proportional to the cost of maintaining a set instance. So when a batch's trees reach this size, |
| 16 | +//! delaying computations is likely not helping us. We should therefore cut our losses and do things |
| 17 | +//! the traditional way. |
| 18 | +//! |
| 19 | +//! Why is this data structure needed? @mwkmwkmwk exhibited a Rust program that caused |
| 20 | +//! `possible_borrower` to OOM/timeout ([10134]). The reasons for this were essentially the |
| 21 | +//! following: |
| 22 | +//! |
| 23 | +//! - Her example involved a large number of basic blocks. |
| 24 | +//! - A program's number of locals tends to be proportional to its number of basic blocks (as was |
| 25 | +//! the case in her example). |
| 26 | +//! - The work done by `possible_borrower` at each `join` in [`iterate_to_fixpoint`] (essentially, |
| 27 | +//! at each basic block) was proportional to the number of locals. |
| 28 | +//! |
| 29 | +//! One way out of this situation is to make the work done at each `join` not proportional to the |
| 30 | +//! number of locals. The current data structure achieves this for certain usage patterns, e.g., |
| 31 | +//! certain MIR dataflow problems. In particular, it avoids an OOM/timeout on @mwkmwkmwk's example |
| 32 | +//! and does not seem to degrade performance on Clippy's existing tests. But it is not sub-linear in |
| 33 | +//! the number of locals in a complexity-theoretic sense. At present, it is not clear whether such a |
| 34 | +//! solution exists. |
| 35 | +//! |
| 36 | +//! [10134]: https://github.com/rust-lang/rust-clippy/issues/10134 |
| 37 | +//! [`iterate_to_fixpoint`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_mir_dataflow/framework/engine/struct.Engine.html#method.iterate_to_fixpoint |
| 38 | +//! [`union`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_index/bit_set/struct.BitSet.html#method.union |
| 39 | +
|
| 40 | +use rustc_data_structures::{fx::FxHashSet, sync::Lrc}; |
| 41 | +use rustc_hir::def_id::DefId; |
| 42 | +use std::borrow::Cow; |
| 43 | +use std::fmt::Debug; |
| 44 | +use std::sync::{LazyLock, RwLock}; |
| 45 | + |
| 46 | +static FLATTENING: LazyLock<RwLock<FxHashSet<DefId>>> = LazyLock::new(|| RwLock::new(FxHashSet::default())); |
| 47 | + |
| 48 | +/// A set-like type with `insert` and `union` operations similar to those of `BitSet` |
| 49 | +/// |
| 50 | +/// [`BitSet`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_index/bit_set/struct.BitSet.html |
| 51 | +pub trait Set { |
| 52 | + /// A value of type `Self::Param` is passed to each trait method invocation. |
| 53 | + type Param: Clone + Debug + Eq; |
| 54 | + |
| 55 | + /// The type of the elements in the set. |
| 56 | + type Elem: Copy + Debug + Eq; |
| 57 | + |
| 58 | + /// Returns the id of the "batch" to which this set belongs. When the set corresponds to a |
| 59 | + /// `mir::Body`, it is expected that the batch id is the `DefId` of the corresponding function. |
| 60 | + fn batch_id(param: &Self::Param) -> DefId; |
| 61 | + |
| 62 | + /// Returns the set's maximum tree size. Trees that exceed this size are flattened into |
| 63 | + /// instances of `Self`. |
| 64 | + fn tree_size_max(param: &Self::Param) -> usize; |
| 65 | + |
| 66 | + /// Creates a new instance of `Self`. |
| 67 | + fn new(param: &Self::Param) -> Self; |
| 68 | + |
| 69 | + /// Inserts `elem`. Returns whether the set has changed. |
| 70 | + fn insert(&mut self, elem: Self::Elem, param: &Self::Param) -> bool; |
| 71 | + |
| 72 | + /// Inserts all elements from `other` and returns `true` if `self` changed (i.e., if new |
| 73 | + /// elements were added). |
| 74 | + fn union(&mut self, other: &Self, param: &Self::Param) -> bool; |
| 75 | +} |
| 76 | + |
| 77 | +#[derive(Clone, Debug, Eq, PartialEq)] |
| 78 | +pub struct LazySet<S: Set> { |
| 79 | + inner: Inner<S>, |
| 80 | + param: <S as Set>::Param, |
| 81 | +} |
| 82 | + |
| 83 | +#[derive(Clone, Debug, Eq, PartialEq)] |
| 84 | +enum Inner<S: Set> { |
| 85 | + Flattened(S), |
| 86 | + Tree(Option<Lrc<Tree<<S as Set>::Elem>>>), |
| 87 | +} |
| 88 | + |
| 89 | +impl<S: Set> Drop for Inner<S> { |
| 90 | + fn drop(&mut self) { |
| 91 | + if let Inner::Tree(tree) = self && let Some(tree) = tree.take() { |
| 92 | + let mut stack = Vec::new(); |
| 93 | + stack.push(tree); |
| 94 | + while let Some(tree) = stack.pop() { |
| 95 | + if let Ok(Tree::Union { left, right, .. }) = Lrc::try_unwrap(tree) { |
| 96 | + stack.push(left); |
| 97 | + stack.push(right); |
| 98 | + } |
| 99 | + } |
| 100 | + } |
| 101 | + } |
| 102 | +} |
| 103 | + |
| 104 | +impl<S> LazySet<S> |
| 105 | +where |
| 106 | + S: Clone + Debug + Set, |
| 107 | +{ |
| 108 | + pub fn new(param: <S as Set>::Param) -> Self { |
| 109 | + Self { |
| 110 | + inner: Inner::Tree(None), |
| 111 | + param, |
| 112 | + } |
| 113 | + } |
| 114 | + |
| 115 | + /// Similar to `BitSet`'s [`insert`]. However, this `insert` returns `true` when the set changed |
| 116 | + /// _or_ computation of the new set was delayed. |
| 117 | + /// |
| 118 | + /// [`insert`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_index/bit_set/struct.BitSet.html#method.insert |
| 119 | + pub fn insert(&mut self, elem: <S as Set>::Elem) -> bool { |
| 120 | + match &mut self.inner { |
| 121 | + Inner::Flattened(flattened) => flattened.insert(elem, &self.param), |
| 122 | + Inner::Tree(tree) => { |
| 123 | + let tree = if let Some(tree) = tree { |
| 124 | + Tree::insert(tree, elem) |
| 125 | + } else { |
| 126 | + Lrc::new(Tree::Singleton(elem)) |
| 127 | + }; |
| 128 | + self.set_inner(Inner::Tree(Some(tree))); |
| 129 | + true |
| 130 | + }, |
| 131 | + } |
| 132 | + } |
| 133 | + |
| 134 | + /// Similar to `BitSet`'s [`union`]. However, this `union` returns `true` when new elements were |
| 135 | + /// added _or_ computation of the new set was delayed. |
| 136 | + /// |
| 137 | + /// [`union`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_index/bit_set/struct.BitSet.html#method.union |
| 138 | + pub fn union(&mut self, other: &Self) -> bool { |
| 139 | + if matches!((&self.inner, &other.inner), (Inner::Tree(Some(_)), Inner::Flattened(_))) { |
| 140 | + self.flatten_in_place(); |
| 141 | + } |
| 142 | + |
| 143 | + match &mut self.inner { |
| 144 | + Inner::Flattened(our_flattened) => match &other.inner { |
| 145 | + Inner::Flattened(their_flattened) => our_flattened.union(their_flattened, &self.param), |
| 146 | + Inner::Tree(None) => false, |
| 147 | + Inner::Tree(Some(their_tree)) => { |
| 148 | + let mut changed = false; |
| 149 | + their_tree.for_each(&mut |elem: &<S as Set>::Elem| { |
| 150 | + changed |= our_flattened.insert(*elem, &self.param); |
| 151 | + }); |
| 152 | + changed |
| 153 | + }, |
| 154 | + }, |
| 155 | + Inner::Tree(None) => match &other.inner { |
| 156 | + Inner::Flattened(_) | Inner::Tree(Some(_)) => { |
| 157 | + self.set_inner(other.inner.clone()); |
| 158 | + true |
| 159 | + }, |
| 160 | + Inner::Tree(None) => false, |
| 161 | + }, |
| 162 | + Inner::Tree(Some(our_tree)) => match &other.inner { |
| 163 | + Inner::Flattened(_) => unreachable!(), |
| 164 | + Inner::Tree(None) => false, |
| 165 | + Inner::Tree(Some(their_tree)) => { |
| 166 | + let (tree, changed) = Tree::union(our_tree, their_tree); |
| 167 | + self.set_inner(Inner::Tree(Some(tree))); |
| 168 | + changed |
| 169 | + }, |
| 170 | + }, |
| 171 | + } |
| 172 | + } |
| 173 | + |
| 174 | + fn set_inner(&mut self, inner: Inner<S>) { |
| 175 | + self.inner = inner; |
| 176 | + if let Inner::Tree(Some(tree)) = &self.inner { |
| 177 | + if tree.size() <= S::tree_size_max(&self.param) { |
| 178 | + let lock = FLATTENING.read().unwrap(); |
| 179 | + if !lock.contains(&S::batch_id(&self.param)) { |
| 180 | + return; |
| 181 | + } |
| 182 | + drop(lock); |
| 183 | + self.flatten_in_place(); |
| 184 | + } else { |
| 185 | + let mut lock = FLATTENING.write().unwrap(); |
| 186 | + lock.insert(S::batch_id(&self.param)); |
| 187 | + drop(lock); |
| 188 | + self.flatten_in_place(); |
| 189 | + } |
| 190 | + } |
| 191 | + } |
| 192 | + |
| 193 | + fn flatten_in_place(&mut self) { |
| 194 | + if !matches!(self.inner, Inner::Flattened(_)) { |
| 195 | + self.inner = Inner::Flattened(self.flatten().into_owned()); |
| 196 | + } |
| 197 | + } |
| 198 | + |
| 199 | + pub fn flatten(&self) -> Cow<'_, S> { |
| 200 | + match &self.inner { |
| 201 | + Inner::Flattened(flattened) => Cow::Borrowed(flattened), |
| 202 | + Inner::Tree(tree) => { |
| 203 | + let mut flattened = S::new(&self.param); |
| 204 | + if let Some(tree) = tree { |
| 205 | + tree.for_each(&mut |elem: &<S as Set>::Elem| { |
| 206 | + flattened.insert(*elem, &self.param); |
| 207 | + }); |
| 208 | + } |
| 209 | + Cow::Owned(flattened) |
| 210 | + }, |
| 211 | + } |
| 212 | + } |
| 213 | +} |
| 214 | + |
| 215 | +#[derive(Clone, Debug, Eq, PartialEq)] |
| 216 | +enum Tree<T> { |
| 217 | + Singleton(T), |
| 218 | + Union { |
| 219 | + size: usize, |
| 220 | + left: Lrc<Tree<T>>, |
| 221 | + right: Lrc<Tree<T>>, |
| 222 | + }, |
| 223 | +} |
| 224 | + |
| 225 | +impl<T> Tree<T> { |
| 226 | + fn size(&self) -> usize { |
| 227 | + match self { |
| 228 | + Self::Singleton(_) => 1, |
| 229 | + Self::Union { size, .. } => *size, |
| 230 | + } |
| 231 | + } |
| 232 | + |
| 233 | + fn insert(this: &Lrc<Tree<T>>, elem: T) -> Lrc<Self> { |
| 234 | + Lrc::new(Self::Union { |
| 235 | + size: 1 + this.size() + 1, |
| 236 | + left: this.clone(), |
| 237 | + right: Lrc::new(Self::Singleton(elem)), |
| 238 | + }) |
| 239 | + } |
| 240 | + |
| 241 | + fn union(this: &Lrc<Tree<T>>, other: &Lrc<Tree<T>>) -> (Lrc<Self>, bool) { |
| 242 | + if Lrc::ptr_eq(this, other) { |
| 243 | + (this.clone(), false) |
| 244 | + } else { |
| 245 | + ( |
| 246 | + Lrc::new(Tree::Union { |
| 247 | + size: 1 + this.size() + other.size(), |
| 248 | + left: this.clone(), |
| 249 | + right: other.clone(), |
| 250 | + }), |
| 251 | + true, |
| 252 | + ) |
| 253 | + } |
| 254 | + } |
| 255 | + |
| 256 | + fn for_each(&self, f: &mut impl FnMut(&T)) { |
| 257 | + let mut stack = Vec::new(); |
| 258 | + stack.push(self); |
| 259 | + while let Some(tree) = stack.pop() { |
| 260 | + match tree { |
| 261 | + Self::Singleton(elem) => f(elem), |
| 262 | + Self::Union { left, right, .. } => { |
| 263 | + stack.push(left); |
| 264 | + stack.push(right); |
| 265 | + }, |
| 266 | + } |
| 267 | + } |
| 268 | + } |
| 269 | +} |
0 commit comments