Skip to content

Commit 4bdd15f

Browse files
committed
LazySet implementation
1 parent ef5a545 commit 4bdd15f

File tree

5 files changed

+359
-30
lines changed

5 files changed

+359
-30
lines changed

clippy_utils/src/mir/bit_matrix.rs

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
use super::lazy_set::Set;
2+
use rustc_hir::def_id::DefId;
3+
use rustc_index::{bit_set::BitSet, vec::IndexVec};
4+
use rustc_middle::mir::{self, Local};
5+
6+
#[derive(Clone, Debug, Eq, PartialEq)]
7+
pub struct Param {
8+
def_id: DefId,
9+
domain_size: usize,
10+
}
11+
12+
impl Param {
13+
pub fn new(body: &mir::Body<'_>) -> Self {
14+
Self {
15+
def_id: body.source.def_id(),
16+
domain_size: body.local_decls.len(),
17+
}
18+
}
19+
}
20+
21+
#[derive(Clone, Debug, Eq, PartialEq)]
22+
pub struct BitMatrix(pub IndexVec<Local, Option<BitSet<Local>>>);
23+
24+
impl Set for BitMatrix {
25+
type Param = Param;
26+
type Elem = (Local, Local);
27+
28+
fn batch_id(param: &Self::Param) -> DefId {
29+
param.def_id
30+
}
31+
32+
fn tree_size_max(param: &Self::Param) -> usize {
33+
param.domain_size * param.domain_size
34+
}
35+
36+
fn new(param: &Self::Param) -> Self {
37+
Self(IndexVec::from_elem_n(None, param.domain_size))
38+
}
39+
40+
fn insert(&mut self, elem: (Local, Local), param: &Self::Param) -> bool {
41+
self.0[elem.0]
42+
.get_or_insert_with(|| BitSet::new_empty(param.domain_size))
43+
.insert(elem.1)
44+
}
45+
46+
fn union(&mut self, other: &BitMatrix, param: &Self::Param) -> bool {
47+
let mut changed = false;
48+
for (borrowed, borrowers) in other.0.iter_enumerated() {
49+
if let Some(borrowers) = borrowers {
50+
changed |= self.0[borrowed]
51+
.get_or_insert_with(|| BitSet::new_empty(param.domain_size))
52+
.union(borrowers);
53+
}
54+
}
55+
changed
56+
}
57+
}

clippy_utils/src/mir/lazy_set.rs

Lines changed: 269 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,269 @@
1+
//! "Lazy" set-like data structure
2+
//!
3+
//! This data structure supports an in-place union operation similar to `BitSet`'s [`union`].
4+
//! However, unlike `BitSet`'s `union`, which returns true only when new bits were added, this data
5+
//! structure's `union` will sometimes delay computation and return `true` immediately even when no
6+
//! new elements were added. The time savings are paid for with inaccuracy in the return value.
7+
//!
8+
//! Delayed computations are represented in a tree-like data structure, and each such tree belongs
9+
//! to a "batch." When a tree's number of nodes reaches a predetermined maximum, the tree is
10+
//! "flattened" into an instance of the underlying set type. When this condition occurs for one tree
11+
//! within a batch, the other trees in the batch are similarly flattened as needed.
12+
//!
13+
//! When the underlying set is a `BitSet` (or is `BitSet`-like), it is expected that the maximum
14+
//! tree size is the domain size. The intuition is: the cost of maintaining a tree of this size is
15+
//! proportional to the cost of maintaining a set instance. So when a batch's trees reach this size,
16+
//! delaying computations is likely not helping us. We should therefore cut our losses and do things
17+
//! the traditional way.
18+
//!
19+
//! Why is this data structure needed? @mwkmwkmwk exhibited a Rust program that caused
20+
//! `possible_borrower` to OOM/timeout ([10134]). The reasons for this were essentially the
21+
//! following:
22+
//!
23+
//! - Her example involved a large number of basic blocks.
24+
//! - A program's number of locals tends to be proportional to its number of basic blocks (as was
25+
//! the case in her example).
26+
//! - The work done by `possible_borrower` at each `join` in [`iterate_to_fixpoint`] (essentially,
27+
//! at each basic block) was proportional to the number of locals.
28+
//!
29+
//! One way out of this situation is to make the work done at each `join` not proportional to the
30+
//! number of locals. The current data structure achieves this for certain usage patterns, e.g.,
31+
//! certain MIR dataflow problems. In particular, it avoids an OOM/timeout on @mwkmwkmwk's example
32+
//! and does not seem to degrade performance on Clippy's existing tests. But it is not sub-linear in
33+
//! the number of locals in a complexity-theoretic sense. At present, it is not clear whether such a
34+
//! solution exists.
35+
//!
36+
//! [10134]: https://github.com/rust-lang/rust-clippy/issues/10134
37+
//! [`iterate_to_fixpoint`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_mir_dataflow/framework/engine/struct.Engine.html#method.iterate_to_fixpoint
38+
//! [`union`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_index/bit_set/struct.BitSet.html#method.union
39+
40+
use rustc_data_structures::{fx::FxHashSet, sync::Lrc};
41+
use rustc_hir::def_id::DefId;
42+
use std::borrow::Cow;
43+
use std::fmt::Debug;
44+
use std::sync::{LazyLock, RwLock};
45+
46+
static FLATTENING: LazyLock<RwLock<FxHashSet<DefId>>> = LazyLock::new(|| RwLock::new(FxHashSet::default()));
47+
48+
/// A set-like type with `insert` and `union` operations similar to those of `BitSet`
49+
///
50+
/// [`BitSet`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_index/bit_set/struct.BitSet.html
51+
pub trait Set {
52+
/// A value of type `Self::Param` is passed to each trait method invocation.
53+
type Param: Clone + Debug + Eq;
54+
55+
/// The type of the elements in the set.
56+
type Elem: Copy + Debug + Eq;
57+
58+
/// Returns the id of the "batch" to which this set belongs. When the set corresponds to a
59+
/// `mir::Body`, it is expected that the batch id is the `DefId` of the corresponding function.
60+
fn batch_id(param: &Self::Param) -> DefId;
61+
62+
/// Returns the set's maximum tree size. Trees that exceed this size are flattened into
63+
/// instances of `Self`.
64+
fn tree_size_max(param: &Self::Param) -> usize;
65+
66+
/// Creates a new instance of `Self`.
67+
fn new(param: &Self::Param) -> Self;
68+
69+
/// Inserts `elem`. Returns whether the set has changed.
70+
fn insert(&mut self, elem: Self::Elem, param: &Self::Param) -> bool;
71+
72+
/// Inserts all elements from `other` and returns `true` if `self` changed (i.e., if new
73+
/// elements were added).
74+
fn union(&mut self, other: &Self, param: &Self::Param) -> bool;
75+
}
76+
77+
#[derive(Clone, Debug, Eq, PartialEq)]
78+
pub struct LazySet<S: Set> {
79+
inner: Inner<S>,
80+
param: <S as Set>::Param,
81+
}
82+
83+
#[derive(Clone, Debug, Eq, PartialEq)]
84+
enum Inner<S: Set> {
85+
Flattened(S),
86+
Tree(Option<Lrc<Tree<<S as Set>::Elem>>>),
87+
}
88+
89+
impl<S: Set> Drop for Inner<S> {
90+
fn drop(&mut self) {
91+
if let Inner::Tree(tree) = self && let Some(tree) = tree.take() {
92+
let mut stack = Vec::new();
93+
stack.push(tree);
94+
while let Some(tree) = stack.pop() {
95+
if let Ok(Tree::Union { left, right, .. }) = Lrc::try_unwrap(tree) {
96+
stack.push(left);
97+
stack.push(right);
98+
}
99+
}
100+
}
101+
}
102+
}
103+
104+
impl<S> LazySet<S>
105+
where
106+
S: Clone + Debug + Set,
107+
{
108+
pub fn new(param: <S as Set>::Param) -> Self {
109+
Self {
110+
inner: Inner::Tree(None),
111+
param,
112+
}
113+
}
114+
115+
/// Similar to `BitSet`'s [`insert`]. However, this `insert` returns `true` when the set changed
116+
/// _or_ computation of the new set was delayed.
117+
///
118+
/// [`insert`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_index/bit_set/struct.BitSet.html#method.insert
119+
pub fn insert(&mut self, elem: <S as Set>::Elem) -> bool {
120+
match &mut self.inner {
121+
Inner::Flattened(flattened) => flattened.insert(elem, &self.param),
122+
Inner::Tree(tree) => {
123+
let tree = if let Some(tree) = tree {
124+
Tree::insert(tree, elem)
125+
} else {
126+
Lrc::new(Tree::Singleton(elem))
127+
};
128+
self.set_inner(Inner::Tree(Some(tree)));
129+
true
130+
},
131+
}
132+
}
133+
134+
/// Similar to `BitSet`'s [`union`]. However, this `union` returns `true` when new elements were
135+
/// added _or_ computation of the new set was delayed.
136+
///
137+
/// [`union`]: https://doc.rust-lang.org/nightly/nightly-rustc/rustc_index/bit_set/struct.BitSet.html#method.union
138+
pub fn union(&mut self, other: &Self) -> bool {
139+
if matches!((&self.inner, &other.inner), (Inner::Tree(Some(_)), Inner::Flattened(_))) {
140+
self.flatten_in_place();
141+
}
142+
143+
match &mut self.inner {
144+
Inner::Flattened(our_flattened) => match &other.inner {
145+
Inner::Flattened(their_flattened) => our_flattened.union(their_flattened, &self.param),
146+
Inner::Tree(None) => false,
147+
Inner::Tree(Some(their_tree)) => {
148+
let mut changed = false;
149+
their_tree.for_each(&mut |elem: &<S as Set>::Elem| {
150+
changed |= our_flattened.insert(*elem, &self.param);
151+
});
152+
changed
153+
},
154+
},
155+
Inner::Tree(None) => match &other.inner {
156+
Inner::Flattened(_) | Inner::Tree(Some(_)) => {
157+
self.set_inner(other.inner.clone());
158+
true
159+
},
160+
Inner::Tree(None) => false,
161+
},
162+
Inner::Tree(Some(our_tree)) => match &other.inner {
163+
Inner::Flattened(_) => unreachable!(),
164+
Inner::Tree(None) => false,
165+
Inner::Tree(Some(their_tree)) => {
166+
let (tree, changed) = Tree::union(our_tree, their_tree);
167+
self.set_inner(Inner::Tree(Some(tree)));
168+
changed
169+
},
170+
},
171+
}
172+
}
173+
174+
fn set_inner(&mut self, inner: Inner<S>) {
175+
self.inner = inner;
176+
if let Inner::Tree(Some(tree)) = &self.inner {
177+
if tree.size() <= S::tree_size_max(&self.param) {
178+
let lock = FLATTENING.read().unwrap();
179+
if !lock.contains(&S::batch_id(&self.param)) {
180+
return;
181+
}
182+
drop(lock);
183+
self.flatten_in_place();
184+
} else {
185+
let mut lock = FLATTENING.write().unwrap();
186+
lock.insert(S::batch_id(&self.param));
187+
drop(lock);
188+
self.flatten_in_place();
189+
}
190+
}
191+
}
192+
193+
fn flatten_in_place(&mut self) {
194+
if !matches!(self.inner, Inner::Flattened(_)) {
195+
self.inner = Inner::Flattened(self.flatten().into_owned());
196+
}
197+
}
198+
199+
pub fn flatten(&self) -> Cow<'_, S> {
200+
match &self.inner {
201+
Inner::Flattened(flattened) => Cow::Borrowed(flattened),
202+
Inner::Tree(tree) => {
203+
let mut flattened = S::new(&self.param);
204+
if let Some(tree) = tree {
205+
tree.for_each(&mut |elem: &<S as Set>::Elem| {
206+
flattened.insert(*elem, &self.param);
207+
});
208+
}
209+
Cow::Owned(flattened)
210+
},
211+
}
212+
}
213+
}
214+
215+
#[derive(Clone, Debug, Eq, PartialEq)]
216+
enum Tree<T> {
217+
Singleton(T),
218+
Union {
219+
size: usize,
220+
left: Lrc<Tree<T>>,
221+
right: Lrc<Tree<T>>,
222+
},
223+
}
224+
225+
impl<T> Tree<T> {
226+
fn size(&self) -> usize {
227+
match self {
228+
Self::Singleton(_) => 1,
229+
Self::Union { size, .. } => *size,
230+
}
231+
}
232+
233+
fn insert(this: &Lrc<Tree<T>>, elem: T) -> Lrc<Self> {
234+
Lrc::new(Self::Union {
235+
size: 1 + this.size() + 1,
236+
left: this.clone(),
237+
right: Lrc::new(Self::Singleton(elem)),
238+
})
239+
}
240+
241+
fn union(this: &Lrc<Tree<T>>, other: &Lrc<Tree<T>>) -> (Lrc<Self>, bool) {
242+
if Lrc::ptr_eq(this, other) {
243+
(this.clone(), false)
244+
} else {
245+
(
246+
Lrc::new(Tree::Union {
247+
size: 1 + this.size() + other.size(),
248+
left: this.clone(),
249+
right: other.clone(),
250+
}),
251+
true,
252+
)
253+
}
254+
}
255+
256+
fn for_each(&self, f: &mut impl FnMut(&T)) {
257+
let mut stack = Vec::new();
258+
stack.push(self);
259+
while let Some(tree) = stack.pop() {
260+
match tree {
261+
Self::Singleton(elem) => f(elem),
262+
Self::Union { left, right, .. } => {
263+
stack.push(left);
264+
stack.push(right);
265+
},
266+
}
267+
}
268+
}
269+
}

clippy_utils/src/mir/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@ use rustc_middle::mir::{
55
};
66
use rustc_middle::ty::TyCtxt;
77

8+
mod bit_matrix;
9+
10+
mod lazy_set;
11+
812
mod possible_borrower;
913
pub use possible_borrower::PossibleBorrowerMap;
1014

0 commit comments

Comments
 (0)