Skip to content

Commit dd1ab84

Browse files
committed
rustc_query_system: use more space-efficient edges representation
Use single vector of edges rather than per-node vector. There is a small hit to instruction counts (< 0.5%), but the memory savings make up for it.
1 parent ea47269 commit dd1ab84

File tree

2 files changed

+103
-52
lines changed

2 files changed

+103
-52
lines changed

compiler/rustc_query_system/src/dep_graph/graph.rs

Lines changed: 91 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ use std::env;
1515
use std::hash::Hash;
1616
use std::marker::PhantomData;
1717
use std::mem;
18+
use std::ops::Range;
1819
use std::sync::atomic::Ordering::Relaxed;
1920

2021
use super::debug::EdgeFilter;
@@ -143,42 +144,48 @@ impl<K: DepKind> DepGraph<K> {
143144
let node_count = data.hybrid_indices.len();
144145

145146
let mut nodes = Vec::with_capacity(node_count);
146-
let mut edges = Vec::with_capacity(edge_count);
147-
148-
for (index, &hybrid_index) in data.hybrid_indices.iter_enumerated() {
149-
let src = index.index();
147+
let mut edge_list_indices = Vec::with_capacity(node_count);
148+
let mut edge_list_data = Vec::with_capacity(edge_count);
149+
edge_list_data.extend(data.unshared_edges.iter().map(|i| i.index()));
150150

151+
for &hybrid_index in data.hybrid_indices.iter() {
151152
match hybrid_index.into() {
152153
HybridIndex::New(new_index) => {
153-
let new = &data.new;
154-
nodes.push(new.nodes[new_index]);
155-
edges.extend(new.edges[new_index].iter().map(|dst| (src, dst.index())));
154+
nodes.push(data.new.nodes[new_index]);
155+
let edges = &data.new.edges[new_index];
156+
edge_list_indices.push((edges.start.index(), edges.end.index()));
156157
}
157158
HybridIndex::Red(red_index) => {
158-
let red = &data.red;
159-
nodes.push(previous.index_to_node(red.node_indices[red_index]));
160-
edges.extend(red.edges[red_index].iter().map(|dst| (src, dst.index())));
159+
nodes.push(previous.index_to_node(data.red.node_indices[red_index]));
160+
let edges = &data.red.edges[red_index];
161+
edge_list_indices.push((edges.start.index(), edges.end.index()));
161162
}
162163
HybridIndex::LightGreen(lg_index) => {
163-
let lg = &data.light_green;
164-
nodes.push(previous.index_to_node(lg.node_indices[lg_index]));
165-
edges.extend(lg.edges[lg_index].iter().map(|dst| (src, dst.index())));
164+
nodes.push(previous.index_to_node(data.light_green.node_indices[lg_index]));
165+
let edges = &data.light_green.edges[lg_index];
166+
edge_list_indices.push((edges.start.index(), edges.end.index()));
166167
}
167168
HybridIndex::DarkGreen(prev_index) => {
168169
nodes.push(previous.index_to_node(prev_index));
170+
169171
let edges_iter = previous
170172
.edge_targets_from(prev_index)
171173
.iter()
172-
.map(|&dst| (src, prev_index_to_index[dst].unwrap().index()));
173-
edges.extend(edges_iter);
174+
.map(|&dst| prev_index_to_index[dst].unwrap().index());
175+
176+
let start = edge_list_data.len();
177+
edge_list_data.extend(edges_iter);
178+
let end = edge_list_data.len();
179+
edge_list_indices.push((start, end));
174180
}
175181
}
176182
}
177183

178184
debug_assert_eq!(nodes.len(), node_count);
179-
debug_assert_eq!(edges.len(), edge_count);
185+
debug_assert_eq!(edge_list_indices.len(), node_count);
186+
debug_assert_eq!(edge_list_data.len(), edge_count);
180187

181-
DepGraphQuery::new(&nodes[..], &edges[..])
188+
DepGraphQuery::new(&nodes[..], &edge_list_indices[..], &edge_list_data[..])
182189
}
183190

184191
pub fn assert_ignored(&self) {
@@ -561,11 +568,7 @@ impl<K: DepKind> DepGraph<K> {
561568
let previous = &data.previous;
562569
let data = data.current.data.lock();
563570

564-
// Linearly scanning each collection is a bit faster than scanning
565-
// `hybrid_indices` and bouncing around the different collections.
566-
let mut edge_count = data.new.edges.iter().map(|e| e.len()).sum::<usize>()
567-
+ data.red.edges.iter().map(|e| e.len()).sum::<usize>()
568-
+ data.light_green.edges.iter().map(|e| e.len()).sum::<usize>();
571+
let mut edge_count = data.unshared_edges.len();
569572

570573
for &hybrid_index in data.hybrid_indices.iter() {
571574
if let HybridIndex::DarkGreen(prev_index) = hybrid_index.into() {
@@ -591,46 +594,44 @@ impl<K: DepKind> DepGraph<K> {
591594
let mut fingerprints = IndexVec::with_capacity(node_count);
592595
let mut edge_list_indices = IndexVec::with_capacity(node_count);
593596
let mut edge_list_data = Vec::with_capacity(edge_count);
594-
595-
fn add_edges<'a, I: Iterator<Item = &'a DepNodeIndex>>(
596-
edge_list_indices: &mut IndexVec<SerializedDepNodeIndex, (u32, u32)>,
597-
edge_list_data: &mut Vec<SerializedDepNodeIndex>,
598-
iter: I,
599-
) {
600-
let start = edge_list_data.len() as u32;
601-
edge_list_data.extend(iter.map(|i| SDNI::new(i.index())));
602-
let end = edge_list_data.len() as u32;
603-
edge_list_indices.push((start, end));
604-
};
597+
edge_list_data.extend(data.unshared_edges.iter().map(|i| SDNI::new(i.index())));
605598

606599
for &hybrid_index in data.hybrid_indices.iter() {
607600
match hybrid_index.into() {
608601
HybridIndex::New(i) => {
609602
let new = &data.new;
610603
nodes.push(new.nodes[i]);
611604
fingerprints.push(new.fingerprints[i]);
612-
add_edges(&mut edge_list_indices, &mut edge_list_data, new.edges[i].iter());
605+
let edges = &new.edges[i];
606+
edge_list_indices.push((edges.start.as_u32(), edges.end.as_u32()));
613607
}
614608
HybridIndex::Red(i) => {
615609
let red = &data.red;
616610
nodes.push(previous.index_to_node(red.node_indices[i]));
617611
fingerprints.push(red.fingerprints[i]);
618-
add_edges(&mut edge_list_indices, &mut edge_list_data, red.edges[i].iter());
612+
let edges = &red.edges[i];
613+
edge_list_indices.push((edges.start.as_u32(), edges.end.as_u32()));
619614
}
620615
HybridIndex::LightGreen(i) => {
621616
let lg = &data.light_green;
622617
nodes.push(previous.index_to_node(lg.node_indices[i]));
623618
fingerprints.push(previous.fingerprint_by_index(lg.node_indices[i]));
624-
add_edges(&mut edge_list_indices, &mut edge_list_data, lg.edges[i].iter());
619+
let edges = &lg.edges[i];
620+
edge_list_indices.push((edges.start.as_u32(), edges.end.as_u32()));
625621
}
626622
HybridIndex::DarkGreen(prev_index) => {
627623
nodes.push(previous.index_to_node(prev_index));
628624
fingerprints.push(previous.fingerprint_by_index(prev_index));
625+
629626
let edges_iter = previous
630627
.edge_targets_from(prev_index)
631628
.iter()
632629
.map(|&dst| prev_index_to_index[dst].as_ref().unwrap());
633-
add_edges(&mut edge_list_indices, &mut edge_list_data, edges_iter);
630+
631+
let start = edge_list_data.len() as u32;
632+
edge_list_data.extend(edges_iter.map(|i| SDNI::new(i.index())));
633+
let end = edge_list_data.len() as u32;
634+
edge_list_indices.push((start, end));
634635
}
635636
}
636637
}
@@ -1125,6 +1126,11 @@ impl From<CompressedHybridIndex> for HybridIndex {
11251126
}
11261127
}
11271128

1129+
// Index type for `DepNodeData`'s edges.
1130+
rustc_index::newtype_index! {
1131+
struct EdgeIndex { .. }
1132+
}
1133+
11281134
/// Data for nodes in the current graph, divided into different collections
11291135
/// based on their presence in the previous graph, and if present, their color.
11301136
/// We divide nodes this way because different types of nodes are able to share
@@ -1171,6 +1177,16 @@ struct DepNodeData<K> {
11711177
/// Data for nodes in previous graph that have been marked light green.
11721178
light_green: LightGreenDepNodeData,
11731179

1180+
// Edges for all nodes other than dark-green ones. Edges for each node
1181+
// occupy a contiguous region of this collection, which a node can reference
1182+
// using two indices. Storing edges this way rather than using an `EdgesVec`
1183+
// for each node reduces memory consumption by a not insignificant amount
1184+
// when compiling large crates. The downside is that we have to copy into
1185+
// this collection the edges from the `EdgesVec`s that are built up during
1186+
// query execution. But this is mostly balanced out by the more efficient
1187+
// implementation of `DepGraph::serialize` enabled by this representation.
1188+
unshared_edges: IndexVec<EdgeIndex, DepNodeIndex>,
1189+
11741190
/// Mapping from `DepNodeIndex` to an index into a collection above.
11751191
/// Indicates which of the above collections contains a node's data.
11761192
///
@@ -1186,7 +1202,7 @@ struct DepNodeData<K> {
11861202
/// the previous graph, so we must store all of such a node's data here.
11871203
struct NewDepNodeData<K> {
11881204
nodes: IndexVec<NewDepNodeIndex, DepNode<K>>,
1189-
edges: IndexVec<NewDepNodeIndex, EdgesVec>,
1205+
edges: IndexVec<NewDepNodeIndex, Range<EdgeIndex>>,
11901206
fingerprints: IndexVec<NewDepNodeIndex, Fingerprint>,
11911207
}
11921208

@@ -1195,7 +1211,7 @@ struct NewDepNodeData<K> {
11951211
/// fingerprint is known to be different, so we store the latter two directly.
11961212
struct RedDepNodeData {
11971213
node_indices: IndexVec<RedDepNodeIndex, SerializedDepNodeIndex>,
1198-
edges: IndexVec<RedDepNodeIndex, EdgesVec>,
1214+
edges: IndexVec<RedDepNodeIndex, Range<EdgeIndex>>,
11991215
fingerprints: IndexVec<RedDepNodeIndex, Fingerprint>,
12001216
}
12011217

@@ -1205,7 +1221,7 @@ struct RedDepNodeData {
12051221
/// graph, but the edges may be different, so we store them directly.
12061222
struct LightGreenDepNodeData {
12071223
node_indices: IndexVec<LightGreenDepNodeIndex, SerializedDepNodeIndex>,
1208-
edges: IndexVec<LightGreenDepNodeIndex, EdgesVec>,
1224+
edges: IndexVec<LightGreenDepNodeIndex, Range<EdgeIndex>>,
12091225
}
12101226

12111227
/// `CurrentDepGraph` stores the dependency graph for the current session. It
@@ -1294,11 +1310,27 @@ impl<K: DepKind> CurrentDepGraph<K> {
12941310
// not be enough. The allocation for red and green node data doesn't
12951311
// include a constant, as we don't want to allocate anything for these
12961312
// structures during full incremental builds, where they aren't used.
1313+
//
1314+
// These estimates are based on the distribution of node and edge counts
1315+
// seen in rustc-perf benchmarks, adjusted somewhat to account for the
1316+
// fact that these benchmarks aren't perfectly representative.
1317+
//
1318+
// FIXME Use a collection type that doesn't copy node and edge data and
1319+
// grow multiplicatively on reallocation. Without such a collection or
1320+
// solution having the same effect, there is a performance hazard here
1321+
// in both time and space, as growing these collections means copying a
1322+
// large amount of data and doubling already large buffer capacities. A
1323+
// solution for this will also mean that it's less important to get
1324+
// these estimates right.
12971325
let new_node_count_estimate = (prev_graph_node_count * 2) / 100 + 200;
12981326
let red_node_count_estimate = (prev_graph_node_count * 3) / 100;
12991327
let light_green_node_count_estimate = (prev_graph_node_count * 25) / 100;
13001328
let total_node_count_estimate = prev_graph_node_count + new_node_count_estimate;
13011329

1330+
let average_edges_per_node_estimate = 6;
1331+
let unshared_edge_count_estimate = average_edges_per_node_estimate
1332+
* (new_node_count_estimate + red_node_count_estimate + light_green_node_count_estimate);
1333+
13021334
// We store a large collection of these in `prev_index_to_index` during
13031335
// non-full incremental builds, and want to ensure that the element size
13041336
// doesn't inadvertently increase.
@@ -1320,6 +1352,7 @@ impl<K: DepKind> CurrentDepGraph<K> {
13201352
node_indices: IndexVec::with_capacity(light_green_node_count_estimate),
13211353
edges: IndexVec::with_capacity(light_green_node_count_estimate),
13221354
},
1355+
unshared_edges: IndexVec::with_capacity(unshared_edge_count_estimate),
13231356
hybrid_indices: IndexVec::with_capacity(total_node_count_estimate),
13241357
}),
13251358
new_node_to_index: Sharded::new(|| {
@@ -1352,9 +1385,9 @@ impl<K: DepKind> CurrentDepGraph<K> {
13521385
match self.new_node_to_index.get_shard_by_value(&dep_node).lock().entry(dep_node) {
13531386
Entry::Occupied(entry) => *entry.get(),
13541387
Entry::Vacant(entry) => {
1355-
let mut data = self.data.lock();
1388+
let data = &mut *self.data.lock();
13561389
let new_index = data.new.nodes.push(dep_node);
1357-
data.new.edges.push(edges);
1390+
add_edges(&mut data.unshared_edges, &mut data.new.edges, edges);
13581391
data.new.fingerprints.push(fingerprint);
13591392
let dep_node_index = data.hybrid_indices.push(new_index.into());
13601393
entry.insert(dep_node_index);
@@ -1377,9 +1410,9 @@ impl<K: DepKind> CurrentDepGraph<K> {
13771410
match prev_index_to_index[prev_index] {
13781411
Some(dep_node_index) => dep_node_index,
13791412
None => {
1380-
let mut data = self.data.lock();
1413+
let data = &mut *self.data.lock();
13811414
let red_index = data.red.node_indices.push(prev_index);
1382-
data.red.edges.push(edges);
1415+
add_edges(&mut data.unshared_edges, &mut data.red.edges, edges);
13831416
data.red.fingerprints.push(fingerprint);
13841417
let dep_node_index = data.hybrid_indices.push(red_index.into());
13851418
prev_index_to_index[prev_index] = Some(dep_node_index);
@@ -1401,9 +1434,9 @@ impl<K: DepKind> CurrentDepGraph<K> {
14011434
match prev_index_to_index[prev_index] {
14021435
Some(dep_node_index) => dep_node_index,
14031436
None => {
1404-
let mut data = self.data.lock();
1437+
let data = &mut *self.data.lock();
14051438
let light_green_index = data.light_green.node_indices.push(prev_index);
1406-
data.light_green.edges.push(edges);
1439+
add_edges(&mut data.unshared_edges, &mut data.light_green.edges, edges);
14071440
let dep_node_index = data.hybrid_indices.push(light_green_index.into());
14081441
prev_index_to_index[prev_index] = Some(dep_node_index);
14091442
dep_node_index
@@ -1445,6 +1478,18 @@ impl<K: DepKind> CurrentDepGraph<K> {
14451478
}
14461479
}
14471480

1481+
#[inline]
1482+
fn add_edges<I: Idx>(
1483+
edges: &mut IndexVec<EdgeIndex, DepNodeIndex>,
1484+
edge_indices: &mut IndexVec<I, Range<EdgeIndex>>,
1485+
new_edges: EdgesVec,
1486+
) {
1487+
let start = edges.next_index();
1488+
edges.extend(new_edges);
1489+
let end = edges.next_index();
1490+
edge_indices.push(start..end);
1491+
}
1492+
14481493
/// The capacity of the `reads` field `SmallVec`
14491494
const TASK_DEPS_READS_CAP: usize = 8;
14501495
type EdgesVec = SmallVec<[DepNodeIndex; TASK_DEPS_READS_CAP]>;

compiler/rustc_query_system/src/dep_graph/query.rs

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,23 @@ pub struct DepGraphQuery<K> {
99
}
1010

1111
impl<K: DepKind> DepGraphQuery<K> {
12-
pub fn new(nodes: &[DepNode<K>], edges: &[(usize, usize)]) -> DepGraphQuery<K> {
13-
let mut graph = Graph::with_capacity(nodes.len(), edges.len());
12+
pub fn new(
13+
nodes: &[DepNode<K>],
14+
edge_list_indices: &[(usize, usize)],
15+
edge_list_data: &[usize],
16+
) -> DepGraphQuery<K> {
17+
let mut graph = Graph::with_capacity(nodes.len(), edge_list_data.len());
1418
let mut indices = FxHashMap::default();
1519
for node in nodes {
1620
indices.insert(*node, graph.add_node(*node));
1721
}
1822

19-
for &(source, target) in edges {
20-
let source = indices[&nodes[source]];
21-
let target = indices[&nodes[target]];
22-
graph.add_edge(source, target, ());
23+
for (source, &(start, end)) in edge_list_indices.iter().enumerate() {
24+
for &target in &edge_list_data[start..end] {
25+
let source = indices[&nodes[source]];
26+
let target = indices[&nodes[target]];
27+
graph.add_edge(source, target, ());
28+
}
2329
}
2430

2531
DepGraphQuery { graph, indices }

0 commit comments

Comments
 (0)