Skip to content

Commit 27732c2

Browse files
committed
Mmap the DepGraph instead of reading it.
1 parent 36f4f4a commit 27732c2

File tree

5 files changed

+197
-107
lines changed

5 files changed

+197
-107
lines changed

compiler/rustc_incremental/src/persist/load.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ pub fn load_dep_graph(sess: &Session) -> DepGraphFuture {
204204
return LoadResult::DataOutOfDate;
205205
}
206206

207-
let dep_graph = SerializedDepGraph::decode(&mut decoder);
207+
let dep_graph = SerializedDepGraph::decode(bytes);
208208

209209
LoadResult::Ok { data: (dep_graph, prev_work_products) }
210210
}

compiler/rustc_query_system/src/dep_graph/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ pub use serialized::{SerializedDepGraph, SerializedDepNodeIndex};
1313

1414
use crate::ich::StableHashingContext;
1515
use rustc_data_structures::profiling::SelfProfilerRef;
16-
use rustc_serialize::{opaque::FileEncoder, Encodable};
16+
use rustc_serialize::MmapSafe;
1717
use rustc_session::Session;
1818

1919
use std::fmt;
@@ -84,7 +84,7 @@ impl FingerprintStyle {
8484
}
8585

8686
/// Describe the different families of dependency nodes.
87-
pub trait DepKind: Copy + fmt::Debug + Eq + Hash + Send + Encodable<FileEncoder> + 'static {
87+
pub trait DepKind: Copy + fmt::Debug + Eq + Hash + Send + MmapSafe + 'static {
8888
/// DepKind to use when incr. comp. is turned off.
8989
const NULL: Self;
9090

compiler/rustc_query_system/src/dep_graph/serialized.rs

Lines changed: 131 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,13 @@ use super::query::DepGraphQuery;
1616
use super::{DepKind, DepNode, DepNodeIndex};
1717
use rustc_data_structures::fingerprint::Fingerprint;
1818
use rustc_data_structures::fx::FxHashMap;
19+
use rustc_data_structures::memmap::Mmap;
20+
use rustc_data_structures::owning_ref::OwningRef;
1921
use rustc_data_structures::profiling::SelfProfilerRef;
2022
use rustc_data_structures::sync::Lock;
21-
use rustc_index::vec::{Idx, IndexVec};
23+
use rustc_index::vec::IndexVec;
2224
use rustc_serialize::opaque::{FileEncodeResult, FileEncoder, IntEncodedWithFixedSize, MemDecoder};
23-
use rustc_serialize::{Decodable, Decoder, Encodable};
25+
use rustc_serialize::{Decodable, Encodable};
2426
use smallvec::SmallVec;
2527
use std::convert::TryInto;
2628

@@ -34,138 +36,136 @@ rustc_index::newtype_index! {
3436
}
3537

3638
/// Data for use when recompiling the **current crate**.
37-
#[derive(Debug)]
39+
///
40+
/// The DepGraph is backed on-disk and read on-demand through a Mmap.
41+
/// The file layout is as follows.
42+
///
43+
/// The DepGraph starts with the version header, handled by rustc_incremental.
44+
/// It is followed by the concatenation of all node dependency information as:
45+
/// - the query result fingerprint (size_of<Fingerprint> bytes)
46+
/// - the number of dependency edges (4 bytes)
47+
/// - the dependencies indices (array of 4-byte integers)
48+
///
49+
/// Finding this information inside the file is handled by a "glossary" written at the end.
50+
/// This glossary is an array of `(DepNode, u32)` pairs. This array is used to make the
51+
/// correspondence between the `SerializedDepNodeIndex` (ie. the index into this array),
52+
/// and the `DepNode`. The `u32` is the position of the dependency information (Fingerprint +
53+
/// array of dependencies) inside the file. The glossary array is directly mmapped into `nodes`.
54+
///
55+
/// The file finished with two `u64`, which are the number of entries in the glossary
56+
/// and its position in the file.
57+
///
58+
/// Graphically, we have:
59+
/// beginning of nodes beginning of glossary ---------------+
60+
/// v v |
61+
/// --------------------------------------------------------------------------------------------
62+
/// | HEADER | ... | Fingerprint | Length | Deps | ... | ... | DepNode | u32 | ... | u64 | u64 |
63+
/// --------------------------------------------------------------------------------------------
64+
/// ^ | node
65+
/// start for node i --------------------------------------+ count
66+
///
67+
/// In order to recover an index from a DepNode, we populate a hash-map in `index`.
3868
pub struct SerializedDepGraph<K: DepKind> {
39-
/// The set of all DepNodes in the graph
40-
nodes: IndexVec<SerializedDepNodeIndex, DepNode<K>>,
41-
/// The set of all Fingerprints in the graph. Each Fingerprint corresponds to
42-
/// the DepNode at the same index in the nodes vector.
43-
fingerprints: IndexVec<SerializedDepNodeIndex, Fingerprint>,
44-
/// For each DepNode, stores the list of edges originating from that
45-
/// DepNode. Encoded as a [start, end) pair indexing into edge_list_data,
46-
/// which holds the actual DepNodeIndices of the target nodes.
47-
edge_list_indices: IndexVec<SerializedDepNodeIndex, (u32, u32)>,
48-
/// A flattened list of all edge targets in the graph. Edge sources are
49-
/// implicit in edge_list_indices.
50-
edge_list_data: Vec<SerializedDepNodeIndex>,
69+
/// The set of all DepNodes in the graph and their position in the mmap.
70+
nodes: Option<OwningRef<Mmap, [(DepNode<K>, u32)]>>,
5171
/// Reciprocal map to `nodes`.
5272
index: FxHashMap<DepNode<K>, SerializedDepNodeIndex>,
5373
}
5474

5575
impl<K: DepKind> Default for SerializedDepGraph<K> {
5676
fn default() -> Self {
57-
SerializedDepGraph {
58-
nodes: Default::default(),
59-
fingerprints: Default::default(),
60-
edge_list_indices: Default::default(),
61-
edge_list_data: Default::default(),
62-
index: Default::default(),
63-
}
77+
SerializedDepGraph { nodes: None, index: Default::default() }
6478
}
6579
}
6680

6781
impl<K: DepKind> SerializedDepGraph<K> {
6882
#[inline]
69-
pub fn edge_targets_from(&self, source: SerializedDepNodeIndex) -> &[SerializedDepNodeIndex] {
70-
let targets = self.edge_list_indices[source];
71-
&self.edge_list_data[targets.0 as usize..targets.1 as usize]
83+
fn decoder_at(&self, dep_node_index: SerializedDepNodeIndex) -> Option<MemDecoder<'_>> {
84+
let nodes = self.nodes.as_ref()?;
85+
let dep_node_index = dep_node_index.as_usize();
86+
let position = nodes[dep_node_index].1 as usize;
87+
let data = &nodes.owner()[position..];
88+
let decoder = MemDecoder::new(data, 0);
89+
Some(decoder)
90+
}
91+
92+
#[inline]
93+
pub fn node_to_index_opt(&self, dep_node: &DepNode<K>) -> Option<SerializedDepNodeIndex> {
94+
self.index.get(dep_node).cloned()
7295
}
7396

7497
#[inline]
7598
pub fn index_to_node(&self, dep_node_index: SerializedDepNodeIndex) -> DepNode<K> {
76-
self.nodes[dep_node_index]
99+
let dep_node_index = dep_node_index.as_usize();
100+
self.nodes.as_ref().unwrap()[dep_node_index].0
77101
}
78102

79103
#[inline]
80-
pub fn node_to_index_opt(&self, dep_node: &DepNode<K>) -> Option<SerializedDepNodeIndex> {
81-
self.index.get(dep_node).cloned()
104+
pub fn fingerprint_by_index(&self, dep_node_index: SerializedDepNodeIndex) -> Fingerprint {
105+
if let Some(decoder) = self.decoder_at(dep_node_index) {
106+
let &fingerprint = unsafe { decoder.mmap_at::<Fingerprint>(0) };
107+
fingerprint
108+
} else {
109+
Fingerprint::ZERO
110+
}
82111
}
83112

84113
#[inline]
85114
pub fn fingerprint_of(&self, dep_node: &DepNode<K>) -> Option<Fingerprint> {
86-
self.index.get(dep_node).map(|&node_index| self.fingerprints[node_index])
115+
let index = self.index.get(dep_node).cloned()?;
116+
Some(self.fingerprint_by_index(index))
87117
}
88118

89119
#[inline]
90-
pub fn fingerprint_by_index(&self, dep_node_index: SerializedDepNodeIndex) -> Fingerprint {
91-
self.fingerprints[dep_node_index]
120+
pub fn edge_targets_from(&self, source: SerializedDepNodeIndex) -> &[SerializedDepNodeIndex] {
121+
// The encoder has checked that there is no padding there.
122+
if let Some(decoder) = self.decoder_at(source) {
123+
let position = std::mem::size_of::<Fingerprint>();
124+
let &length = unsafe { decoder.mmap_at::<u32>(position) };
125+
unsafe {
126+
decoder.mmap_slice_at::<SerializedDepNodeIndex>(position + 4, length as usize)
127+
}
128+
} else {
129+
&[]
130+
}
92131
}
93132

94133
pub fn node_count(&self) -> usize {
95134
self.index.len()
96135
}
97-
}
98136

99-
impl<'a, K: DepKind + Decodable<MemDecoder<'a>>> Decodable<MemDecoder<'a>>
100-
for SerializedDepGraph<K>
101-
{
102-
#[instrument(level = "debug", skip(d))]
103-
fn decode(d: &mut MemDecoder<'a>) -> SerializedDepGraph<K> {
104-
let start_position = d.position();
137+
#[instrument(level = "debug", skip(mmap))]
138+
pub fn decode(mmap: Mmap) -> SerializedDepGraph<K> {
139+
let data = mmap.as_ref();
105140

106-
// The last 16 bytes are the node count and edge count.
107-
debug!("position: {:?}", d.position());
108-
d.set_position(d.data.len() - 2 * IntEncodedWithFixedSize::ENCODED_SIZE);
141+
// The last 16 bytes are the node count, edge count and nodes position.
142+
let start_position = data.len() - 2 * IntEncodedWithFixedSize::ENCODED_SIZE;
143+
let mut d = MemDecoder::new(data, start_position);
109144
debug!("position: {:?}", d.position());
110145

111-
let node_count = IntEncodedWithFixedSize::decode(d).0 as usize;
112-
let edge_count = IntEncodedWithFixedSize::decode(d).0 as usize;
113-
debug!(?node_count, ?edge_count);
146+
let node_count = IntEncodedWithFixedSize::decode(&mut d).0 as usize;
147+
let nodes_position = IntEncodedWithFixedSize::decode(&mut d).0 as usize;
148+
debug!(?node_count, ?nodes_position);
114149

115-
debug!("position: {:?}", d.position());
116-
d.set_position(start_position);
117-
debug!("position: {:?}", d.position());
118-
119-
let mut nodes = IndexVec::with_capacity(node_count);
120-
let mut fingerprints = IndexVec::with_capacity(node_count);
121-
let mut edge_list_indices = IndexVec::with_capacity(node_count);
122-
let mut edge_list_data = Vec::with_capacity(edge_count);
123-
124-
for _index in 0..node_count {
125-
let dep_node: DepNode<K> = Decodable::decode(d);
126-
let _i: SerializedDepNodeIndex = nodes.push(dep_node);
127-
debug_assert_eq!(_i.index(), _index);
128-
129-
let fingerprint: Fingerprint = Decodable::decode(d);
130-
let _i: SerializedDepNodeIndex = fingerprints.push(fingerprint);
131-
debug_assert_eq!(_i.index(), _index);
132-
133-
// Deserialize edges -- sequence of DepNodeIndex
134-
let len = d.read_usize();
135-
let start = edge_list_data.len().try_into().unwrap();
136-
for _ in 0..len {
137-
let edge = Decodable::decode(d);
138-
edge_list_data.push(edge);
139-
}
140-
let end = edge_list_data.len().try_into().unwrap();
141-
let _i: SerializedDepNodeIndex = edge_list_indices.push((start, end));
142-
debug_assert_eq!(_i.index(), _index);
143-
}
150+
let nodes = OwningRef::new(mmap).map(|mmap| {
151+
let d = MemDecoder::new(mmap, nodes_position);
152+
unsafe { d.mmap_slice_at::<(DepNode<K>, u32)>(nodes_position, node_count) }
153+
});
144154

145-
let index: FxHashMap<_, _> =
146-
nodes.iter_enumerated().map(|(idx, &dep_node)| (dep_node, idx)).collect();
155+
let index: FxHashMap<_, _> = nodes
156+
.iter()
157+
.enumerate()
158+
.map(|(idx, &(dep_node, _))| (dep_node, SerializedDepNodeIndex::from_usize(idx)))
159+
.collect();
147160

148-
SerializedDepGraph { nodes, fingerprints, edge_list_indices, edge_list_data, index }
161+
SerializedDepGraph { nodes: Some(nodes), index }
149162
}
150163
}
151164

152-
#[derive(Debug, Encodable, Decodable)]
153-
pub struct NodeInfo<K: DepKind> {
154-
node: DepNode<K>,
155-
fingerprint: Fingerprint,
156-
edges: SmallVec<[DepNodeIndex; 8]>,
157-
}
158-
159-
struct Stat<K: DepKind> {
160-
kind: K,
161-
node_counter: u64,
162-
edge_counter: u64,
163-
}
164-
165165
struct EncoderState<K: DepKind> {
166166
encoder: FileEncoder,
167-
total_node_count: usize,
168167
total_edge_count: usize,
168+
nodes: IndexVec<DepNodeIndex, (DepNode<K>, u32)>,
169169
stats: Option<FxHashMap<K, Stat<K>>>,
170170
}
171171

@@ -174,22 +174,35 @@ impl<K: DepKind> EncoderState<K> {
174174
Self {
175175
encoder,
176176
total_edge_count: 0,
177-
total_node_count: 0,
177+
nodes: IndexVec::default(),
178178
stats: record_stats.then(FxHashMap::default),
179179
}
180180
}
181181

182+
fn try_encode_node(&mut self, node: &NodeInfo<K>) -> usize {
183+
let encoder = &mut self.encoder;
184+
let start_pos = encoder.write_mmap(&node.fingerprint);
185+
let _pos = encoder.write_mmap::<u32>(&node.edges.len().try_into().unwrap());
186+
debug_assert_eq!(_pos, start_pos + std::mem::size_of::<Fingerprint>());
187+
let _pos = encoder.write_mmap_slice::<DepNodeIndex>(&node.edges[..]);
188+
debug_assert_eq!(_pos, start_pos + std::mem::size_of::<Fingerprint>() + 4);
189+
start_pos
190+
}
191+
182192
fn encode_node(
183193
&mut self,
184194
node: &NodeInfo<K>,
185195
record_graph: &Option<Lock<DepGraphQuery<K>>>,
186196
) -> DepNodeIndex {
187-
let index = DepNodeIndex::new(self.total_node_count);
188-
self.total_node_count += 1;
189-
190197
let edge_count = node.edges.len();
191198
self.total_edge_count += edge_count;
192199

200+
let position = self.try_encode_node(node);
201+
debug_assert!(position & (std::mem::align_of::<Fingerprint>() - 1) == 0);
202+
debug!(?position);
203+
204+
let index = self.nodes.push((node.node, position.try_into().unwrap()));
205+
193206
if let Some(record_graph) = &record_graph {
194207
// Do not ICE when a query is called from within `with_query`.
195208
if let Some(record_graph) = &mut record_graph.try_lock() {
@@ -205,21 +218,20 @@ impl<K: DepKind> EncoderState<K> {
205218
stat.edge_counter += edge_count as u64;
206219
}
207220

208-
let encoder = &mut self.encoder;
209-
node.encode(encoder);
210221
index
211222
}
212223

213224
fn finish(self, profiler: &SelfProfilerRef) -> FileEncodeResult {
214-
let Self { mut encoder, total_node_count, total_edge_count, stats: _ } = self;
225+
let Self { mut encoder, nodes, total_edge_count: _, stats: _ } = self;
215226

216-
let node_count = total_node_count.try_into().unwrap();
217-
let edge_count = total_edge_count.try_into().unwrap();
227+
let node_count = nodes.len().try_into().unwrap();
228+
let nodes_position = encoder.write_mmap_slice(&nodes.raw[..]);
229+
let nodes_position = nodes_position.try_into().unwrap();
218230

219-
debug!(?node_count, ?edge_count);
231+
debug!(?node_count, ?nodes_position);
220232
debug!("position: {:?}", encoder.position());
221233
IntEncodedWithFixedSize(node_count).encode(&mut encoder);
222-
IntEncodedWithFixedSize(edge_count).encode(&mut encoder);
234+
IntEncodedWithFixedSize(nodes_position).encode(&mut encoder);
223235
debug!("position: {:?}", encoder.position());
224236
// Drop the encoder so that nothing is written after the counts.
225237
let result = encoder.finish();
@@ -232,12 +244,25 @@ impl<K: DepKind> EncoderState<K> {
232244
}
233245
}
234246

247+
#[derive(Debug, Encodable, Decodable)]
248+
pub struct NodeInfo<K: DepKind> {
249+
node: DepNode<K>,
250+
fingerprint: Fingerprint,
251+
edges: SmallVec<[DepNodeIndex; 8]>,
252+
}
253+
254+
struct Stat<K: DepKind> {
255+
kind: K,
256+
node_counter: u64,
257+
edge_counter: u64,
258+
}
259+
235260
pub struct GraphEncoder<K: DepKind> {
236261
status: Lock<EncoderState<K>>,
237262
record_graph: Option<Lock<DepGraphQuery<K>>>,
238263
}
239264

240-
impl<K: DepKind + Encodable<FileEncoder>> GraphEncoder<K> {
265+
impl<K: DepKind> GraphEncoder<K> {
241266
pub fn new(
242267
encoder: FileEncoder,
243268
prev_node_count: usize,
@@ -270,11 +295,13 @@ impl<K: DepKind + Encodable<FileEncoder>> GraphEncoder<K> {
270295
----------------------------------------------\
271296
------------";
272297

298+
let total_node_count = status.nodes.len();
299+
273300
eprintln!("[incremental]");
274301
eprintln!("[incremental] DepGraph Statistics");
275302
eprintln!("{}", SEPARATOR);
276303
eprintln!("[incremental]");
277-
eprintln!("[incremental] Total Node Count: {}", status.total_node_count);
304+
eprintln!("[incremental] Total Node Count: {}", total_node_count);
278305
eprintln!("[incremental] Total Edge Count: {}", status.total_edge_count);
279306

280307
if cfg!(debug_assertions) {
@@ -294,7 +321,7 @@ impl<K: DepKind + Encodable<FileEncoder>> GraphEncoder<K> {
294321

295322
for stat in stats {
296323
let node_kind_ratio =
297-
(100.0 * (stat.node_counter as f64)) / (status.total_node_count as f64);
324+
(100.0 * (stat.node_counter as f64)) / (total_node_count as f64);
298325
let node_kind_avg_edges = (stat.edge_counter as f64) / (stat.node_counter as f64);
299326

300327
eprintln!(

0 commit comments

Comments
 (0)