Skip to content

Commit 3a2edd7

Browse files
committed
load/save hashes of metadata
This commit reorganizes how the persist code treats hashing. The idea is that each crate saves a file containing hashes representing the metadata for each item X. When we see a read from `MetaData(X)`, we can load this hash up (if we don't find a file for that crate, we just use the SVH for the entire crate). To compute the hash for `MetaData(Y)`, where Y is some local item, we examine all the predecessors of the `MetaData(Y)` node and hash their hashes together.
1 parent b01919a commit 3a2edd7

File tree

7 files changed

+249
-92
lines changed

7 files changed

+249
-92
lines changed

src/librustc_incremental/persist/data.rs

Lines changed: 35 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
//! The data that we will serialize and deserialize.
1212
1313
use rustc::dep_graph::DepNode;
14+
use rustc::hir::def_id::DefIndex;
1415

1516
use super::directory::DefPathIndex;
1617

@@ -34,30 +35,56 @@ pub struct SerializedDepGraph {
3435
/// compare them against the hashes we see at that time, which
3536
/// will tell us what has changed, either in this crate or in some
3637
/// crate that we depend on.
38+
///
39+
/// Because they will be reloaded, we don't store the DefId (which
40+
/// will be different when we next compile) related to each node,
41+
/// but rather the `DefPathIndex`. This can then be retraced
42+
/// to find the current def-id.
3743
pub hashes: Vec<SerializedHash>,
3844
}
3945

46+
pub type SerializedEdge = (DepNode<DefPathIndex>, DepNode<DefPathIndex>);
47+
48+
#[derive(Debug, RustcEncodable, RustcDecodable)]
49+
pub struct SerializedHash {
50+
/// node being hashed; either a Hir or MetaData variant, in
51+
/// practice
52+
pub node: DepNode<DefPathIndex>,
53+
54+
/// the hash itself, computed by `calculate_item_hash`
55+
pub hash: u64,
56+
}
57+
4058
/// Data for use when downstream crates get recompiled.
4159
#[derive(Debug, RustcEncodable, RustcDecodable)]
4260
pub struct SerializedMetadataHashes {
4361
/// For each def-id defined in this crate that appears in the
4462
/// metadata, we hash all the inputs that were used when producing
45-
/// the metadata. We save this after compilation is done. Then,
63+
/// the metadata. We save this after compilation is done. Then,
4664
/// when some downstream crate is being recompiled, it can compare
4765
/// the hashes we saved against the hashes that it saw from
4866
/// before; this will tell it which of the items in this crate
4967
/// changed, which in turn implies what items in the downstream
5068
/// crate need to be recompiled.
51-
pub hashes: Vec<SerializedHash>,
69+
///
70+
/// Note that we store the def-ids here. This is because we don't
71+
/// reload this file when we recompile this crate, we will just
72+
/// regenerate it completely with the current hashes and new def-ids.
73+
///
74+
/// Then downstream creates will load up their
75+
/// `SerializedDepGraph`, which may contain `MetaData(X)` nodes
76+
/// where `X` refers to some item in this crate. That `X` will be
77+
/// a `DefPathIndex` that gets retracted to the current `DefId`
78+
/// (matching the one found in this structure).
79+
pub hashes: Vec<SerializedMetadataHash>,
5280
}
5381

54-
pub type SerializedEdge = (DepNode<DefPathIndex>, DepNode<DefPathIndex>);
55-
82+
/// The hash for some metadata that (when saving) will be exported
83+
/// from this crate, or which (when importing) was exported by an
84+
/// upstream crate.
5685
#[derive(Debug, RustcEncodable, RustcDecodable)]
57-
pub struct SerializedHash {
58-
/// node being hashed; either a Hir or MetaData variant, in
59-
/// practice
60-
pub node: DepNode<DefPathIndex>,
86+
pub struct SerializedMetadataHash {
87+
pub def_index: DefIndex,
6188

6289
/// the hash itself, computed by `calculate_item_hash`
6390
pub hash: u64,

src/librustc_incremental/persist/directory.rs

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ impl RetracedDefIdDirectory {
6464

6565
pub struct DefIdDirectoryBuilder<'a,'tcx:'a> {
6666
tcx: TyCtxt<'a, 'tcx, 'tcx>,
67-
hash: DefIdMap<Option<DefPathIndex>>,
67+
hash: DefIdMap<DefPathIndex>,
6868
directory: DefIdDirectory,
6969
}
7070

@@ -77,29 +77,22 @@ impl<'a,'tcx> DefIdDirectoryBuilder<'a,'tcx> {
7777
}
7878
}
7979

80-
pub fn add(&mut self, def_id: DefId) -> Option<DefPathIndex> {
81-
if !def_id.is_local() {
82-
// FIXME(#32015) clarify story about cross-crate dep tracking
83-
return None;
84-
}
85-
80+
pub fn add(&mut self, def_id: DefId) -> DefPathIndex {
81+
debug!("DefIdDirectoryBuilder: def_id={:?}", def_id);
8682
let tcx = self.tcx;
8783
let paths = &mut self.directory.paths;
8884
self.hash.entry(def_id)
8985
.or_insert_with(|| {
9086
let def_path = tcx.def_path(def_id);
91-
if !def_path.is_local() {
92-
return None;
93-
}
9487
let index = paths.len() as u32;
9588
paths.push(def_path);
96-
Some(DefPathIndex { index: index })
89+
DefPathIndex { index: index }
9790
})
9891
.clone()
9992
}
10093

101-
pub fn map(&mut self, node: DepNode<DefId>) -> Option<DepNode<DefPathIndex>> {
102-
node.map_def(|&def_id| self.add(def_id))
94+
pub fn map(&mut self, node: DepNode<DefId>) -> DepNode<DefPathIndex> {
95+
node.map_def(|&def_id| Some(self.add(def_id))).unwrap()
10396
}
10497

10598
pub fn into_directory(self) -> DefIdDirectory {
Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
use calculate_svh::SvhCalculate;
12+
use rbml::Error;
13+
use rbml::opaque::Decoder;
14+
use rustc::dep_graph::DepNode;
15+
use rustc::hir::def_id::DefId;
16+
use rustc::hir::svh::Svh;
17+
use rustc::ty::TyCtxt;
18+
use rustc_data_structures::fnv::FnvHashMap;
19+
use rustc_serialize::Decodable;
20+
use std::io::{ErrorKind, Read};
21+
use std::fs::File;
22+
use syntax::ast;
23+
24+
use super::data::*;
25+
use super::util::*;
26+
27+
pub struct HashContext<'a, 'tcx: 'a> {
28+
pub tcx: TyCtxt<'a, 'tcx, 'tcx>,
29+
item_metadata_hashes: FnvHashMap<DefId, u64>,
30+
crate_hashes: FnvHashMap<ast::CrateNum, Svh>,
31+
}
32+
33+
impl<'a, 'tcx> HashContext<'a, 'tcx> {
34+
pub fn new(tcx: TyCtxt<'a, 'tcx, 'tcx>) -> Self {
35+
HashContext {
36+
tcx: tcx,
37+
item_metadata_hashes: FnvHashMap(),
38+
crate_hashes: FnvHashMap(),
39+
}
40+
}
41+
42+
pub fn hash(&mut self, dep_node: DepNode<DefId>) -> Option<u64> {
43+
match dep_node {
44+
// HIR nodes (which always come from our crate) are an input:
45+
DepNode::Hir(def_id) => {
46+
assert!(def_id.is_local());
47+
Some(self.hir_hash(def_id))
48+
}
49+
50+
// MetaData from other crates is an *input* to us.
51+
// MetaData nodes from *our* crates are an *output*; we
52+
// don't hash them, but we do compute a hash for them and
53+
// save it for others to use.
54+
DepNode::MetaData(def_id) if !def_id.is_local() => {
55+
Some(self.metadata_hash(def_id))
56+
}
57+
58+
_ => {
59+
// Other kinds of nodes represent computed by-products
60+
// that we don't hash directly; instead, they should
61+
// have some transitive dependency on a Hir or
62+
// MetaData node, so we'll just hash that
63+
None
64+
}
65+
}
66+
}
67+
68+
fn hir_hash(&mut self, def_id: DefId) -> u64 {
69+
assert!(def_id.is_local());
70+
// FIXME(#32753) -- should we use a distinct hash here
71+
self.tcx.calculate_item_hash(def_id)
72+
}
73+
74+
fn metadata_hash(&mut self, def_id: DefId) -> u64 {
75+
debug!("metadata_hash(def_id={:?})", def_id);
76+
77+
assert!(!def_id.is_local());
78+
loop {
79+
// check whether we have a result cached for this def-id
80+
if let Some(&hash) = self.item_metadata_hashes.get(&def_id) {
81+
debug!("metadata_hash: def_id={:?} hash={:?}", def_id, hash);
82+
return hash;
83+
}
84+
85+
// check whether we did not find detailed metadata for this
86+
// krate; in that case, we just use the krate's overall hash
87+
if let Some(&hash) = self.crate_hashes.get(&def_id.krate) {
88+
debug!("metadata_hash: def_id={:?} crate_hash={:?}", def_id, hash);
89+
return hash.as_u64();
90+
}
91+
92+
// otherwise, load the data and repeat.
93+
self.load_data(def_id.krate);
94+
assert!(self.crate_hashes.contains_key(&def_id.krate));
95+
}
96+
}
97+
98+
fn load_data(&mut self, cnum: ast::CrateNum) {
99+
debug!("load_data(cnum={})", cnum);
100+
101+
let svh = self.tcx.sess.cstore.crate_hash(cnum);
102+
let old = self.crate_hashes.insert(cnum, svh);
103+
debug!("load_data: svh={}", svh);
104+
assert!(old.is_none(), "loaded data for crate {:?} twice", cnum);
105+
106+
if let Some(path) = metadata_hash_path(self.tcx, cnum) {
107+
debug!("load_data: path={:?}", path);
108+
let mut data = vec![];
109+
match
110+
File::open(&path)
111+
.and_then(|mut file| file.read_to_end(&mut data))
112+
{
113+
Ok(_) => {
114+
match self.load_from_data(cnum, &data) {
115+
Ok(()) => { }
116+
Err(err) => {
117+
bug!("decoding error in dep-graph from `{}`: {}",
118+
path.display(), err);
119+
}
120+
}
121+
}
122+
Err(err) => {
123+
match err.kind() {
124+
ErrorKind::NotFound => {
125+
// If the file is not found, that's ok.
126+
}
127+
_ => {
128+
self.tcx.sess.err(
129+
&format!("could not load dep information from `{}`: {}",
130+
path.display(), err));
131+
return;
132+
}
133+
}
134+
}
135+
}
136+
}
137+
}
138+
139+
fn load_from_data(&mut self, cnum: ast::CrateNum, data: &[u8]) -> Result<(), Error> {
140+
debug!("load_from_data(cnum={})", cnum);
141+
142+
// Load up the hashes for the def-ids from this crate.
143+
let mut decoder = Decoder::new(data, 0);
144+
let serialized_hashes = try!(SerializedMetadataHashes::decode(&mut decoder));
145+
for serialized_hash in serialized_hashes.hashes {
146+
// the hashes are stored with just a def-index, which is
147+
// always relative to the old crate; convert that to use
148+
// our internal crate number
149+
let def_id = DefId { krate: cnum, index: serialized_hash.def_index };
150+
151+
// record the hash for this dep-node
152+
let old = self.item_metadata_hashes.insert(def_id, serialized_hash.hash);
153+
debug!("load_from_data: def_id={:?} hash={}", def_id, serialized_hash.hash);
154+
assert!(old.is_none(), "already have hash for {:?}", def_id);
155+
}
156+
Ok(())
157+
}
158+
}

src/librustc_incremental/persist/load.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ use std::path::Path;
2424
use super::data::*;
2525
use super::directory::*;
2626
use super::dirty_clean;
27+
use super::hash::*;
2728
use super::util::*;
2829

2930
type DirtyNodes = FnvHashSet<DepNode<DefId>>;
@@ -133,13 +134,13 @@ fn initial_dirty_nodes<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
133134
hashes: &[SerializedHash],
134135
retraced: &RetracedDefIdDirectory)
135136
-> DirtyNodes {
137+
let mut hcx = HashContext::new(tcx);
136138
let mut items_removed = false;
137139
let mut dirty_nodes = FnvHashSet();
138140
for hash in hashes {
139141
match hash.node.map_def(|&i| retraced.def_id(i)) {
140142
Some(dep_node) => {
141-
// FIXME(#32753) -- should we use a distinct hash here
142-
let current_hash = dep_node.hash(tcx).unwrap();
143+
let current_hash = hcx.hash(dep_node).unwrap();
143144
debug!("initial_dirty_nodes: hash of {:?} is {:?}, was {:?}",
144145
dep_node, current_hash, hash.hash);
145146
if current_hash != hash.hash {

src/librustc_incremental/persist/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
mod data;
1616
mod directory;
1717
mod dirty_clean;
18+
mod hash;
1819
mod load;
1920
mod save;
2021
mod util;

0 commit comments

Comments
 (0)