Skip to content

Commit 80f1bcd

Browse files
committed
coverage: Build the global file table ahead of time
1 parent df1e46b commit 80f1bcd

File tree

4 files changed

+70
-32
lines changed

4 files changed

+70
-32
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3563,6 +3563,7 @@ version = "0.0.0"
35633563
dependencies = [
35643564
"bitflags 1.3.2",
35653565
"cstr",
3566+
"itertools",
35663567
"libc",
35673568
"measureme",
35683569
"object",

compiler/rustc_codegen_llvm/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ test = false
99
[dependencies]
1010
bitflags = "1.0"
1111
cstr = "0.2"
12+
itertools = "0.10.5"
1213
libc = "0.2"
1314
measureme = "10.0.0"
1415
object = { version = "0.32.0", default-features = false, features = [

compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use rustc_middle::mir::coverage::{
77
CodeRegion, CounterId, CovTerm, Expression, ExpressionId, FunctionCoverageInfo, Mapping, Op,
88
};
99
use rustc_middle::ty::Instance;
10+
use rustc_span::Symbol;
1011

1112
/// Holds all of the coverage mapping data associated with a function instance,
1213
/// collected during traversal of `Coverage` statements in the function's MIR.
@@ -162,7 +163,7 @@ impl<'tcx> FunctionCoverageCollector<'tcx> {
162163
}
163164

164165
pub(crate) fn into_finished(self) -> FunctionCoverage<'tcx> {
165-
FunctionCoverage::new(self)
166+
FunctionCoverage::from_collector(self)
166167
}
167168
}
168169

@@ -175,7 +176,7 @@ pub(crate) struct FunctionCoverage<'tcx> {
175176
}
176177

177178
impl<'tcx> FunctionCoverage<'tcx> {
178-
fn new(collector: FunctionCoverageCollector<'tcx>) -> Self {
179+
fn from_collector(collector: FunctionCoverageCollector<'tcx>) -> Self {
179180
let zero_expressions = collector.identify_zero_expressions();
180181
let FunctionCoverageCollector { function_coverage_info, is_used, counters_seen, .. } =
181182
collector;
@@ -194,6 +195,11 @@ impl<'tcx> FunctionCoverage<'tcx> {
194195
if self.is_used { self.function_coverage_info.function_source_hash } else { 0 }
195196
}
196197

198+
/// Returns an iterator over all filenames used by this function's mappings.
199+
pub(crate) fn all_file_names(&self) -> impl Iterator<Item = Symbol> + Captures<'_> {
200+
self.function_coverage_info.mappings.iter().map(|mapping| mapping.code_region.file_name)
201+
}
202+
197203
/// Convert this function's coverage expression data into a form that can be
198204
/// passed through FFI to LLVM.
199205
pub(crate) fn counter_expressions(

compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs

Lines changed: 60 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,17 @@ use crate::coverageinfo::ffi::CounterMappingRegion;
44
use crate::coverageinfo::map_data::{FunctionCoverage, FunctionCoverageCollector};
55
use crate::llvm;
66

7+
use itertools::Itertools as _;
78
use rustc_codegen_ssa::traits::{BaseTypeMethods, ConstMethods};
8-
use rustc_data_structures::fx::FxIndexSet;
9+
use rustc_data_structures::fx::{FxIndexMap, FxIndexSet};
910
use rustc_hir::def::DefKind;
1011
use rustc_hir::def_id::DefId;
1112
use rustc_index::IndexVec;
1213
use rustc_middle::bug;
1314
use rustc_middle::mir;
1415
use rustc_middle::mir::coverage::CodeRegion;
1516
use rustc_middle::ty::{self, TyCtxt};
17+
use rustc_session::RemapFileNameExt;
1618
use rustc_span::def_id::DefIdSet;
1719
use rustc_span::Symbol;
1820

@@ -57,20 +59,26 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) {
5759
return;
5860
}
5961

60-
let mut global_file_table = GlobalFileTable::new(tcx);
62+
let function_coverage_entries = function_coverage_map
63+
.into_iter()
64+
.map(|(instance, function_coverage)| (instance, function_coverage.into_finished()))
65+
.collect::<Vec<_>>();
66+
67+
let all_file_names =
68+
function_coverage_entries.iter().flat_map(|(_, fn_cov)| fn_cov.all_file_names());
69+
let global_file_table = GlobalFileTable::new(tcx, all_file_names);
6170

6271
// Encode coverage mappings and generate function records
6372
let mut function_data = Vec::new();
64-
for (instance, function_coverage) in function_coverage_map {
65-
let function_coverage = function_coverage.into_finished();
73+
for (instance, function_coverage) in function_coverage_entries {
6674
debug!("Generate function coverage for {}, {:?}", cx.codegen_unit.name(), instance);
6775

6876
let mangled_function_name = tcx.symbol_name(instance).name;
6977
let source_hash = function_coverage.source_hash();
7078
let is_used = function_coverage.is_used();
7179

7280
let coverage_mapping_buffer =
73-
encode_mappings_for_function(&mut global_file_table, &function_coverage);
81+
encode_mappings_for_function(&global_file_table, &function_coverage);
7482

7583
if coverage_mapping_buffer.is_empty() {
7684
if function_coverage.is_used() {
@@ -88,11 +96,11 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) {
8896
}
8997

9098
// Encode all filenames referenced by counters/expressions in this module
91-
let filenames_buffer = global_file_table.into_filenames_buffer();
99+
let filenames_buffer = global_file_table.filenames_buffer();
92100

93101
let filenames_size = filenames_buffer.len();
94-
let filenames_val = cx.const_bytes(&filenames_buffer);
95-
let filenames_ref = coverageinfo::hash_bytes(&filenames_buffer);
102+
let filenames_val = cx.const_bytes(filenames_buffer);
103+
let filenames_ref = coverageinfo::hash_bytes(filenames_buffer);
96104

97105
// Generate the LLVM IR representation of the coverage map and store it in a well-known global
98106
let cov_data_val = generate_coverage_map(cx, version, filenames_size, filenames_val);
@@ -140,39 +148,61 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) {
140148
}
141149

142150
struct GlobalFileTable {
143-
global_file_table: FxIndexSet<Symbol>,
151+
/// This "raw" table doesn't include the working dir, so a filename's
152+
/// global ID is its index in this set **plus one**.
153+
raw_file_table: FxIndexSet<Symbol>,
154+
filenames_buffer: Vec<u8>,
144155
}
145156

146157
impl GlobalFileTable {
147-
fn new(tcx: TyCtxt<'_>) -> Self {
148-
let mut global_file_table = FxIndexSet::default();
158+
fn new(tcx: TyCtxt<'_>, all_file_names: impl IntoIterator<Item = Symbol>) -> Self {
149159
// LLVM Coverage Mapping Format version 6 (zero-based encoded as 5)
150160
// requires setting the first filename to the compilation directory.
151161
// Since rustc generates coverage maps with relative paths, the
152162
// compilation directory can be combined with the relative paths
153163
// to get absolute paths, if needed.
154-
use rustc_session::RemapFileNameExt;
155-
let working_dir =
156-
Symbol::intern(&tcx.sess.opts.working_dir.for_codegen(&tcx.sess).to_string_lossy());
157-
global_file_table.insert(working_dir);
158-
Self { global_file_table }
159-
}
160-
161-
fn global_file_id_for_file_name(&mut self, file_name: Symbol) -> u32 {
162-
let (global_file_id, _) = self.global_file_table.insert_full(file_name);
163-
global_file_id as u32
164-
}
164+
let working_dir: &str = &tcx.sess.opts.working_dir.for_codegen(&tcx.sess).to_string_lossy();
165+
166+
// Prepare a map from filename symbols to their underlying strings, so
167+
// that we can sort by the strings.
168+
let mut raw_file_table = FxIndexMap::<Symbol, &str>::default();
169+
// Filenames usually come in contiguous runs, so dedup to save work.
170+
let all_file_names = all_file_names.into_iter().dedup().collect::<Vec<_>>();
171+
for file_name in &all_file_names {
172+
raw_file_table.entry(*file_name).or_insert_with(|| file_name.as_str());
173+
}
165174

166-
fn into_filenames_buffer(self) -> Vec<u8> {
167-
// This method takes `self` so that the caller can't accidentally
168-
// modify the original file table after encoding it into a buffer.
175+
// Sort the file table by its actual string values, not the arbitrary
176+
// ordering of its symbols.
177+
raw_file_table.sort_unstable_by(|_, a, _, b| str::cmp(a, b));
169178

170-
llvm::build_byte_buffer(|buffer| {
179+
// Build the LLVM filenames buffer ahead of time, so that we can discard
180+
// the string references afterwards.
181+
let filenames_buffer = llvm::build_byte_buffer(|buffer| {
171182
coverageinfo::write_filenames_section_to_buffer(
172-
self.global_file_table.iter().map(Symbol::as_str),
183+
// Insert the working dir at index 0, before the other filenames.
184+
std::iter::once(working_dir).chain(raw_file_table.values().copied()),
173185
buffer,
174186
);
175-
})
187+
});
188+
189+
// Discard the string reference values, leaving only a set of symbols.
190+
let raw_file_table = raw_file_table.into_keys().collect::<FxIndexSet<_>>();
191+
192+
Self { raw_file_table, filenames_buffer }
193+
}
194+
195+
fn global_file_id_for_file_name(&self, file_name: Symbol) -> u32 {
196+
let raw_id = self.raw_file_table.get_index_of(&file_name).unwrap_or_else(|| {
197+
bug!("file name not found in prepared global file table: {file_name}");
198+
});
199+
// The raw file table doesn't include an entry for the working dir
200+
// (which has ID 0), so add 1 to get the correct ID.
201+
(raw_id + 1) as u32
202+
}
203+
204+
fn filenames_buffer(&self) -> &[u8] {
205+
&self.filenames_buffer
176206
}
177207
}
178208

@@ -182,7 +212,7 @@ impl GlobalFileTable {
182212
///
183213
/// Newly-encountered filenames will be added to the global file table.
184214
fn encode_mappings_for_function(
185-
global_file_table: &mut GlobalFileTable,
215+
global_file_table: &GlobalFileTable,
186216
function_coverage: &FunctionCoverage<'_>,
187217
) -> Vec<u8> {
188218
let mut counter_regions = function_coverage.counter_regions().collect::<Vec<_>>();
@@ -203,7 +233,7 @@ fn encode_mappings_for_function(
203233
for counter_regions_for_file in
204234
counter_regions.group_by(|(_, a), (_, b)| a.file_name == b.file_name)
205235
{
206-
// Look up (or allocate) the global file ID for this filename.
236+
// Look up the global file ID for this filename.
207237
let file_name = counter_regions_for_file[0].1.file_name;
208238
let global_file_id = global_file_table.global_file_id_for_file_name(file_name);
209239

0 commit comments

Comments
 (0)