Skip to content

Commit aa021b7

Browse files
committed
coverage: Build the global file table ahead of time
1 parent 5b101c5 commit aa021b7

File tree

4 files changed

+68
-29
lines changed

4 files changed

+68
-29
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3596,6 +3596,7 @@ version = "0.0.0"
35963596
dependencies = [
35973597
"bitflags 1.3.2",
35983598
"cstr",
3599+
"itertools",
35993600
"libc",
36003601
"measureme",
36013602
"object",

compiler/rustc_codegen_llvm/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ test = false
99
[dependencies]
1010
bitflags = "1.0"
1111
cstr = "0.2"
12+
itertools = "0.10.5"
1213
libc = "0.2"
1314
measureme = "10.0.0"
1415
object = { version = "0.32.0", default-features = false, features = [

compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use rustc_middle::mir::coverage::{
77
CodeRegion, CounterId, CovTerm, Expression, ExpressionId, FunctionCoverageInfo, Mapping, Op,
88
};
99
use rustc_middle::ty::Instance;
10+
use rustc_span::Symbol;
1011

1112
/// Holds all of the coverage mapping data associated with a function instance,
1213
/// collected during traversal of `Coverage` statements in the function's MIR.
@@ -194,6 +195,11 @@ impl<'tcx> FunctionCoverage<'tcx> {
194195
if self.is_used { self.function_coverage_info.function_source_hash } else { 0 }
195196
}
196197

198+
/// Returns an iterator over all filenames used by this function's mappings.
199+
pub(crate) fn all_file_names(&self) -> impl Iterator<Item = Symbol> + Captures<'_> {
200+
self.function_coverage_info.mappings.iter().map(|mapping| mapping.code_region.file_name)
201+
}
202+
197203
/// Convert this function's coverage expression data into a form that can be
198204
/// passed through FFI to LLVM.
199205
pub(crate) fn counter_expressions(

compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs

Lines changed: 60 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@ use crate::coverageinfo::ffi::CounterMappingRegion;
44
use crate::coverageinfo::map_data::{FunctionCoverage, FunctionCoverageCollector};
55
use crate::llvm;
66

7+
use itertools::Itertools as _;
78
use rustc_codegen_ssa::traits::{BaseTypeMethods, ConstMethods};
8-
use rustc_data_structures::fx::FxIndexSet;
9+
use rustc_data_structures::fx::{FxIndexMap, FxIndexSet};
910
use rustc_hir::def::DefKind;
1011
use rustc_hir::def_id::DefId;
1112
use rustc_index::IndexVec;
@@ -57,20 +58,26 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) {
5758
return;
5859
}
5960

60-
let mut global_file_table = GlobalFileTable::new(tcx);
61+
let function_coverage_entries = function_coverage_map
62+
.into_iter()
63+
.map(|(instance, function_coverage)| (instance, function_coverage.into_finished()))
64+
.collect::<Vec<_>>();
65+
66+
let all_file_names =
67+
function_coverage_entries.iter().flat_map(|(_, fn_cov)| fn_cov.all_file_names());
68+
let global_file_table = GlobalFileTable::new(tcx, all_file_names);
6169

6270
// Encode coverage mappings and generate function records
6371
let mut function_data = Vec::new();
64-
for (instance, function_coverage) in function_coverage_map {
65-
let function_coverage = function_coverage.into_finished();
72+
for (instance, function_coverage) in function_coverage_entries {
6673
debug!("Generate function coverage for {}, {:?}", cx.codegen_unit.name(), instance);
6774

6875
let mangled_function_name = tcx.symbol_name(instance).name;
6976
let source_hash = function_coverage.source_hash();
7077
let is_used = function_coverage.is_used();
7178

7279
let coverage_mapping_buffer =
73-
encode_mappings_for_function(&mut global_file_table, &function_coverage);
80+
encode_mappings_for_function(&global_file_table, &function_coverage);
7481

7582
if coverage_mapping_buffer.is_empty() {
7683
if function_coverage.is_used() {
@@ -88,11 +95,11 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) {
8895
}
8996

9097
// Encode all filenames referenced by counters/expressions in this module
91-
let filenames_buffer = global_file_table.into_filenames_buffer();
98+
let filenames_buffer = global_file_table.filenames_buffer();
9299

93100
let filenames_size = filenames_buffer.len();
94-
let filenames_val = cx.const_bytes(&filenames_buffer);
95-
let filenames_ref = coverageinfo::hash_bytes(&filenames_buffer);
101+
let filenames_val = cx.const_bytes(filenames_buffer);
102+
let filenames_ref = coverageinfo::hash_bytes(filenames_buffer);
96103

97104
// Generate the LLVM IR representation of the coverage map and store it in a well-known global
98105
let cov_data_val = generate_coverage_map(cx, version, filenames_size, filenames_val);
@@ -139,40 +146,64 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) {
139146
coverageinfo::save_cov_data_to_mod(cx, cov_data_val);
140147
}
141148

149+
/// Maps "global" (per-CGU) file ID numbers to their underlying filenames.
142150
struct GlobalFileTable {
143-
global_file_table: FxIndexSet<Symbol>,
151+
/// This "raw" table doesn't include the working dir, so a filename's
152+
/// global ID is its index in this set **plus one**.
153+
raw_file_table: FxIndexSet<Symbol>,
154+
filenames_buffer: Vec<u8>,
144155
}
145156

146157
impl GlobalFileTable {
147-
fn new(tcx: TyCtxt<'_>) -> Self {
148-
let mut global_file_table = FxIndexSet::default();
158+
fn new(tcx: TyCtxt<'_>, all_file_names: impl IntoIterator<Item = Symbol>) -> Self {
149159
// LLVM Coverage Mapping Format version 6 (zero-based encoded as 5)
150160
// requires setting the first filename to the compilation directory.
151161
// Since rustc generates coverage maps with relative paths, the
152162
// compilation directory can be combined with the relative paths
153163
// to get absolute paths, if needed.
154164
use rustc_session::RemapFileNameExt;
155-
let working_dir =
156-
Symbol::intern(&tcx.sess.opts.working_dir.for_codegen(&tcx.sess).to_string_lossy());
157-
global_file_table.insert(working_dir);
158-
Self { global_file_table }
159-
}
160-
161-
fn global_file_id_for_file_name(&mut self, file_name: Symbol) -> u32 {
162-
let (global_file_id, _) = self.global_file_table.insert_full(file_name);
163-
global_file_id as u32
164-
}
165+
let working_dir: &str = &tcx.sess.opts.working_dir.for_codegen(&tcx.sess).to_string_lossy();
166+
167+
// Prepare a map from filename symbols to their underlying strings, so
168+
// that we can sort by the strings.
169+
let mut raw_file_table = FxIndexMap::<Symbol, &str>::default();
170+
// Filenames usually come in contiguous runs, so dedup to save work.
171+
let all_file_names = all_file_names.into_iter().dedup().collect::<Vec<_>>();
172+
for file_name in &all_file_names {
173+
raw_file_table.entry(*file_name).or_insert_with(|| file_name.as_str());
174+
}
165175

166-
fn into_filenames_buffer(self) -> Vec<u8> {
167-
// This method takes `self` so that the caller can't accidentally
168-
// modify the original file table after encoding it into a buffer.
176+
// Sort the file table by its actual string values, not the arbitrary
177+
// ordering of its symbols.
178+
raw_file_table.sort_unstable_by(|_, a, _, b| str::cmp(a, b));
169179

170-
llvm::build_byte_buffer(|buffer| {
180+
// Build the LLVM filenames buffer ahead of time, so that we can discard
181+
// the string references afterwards.
182+
let filenames_buffer = llvm::build_byte_buffer(|buffer| {
171183
coverageinfo::write_filenames_section_to_buffer(
172-
self.global_file_table.iter().map(Symbol::as_str),
184+
// Insert the working dir at index 0, before the other filenames.
185+
std::iter::once(working_dir).chain(raw_file_table.values().copied()),
173186
buffer,
174187
);
175-
})
188+
});
189+
190+
// Discard the string reference values, leaving only a set of symbols.
191+
let raw_file_table = raw_file_table.into_keys().collect::<FxIndexSet<_>>();
192+
193+
Self { raw_file_table, filenames_buffer }
194+
}
195+
196+
fn global_file_id_for_file_name(&self, file_name: Symbol) -> u32 {
197+
let raw_id = self.raw_file_table.get_index_of(&file_name).unwrap_or_else(|| {
198+
bug!("file name not found in prepared global file table: {file_name}");
199+
});
200+
// The raw file table doesn't include an entry for the working dir
201+
// (which has ID 0), so add 1 to get the correct ID.
202+
(raw_id + 1) as u32
203+
}
204+
205+
fn filenames_buffer(&self) -> &[u8] {
206+
&self.filenames_buffer
176207
}
177208
}
178209

@@ -182,7 +213,7 @@ impl GlobalFileTable {
182213
///
183214
/// Newly-encountered filenames will be added to the global file table.
184215
fn encode_mappings_for_function(
185-
global_file_table: &mut GlobalFileTable,
216+
global_file_table: &GlobalFileTable,
186217
function_coverage: &FunctionCoverage<'_>,
187218
) -> Vec<u8> {
188219
let mut counter_regions = function_coverage.counter_regions().collect::<Vec<_>>();
@@ -203,7 +234,7 @@ fn encode_mappings_for_function(
203234
for counter_regions_for_file in
204235
counter_regions.group_by(|(_, a), (_, b)| a.file_name == b.file_name)
205236
{
206-
// Look up (or allocate) the global file ID for this filename.
237+
// Look up the global file ID for this filename.
207238
let file_name = counter_regions_for_file[0].1.file_name;
208239
let global_file_id = global_file_table.global_file_id_for_file_name(file_name);
209240

0 commit comments

Comments
 (0)