Skip to content

Commit f64372b

Browse files
committed
🍒 Add tool src/tools/coverage-dump for use by some new coverage tests
1 parent 39e0749 commit f64372b

File tree

11 files changed

+500
-1
lines changed

11 files changed

+500
-1
lines changed

Cargo.lock

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -703,6 +703,18 @@ version = "0.8.4"
703703
source = "registry+https://github.com/rust-lang/crates.io-index"
704704
checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa"
705705

706+
[[package]]
707+
name = "coverage-dump"
708+
version = "0.1.0"
709+
dependencies = [
710+
"anyhow",
711+
"leb128",
712+
"md-5",
713+
"miniz_oxide",
714+
"regex",
715+
"rustc-demangle",
716+
]
717+
706718
[[package]]
707719
name = "coverage_test_macros"
708720
version = "0.0.0"
@@ -2009,6 +2021,12 @@ version = "1.3.0"
20092021
source = "registry+https://github.com/rust-lang/crates.io-index"
20102022
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
20112023

2024+
[[package]]
2025+
name = "leb128"
2026+
version = "0.2.5"
2027+
source = "registry+https://github.com/rust-lang/crates.io-index"
2028+
checksum = "884e2677b40cc8c339eaefcb701c32ef1fd2493d71118dc0ca4b6a736c93bd67"
2029+
20122030
[[package]]
20132031
name = "libc"
20142032
version = "0.2.147"

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ members = [
4343
"src/tools/generate-windows-sys",
4444
"src/tools/rustdoc-gui-test",
4545
"src/tools/opt-dist",
46+
"src/tools/coverage-dump",
4647
]
4748

4849
exclude = [

src/bootstrap/builder.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -703,7 +703,8 @@ impl<'a> Builder<'a> {
703703
llvm::Lld,
704704
llvm::CrtBeginEnd,
705705
tool::RustdocGUITest,
706-
tool::OptimizedDist
706+
tool::OptimizedDist,
707+
tool::CoverageDump,
707708
),
708709
Kind::Check | Kind::Clippy | Kind::Fix => describe!(
709710
check::Std,

src/bootstrap/tool.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,7 @@ bootstrap_tool!(
306306
GenerateWindowsSys, "src/tools/generate-windows-sys", "generate-windows-sys";
307307
RustdocGUITest, "src/tools/rustdoc-gui-test", "rustdoc-gui-test", is_unstable_tool = true, allow_features = "test";
308308
OptimizedDist, "src/tools/opt-dist", "opt-dist";
309+
CoverageDump, "src/tools/coverage-dump", "coverage-dump";
309310
);
310311

311312
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, Ord, PartialOrd)]

src/tools/coverage-dump/Cargo.toml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
[package]
2+
name = "coverage-dump"
3+
version = "0.1.0"
4+
edition = "2021"
5+
6+
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
7+
8+
[dependencies]
9+
anyhow = "1.0.71"
10+
leb128 = "0.2.5"
11+
md5 = { package = "md-5" , version = "0.10.5" }
12+
miniz_oxide = "0.7.1"
13+
regex = "1.8.4"
14+
rustc-demangle = "0.1.23"

src/tools/coverage-dump/README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
This tool extracts coverage mapping information from an LLVM IR assembly file
2+
(`.ll`), and prints it in a more human-readable form that can be used for
3+
snapshot tests.
4+
5+
The output format is mostly arbitrary, so it's OK to change the output as long
6+
as any affected tests are also re-blessed. However, the output should be
7+
consistent across different executions on different platforms, so avoid
8+
printing any information that is platform-specific or non-deterministic.

src/tools/coverage-dump/src/covfun.rs

Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,236 @@
1+
use crate::parser::{unescape_llvm_string_contents, Parser};
2+
use anyhow::{anyhow, Context};
3+
use regex::Regex;
4+
use std::collections::HashMap;
5+
use std::fmt::{self, Debug};
6+
use std::sync::OnceLock;
7+
8+
pub(crate) fn dump_covfun_mappings(
9+
llvm_ir: &str,
10+
function_names: &HashMap<u64, String>,
11+
) -> anyhow::Result<()> {
12+
// Extract function coverage entries from the LLVM IR assembly, and associate
13+
// each entry with its (demangled) name.
14+
let mut covfun_entries = llvm_ir
15+
.lines()
16+
.filter_map(covfun_line_data)
17+
.map(|line_data| (function_names.get(&line_data.name_hash).map(String::as_str), line_data))
18+
.collect::<Vec<_>>();
19+
covfun_entries.sort_by(|a, b| {
20+
// Sort entries primarily by name, to help make the order consistent
21+
// across platforms and relatively insensitive to changes.
22+
// (Sadly we can't use `sort_by_key` because we would need to return references.)
23+
Ord::cmp(&a.0, &b.0)
24+
.then_with(|| Ord::cmp(&a.1.is_used, &b.1.is_used))
25+
.then_with(|| Ord::cmp(a.1.payload.as_slice(), b.1.payload.as_slice()))
26+
});
27+
28+
for (name, line_data) in &covfun_entries {
29+
let name = name.unwrap_or("(unknown)");
30+
let unused = if line_data.is_used { "" } else { " (unused)" };
31+
println!("Function name: {name}{unused}");
32+
33+
let payload: &[u8] = &line_data.payload;
34+
println!("Raw bytes ({len}): 0x{payload:02x?}", len = payload.len());
35+
36+
let mut parser = Parser::new(payload);
37+
38+
let num_files = parser.read_uleb128_u32()?;
39+
println!("Number of files: {num_files}");
40+
41+
for i in 0..num_files {
42+
let global_file_id = parser.read_uleb128_u32()?;
43+
println!("- file {i} => global file {global_file_id}");
44+
}
45+
46+
let num_expressions = parser.read_uleb128_u32()?;
47+
println!("Number of expressions: {num_expressions}");
48+
49+
for i in 0..num_expressions {
50+
let lhs = parser.read_simple_operand()?;
51+
let rhs = parser.read_simple_operand()?;
52+
println!("- expression {i} operands: lhs = {lhs:?}, rhs = {rhs:?}");
53+
}
54+
55+
for i in 0..num_files {
56+
let num_mappings = parser.read_uleb128_u32()?;
57+
println!("Number of file {i} mappings: {num_mappings}");
58+
59+
for _ in 0..num_mappings {
60+
let (kind, region) = parser.read_mapping_kind_and_region()?;
61+
println!("- {kind:?} at {region:?}");
62+
}
63+
}
64+
65+
parser.ensure_empty()?;
66+
println!();
67+
}
68+
Ok(())
69+
}
70+
71+
struct CovfunLineData {
72+
name_hash: u64,
73+
is_used: bool,
74+
payload: Vec<u8>,
75+
}
76+
77+
/// Checks a line of LLVM IR assembly to see if it contains an `__llvm_covfun`
78+
/// entry, and if so extracts relevant data in a `CovfunLineData`.
79+
fn covfun_line_data(line: &str) -> Option<CovfunLineData> {
80+
let re = {
81+
// We cheat a little bit and match variable names `@__covrec_[HASH]u`
82+
// rather than the section name, because the section name is harder to
83+
// extract and differs across Linux/Windows/macOS. We also extract the
84+
// symbol name hash from the variable name rather than the data, since
85+
// it's easier and both should match.
86+
static RE: OnceLock<Regex> = OnceLock::new();
87+
RE.get_or_init(|| {
88+
Regex::new(
89+
r#"^@__covrec_(?<name_hash>[0-9A-Z]+)(?<is_used>u)? = .*\[[0-9]+ x i8\] c"(?<payload>[^"]*)".*$"#,
90+
)
91+
.unwrap()
92+
})
93+
};
94+
95+
let captures = re.captures(line)?;
96+
let name_hash = u64::from_str_radix(&captures["name_hash"], 16).unwrap();
97+
let is_used = captures.name("is_used").is_some();
98+
let payload = unescape_llvm_string_contents(&captures["payload"]);
99+
100+
Some(CovfunLineData { name_hash, is_used, payload })
101+
}
102+
103+
// Extra parser methods only needed when parsing `covfun` payloads.
104+
impl<'a> Parser<'a> {
105+
fn read_simple_operand(&mut self) -> anyhow::Result<Operand> {
106+
let raw_operand = self.read_uleb128_u32()?;
107+
Operand::decode(raw_operand).context("decoding operand")
108+
}
109+
110+
fn read_mapping_kind_and_region(&mut self) -> anyhow::Result<(MappingKind, MappingRegion)> {
111+
let mut kind = self.read_raw_mapping_kind()?;
112+
let mut region = self.read_raw_mapping_region()?;
113+
114+
const HIGH_BIT: u32 = 1u32 << 31;
115+
if region.end_column & HIGH_BIT != 0 {
116+
region.end_column &= !HIGH_BIT;
117+
kind = match kind {
118+
MappingKind::Code(operand) => MappingKind::Gap(operand),
119+
// LLVM's coverage mapping reader will actually handle this
120+
// case without complaint, but the result is almost certainly
121+
// a meaningless implementation artifact.
122+
_ => return Err(anyhow!("unexpected base kind for gap region: {kind:?}")),
123+
}
124+
}
125+
126+
Ok((kind, region))
127+
}
128+
129+
fn read_raw_mapping_kind(&mut self) -> anyhow::Result<MappingKind> {
130+
let raw_mapping_kind = self.read_uleb128_u32()?;
131+
if let Some(operand) = Operand::decode(raw_mapping_kind) {
132+
return Ok(MappingKind::Code(operand));
133+
}
134+
135+
assert_eq!(raw_mapping_kind & 0b11, 0);
136+
assert_ne!(raw_mapping_kind, 0);
137+
138+
let (high, is_expansion) = (raw_mapping_kind >> 3, raw_mapping_kind & 0b100 != 0);
139+
if is_expansion {
140+
Ok(MappingKind::Expansion(high))
141+
} else {
142+
match high {
143+
0 => unreachable!("zero kind should have already been handled as a code mapping"),
144+
2 => Ok(MappingKind::Skip),
145+
4 => {
146+
let true_ = self.read_simple_operand()?;
147+
let false_ = self.read_simple_operand()?;
148+
Ok(MappingKind::Branch { true_, false_ })
149+
}
150+
_ => Err(anyhow!("unknown mapping kind: {raw_mapping_kind:#x}")),
151+
}
152+
}
153+
}
154+
155+
fn read_raw_mapping_region(&mut self) -> anyhow::Result<MappingRegion> {
156+
let start_line_offset = self.read_uleb128_u32()?;
157+
let start_column = self.read_uleb128_u32()?;
158+
let end_line_offset = self.read_uleb128_u32()?;
159+
let end_column = self.read_uleb128_u32()?;
160+
Ok(MappingRegion { start_line_offset, start_column, end_line_offset, end_column })
161+
}
162+
}
163+
164+
// Represents an expression operand (lhs/rhs), branch region operand (true/false),
165+
// or the value used by a code region or gap region.
166+
#[derive(Debug)]
167+
pub(crate) enum Operand {
168+
Zero,
169+
Counter(u32),
170+
Expression(u32, Op),
171+
}
172+
173+
/// Operator (addition or subtraction) used by an expression.
174+
#[derive(Debug)]
175+
pub(crate) enum Op {
176+
Sub,
177+
Add,
178+
}
179+
180+
impl Operand {
181+
pub(crate) fn decode(input: u32) -> Option<Self> {
182+
let (high, tag) = (input >> 2, input & 0b11);
183+
match tag {
184+
0b00 if high == 0 => Some(Self::Zero),
185+
0b01 => Some(Self::Counter(high)),
186+
0b10 => Some(Self::Expression(high, Op::Sub)),
187+
0b11 => Some(Self::Expression(high, Op::Add)),
188+
// When reading expression or branch operands, the LLVM coverage
189+
// mapping reader will always interpret a `0b00` tag as a zero
190+
// operand, even when the high bits are non-zero.
191+
// We treat that case as failure instead, so that this code can be
192+
// shared by the full mapping-kind reader as well.
193+
_ => None,
194+
}
195+
}
196+
}
197+
198+
#[derive(Debug)]
199+
enum MappingKind {
200+
Code(Operand),
201+
Gap(Operand),
202+
Expansion(u32),
203+
Skip,
204+
Branch {
205+
#[allow(dead_code)]
206+
true_: Operand,
207+
#[allow(dead_code)]
208+
false_: Operand,
209+
},
210+
}
211+
212+
struct MappingRegion {
213+
/// Offset of this region's start line, relative to the *start line* of
214+
/// the *previous mapping* (or 0). Line numbers are 1-based.
215+
start_line_offset: u32,
216+
/// This region's start column, absolute and 1-based.
217+
start_column: u32,
218+
/// Offset of this region's end line, relative to the *this mapping's*
219+
/// start line. Line numbers are 1-based.
220+
end_line_offset: u32,
221+
/// This region's end column, absolute, 1-based, and exclusive.
222+
///
223+
/// If the highest bit is set, that bit is cleared and the associated
224+
/// mapping becomes a gap region mapping.
225+
end_column: u32,
226+
}
227+
228+
impl Debug for MappingRegion {
229+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
230+
write!(
231+
f,
232+
"(prev + {}, {}) to (start + {}, {})",
233+
self.start_line_offset, self.start_column, self.end_line_offset, self.end_column
234+
)
235+
}
236+
}

src/tools/coverage-dump/src/main.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
mod covfun;
2+
mod parser;
3+
mod prf_names;
4+
5+
fn main() -> anyhow::Result<()> {
6+
use anyhow::Context as _;
7+
8+
let args = std::env::args().collect::<Vec<_>>();
9+
10+
let llvm_ir_path = args.get(1).context("LLVM IR file not specified")?;
11+
let llvm_ir = std::fs::read_to_string(llvm_ir_path).context("couldn't read LLVM IR file")?;
12+
13+
let function_names = crate::prf_names::make_function_names_table(&llvm_ir)?;
14+
crate::covfun::dump_covfun_mappings(&llvm_ir, &function_names)?;
15+
16+
Ok(())
17+
}

0 commit comments

Comments
 (0)