Skip to content

Commit 3a43a65

Browse files
committed
Replace the nm symbol check with a Rust implementation
This should be less error-prone and adaptable than the `nm` version, and have better cross-platform support without needing LLVM `nm` installed.
1 parent 8cd12b3 commit 3a43a65

File tree

4 files changed

+262
-108
lines changed

4 files changed

+262
-108
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ members = [
66
"crates/libm-macros",
77
"crates/musl-math-sys",
88
"crates/panic-handler",
9+
"crates/symbol-check",
910
"crates/util",
1011
"libm",
1112
"libm-test",

ci/run.sh

Lines changed: 17 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -47,87 +47,25 @@ else
4747
fi
4848
fi
4949

50-
51-
declare -a rlib_paths
52-
53-
# Set the `rlib_paths` global array to a list of all compiler-builtins rlibs
54-
update_rlib_paths() {
55-
if [ -d /builtins-target ]; then
56-
rlib_paths=( /builtins-target/"${target}"/debug/deps/libcompiler_builtins-*.rlib )
57-
else
58-
rlib_paths=( target/"${target}"/debug/deps/libcompiler_builtins-*.rlib )
59-
fi
60-
}
61-
62-
# Remove any existing artifacts from previous tests that don't set #![compiler_builtins]
63-
update_rlib_paths
64-
rm -f "${rlib_paths[@]}"
65-
66-
cargo build -p compiler_builtins --target "$target"
67-
cargo build -p compiler_builtins --target "$target" --release
68-
cargo build -p compiler_builtins --target "$target" --features c
69-
cargo build -p compiler_builtins --target "$target" --features c --release
70-
cargo build -p compiler_builtins --target "$target" --features no-asm
71-
cargo build -p compiler_builtins --target "$target" --features no-asm --release
72-
cargo build -p compiler_builtins --target "$target" --features no-f16-f128
73-
cargo build -p compiler_builtins --target "$target" --features no-f16-f128 --release
74-
75-
PREFIX=${target//unknown-/}-
76-
case "$target" in
77-
armv7-*)
78-
PREFIX=arm-linux-gnueabihf-
79-
;;
80-
thumb*)
81-
PREFIX=arm-none-eabi-
82-
;;
83-
*86*-*)
84-
PREFIX=
85-
;;
86-
esac
87-
88-
NM=$(find "$(rustc --print sysroot)" \( -name llvm-nm -o -name llvm-nm.exe \) )
89-
if [ "$NM" = "" ]; then
90-
NM="${PREFIX}nm"
91-
fi
92-
93-
# i686-pc-windows-gnu tools have a dependency on some DLLs, so run it with
94-
# rustup run to ensure that those are in PATH.
95-
TOOLCHAIN="$(rustup show active-toolchain | sed 's/ (default)//')"
96-
if [[ "$TOOLCHAIN" == *i686-pc-windows-gnu ]]; then
97-
NM="rustup run $TOOLCHAIN $NM"
98-
fi
99-
100-
# Look out for duplicated symbols when we include the compiler-rt (C) implementation
101-
update_rlib_paths
102-
for rlib in "${rlib_paths[@]}"; do
103-
set +x
104-
echo "================================================================"
105-
echo "checking $rlib for duplicate symbols"
106-
echo "================================================================"
107-
set -x
108-
109-
duplicates_found=0
110-
111-
# NOTE On i586, It's normal that the get_pc_thunk symbol appears several
112-
# times so ignore it
113-
$NM -g --defined-only "$rlib" 2>&1 |
114-
sort |
115-
uniq -d |
116-
grep -v __x86.get_pc_thunk --quiet |
117-
grep 'T __' && duplicates_found=1
118-
119-
if [ "$duplicates_found" != 0 ]; then
120-
echo "error: found duplicate symbols"
121-
exit 1
122-
else
123-
echo "success; no duplicate symbols found"
124-
fi
125-
done
126-
127-
rm -f "${rlib_paths[@]}"
50+
# Ensure there are no duplicate symbols or references to `core` when
51+
# `compiler-builtins` is built with various features. Symcheck invokes Cargo to
52+
# build with the arguments we provide it, then validates the built artifacts.
53+
symcheck=(cargo run -p symbol-check --release)
54+
[[ "$target" = "wasm"* ]] && symcheck+=(--features wasm)
55+
symcheck+=(-- build-and-check)
56+
57+
"${symcheck[@]}" -p compiler_builtins --target "$target"
58+
"${symcheck[@]}" -p compiler_builtins --target "$target" --release
59+
"${symcheck[@]}" -p compiler_builtins --target "$target" --features c
60+
"${symcheck[@]}" -p compiler_builtins --target "$target" --features c --release
61+
"${symcheck[@]}" -p compiler_builtins --target "$target" --features no-asm
62+
"${symcheck[@]}" -p compiler_builtins --target "$target" --features no-asm --release
63+
"${symcheck[@]}" -p compiler_builtins --target "$target" --features no-f16-f128
64+
"${symcheck[@]}" -p compiler_builtins --target "$target" --features no-f16-f128 --release
12865

12966
build_intrinsics_test() {
130-
cargo build \
67+
# symcheck also checks the results of builtins-test-intrinsics
68+
"${symcheck[@]}" \
13169
--target "$target" --verbose \
13270
--manifest-path builtins-test-intrinsics/Cargo.toml "$@"
13371
}
@@ -143,35 +81,6 @@ build_intrinsics_test --features c --release
14381
CARGO_PROFILE_DEV_LTO=true build_intrinsics_test
14482
CARGO_PROFILE_RELEASE_LTO=true build_intrinsics_test --release
14583

146-
# Ensure no references to any symbols from core
147-
update_rlib_paths
148-
for rlib in "${rlib_paths[@]}"; do
149-
set +x
150-
echo "================================================================"
151-
echo "checking $rlib for references to core"
152-
echo "================================================================"
153-
set -x
154-
155-
tmpdir="${CARGO_TARGET_DIR:-target}/tmp"
156-
test -d "$tmpdir" || mkdir "$tmpdir"
157-
defined="$tmpdir/defined_symbols.txt"
158-
undefined="$tmpdir/defined_symbols.txt"
159-
160-
$NM --quiet -U "$rlib" | grep 'T _ZN4core' | awk '{print $3}' | sort | uniq > "$defined"
161-
$NM --quiet -u "$rlib" | grep 'U _ZN4core' | awk '{print $2}' | sort | uniq > "$undefined"
162-
grep_has_results=0
163-
grep -v -F -x -f "$defined" "$undefined" && grep_has_results=1
164-
165-
if [ "$target" = "powerpc64-unknown-linux-gnu" ]; then
166-
echo "FIXME: powerpc64 fails these tests"
167-
elif [ "$grep_has_results" != 0 ]; then
168-
echo "error: found unexpected references to core"
169-
exit 1
170-
else
171-
echo "success; no references to core found"
172-
fi
173-
done
174-
17584
# Test libm
17685

17786
# Make sure a simple build works

crates/symbol-check/Cargo.toml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
[package]
2+
name = "symbol-check"
3+
version = "0.1.0"
4+
edition = "2024"
5+
publish = false
6+
7+
[dependencies]
8+
# FIXME: used as a git dependency since the latest release does not support wasm
9+
object = { git = "https://github.com/gimli-rs/object.git", rev = "013fac75da56a684377af4151b8164b78c1790e0" }
10+
serde_json = "1.0.140"
11+
12+
[features]
13+
wasm = ["object/wasm"]

crates/symbol-check/src/main.rs

Lines changed: 231 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,231 @@
1+
//! Tool used by CI to inspect compiler-builtins archives and help ensure we won't run into any
2+
//! linking errors.
3+
4+
use std::collections::{BTreeMap, BTreeSet};
5+
use std::fs;
6+
use std::io::{BufRead, BufReader};
7+
use std::path::{Path, PathBuf};
8+
use std::process::{Command, Stdio};
9+
10+
use object::read::archive::{ArchiveFile, ArchiveMember};
11+
use object::{Object, ObjectSymbol, Symbol, SymbolKind, SymbolScope, SymbolSection};
12+
use serde_json::Value;
13+
14+
const CHECK_LIBRARIES: &[&str] = &["compiler_builtins", "builtins_test_intrinsics"];
15+
const CHECK_EXTENSIONS: &[Option<&str>] = &[Some("rlib"), Some("a"), Some("exe"), None];
16+
17+
const USAGE: &str = "Usage:
18+
19+
symbol-check build-and-check CARGO_ARGS ...
20+
21+
Cargo will get invoked with `CARGO_ARGS` and all output
22+
`compiler_builtins*.rlib` files will be checked.
23+
";
24+
25+
fn main() {
26+
// Create a `&str` vec so we can match on it.
27+
let args = std::env::args().collect::<Vec<_>>();
28+
let args_ref = args.iter().map(String::as_str).collect::<Vec<_>>();
29+
30+
match &args_ref[1..] {
31+
["build-and-check", rest @ ..] if !rest.is_empty() => {
32+
let paths = exec_cargo_with_args(rest);
33+
for path in paths {
34+
println!("Checking {}", path.display());
35+
verify_no_duplicates(&path);
36+
verify_core_symbols(&path);
37+
}
38+
}
39+
_ => {
40+
println!("{USAGE}");
41+
std::process::exit(1);
42+
}
43+
}
44+
}
45+
46+
/// Run `cargo build` with the provided additional arguments, collecting the list of created
47+
/// libraries.
48+
fn exec_cargo_with_args(args: &[&str]) -> Vec<PathBuf> {
49+
let mut cmd = Command::new("cargo")
50+
.arg("build")
51+
.arg("--message-format=json")
52+
.args(args)
53+
.stdout(Stdio::piped())
54+
.spawn()
55+
.expect("failed to launch Cargo");
56+
57+
let stdout = cmd.stdout.take().unwrap();
58+
let reader = BufReader::new(stdout);
59+
let mut check_files = Vec::new();
60+
61+
for line in reader.lines() {
62+
let line = line.expect("failed to read line");
63+
println!("{line}"); // tee to stdout
64+
65+
// Select only steps that create files
66+
let j: Value = serde_json::from_str(&line).expect("failed to deserialize");
67+
if j["reason"] != "compiler-artifact" {
68+
continue;
69+
}
70+
71+
// Find rlibs in the created file list that match our expected library names and
72+
// extensions.
73+
for fpath in j["filenames"].as_array().expect("filenames not an array") {
74+
let path = fpath.as_str().expect("file name not a string");
75+
let path = PathBuf::from(path);
76+
77+
if CHECK_EXTENSIONS.contains(&path.extension().map(|ex| ex.to_str().unwrap())) {
78+
let fname = path.file_name().unwrap().to_str().unwrap();
79+
80+
if CHECK_LIBRARIES.iter().any(|lib| fname.contains(lib)) {
81+
check_files.push(path);
82+
}
83+
}
84+
}
85+
}
86+
87+
cmd.wait().expect("failed to wait on Cargo");
88+
89+
assert!(!check_files.is_empty(), "no compiler_builtins rlibs found");
90+
println!("Collected the following rlibs to check: {check_files:#?}");
91+
92+
check_files
93+
}
94+
95+
/// Information collected from `object`, for convenience.
96+
#[expect(unused)] // only for printing
97+
#[derive(Clone, Debug)]
98+
struct SymInfo {
99+
name: String,
100+
kind: SymbolKind,
101+
scope: SymbolScope,
102+
section: SymbolSection,
103+
is_undefined: bool,
104+
is_global: bool,
105+
is_local: bool,
106+
is_weak: bool,
107+
is_common: bool,
108+
address: u64,
109+
object: String,
110+
}
111+
112+
impl SymInfo {
113+
fn new(sym: &Symbol, member: &ArchiveMember) -> Self {
114+
Self {
115+
name: sym.name().expect("missing name").to_owned(),
116+
kind: sym.kind(),
117+
scope: sym.scope(),
118+
section: sym.section(),
119+
is_undefined: sym.is_undefined(),
120+
is_global: sym.is_global(),
121+
is_local: sym.is_local(),
122+
is_weak: sym.is_weak(),
123+
is_common: sym.is_common(),
124+
address: sym.address(),
125+
object: String::from_utf8_lossy(member.name()).into_owned(),
126+
}
127+
}
128+
}
129+
130+
/// Ensure that the same global symbol isn't defined in multiple object files within an archive.
131+
///
132+
/// Note that this will also locate cases where a symbol is weakly defined in more than one place.
133+
/// Technically there are no linker errors that will come from this, but it keeps our binary more
134+
/// straightforward and saves some distribution size.
135+
fn verify_no_duplicates(path: &Path) {
136+
let mut syms = BTreeMap::<String, SymInfo>::new();
137+
let mut dups = Vec::new();
138+
let mut found_any = false;
139+
140+
for_each_symbol(path, |symbol, member| {
141+
// Only check defined globals
142+
if !symbol.is_global() || symbol.is_undefined() {
143+
return;
144+
}
145+
146+
let sym = SymInfo::new(&symbol, member);
147+
148+
// x86-32 includes multiple copies of thunk symbols
149+
if sym.name.starts_with("__x86.get_pc_thunk") {
150+
return;
151+
}
152+
153+
// Windows has symbols for literal numeric constants, string literals, and MinGW pseudo-
154+
// relocations. These are allowed to have repeated definitions.
155+
let win_allowed_dup_pfx = ["__real@", "__xmm@", "??_C@_", ".refptr"];
156+
if win_allowed_dup_pfx
157+
.iter()
158+
.any(|pfx| sym.name.starts_with(pfx))
159+
{
160+
return;
161+
}
162+
163+
match syms.get(&sym.name) {
164+
Some(existing) => {
165+
dups.push(sym);
166+
dups.push(existing.clone());
167+
}
168+
None => {
169+
syms.insert(sym.name.clone(), sym);
170+
}
171+
}
172+
173+
found_any = true;
174+
});
175+
176+
assert!(found_any, "no symbols found");
177+
178+
if !dups.is_empty() {
179+
dups.sort_unstable_by(|a, b| a.name.cmp(&b.name));
180+
panic!("found duplicate symbols: {dups:#?}");
181+
}
182+
183+
println!(" success: no duplicate symbols found");
184+
}
185+
186+
/// Ensure that there are no references to symbols from `core` that aren't also (somehow) defined.
187+
fn verify_core_symbols(path: &Path) {
188+
let mut defined = BTreeSet::new();
189+
let mut undefined = Vec::new();
190+
let mut has_symbols = false;
191+
192+
for_each_symbol(path, |symbol, member| {
193+
has_symbols = true;
194+
195+
// Find only symbols from `core`
196+
if !symbol.name().unwrap().contains("_ZN4core") {
197+
return;
198+
}
199+
200+
let sym = SymInfo::new(&symbol, member);
201+
if sym.is_undefined {
202+
undefined.push(sym);
203+
} else {
204+
defined.insert(sym.name);
205+
}
206+
});
207+
208+
assert!(has_symbols, "no symbols found");
209+
210+
// Discard any symbols that are defined somewhere in the archive
211+
undefined.retain(|sym| !defined.contains(&sym.name));
212+
213+
if !undefined.is_empty() {
214+
undefined.sort_unstable_by(|a, b| a.name.cmp(&b.name));
215+
panic!("found undefined symbols from core: {undefined:#?}");
216+
}
217+
218+
println!(" success: no undefined references to core found");
219+
}
220+
221+
/// For a given archive path, do something with each symbol.
222+
fn for_each_symbol(path: &Path, mut f: impl FnMut(Symbol, &ArchiveMember)) {
223+
let data = fs::read(path).expect("reading file failed");
224+
let archive = ArchiveFile::parse(data.as_slice()).expect("archive parse failed");
225+
for member in archive.members() {
226+
let member = member.expect("failed to access member");
227+
let obj_data = member.data(&*data).expect("failed to access object");
228+
let obj = object::File::parse(obj_data).expect("failed to parse object");
229+
obj.symbols().for_each(|sym| f(sym, &member));
230+
}
231+
}

0 commit comments

Comments
 (0)