Description
(Title is a mess, but I didn't really find a simpler reproducer)
I tried this code:
use std::ptr::NonNull;
#[inline(never)]
pub fn slice_ptr_len_1(ptr: *const [u8]) -> usize {
let ptr = ptr.cast_mut();
if let Some(ptr) = NonNull::new(ptr) {
ptr.len()
} else {
// We know ptr is null, so we know ptr.wrapping_byte_add(1) is not null.
NonNull::new(ptr.wrapping_byte_add(1)).unwrap().len()
}
}
#[inline(never)]
pub fn slice_ptr_len_2(ptr: *const [u8]) -> usize {
let ptr = ptr.cast_mut();
if let Some(ptr) = NonNull::new(ptr) {
ptr.len()
} else {
// We know ptr is null, so we know ptr.wrapping_byte_add(1) is not null.
NonNull::new(ptr.wrapping_byte_add(1)).unwrap_or(NonNull::from(&[])).len()
}
}
#[inline(never)]
pub fn slice_ptr_len_3(ptr: *const [u8]) -> usize {
fn some_nonnull_slice_ptr(ptr: *const [u8]) -> NonNull<[u8]> {
let ptr = ptr.cast_mut();
if let Some(ptr) = NonNull::new(ptr) {
ptr
} else {
// We know ptr is null, so we know ptr.wrapping_byte_add(1) is not null.
NonNull::new(ptr.wrapping_byte_add(1)).unwrap()
}
}
some_nonnull_slice_ptr(ptr).len()
}
#[used]
static _SHOW_DEDUP: [fn(*const [u8]) -> usize; 3] = [
slice_ptr_len_1,
slice_ptr_len_2,
slice_ptr_len_3,
];
(Slice element type u8
is not important. Context: stable polyfill for <*const [T]>::len
. slice_ptr_len_3
is essentially just slice_ptr_len_1
but with the .len()
factored out into a separate function.)
I expected to see this happen: With optimizations, all three fns compile to roughly the same asm, with no panic or branches.
Instead, this happened: The first has a(n unreachable) panic branch, the second and third do not.
example::slice_ptr_len_1:
lea rax, [rdi + 1]
or rax, rdi
je .LBB0_2
mov rax, rsi
ret
.LBB0_2:
push rax
lea rdi, [rip + .L__unnamed_1]
call qword ptr [rip + core::option::unwrap_failed@GOTPCREL]
example::slice_ptr_len_2:
mov rax, rsi
ret
// snip: panic payload etc
example::_SHOW_DEDUP:
.quad example::slice_ptr_len_1
.quad example::slice_ptr_len_2
.quad example::slice_ptr_len_2 // slice_ptr_len_3 is dedup'd to slice_ptr_len_2
(tested on nightly x86_64-unknown-linux-gnu
, similar on other versions/targets. //
comments mine)
LLVM IR
rustc -OO --emit=llvm-ir
@alloc_9be5c135c0f7c91e35e471f025924b11 = private unnamed_addr constant <{ [15 x i8] }> <{ [15 x i8] c"/app/example.rs" }>, align 1
@alloc_a22f3c8c001f7e83bbc8418ba69a40e0 = private unnamed_addr constant <{ ptr, [16 x i8] }> <{ ptr @alloc_9be5c135c0f7c91e35e471f025924b11, [16 x i8] c"\0F\00\00\00\00\00\00\00\09\00\00\00,\00\00\00" }>, align 8
@example::_SHOW_DEDUP = constant <{ ptr, ptr, ptr }> <{ ptr @example::slice_ptr_len_1, ptr @example::slice_ptr_len_2, ptr @example::slice_ptr_len_2 }>, align 8
@llvm.compiler.used = appending global [1 x ptr] [ptr @example::_SHOW_DEDUP], section "llvm.metadata"
@example::slice_ptr_len_3 = unnamed_addr alias i64 (ptr, i64), ptr @example::slice_ptr_len_2
define noundef i64 @example::slice_ptr_len_1(ptr noundef readnone %ptr.0, i64 noundef returned %ptr.1) unnamed_addr #0 !dbg !7 {
start:
%0 = icmp eq ptr %ptr.0, null, !dbg !12
%1 = getelementptr i8, ptr %ptr.0, i64 1
%2 = icmp eq ptr %1, null
%or.cond = and i1 %0, %2, !dbg !23
br i1 %or.cond, label %bb11, label %bb3, !dbg !23
bb3:
ret i64 %ptr.1, !dbg !24
bb11:
tail call void @core::option::unwrap_failed(ptr noalias noundef nonnull readonly align 8 dereferenceable(24) @alloc_a22f3c8c001f7e83bbc8418ba69a40e0) #4, !dbg !25
unreachable, !dbg !25
}
define noundef i64 @example::slice_ptr_len_2(ptr nocapture readnone %ptr.0, i64 noundef returned %ptr.1) unnamed_addr #1 !dbg !32 {
start:
ret i64 %ptr.1, !dbg !33
}
define noundef i64 @example::asdf() unnamed_addr #2 !dbg !34 {
start:
ret i64 4294967295, !dbg !35
}
declare void @core::option::unwrap_failed(ptr noalias noundef readonly align 8 dereferenceable(24)) unnamed_addr #3
attributes #0 = { nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #1 = { mustprogress nofree norecurse nosync nounwind nonlazybind willreturn memory(none) uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #2 = { mustprogress nofree noinline norecurse nosync nounwind nonlazybind willreturn memory(none) uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #3 = { cold noinline noreturn nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #4 = { noreturn }
Meta
rustc --version --verbose
:
$ rustc +nightly --version --verbose
rustc 1.77.0-nightly (6b4f1c5e7 2024-01-27)
binary: rustc
commit-hash: 6b4f1c5e782c72a047a23e922decd33e7d462345
commit-date: 2024-01-27
host: x86_64-unknown-linux-gnu
release: 1.77.0-nightly
LLVM version: 17.0.6
$ rustc +stable --version --verbose
rustc 1.75.0 (82e1608df 2023-12-21)
binary: rustc
commit-hash: 82e1608dfa6e0b5569232559e3d385fea5a93112
commit-date: 2023-12-21
host: x86_64-unknown-linux-gnu
release: 1.75.0
LLVM version: 17.0.6
(result is the roughly same on stable and nightly. wrapping_byte_add
was stabilized in 1.75.0, so this doesn't compile with stable versions before that)