Skip to content

Missed optimization with NonNull::new then pointer::wrapping_byte_add(1) then NonNull::new then Option::unwrap then NonNull::len in some situtations #120440

Closed
@zachs18

Description

@zachs18

(Title is a mess, but I didn't really find a simpler reproducer)

I tried this code:

use std::ptr::NonNull;

#[inline(never)]
pub fn slice_ptr_len_1(ptr: *const [u8]) -> usize {
  let ptr = ptr.cast_mut();
  if let Some(ptr) = NonNull::new(ptr) {
    ptr.len()
  } else {
    // We know ptr is null, so we know ptr.wrapping_byte_add(1) is not null.
    NonNull::new(ptr.wrapping_byte_add(1)).unwrap().len()
  }
}

#[inline(never)]
pub fn slice_ptr_len_2(ptr: *const [u8]) -> usize {
  let ptr = ptr.cast_mut();
  if let Some(ptr) = NonNull::new(ptr) {
    ptr.len()
  } else {
    // We know ptr is null, so we know ptr.wrapping_byte_add(1) is not null.
    NonNull::new(ptr.wrapping_byte_add(1)).unwrap_or(NonNull::from(&[])).len()
  }
}

#[inline(never)]
pub fn slice_ptr_len_3(ptr: *const [u8]) -> usize {
    fn some_nonnull_slice_ptr(ptr: *const [u8]) -> NonNull<[u8]> {
        let ptr = ptr.cast_mut();
        if let Some(ptr) = NonNull::new(ptr) {
            ptr
        } else {
            // We know ptr is null, so we know ptr.wrapping_byte_add(1) is not null.
            NonNull::new(ptr.wrapping_byte_add(1)).unwrap()
        }
    }
    some_nonnull_slice_ptr(ptr).len()
}

#[used]
static _SHOW_DEDUP: [fn(*const [u8]) -> usize; 3] = [
    slice_ptr_len_1,
    slice_ptr_len_2,
    slice_ptr_len_3,
];

(Slice element type u8 is not important. Context: stable polyfill for <*const [T]>::len. slice_ptr_len_3 is essentially just slice_ptr_len_1 but with the .len() factored out into a separate function.)

I expected to see this happen: With optimizations, all three fns compile to roughly the same asm, with no panic or branches.

Instead, this happened: The first has a(n unreachable) panic branch, the second and third do not.

Godbolt link

example::slice_ptr_len_1:
  lea rax, [rdi + 1]
  or rax, rdi
  je .LBB0_2
  mov rax, rsi
  ret
.LBB0_2:
  push rax
  lea rdi, [rip + .L__unnamed_1]
  call qword ptr [rip + core::option::unwrap_failed@GOTPCREL]

example::slice_ptr_len_2:
  mov rax, rsi
  ret

// snip: panic payload etc

example::_SHOW_DEDUP:
  .quad example::slice_ptr_len_1
  .quad example::slice_ptr_len_2
  .quad example::slice_ptr_len_2 // slice_ptr_len_3 is dedup'd to slice_ptr_len_2

(tested on nightly x86_64-unknown-linux-gnu, similar on other versions/targets. // comments mine)

LLVM IR

rustc -OO --emit=llvm-ir

@alloc_9be5c135c0f7c91e35e471f025924b11 = private unnamed_addr constant <{ [15 x i8] }> <{ [15 x i8] c"/app/example.rs" }>, align 1
@alloc_a22f3c8c001f7e83bbc8418ba69a40e0 = private unnamed_addr constant <{ ptr, [16 x i8] }> <{ ptr @alloc_9be5c135c0f7c91e35e471f025924b11, [16 x i8] c"\0F\00\00\00\00\00\00\00\09\00\00\00,\00\00\00" }>, align 8
@example::_SHOW_DEDUP = constant <{ ptr, ptr, ptr }> <{ ptr @example::slice_ptr_len_1, ptr @example::slice_ptr_len_2, ptr @example::slice_ptr_len_2 }>, align 8
@llvm.compiler.used = appending global [1 x ptr] [ptr @example::_SHOW_DEDUP], section "llvm.metadata"

@example::slice_ptr_len_3 = unnamed_addr alias i64 (ptr, i64), ptr @example::slice_ptr_len_2

define noundef i64 @example::slice_ptr_len_1(ptr noundef readnone %ptr.0, i64 noundef returned %ptr.1) unnamed_addr #0 !dbg !7 {
start:
  %0 = icmp eq ptr %ptr.0, null, !dbg !12
  %1 = getelementptr i8, ptr %ptr.0, i64 1
  %2 = icmp eq ptr %1, null
  %or.cond = and i1 %0, %2, !dbg !23
  br i1 %or.cond, label %bb11, label %bb3, !dbg !23

bb3:
  ret i64 %ptr.1, !dbg !24

bb11:
  tail call void @core::option::unwrap_failed(ptr noalias noundef nonnull readonly align 8 dereferenceable(24) @alloc_a22f3c8c001f7e83bbc8418ba69a40e0) #4, !dbg !25
  unreachable, !dbg !25
}

define noundef i64 @example::slice_ptr_len_2(ptr nocapture readnone %ptr.0, i64 noundef returned %ptr.1) unnamed_addr #1 !dbg !32 {
start:
  ret i64 %ptr.1, !dbg !33
}

define noundef i64 @example::asdf() unnamed_addr #2 !dbg !34 {
start:
  ret i64 4294967295, !dbg !35
}

declare void @core::option::unwrap_failed(ptr noalias noundef readonly align 8 dereferenceable(24)) unnamed_addr #3

attributes #0 = { nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #1 = { mustprogress nofree norecurse nosync nounwind nonlazybind willreturn memory(none) uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #2 = { mustprogress nofree noinline norecurse nosync nounwind nonlazybind willreturn memory(none) uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #3 = { cold noinline noreturn nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #4 = { noreturn }

Meta

rustc --version --verbose:

$ rustc +nightly --version --verbose
rustc 1.77.0-nightly (6b4f1c5e7 2024-01-27)
binary: rustc
commit-hash: 6b4f1c5e782c72a047a23e922decd33e7d462345
commit-date: 2024-01-27
host: x86_64-unknown-linux-gnu
release: 1.77.0-nightly
LLVM version: 17.0.6
$ rustc +stable --version --verbose
rustc 1.75.0 (82e1608df 2023-12-21)
binary: rustc
commit-hash: 82e1608dfa6e0b5569232559e3d385fea5a93112
commit-date: 2023-12-21
host: x86_64-unknown-linux-gnu
release: 1.75.0
LLVM version: 17.0.6

(result is the roughly same on stable and nightly. wrapping_byte_add was stabilized in 1.75.0, so this doesn't compile with stable versions before that)

Metadata

Metadata

Assignees

No one assigned

    Labels

    A-LLVMArea: Code generation parts specific to LLVM. Both correctness bugs and optimization-related issues.C-optimizationCategory: An issue highlighting optimization opportunities or PRs implementing suchE-needs-testCall for participation: An issue has been fixed and does not reproduce, but no test has been added.I-slowIssue: Problems and improvements with respect to performance of generated code.

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions