Skip to content

Compiler fails to optimize out bounds checks on 32-bit x86 and ARM #22924

Closed
@rprichard

Description

@rprichard

Test case:

#[inline(never)]
pub fn my_write(dst: &mut [u8]) {
    if dst.len() >= 41 {
        dst[40] = 17;
    }
}

fn main() {
    let mut buf = [0u8; 41];
    my_write(&mut buf);
}

The 32-bit x86 and ARM disassembly for my_write includes two checks of the slice length and a call to panic_bounds_check. The bounds check is optimized away on x86_64.

Here's the x86 assembly:

_ZN8my_write20he7b6161ce2fc6d9beaaE:
    subl    $12, %esp
    cmpl    $41, 4(%ecx)
    jb  .LBB0_3
    movl    4(%ecx), %eax
    cmpl    $41, %eax
    jb  .LBB0_4
    movl    (%ecx), %eax
    movb    $17, 40(%eax)
.LBB0_3:
    addl    $12, %esp
    retl
.LBB0_4:
    movl    %eax, 8(%esp)
    movl    $40, 4(%esp)
    movl    $panic_bounds_check_loc20, (%esp)
    calll   _ZN9panicking18panic_bounds_check20hdb38771954ce4aaf58rE

LLVM bitcode:

; Function Attrs: noinline uwtable
define internal fastcc void @_ZN8my_write20he7b6161ce2fc6d9beaaE({ i8*, i32 }* noalias nocapture dereferenceable(8)) unnamed_addr #0 {
entry-block:
  %1 = bitcast { i8*, i32 }* %0 to i8*
  %2 = bitcast { i8*, i32 }* %0 to i64*
  %3 = load i64* %2, align 4
  %.sroa.3.0.extract.shift.i.i = lshr i64 %3, 32
  %.sroa.3.0.extract.trunc.i.i = trunc i64 %.sroa.3.0.extract.shift.i.i to i32
  %4 = icmp ugt i32 %.sroa.3.0.extract.trunc.i.i, 40
  %5 = trunc i64 %3 to i32
  %6 = inttoptr i32 %5 to i8*
  br i1 %4, label %then-block-17-, label %next-block

then-block-17-:                                   ; preds = %entry-block
  %7 = getelementptr inbounds { i8*, i32 }* %0, i32 0, i32 1
  %8 = load i32* %7, align 4
  %9 = icmp ult i32 %8, 41
  br i1 %9, label %cond, label %next, !prof !0

next:                                             ; preds = %then-block-17-
  %10 = getelementptr inbounds i8* %6, i32 40
  store i8 17, i8* %10, align 1
  br label %next-block

cond:                                             ; preds = %then-block-17-
  tail call void @_ZN9panicking18panic_bounds_check20hdb38771954ce4aaf58rE({ %str_slice, i32 }* noalias readonly dereferenceable(12) @panic_bounds_check_loc20, i32 40, i32 %8)
  unreachable

next-block:                                       ; preds = %entry-block, %next
  tail call void @llvm.lifetime.end(i64 8, i8* %1)
  ret void
}

Compiler version:

rustc 1.0.0-nightly (e233987ce 2015-02-27) (built 2015-02-28)
binary: rustc
commit-hash: e233987ce1de88a48db2ce612019ba644d3cf5dd
commit-date: 2015-02-27
build-date: 2015-02-28
host: i686-unknown-linux-gnu
release: 1.0.0-nightly

My ARM compiler is older -- 2015-02-18 or so.

Metadata

Metadata

Assignees

Labels

A-codegenArea: Code generationI-slowIssue: Problems and improvements with respect to performance of generated code.

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions