Skip to content

Noop-loop is no longer optimized away on Nightly #121239

Closed
@the8472

Description

@the8472

While working on #120682 I noticed that some optimizations regressed after a rebase. The following should be a reduction.

I tried this code:

godbolt

#![feature(ptr_sub_ptr)]
#![feature(unchecked_math)]
#![feature(non_null_convenience)]

use std::ptr::NonNull;
use std::ptr;
use std::mem;

struct It<T> {
    start: NonNull<T>,
    end: *const T,
}

struct Out {
    a: u64,
    b: u64,
    c: u64,
    d: u64,
}

pub fn do_it(ptr: *mut [u64; 4], len: usize) -> It<Out> {
    unsafe {
        let start = NonNull::new_unchecked(ptr);
        let mut it = It::<[u64; 4]> {
            start,
            end: start.add(len).as_ptr()
        };
        end_offset_copy(&mut it)
    }
}

fn end_offset_copy(a: &mut It<[u64; 4]>) -> It<Out> {
    unsafe {
        let len = a.end.sub_ptr(a.start.as_ptr());

        let mut i = len;

        let dest_buf: *mut Out = a.start.as_ptr().cast();

        while i > 0 {
            let val = a.end.sub(i).read();
            let dst = dest_buf.add(len).sub(i);

            ptr::write(dst, mem::transmute(val));

            i = i.unchecked_sub(1); 
        }

        let start = NonNull::new_unchecked(dest_buf);

        It::<Out> {
            start,
            end: start.add(len).as_ptr()
        }
    }
}

I expected to see this happen (beta):

example::do_it:
        mov     rax, rdi
        shl     rsi, 5
        lea     rdx, [rsi + rdi]
        ret

Instead, this happened (nightly):

example::do_it:
        mov     rax, rdi
        movabs  rdx, 576460752303423487
        and     rdx, rsi
        test    rsi, rsi
        je      .LBB0_6
        mov     rcx, rsi
        shl     rcx, 5
        mov     rdi, rdx
        test    sil, 1
        je      .LBB0_3
        lea     rsi, [rax + rcx]
        mov     rdi, rdx
        shl     rdi, 5
        sub     rsi, rdi
        movups  xmm0, xmmword ptr [rsi]
        movups  xmm1, xmmword ptr [rsi + 16]
        movups  xmmword ptr [rax + 16], xmm1
        movups  xmmword ptr [rax], xmm0
        lea     rdi, [rdx - 1]
.LBB0_3:
        cmp     rdx, 1
        je      .LBB0_6
        mov     r8, rdi
        shl     r8, 5
        mov     rsi, rax
        sub     rsi, r8
        add     rsi, 32
        mov     r8, rdx
        shl     r8, 5
.LBB0_5:
        movups  xmm0, xmmword ptr [rsi + rcx - 32]
        movups  xmm1, xmmword ptr [rsi + rcx - 16]
        movups  xmmword ptr [rsi + r8 - 16], xmm1
        movups  xmmword ptr [rsi + r8 - 32], xmm0
        movups  xmm0, xmmword ptr [rsi + rcx]
        movups  xmm1, xmmword ptr [rsi + rcx + 16]
        movups  xmmword ptr [rsi + r8 + 16], xmm1
        movups  xmmword ptr [rsi + r8], xmm0
        add     rsi, 64
        add     rdi, -2
        jne     .LBB0_5
.LBB0_6:
        shl     rdx, 5
        add     rdx, rax
        ret

Meta

rustc --version --verbose:

rustc 1.78.0-nightly (bccb9bbb4 2024-02-16)
binary: rustc
commit-hash: bccb9bbb418a30aeb332052e721beb6ebc6b1ce7
commit-date: 2024-02-16
host: x86_64-unknown-linux-gnu
release: 1.78.0-nightly
LLVM version: 18.1.0
Compiler returned: 0

Metadata

Metadata

Assignees

Labels

A-LLVMArea: Code generation parts specific to LLVM. Both correctness bugs and optimization-related issues.A-codegenArea: Code generationC-bugCategory: This is a bug.I-slowIssue: Problems and improvements with respect to performance of generated code.P-mediumMedium priorityS-has-mcveStatus: A Minimal Complete and Verifiable Example has been found for this issueS-needs-reproStatus: This issue has no reproduction and needs a reproduction to make progress.T-compilerRelevant to the compiler team, which will review and decide on the PR/issue.llvm-fixed-upstreamIssue expected to be fixed by the next major LLVM upgrade, or backported fixesregression-from-stable-to-nightlyPerformance or correctness regression from stable to nightly.

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions