Open
Description
I tried this code:
pub fn parse_u32_digit_u32_if(acc: u32, byte: u8) -> Option<u32> {
let first_invalid = if acc < 429496729 {
10
} else if acc > 429496729 {
0
} else {
6
};
let byte = byte as u32;
let sub = if acc > 429496729 { byte } else { b'0' as u32 };
let v = byte.wrapping_sub(sub);
(v < first_invalid).then_some(acc.wrapping_mul(10).wrapping_add(v))
}
pub fn parse_u32_digit_u32_match(acc: u32, byte: u8) -> Option<u32> {
let first_invalid = match acc.cmp(&429496729) {
std::cmp::Ordering::Less => 10,
std::cmp::Ordering::Equal => 6,
std::cmp::Ordering::Greater => 0,
};
let byte = byte as u32;
let sub = if acc > 429496729 { byte } else { b'0' as u32 };
let v = byte.wrapping_sub(sub);
(v < first_invalid).then_some(acc.wrapping_mul(10).wrapping_add(v))
}
I expected to see this happen: assembly for the second to be like the first and just use immediates:
parse_u32_digit_u32_if:
xor eax, eax
cmp edi, 429496729
mov ecx, 6
cmovne ecx, eax
mov edx, 10
cmovae edx, ecx
movzx ecx, sil
add ecx, -48
cmp edi, 429496730
cmovae ecx, eax
xor eax, eax
cmp ecx, edx
setb al
lea edx, [rdi + 4*rdi]
lea edx, [rcx + 2*rdx]
ret
Instead, this happened: the following assembly, 20 bytes larger (54 to 74 bytes) and one instruction longer:
parse_u32_digit_u32_match:
xor ecx, ecx
cmp edi, 429496729
setne cl
inc ecx
xor eax, eax
cmp edi, 429496729
cmovb ecx, eax
lea rdx, [rip + .Lswitch.table.parse_u32_digit_u32_match]
movzx esi, sil
add esi, -48
cmp edi, 429496730
cmovae esi, eax
xor eax, eax
cmp esi, dword ptr [rdx + 4*rcx]
setb al
lea ecx, [rdi + 4*rdi]
lea edx, [rsi + 2*rcx]
ret
.Lswitch.table.parse_u32_digit_u32_match:
.long 10
.long 6
.long 0
Interestingly, if you move the u8 -> u32 cast down to the last point possible, it results in the same exact assembly emit:
pub fn parse_u32_digit_u8_if(acc: u32, byte: u8) -> Option<u32> {
let first_invalid = if acc < 429496729 {
10
} else if acc > 429496729 {
0
} else {
6
};
let sub = if acc > 429496729 { byte } else { b'0' };
let v = byte.wrapping_sub(sub);
(v < first_invalid).then_some(acc.wrapping_mul(10).wrapping_add(v as u32))
}
pub fn parse_u32_digit_u8_match(acc: u32, byte: u8) -> Option<u32> {
let first_invalid = match acc.cmp(&429496729) {
std::cmp::Ordering::Less => 10,
std::cmp::Ordering::Equal => 6,
std::cmp::Ordering::Greater => 0,
};
let sub = if acc > 429496729 { byte } else { b'0' };
let v = byte.wrapping_sub(sub);
(v < first_invalid).then_some(acc.wrapping_mul(10).wrapping_add(v as u32))
}
parse_u32_digit_u8_if:
xor eax, eax
cmp edi, 429496729
mov ecx, 6
cmovne ecx, eax
mov edx, 10
cmovae edx, ecx
add sil, -48
cmp edi, 429496730
movzx ecx, sil
cmovae ecx, eax
xor eax, eax
cmp cl, dl
setb al
lea edx, [rdi + 4*rdi]
movzx ecx, cl
lea edx, [rcx + 2*rdx]
ret
Meta
rustc --version --verbose
:
rustc 1.79.0 (129f3b996 2024-06-10)
binary: rustc
commit-hash: 129f3b9964af4d4a709d1383930ade12dfe7c081
commit-date: 2024-06-10
host: x86_64-unknown-linux-gnu
release: 1.79.0
LLVM version: 18.1.7
Compiler returned: 0
Metadata
Metadata
Assignees
Labels
Area: Code generation parts specific to LLVM. Both correctness bugs and optimization-related issues.Area: Code generationCategory: An issue highlighting optimization opportunities or PRs implementing suchIssue: Problems and improvements with respect to binary size of generated code.Relevant to the compiler team, which will review and decide on the PR/issue.