Skip to content

Commit d7d35af

Browse files
Rollup merge of #76971 - bugadani:issue-75659, r=Amanieu
Refactor memchr to allow optimization Closes #75659 The implementation already uses naive search if the slice if short enough, but the case is complicated enough to not be optimized away. This PR refactors memchr so that it exists early when the slice is short enough. Codegen-wise, as shown in #75659, memchr was not inlined previously so the only way I could find to test this is to check if there is no memchr call. Let me know if there is a more robust solution here.
2 parents 88b2577 + d023436 commit d7d35af

File tree

4 files changed

+90
-16
lines changed

4 files changed

+90
-16
lines changed

library/core/src/slice/cmp.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,12 +268,14 @@ where
268268
}
269269

270270
impl SliceContains for u8 {
271+
#[inline]
271272
fn slice_contains(&self, x: &[Self]) -> bool {
272273
memchr::memchr(*self, x).is_some()
273274
}
274275
}
275276

276277
impl SliceContains for i8 {
278+
#[inline]
277279
fn slice_contains(&self, x: &[Self]) -> bool {
278280
let byte = *self as u8;
279281
// SAFETY: `i8` and `u8` have the same memory layout, thus casting `x.as_ptr()`

library/core/src/slice/memchr.rs

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ const HI_U64: u64 = 0x8080808080808080;
1212
// Use truncation.
1313
const LO_USIZE: usize = LO_U64 as usize;
1414
const HI_USIZE: usize = HI_U64 as usize;
15+
const USIZE_BYTES: usize = mem::size_of::<usize>();
1516

1617
/// Returns `true` if `x` contains any zero byte.
1718
///
@@ -38,19 +39,30 @@ fn repeat_byte(b: u8) -> usize {
3839
}
3940

4041
/// Returns the first index matching the byte `x` in `text`.
42+
#[inline]
4143
pub fn memchr(x: u8, text: &[u8]) -> Option<usize> {
44+
// Fast path for small slices
45+
if text.len() < 2 * USIZE_BYTES {
46+
return text.iter().position(|elt| *elt == x);
47+
}
48+
49+
memchr_general_case(x, text)
50+
}
51+
52+
#[inline(never)]
53+
fn memchr_general_case(x: u8, text: &[u8]) -> Option<usize> {
4254
// Scan for a single byte value by reading two `usize` words at a time.
4355
//
4456
// Split `text` in three parts
4557
// - unaligned initial part, before the first word aligned address in text
4658
// - body, scan by 2 words at a time
4759
// - the last remaining part, < 2 word size
60+
61+
// search up to an aligned boundary
4862
let len = text.len();
4963
let ptr = text.as_ptr();
50-
let usize_bytes = mem::size_of::<usize>();
64+
let mut offset = ptr.align_offset(USIZE_BYTES);
5165

52-
// search up to an aligned boundary
53-
let mut offset = ptr.align_offset(usize_bytes);
5466
if offset > 0 {
5567
offset = cmp::min(offset, len);
5668
if let Some(index) = text[..offset].iter().position(|elt| *elt == x) {
@@ -60,22 +72,19 @@ pub fn memchr(x: u8, text: &[u8]) -> Option<usize> {
6072

6173
// search the body of the text
6274
let repeated_x = repeat_byte(x);
75+
while offset <= len - 2 * USIZE_BYTES {
76+
unsafe {
77+
let u = *(ptr.add(offset) as *const usize);
78+
let v = *(ptr.add(offset + USIZE_BYTES) as *const usize);
6379

64-
if len >= 2 * usize_bytes {
65-
while offset <= len - 2 * usize_bytes {
66-
unsafe {
67-
let u = *(ptr.add(offset) as *const usize);
68-
let v = *(ptr.add(offset + usize_bytes) as *const usize);
69-
70-
// break if there is a matching byte
71-
let zu = contains_zero_byte(u ^ repeated_x);
72-
let zv = contains_zero_byte(v ^ repeated_x);
73-
if zu || zv {
74-
break;
75-
}
80+
// break if there is a matching byte
81+
let zu = contains_zero_byte(u ^ repeated_x);
82+
let zv = contains_zero_byte(v ^ repeated_x);
83+
if zu || zv {
84+
break;
7685
}
77-
offset += usize_bytes * 2;
7886
}
87+
offset += USIZE_BYTES * 2;
7988
}
8089

8190
// Find the byte after the point the body loop stopped.

library/core/src/slice/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1636,6 +1636,7 @@ impl<T> [T] {
16361636
/// assert!(!v.iter().any(|e| e == "hi"));
16371637
/// ```
16381638
#[stable(feature = "rust1", since = "1.0.0")]
1639+
#[inline]
16391640
pub fn contains(&self, x: &T) -> bool
16401641
where
16411642
T: PartialEq,

src/test/codegen/issue-75659.rs

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
// This test checks that the call to memchr/slice_contains is optimized away
2+
// when searching in small slices.
3+
4+
// compile-flags: -O
5+
6+
#![crate_type = "lib"]
7+
8+
// CHECK-LABEL: @foo1
9+
#[no_mangle]
10+
pub fn foo1(x: u8, data: &[u8; 1]) -> bool {
11+
// CHECK-NOT: memchr
12+
// CHECK-NOT: slice_contains
13+
data.contains(&x)
14+
}
15+
16+
// CHECK-LABEL: @foo2
17+
#[no_mangle]
18+
pub fn foo2(x: u8, data: &[u8; 2]) -> bool {
19+
// CHECK-NOT: memchr
20+
// CHECK-NOT: slice_contains
21+
data.contains(&x)
22+
}
23+
24+
// CHECK-LABEL: @foo3
25+
#[no_mangle]
26+
pub fn foo3(x: u8, data: &[u8; 3]) -> bool {
27+
// CHECK-NOT: memchr
28+
// CHECK-NOT: slice_contains
29+
data.contains(&x)
30+
}
31+
32+
// CHECK-LABEL: @foo4
33+
#[no_mangle]
34+
pub fn foo4(x: u8, data: &[u8; 4]) -> bool {
35+
// CHECK-NOT: memchr
36+
// CHECK-NOT: slice_contains
37+
data.contains(&x)
38+
}
39+
40+
// CHECK-LABEL: @foo8
41+
#[no_mangle]
42+
pub fn foo8(x: u8, data: &[u8; 8]) -> bool {
43+
// CHECK-NOT: memchr
44+
// CHECK-NOT: slice_contains
45+
data.contains(&x)
46+
}
47+
48+
// CHECK-LABEL: @foo8_i8
49+
#[no_mangle]
50+
pub fn foo8_i8(x: i8, data: &[i8; 8]) -> bool {
51+
// CHECK-NOT: memchr
52+
// CHECK-NOT: slice_contains
53+
data.contains(&x)
54+
}
55+
56+
// Check that the general case isn't inlined
57+
// CHECK-LABEL: @foo80
58+
#[no_mangle]
59+
pub fn foo80(x: u8, data: &[u8; 80]) -> bool {
60+
// CHECK: call core::slice::memchr
61+
data.contains(&x)
62+
}

0 commit comments

Comments
 (0)