Skip to content

Commit 291395d

Browse files
authored
Optimize std_detect's caching (#908)
1 parent 031cd50 commit 291395d

File tree

2 files changed

+47
-34
lines changed

2 files changed

+47
-34
lines changed

crates/std_detect/src/detect/cache.rs

Lines changed: 46 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ impl Default for Initializer {
4343
// the one fitting our cache.
4444
impl Initializer {
4545
/// Tests the `bit` of the cache.
46-
#[allow(dead_code)]
4746
#[inline]
4847
pub(crate) fn test(self, bit: u32) -> bool {
4948
debug_assert!(
@@ -80,60 +79,65 @@ impl Initializer {
8079
// Note: on x64, we only use the first slot
8180
static CACHE: [Cache; 2] = [Cache::uninitialized(), Cache::uninitialized()];
8281

83-
/// Feature cache with capacity for `usize::MAX - 1` features.
82+
/// Feature cache with capacity for `size_of::<usize::MAX>() * 8 - 1` features.
8483
///
85-
/// Note: the last feature bit is used to represent an
86-
/// uninitialized cache.
84+
/// Note: 0 is used to represent an uninitialized cache, and (at least) the most
85+
/// significant bit is set on any cache which has been initialized.
8786
///
88-
/// Note: we can use `Relaxed` atomic operations, because we are only interested
89-
/// in the effects of operations on a single memory location. That is, we only
90-
/// need "modification order", and not the full-blown "happens before". However,
91-
/// we use `SeqCst` just to be on the safe side.
87+
/// Note: we use `Relaxed` atomic operations, because we are only interested in
88+
/// the effects of operations on a single memory location. That is, we only need
89+
/// "modification order", and not the full-blown "happens before".
9290
struct Cache(AtomicUsize);
9391

9492
impl Cache {
9593
const CAPACITY: u32 = (core::mem::size_of::<usize>() * 8 - 1) as u32;
9694
const MASK: usize = (1 << Cache::CAPACITY) - 1;
95+
const INITIALIZED_BIT: usize = 1usize << Cache::CAPACITY;
9796

9897
/// Creates an uninitialized cache.
9998
#[allow(clippy::declare_interior_mutable_const)]
10099
const fn uninitialized() -> Self {
101-
Cache(AtomicUsize::new(usize::MAX))
102-
}
103-
/// Is the cache uninitialized?
104-
#[inline]
105-
pub(crate) fn is_uninitialized(&self) -> bool {
106-
self.0.load(Ordering::SeqCst) == usize::MAX
100+
Cache(AtomicUsize::new(0))
107101
}
108102

109-
/// Is the `bit` in the cache set?
103+
/// Is the `bit` in the cache set? Returns `None` if the cache has not been initialized.
110104
#[inline]
111-
pub(crate) fn test(&self, bit: u32) -> bool {
112-
test_bit(self.0.load(Ordering::SeqCst) as u64, bit)
105+
pub(crate) fn test(&self, bit: u32) -> Option<bool> {
106+
let cached = self.0.load(Ordering::Relaxed);
107+
if cached == 0 {
108+
None
109+
} else {
110+
Some(test_bit(cached as u64, bit))
111+
}
113112
}
114113

115114
/// Initializes the cache.
116115
#[inline]
117-
fn initialize(&self, value: usize) {
118-
self.0.store(value, Ordering::SeqCst);
116+
fn initialize(&self, value: usize) -> usize {
117+
debug_assert_eq!((value & !Cache::MASK), 0);
118+
self.0
119+
.store(value | Cache::INITIALIZED_BIT, Ordering::Relaxed);
120+
value
119121
}
120122
}
121123

122124
cfg_if::cfg_if! {
123125
if #[cfg(feature = "std_detect_env_override")] {
124-
#[inline(never)]
125-
fn initialize(mut value: Initializer) {
126+
#[inline]
127+
fn initialize(mut value: Initializer) -> Initializer {
126128
if let Ok(disable) = crate::env::var("RUST_STD_DETECT_UNSTABLE") {
127129
for v in disable.split(" ") {
128130
let _ = super::Feature::from_str(v).map(|v| value.unset(v as u32));
129131
}
130132
}
131133
do_initialize(value);
134+
value
132135
}
133136
} else {
134137
#[inline]
135-
fn initialize(value: Initializer) {
138+
fn initialize(value: Initializer) -> Initializer {
136139
do_initialize(value);
140+
value
137141
}
138142
}
139143
}
@@ -144,8 +148,22 @@ fn do_initialize(value: Initializer) {
144148
CACHE[1].initialize((value.0 >> Cache::CAPACITY) as usize & Cache::MASK);
145149
}
146150

151+
// We only have to detect features once, and it's fairly costly, so hint to LLVM
152+
// that it should assume that cache hits are more common than misses (which is
153+
// the point of caching). It's possibly unfortunate that this function needs to
154+
// reach across modules like this to call `os::detect_features`, but it produces
155+
// the best code out of several attempted variants.
156+
//
157+
// The `Initializer` that the cache was initialized with is returned, so that
158+
// the caller can call `test()` on it without having to load the value from the
159+
// cache again.
160+
#[cold]
161+
fn detect_and_initialize() -> Initializer {
162+
initialize(super::os::detect_features())
163+
}
164+
147165
/// Tests the `bit` of the storage. If the storage has not been initialized,
148-
/// initializes it with the result of `f()`.
166+
/// initializes it with the result of `os::detect_features()`.
149167
///
150168
/// On its first invocation, it detects the CPU features and caches them in the
151169
/// `CACHE` global variable as an `AtomicU64`.
@@ -157,18 +175,13 @@ fn do_initialize(value: Initializer) {
157175
/// variable `RUST_STD_DETECT_UNSTABLE` and uses its its content to disable
158176
/// Features that would had been otherwise detected.
159177
#[inline]
160-
pub(crate) fn test<F>(bit: u32, f: F) -> bool
161-
where
162-
F: FnOnce() -> Initializer,
163-
{
164-
let (bit, idx) = if bit < Cache::CAPACITY {
178+
pub(crate) fn test(bit: u32) -> bool {
179+
let (relative_bit, idx) = if bit < Cache::CAPACITY {
165180
(bit, 0)
166181
} else {
167182
(bit - Cache::CAPACITY, 1)
168183
};
169-
170-
if CACHE[idx].is_uninitialized() {
171-
initialize(f())
172-
}
173-
CACHE[idx].test(bit)
184+
CACHE[idx]
185+
.test(relative_bit)
186+
.unwrap_or_else(|| detect_and_initialize().test(bit))
174187
}

crates/std_detect/src/detect/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ cfg_if! {
120120
#[inline]
121121
#[allow(dead_code)]
122122
fn check_for(x: Feature) -> bool {
123-
cache::test(x as u32, self::os::detect_features)
123+
cache::test(x as u32)
124124
}
125125

126126
/// Returns an `Iterator<Item=(&'static str, bool)>` where

0 commit comments

Comments
 (0)