From 2f5da0e1cf08aa02c7901b988872f464bff4b735 Mon Sep 17 00:00:00 2001 From: Thom Chiovoloni Date: Mon, 14 Sep 2020 06:42:50 -0700 Subject: [PATCH 1/4] Optimize std_detect's cache --- crates/std_detect/src/detect/cache.rs | 71 +++++++++++++++++---------- crates/std_detect/src/detect/mod.rs | 2 +- 2 files changed, 46 insertions(+), 27 deletions(-) diff --git a/crates/std_detect/src/detect/cache.rs b/crates/std_detect/src/detect/cache.rs index 6bcbace4bb..bc2104e9fd 100644 --- a/crates/std_detect/src/detect/cache.rs +++ b/crates/std_detect/src/detect/cache.rs @@ -43,7 +43,6 @@ impl Default for Initializer { // the one fitting our cache. impl Initializer { /// Tests the `bit` of the cache. - #[allow(dead_code)] #[inline] pub(crate) fn test(self, bit: u32) -> bool { debug_assert!( @@ -85,10 +84,9 @@ static CACHE: [Cache; 2] = [Cache::uninitialized(), Cache::uninitialized()]; /// Note: the last feature bit is used to represent an /// uninitialized cache. /// -/// Note: we can use `Relaxed` atomic operations, because we are only interested -/// in the effects of operations on a single memory location. That is, we only -/// need "modification order", and not the full-blown "happens before". However, -/// we use `SeqCst` just to be on the safe side. +/// Note: we use `Relaxed` atomic operations, because we are only interested in +/// the effects of operations on a single memory location. That is, we only need +/// "modification order", and not the full-blown "happens before". struct Cache(AtomicUsize); impl Cache { @@ -100,28 +98,38 @@ impl Cache { const fn uninitialized() -> Self { Cache(AtomicUsize::new(usize::MAX)) } - /// Is the cache uninitialized? - #[inline] - pub(crate) fn is_uninitialized(&self) -> bool { - self.0.load(Ordering::SeqCst) == usize::MAX - } - /// Is the `bit` in the cache set? + /// Is the `bit` in the cache set? Returns `None` if the cache has not been initialized. #[inline] - pub(crate) fn test(&self, bit: u32) -> bool { - test_bit(self.0.load(Ordering::SeqCst) as u64, bit) + pub(crate) fn test(&self, bit: u32) -> Option { + let cached = self.0.load(Ordering::Relaxed); + if cached == usize::MAX { + None + } else { + Some(test_bit(cached as u64, bit)) + } } /// Initializes the cache. #[inline] - fn initialize(&self, value: usize) { + fn initialize(&self, value: usize) -> usize { + // Use `SeqCst` store to ensure a thread which sees one entry in `CACHE` + // initialized will definitely see the other. While it likely never can + // happen, this avoids something like: + // + // ``` + // is_foo_feature_detected!("bar") && is_foo_feature_detected!("baz") + // ``` + // + // needing to initialize the cache twice. self.0.store(value, Ordering::SeqCst); + value } } cfg_if::cfg_if! { if #[cfg(feature = "std_detect_env_override")] { - #[inline(never)] + #[inline] fn initialize(mut value: Initializer) { if let Ok(disable) = crate::env::var("RUST_STD_DETECT_UNSTABLE") { for v in disable.split(" ") { @@ -144,8 +152,24 @@ fn do_initialize(value: Initializer) { CACHE[1].initialize((value.0 >> Cache::CAPACITY) as usize & Cache::MASK); } +// We only have to detect features once, and it's fairly costly, so hint to LLVM +// that it should assume that cache hits are more common than misses (which is +// the point of caching). It's possibly unfortunate that this function needs to +// reach across modules like this to call `os::detect_features`, but it produces +// the best code out of several attempted variants. +// +// The `Initializer` that the cache was initialized with is returned, so that +// the caller can call `test()` on it without having to load the value from the +// cache again. +#[cold] +fn detect_and_initialize() -> Initializer { + let result = super::os::detect_features(); + initialize(result); + result +} + /// Tests the `bit` of the storage. If the storage has not been initialized, -/// initializes it with the result of `f()`. +/// initializes it with the result of `os::detect_features()`. /// /// On its first invocation, it detects the CPU features and caches them in the /// `CACHE` global variable as an `AtomicU64`. @@ -157,18 +181,13 @@ fn do_initialize(value: Initializer) { /// variable `RUST_STD_DETECT_UNSTABLE` and uses its its content to disable /// Features that would had been otherwise detected. #[inline] -pub(crate) fn test(bit: u32, f: F) -> bool -where - F: FnOnce() -> Initializer, -{ - let (bit, idx) = if bit < Cache::CAPACITY { +pub(crate) fn test(bit: u32) -> bool { + let (relative_bit, idx) = if bit < Cache::CAPACITY { (bit, 0) } else { (bit - Cache::CAPACITY, 1) }; - - if CACHE[idx].is_uninitialized() { - initialize(f()) - } - CACHE[idx].test(bit) + CACHE[idx] + .test(relative_bit) + .unwrap_or_else(|| detect_and_initialize().test(bit)) } diff --git a/crates/std_detect/src/detect/mod.rs b/crates/std_detect/src/detect/mod.rs index c44f44c1b3..7aedef47d6 100644 --- a/crates/std_detect/src/detect/mod.rs +++ b/crates/std_detect/src/detect/mod.rs @@ -120,7 +120,7 @@ cfg_if! { #[inline] #[allow(dead_code)] fn check_for(x: Feature) -> bool { - cache::test(x as u32, self::os::detect_features) + cache::test(x as u32) } /// Returns an `Iterator` where From d3cbfa5c941ccdedf6d7d58313e3995c824aa5e0 Mon Sep 17 00:00:00 2001 From: Thom Chiovoloni Date: Mon, 14 Sep 2020 08:27:06 -0700 Subject: [PATCH 2/4] Have `cache::initialize` return the Initializer This allows modifications made to it, such as env overrides, to be handled properly (immediately). --- crates/std_detect/src/detect/cache.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/crates/std_detect/src/detect/cache.rs b/crates/std_detect/src/detect/cache.rs index bc2104e9fd..78f80cd665 100644 --- a/crates/std_detect/src/detect/cache.rs +++ b/crates/std_detect/src/detect/cache.rs @@ -130,18 +130,20 @@ impl Cache { cfg_if::cfg_if! { if #[cfg(feature = "std_detect_env_override")] { #[inline] - fn initialize(mut value: Initializer) { + fn initialize(mut value: Initializer) -> Initializer { if let Ok(disable) = crate::env::var("RUST_STD_DETECT_UNSTABLE") { for v in disable.split(" ") { let _ = super::Feature::from_str(v).map(|v| value.unset(v as u32)); } } do_initialize(value); + value } } else { #[inline] - fn initialize(value: Initializer) { + fn initialize(value: Initializer) -> Initializer { do_initialize(value); + value } } } @@ -163,9 +165,7 @@ fn do_initialize(value: Initializer) { // cache again. #[cold] fn detect_and_initialize() -> Initializer { - let result = super::os::detect_features(); - initialize(result); - result + initialize(super::os::detect_features()) } /// Tests the `bit` of the storage. If the storage has not been initialized, From 4a63d66e05721a92de7f7b2d28fef102471a1ee3 Mon Sep 17 00:00:00 2001 From: Thom Chiovoloni Date: Wed, 16 Sep 2020 16:33:07 -0700 Subject: [PATCH 3/4] Use 0 for uninitialized, and Relaxed for stores. --- crates/std_detect/src/detect/cache.rs | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/crates/std_detect/src/detect/cache.rs b/crates/std_detect/src/detect/cache.rs index 78f80cd665..ddb1f224a7 100644 --- a/crates/std_detect/src/detect/cache.rs +++ b/crates/std_detect/src/detect/cache.rs @@ -79,10 +79,10 @@ impl Initializer { // Note: on x64, we only use the first slot static CACHE: [Cache; 2] = [Cache::uninitialized(), Cache::uninitialized()]; -/// Feature cache with capacity for `usize::MAX - 1` features. +/// Feature cache with capacity for `size_of::() * 8 - 1` features. /// -/// Note: the last feature bit is used to represent an -/// uninitialized cache. +/// Note: 0 is used to represent an uninitialized cache, and (at least) the most +/// significant bit is set on any cache which has been initialized. /// /// Note: we use `Relaxed` atomic operations, because we are only interested in /// the effects of operations on a single memory location. That is, we only need @@ -92,18 +92,19 @@ struct Cache(AtomicUsize); impl Cache { const CAPACITY: u32 = (core::mem::size_of::() * 8 - 1) as u32; const MASK: usize = (1 << Cache::CAPACITY) - 1; + const INITIALIZED_BIT: usize = 1usize << Cache::CAPACITY; /// Creates an uninitialized cache. #[allow(clippy::declare_interior_mutable_const)] const fn uninitialized() -> Self { - Cache(AtomicUsize::new(usize::MAX)) + Cache(AtomicUsize::new(0)) } /// Is the `bit` in the cache set? Returns `None` if the cache has not been initialized. #[inline] pub(crate) fn test(&self, bit: u32) -> Option { let cached = self.0.load(Ordering::Relaxed); - if cached == usize::MAX { + if cached == 0 { None } else { Some(test_bit(cached as u64, bit)) @@ -113,16 +114,8 @@ impl Cache { /// Initializes the cache. #[inline] fn initialize(&self, value: usize) -> usize { - // Use `SeqCst` store to ensure a thread which sees one entry in `CACHE` - // initialized will definitely see the other. While it likely never can - // happen, this avoids something like: - // - // ``` - // is_foo_feature_detected!("bar") && is_foo_feature_detected!("baz") - // ``` - // - // needing to initialize the cache twice. - self.0.store(value, Ordering::SeqCst); + debug_assert_eq!((value & !Cache::MASK), 0); + self.0.store(value | Cache::INITIALIZED_BIT, Ordering::Relaxed); value } } From 27efa9fe114af7c29fc03056f57b15487c4f5325 Mon Sep 17 00:00:00 2001 From: Thom Chiovoloni Date: Wed, 16 Sep 2020 16:35:14 -0700 Subject: [PATCH 4/4] Ah, whoops, cargo fmt --- crates/std_detect/src/detect/cache.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crates/std_detect/src/detect/cache.rs b/crates/std_detect/src/detect/cache.rs index ddb1f224a7..e79c96dafa 100644 --- a/crates/std_detect/src/detect/cache.rs +++ b/crates/std_detect/src/detect/cache.rs @@ -115,7 +115,8 @@ impl Cache { #[inline] fn initialize(&self, value: usize) -> usize { debug_assert_eq!((value & !Cache::MASK), 0); - self.0.store(value | Cache::INITIALIZED_BIT, Ordering::Relaxed); + self.0 + .store(value | Cache::INITIALIZED_BIT, Ordering::Relaxed); value } }