@@ -43,7 +43,6 @@ impl Default for Initializer {
43
43
// the one fitting our cache.
44
44
impl Initializer {
45
45
/// Tests the `bit` of the cache.
46
- #[ allow( dead_code) ]
47
46
#[ inline]
48
47
pub ( crate ) fn test ( self , bit : u32 ) -> bool {
49
48
debug_assert ! (
@@ -80,60 +79,65 @@ impl Initializer {
80
79
// Note: on x64, we only use the first slot
81
80
static CACHE : [ Cache ; 2 ] = [ Cache :: uninitialized ( ) , Cache :: uninitialized ( ) ] ;
82
81
83
- /// Feature cache with capacity for `usize::MAX - 1` features.
82
+ /// Feature cache with capacity for `size_of::< usize::MAX>() * 8 - 1` features.
84
83
///
85
- /// Note: the last feature bit is used to represent an
86
- /// uninitialized cache.
84
+ /// Note: 0 is used to represent an uninitialized cache, and (at least) the most
85
+ /// significant bit is set on any cache which has been initialized .
87
86
///
88
- /// Note: we can use `Relaxed` atomic operations, because we are only interested
89
- /// in the effects of operations on a single memory location. That is, we only
90
- /// need "modification order", and not the full-blown "happens before". However,
91
- /// we use `SeqCst` just to be on the safe side.
87
+ /// Note: we use `Relaxed` atomic operations, because we are only interested in
88
+ /// the effects of operations on a single memory location. That is, we only need
89
+ /// "modification order", and not the full-blown "happens before".
92
90
struct Cache ( AtomicUsize ) ;
93
91
94
92
impl Cache {
95
93
const CAPACITY : u32 = ( core:: mem:: size_of :: < usize > ( ) * 8 - 1 ) as u32 ;
96
94
const MASK : usize = ( 1 << Cache :: CAPACITY ) - 1 ;
95
+ const INITIALIZED_BIT : usize = 1usize << Cache :: CAPACITY ;
97
96
98
97
/// Creates an uninitialized cache.
99
98
#[ allow( clippy:: declare_interior_mutable_const) ]
100
99
const fn uninitialized ( ) -> Self {
101
- Cache ( AtomicUsize :: new ( usize:: MAX ) )
102
- }
103
- /// Is the cache uninitialized?
104
- #[ inline]
105
- pub ( crate ) fn is_uninitialized ( & self ) -> bool {
106
- self . 0 . load ( Ordering :: SeqCst ) == usize:: MAX
100
+ Cache ( AtomicUsize :: new ( 0 ) )
107
101
}
108
102
109
- /// Is the `bit` in the cache set?
103
+ /// Is the `bit` in the cache set? Returns `None` if the cache has not been initialized.
110
104
#[ inline]
111
- pub ( crate ) fn test ( & self , bit : u32 ) -> bool {
112
- test_bit ( self . 0 . load ( Ordering :: SeqCst ) as u64 , bit)
105
+ pub ( crate ) fn test ( & self , bit : u32 ) -> Option < bool > {
106
+ let cached = self . 0 . load ( Ordering :: Relaxed ) ;
107
+ if cached == 0 {
108
+ None
109
+ } else {
110
+ Some ( test_bit ( cached as u64 , bit) )
111
+ }
113
112
}
114
113
115
114
/// Initializes the cache.
116
115
#[ inline]
117
- fn initialize ( & self , value : usize ) {
118
- self . 0 . store ( value, Ordering :: SeqCst ) ;
116
+ fn initialize ( & self , value : usize ) -> usize {
117
+ debug_assert_eq ! ( ( value & !Cache :: MASK ) , 0 ) ;
118
+ self . 0
119
+ . store ( value | Cache :: INITIALIZED_BIT , Ordering :: Relaxed ) ;
120
+ value
119
121
}
120
122
}
121
123
122
124
cfg_if:: cfg_if! {
123
125
if #[ cfg( feature = "std_detect_env_override" ) ] {
124
- #[ inline( never ) ]
125
- fn initialize( mut value: Initializer ) {
126
+ #[ inline]
127
+ fn initialize( mut value: Initializer ) -> Initializer {
126
128
if let Ok ( disable) = crate :: env:: var( "RUST_STD_DETECT_UNSTABLE" ) {
127
129
for v in disable. split( " " ) {
128
130
let _ = super :: Feature :: from_str( v) . map( |v| value. unset( v as u32 ) ) ;
129
131
}
130
132
}
131
133
do_initialize( value) ;
134
+ value
132
135
}
133
136
} else {
134
137
#[ inline]
135
- fn initialize( value: Initializer ) {
138
+ fn initialize( value: Initializer ) -> Initializer {
136
139
do_initialize( value) ;
140
+ value
137
141
}
138
142
}
139
143
}
@@ -144,8 +148,22 @@ fn do_initialize(value: Initializer) {
144
148
CACHE [ 1 ] . initialize ( ( value. 0 >> Cache :: CAPACITY ) as usize & Cache :: MASK ) ;
145
149
}
146
150
151
+ // We only have to detect features once, and it's fairly costly, so hint to LLVM
152
+ // that it should assume that cache hits are more common than misses (which is
153
+ // the point of caching). It's possibly unfortunate that this function needs to
154
+ // reach across modules like this to call `os::detect_features`, but it produces
155
+ // the best code out of several attempted variants.
156
+ //
157
+ // The `Initializer` that the cache was initialized with is returned, so that
158
+ // the caller can call `test()` on it without having to load the value from the
159
+ // cache again.
160
+ #[ cold]
161
+ fn detect_and_initialize ( ) -> Initializer {
162
+ initialize ( super :: os:: detect_features ( ) )
163
+ }
164
+
147
165
/// Tests the `bit` of the storage. If the storage has not been initialized,
148
- /// initializes it with the result of `f ()`.
166
+ /// initializes it with the result of `os::detect_features ()`.
149
167
///
150
168
/// On its first invocation, it detects the CPU features and caches them in the
151
169
/// `CACHE` global variable as an `AtomicU64`.
@@ -157,18 +175,13 @@ fn do_initialize(value: Initializer) {
157
175
/// variable `RUST_STD_DETECT_UNSTABLE` and uses its its content to disable
158
176
/// Features that would had been otherwise detected.
159
177
#[ inline]
160
- pub ( crate ) fn test < F > ( bit : u32 , f : F ) -> bool
161
- where
162
- F : FnOnce ( ) -> Initializer ,
163
- {
164
- let ( bit, idx) = if bit < Cache :: CAPACITY {
178
+ pub ( crate ) fn test ( bit : u32 ) -> bool {
179
+ let ( relative_bit, idx) = if bit < Cache :: CAPACITY {
165
180
( bit, 0 )
166
181
} else {
167
182
( bit - Cache :: CAPACITY , 1 )
168
183
} ;
169
-
170
- if CACHE [ idx] . is_uninitialized ( ) {
171
- initialize ( f ( ) )
172
- }
173
- CACHE [ idx] . test ( bit)
184
+ CACHE [ idx]
185
+ . test ( relative_bit)
186
+ . unwrap_or_else ( || detect_and_initialize ( ) . test ( bit) )
174
187
}
0 commit comments