From 48e3da6d5910d119d4afefd7d06340390db6968d Mon Sep 17 00:00:00 2001 From: tyler Date: Sat, 27 Apr 2019 07:23:31 -0700 Subject: [PATCH 01/15] remove dead code: requires_move_before_drop --- src/libstd/sys/redox/fast_thread_local.rs | 4 ---- src/libstd/sys/unix/fast_thread_local.rs | 4 ---- src/libstd/sys/windows/fast_thread_local.rs | 4 ---- src/libstd/thread/local.rs | 15 +++------------ 4 files changed, 3 insertions(+), 24 deletions(-) diff --git a/src/libstd/sys/redox/fast_thread_local.rs b/src/libstd/sys/redox/fast_thread_local.rs index 1202708a4769a..fd278bfe39ac8 100644 --- a/src/libstd/sys/redox/fast_thread_local.rs +++ b/src/libstd/sys/redox/fast_thread_local.rs @@ -105,7 +105,3 @@ pub unsafe extern fn destroy_value(ptr: *mut u8) { ptr::drop_in_place((*ptr).inner.get()); } } - -pub fn requires_move_before_drop() -> bool { - false -} diff --git a/src/libstd/sys/unix/fast_thread_local.rs b/src/libstd/sys/unix/fast_thread_local.rs index 17478dce4fe51..c34c2e6e786ec 100644 --- a/src/libstd/sys/unix/fast_thread_local.rs +++ b/src/libstd/sys/unix/fast_thread_local.rs @@ -82,7 +82,3 @@ pub unsafe fn register_dtor(t: *mut u8, dtor: unsafe extern fn(*mut u8)) { } } } - -pub fn requires_move_before_drop() -> bool { - false -} diff --git a/src/libstd/sys/windows/fast_thread_local.rs b/src/libstd/sys/windows/fast_thread_local.rs index 0ccc67e3fd54e..31d0bd1e72ed2 100644 --- a/src/libstd/sys/windows/fast_thread_local.rs +++ b/src/libstd/sys/windows/fast_thread_local.rs @@ -2,7 +2,3 @@ #![cfg(target_thread_local)] pub use crate::sys_common::thread_local::register_dtor_fallback as register_dtor; - -pub fn requires_move_before_drop() -> bool { - false -} diff --git a/src/libstd/thread/local.rs b/src/libstd/thread/local.rs index bfc1deddf7bdb..0d5e1f2af38a6 100644 --- a/src/libstd/thread/local.rs +++ b/src/libstd/thread/local.rs @@ -344,7 +344,7 @@ pub mod fast { use crate::fmt; use crate::mem; use crate::ptr; - use crate::sys::fast_thread_local::{register_dtor, requires_move_before_drop}; + use crate::sys::fast_thread_local::register_dtor; pub struct Key { inner: UnsafeCell>, @@ -395,17 +395,8 @@ pub mod fast { // destructor as running for this thread so calls to `get` will return // `None`. (*ptr).dtor_running.set(true); - - // Some implementations may require us to move the value before we drop - // it as it could get re-initialized in-place during destruction. - // - // Hence, we use `ptr::read` on those platforms (to move to a "safe" - // location) instead of drop_in_place. - if requires_move_before_drop() { - ptr::read((*ptr).inner.get()); - } else { - ptr::drop_in_place((*ptr).inner.get()); - } + + ptr::drop_in_place((*ptr).inner.get()); } } From ae4be16e407061828fe604b1ccbd61181b14a3f1 Mon Sep 17 00:00:00 2001 From: tyler Date: Sat, 27 Apr 2019 07:38:01 -0700 Subject: [PATCH 02/15] redox had a copy of fast thread local (oversight?) --- src/libstd/sys/redox/fast_thread_local.rs | 105 +--------------------- 1 file changed, 1 insertion(+), 104 deletions(-) diff --git a/src/libstd/sys/redox/fast_thread_local.rs b/src/libstd/sys/redox/fast_thread_local.rs index fd278bfe39ac8..67b92d490b234 100644 --- a/src/libstd/sys/redox/fast_thread_local.rs +++ b/src/libstd/sys/redox/fast_thread_local.rs @@ -1,107 +1,4 @@ #![cfg(target_thread_local)] #![unstable(feature = "thread_local_internals", issue = "0")] -use crate::cell::{Cell, UnsafeCell}; -use crate::mem; -use crate::ptr; - - -pub struct Key { - inner: UnsafeCell>, - - // Metadata to keep track of the state of the destructor. Remember that - // these variables are thread-local, not global. - dtor_registered: Cell, - dtor_running: Cell, -} - -unsafe impl Sync for Key { } - -impl Key { - pub const fn new() -> Key { - Key { - inner: UnsafeCell::new(None), - dtor_registered: Cell::new(false), - dtor_running: Cell::new(false) - } - } - - pub fn get(&'static self) -> Option<&'static UnsafeCell>> { - unsafe { - if mem::needs_drop::() && self.dtor_running.get() { - return None - } - self.register_dtor(); - } - Some(&self.inner) - } - - unsafe fn register_dtor(&self) { - if !mem::needs_drop::() || self.dtor_registered.get() { - return - } - - register_dtor(self as *const _ as *mut u8, - destroy_value::); - self.dtor_registered.set(true); - } -} - -pub unsafe fn register_dtor(t: *mut u8, dtor: unsafe extern fn(*mut u8)) { - // The fallback implementation uses a vanilla OS-based TLS key to track - // the list of destructors that need to be run for this thread. The key - // then has its own destructor which runs all the other destructors. - // - // The destructor for DTORS is a little special in that it has a `while` - // loop to continuously drain the list of registered destructors. It - // *should* be the case that this loop always terminates because we - // provide the guarantee that a TLS key cannot be set after it is - // flagged for destruction. - use crate::sys_common::thread_local as os; - - static DTORS: os::StaticKey = os::StaticKey::new(Some(run_dtors)); - type List = Vec<(*mut u8, unsafe extern fn(*mut u8))>; - if DTORS.get().is_null() { - let v: Box = box Vec::new(); - DTORS.set(Box::into_raw(v) as *mut u8); - } - let list: &mut List = &mut *(DTORS.get() as *mut List); - list.push((t, dtor)); - - unsafe extern fn run_dtors(mut ptr: *mut u8) { - while !ptr.is_null() { - let list: Box = Box::from_raw(ptr as *mut List); - for (ptr, dtor) in list.into_iter() { - dtor(ptr); - } - ptr = DTORS.get(); - DTORS.set(ptr::null_mut()); - } - } -} - -pub unsafe extern fn destroy_value(ptr: *mut u8) { - let ptr = ptr as *mut Key; - // Right before we run the user destructor be sure to flag the - // destructor as running for this thread so calls to `get` will return - // `None`. - (*ptr).dtor_running.set(true); - - // The macOS implementation of TLS apparently had an odd aspect to it - // where the pointer we have may be overwritten while this destructor - // is running. Specifically if a TLS destructor re-accesses TLS it may - // trigger a re-initialization of all TLS variables, paving over at - // least some destroyed ones with initial values. - // - // This means that if we drop a TLS value in place on macOS that we could - // revert the value to its original state halfway through the - // destructor, which would be bad! - // - // Hence, we use `ptr::read` on macOS (to move to a "safe" location) - // instead of drop_in_place. - if cfg!(target_os = "macos") { - ptr::read((*ptr).inner.get()); - } else { - ptr::drop_in_place((*ptr).inner.get()); - } -} +pub use crate::sys_common::thread_local::register_dtor_fallback as register_dtor; \ No newline at end of file From 430a091cd80c0e4b6bf44f6a19463a832e566f97 Mon Sep 17 00:00:00 2001 From: tyler Date: Sat, 27 Apr 2019 08:01:32 -0700 Subject: [PATCH 03/15] ensure fast thread local lookups occur once per access on macos --- src/libstd/sys/redox/fast_thread_local.rs | 6 +++++- src/libstd/sys/unix/fast_thread_local.rs | 17 +++++++++++++++++ src/libstd/sys/windows/fast_thread_local.rs | 4 ++++ src/libstd/thread/local.rs | 11 ++++++----- 4 files changed, 32 insertions(+), 6 deletions(-) diff --git a/src/libstd/sys/redox/fast_thread_local.rs b/src/libstd/sys/redox/fast_thread_local.rs index 67b92d490b234..c34cd575db707 100644 --- a/src/libstd/sys/redox/fast_thread_local.rs +++ b/src/libstd/sys/redox/fast_thread_local.rs @@ -1,4 +1,8 @@ #![cfg(target_thread_local)] #![unstable(feature = "thread_local_internals", issue = "0")] -pub use crate::sys_common::thread_local::register_dtor_fallback as register_dtor; \ No newline at end of file +pub use crate::sys_common::thread_local::register_dtor_fallback as register_dtor; + +pub unsafe fn lookup_once(ptr: *const &T) -> &T { + *ptr +} diff --git a/src/libstd/sys/unix/fast_thread_local.rs b/src/libstd/sys/unix/fast_thread_local.rs index c34c2e6e786ec..28fb980054105 100644 --- a/src/libstd/sys/unix/fast_thread_local.rs +++ b/src/libstd/sys/unix/fast_thread_local.rs @@ -82,3 +82,20 @@ pub unsafe fn register_dtor(t: *mut u8, dtor: unsafe extern fn(*mut u8)) { } } } + +#[cfg(not(target_os = "macos"))] +pub unsafe fn lookup_once(ptr: *const &T) -> &T { + *ptr +} + +#[cfg(target_os = "macos")] +pub unsafe fn lookup_once(ptr: *const &T) -> &T { + // On macos, thread_local lookups can result in terrible code due to + // aggressive rerunning of the macos equivalent of `__tls_get_addr` - four + // lookups per actual reference in user code. + // + // Using a read_volatile on a value holding fast Key's address tricks the + // optimizer into only calling the macos get_addr equivalent once per time + // requested by the user. + crate::ptr::read_volatile(ptr) +} diff --git a/src/libstd/sys/windows/fast_thread_local.rs b/src/libstd/sys/windows/fast_thread_local.rs index 31d0bd1e72ed2..566d5524fd7c8 100644 --- a/src/libstd/sys/windows/fast_thread_local.rs +++ b/src/libstd/sys/windows/fast_thread_local.rs @@ -2,3 +2,7 @@ #![cfg(target_thread_local)] pub use crate::sys_common::thread_local::register_dtor_fallback as register_dtor; + +pub unsafe fn lookup_once(ptr: *const &T) -> &T { + *ptr +} diff --git a/src/libstd/thread/local.rs b/src/libstd/thread/local.rs index 0d5e1f2af38a6..9ce2fcf235221 100644 --- a/src/libstd/thread/local.rs +++ b/src/libstd/thread/local.rs @@ -344,7 +344,7 @@ pub mod fast { use crate::fmt; use crate::mem; use crate::ptr; - use crate::sys::fast_thread_local::register_dtor; + use crate::sys::fast_thread_local::{lookup_once, register_dtor}; pub struct Key { inner: UnsafeCell>, @@ -371,11 +371,12 @@ pub mod fast { } pub unsafe fn get(&self) -> Option<&'static UnsafeCell>> { - if mem::needs_drop::() && self.dtor_running.get() { + let this = lookup_once(&self); + if mem::needs_drop::() && this.dtor_running.get() { return None } - self.register_dtor(); - Some(&*(&self.inner as *const _)) + this.register_dtor(); + Some(&*(&this.inner as *const _)) } unsafe fn register_dtor(&self) { @@ -395,7 +396,7 @@ pub mod fast { // destructor as running for this thread so calls to `get` will return // `None`. (*ptr).dtor_running.set(true); - + ptr::drop_in_place((*ptr).inner.get()); } } From 7acfb99adc013d4b77c611cfc51bade551205f5a Mon Sep 17 00:00:00 2001 From: tyler Date: Tue, 30 Apr 2019 18:24:38 -0700 Subject: [PATCH 04/15] Revert "ensure fast thread local lookups occur once per access on macos" This reverts commit d252f3b77f3b7d4cd59620588f9d026633c05816. --- src/libstd/sys/redox/fast_thread_local.rs | 6 +----- src/libstd/sys/unix/fast_thread_local.rs | 17 ----------------- src/libstd/sys/windows/fast_thread_local.rs | 4 ---- src/libstd/thread/local.rs | 11 +++++------ 4 files changed, 6 insertions(+), 32 deletions(-) diff --git a/src/libstd/sys/redox/fast_thread_local.rs b/src/libstd/sys/redox/fast_thread_local.rs index c34cd575db707..67b92d490b234 100644 --- a/src/libstd/sys/redox/fast_thread_local.rs +++ b/src/libstd/sys/redox/fast_thread_local.rs @@ -1,8 +1,4 @@ #![cfg(target_thread_local)] #![unstable(feature = "thread_local_internals", issue = "0")] -pub use crate::sys_common::thread_local::register_dtor_fallback as register_dtor; - -pub unsafe fn lookup_once(ptr: *const &T) -> &T { - *ptr -} +pub use crate::sys_common::thread_local::register_dtor_fallback as register_dtor; \ No newline at end of file diff --git a/src/libstd/sys/unix/fast_thread_local.rs b/src/libstd/sys/unix/fast_thread_local.rs index 28fb980054105..c34c2e6e786ec 100644 --- a/src/libstd/sys/unix/fast_thread_local.rs +++ b/src/libstd/sys/unix/fast_thread_local.rs @@ -82,20 +82,3 @@ pub unsafe fn register_dtor(t: *mut u8, dtor: unsafe extern fn(*mut u8)) { } } } - -#[cfg(not(target_os = "macos"))] -pub unsafe fn lookup_once(ptr: *const &T) -> &T { - *ptr -} - -#[cfg(target_os = "macos")] -pub unsafe fn lookup_once(ptr: *const &T) -> &T { - // On macos, thread_local lookups can result in terrible code due to - // aggressive rerunning of the macos equivalent of `__tls_get_addr` - four - // lookups per actual reference in user code. - // - // Using a read_volatile on a value holding fast Key's address tricks the - // optimizer into only calling the macos get_addr equivalent once per time - // requested by the user. - crate::ptr::read_volatile(ptr) -} diff --git a/src/libstd/sys/windows/fast_thread_local.rs b/src/libstd/sys/windows/fast_thread_local.rs index 566d5524fd7c8..31d0bd1e72ed2 100644 --- a/src/libstd/sys/windows/fast_thread_local.rs +++ b/src/libstd/sys/windows/fast_thread_local.rs @@ -2,7 +2,3 @@ #![cfg(target_thread_local)] pub use crate::sys_common::thread_local::register_dtor_fallback as register_dtor; - -pub unsafe fn lookup_once(ptr: *const &T) -> &T { - *ptr -} diff --git a/src/libstd/thread/local.rs b/src/libstd/thread/local.rs index 9ce2fcf235221..0d5e1f2af38a6 100644 --- a/src/libstd/thread/local.rs +++ b/src/libstd/thread/local.rs @@ -344,7 +344,7 @@ pub mod fast { use crate::fmt; use crate::mem; use crate::ptr; - use crate::sys::fast_thread_local::{lookup_once, register_dtor}; + use crate::sys::fast_thread_local::register_dtor; pub struct Key { inner: UnsafeCell>, @@ -371,12 +371,11 @@ pub mod fast { } pub unsafe fn get(&self) -> Option<&'static UnsafeCell>> { - let this = lookup_once(&self); - if mem::needs_drop::() && this.dtor_running.get() { + if mem::needs_drop::() && self.dtor_running.get() { return None } - this.register_dtor(); - Some(&*(&this.inner as *const _)) + self.register_dtor(); + Some(&*(&self.inner as *const _)) } unsafe fn register_dtor(&self) { @@ -396,7 +395,7 @@ pub mod fast { // destructor as running for this thread so calls to `get` will return // `None`. (*ptr).dtor_running.set(true); - + ptr::drop_in_place((*ptr).inner.get()); } } From dfe51a7249e04431526cf3b4779316aabef53cca Mon Sep 17 00:00:00 2001 From: tyler Date: Thu, 2 May 2019 22:40:52 -0700 Subject: [PATCH 05/15] restructure thread_local! for better codegen (especially on macos) --- src/libstd/sys/redox/fast_thread_local.rs | 2 +- src/libstd/thread/local.rs | 285 ++++++++++++++-------- src/test/ui/issues/issue-43733.rs | 6 +- src/test/ui/issues/issue-43733.stderr | 8 +- 4 files changed, 195 insertions(+), 106 deletions(-) diff --git a/src/libstd/sys/redox/fast_thread_local.rs b/src/libstd/sys/redox/fast_thread_local.rs index 67b92d490b234..05464787a05d3 100644 --- a/src/libstd/sys/redox/fast_thread_local.rs +++ b/src/libstd/sys/redox/fast_thread_local.rs @@ -1,4 +1,4 @@ #![cfg(target_thread_local)] #![unstable(feature = "thread_local_internals", issue = "0")] -pub use crate::sys_common::thread_local::register_dtor_fallback as register_dtor; \ No newline at end of file +pub use crate::sys_common::thread_local::register_dtor_fallback as register_dtor; diff --git a/src/libstd/thread/local.rs b/src/libstd/thread/local.rs index 0d5e1f2af38a6..7ff81bd9a1732 100644 --- a/src/libstd/thread/local.rs +++ b/src/libstd/thread/local.rs @@ -2,10 +2,7 @@ #![unstable(feature = "thread_local_internals", issue = "0")] -use crate::cell::UnsafeCell; use crate::fmt; -use crate::hint; -use crate::mem; /// A thread local storage key which owns its contents. /// @@ -92,10 +89,7 @@ pub struct LocalKey { // trivially devirtualizable by LLVM because the value of `inner` never // changes and the constant should be readonly within a crate. This mainly // only runs into problems when TLS statics are exported across crates. - inner: unsafe fn() -> Option<&'static UnsafeCell>>, - - // initialization routine to invoke to create a value - init: fn() -> T, + inner: unsafe fn() -> Option<&'static T>, } #[stable(feature = "std_debug", since = "1.16.0")] @@ -159,10 +153,7 @@ macro_rules! __thread_local_inner { #[inline] fn __init() -> $t { $init } - unsafe fn __getit() -> $crate::option::Option< - &'static $crate::cell::UnsafeCell< - $crate::option::Option<$t>>> - { + unsafe fn __getit() -> $crate::option::Option<&'static $t> { #[cfg(all(target_arch = "wasm32", not(target_feature = "atomics")))] static __KEY: $crate::thread::__StaticLocalKeyInner<$t> = $crate::thread::__StaticLocalKeyInner::new(); @@ -182,11 +173,11 @@ macro_rules! __thread_local_inner { static __KEY: $crate::thread::__OsLocalKeyInner<$t> = $crate::thread::__OsLocalKeyInner::new(); - __KEY.get() + __KEY.get(__init) } unsafe { - $crate::thread::LocalKey::new(__getit, __init) + $crate::thread::LocalKey::new(__getit) } } }; @@ -221,11 +212,9 @@ impl LocalKey { #[unstable(feature = "thread_local_internals", reason = "recently added to create a key", issue = "0")] - pub const unsafe fn new(inner: unsafe fn() -> Option<&'static UnsafeCell>>, - init: fn() -> T) -> LocalKey { + pub const unsafe fn new(inner: unsafe fn() -> Option<&'static T>) -> LocalKey { LocalKey { inner, - init, } } @@ -246,37 +235,6 @@ impl LocalKey { after it is destroyed") } - unsafe fn init(&self, slot: &UnsafeCell>) -> &T { - // Execute the initialization up front, *then* move it into our slot, - // just in case initialization fails. - let value = (self.init)(); - let ptr = slot.get(); - - // note that this can in theory just be `*ptr = Some(value)`, but due to - // the compiler will currently codegen that pattern with something like: - // - // ptr::drop_in_place(ptr) - // ptr::write(ptr, Some(value)) - // - // Due to this pattern it's possible for the destructor of the value in - // `ptr` (e.g., if this is being recursively initialized) to re-access - // TLS, in which case there will be a `&` and `&mut` pointer to the same - // value (an aliasing violation). To avoid setting the "I'm running a - // destructor" flag we just use `mem::replace` which should sequence the - // operations a little differently and make this safe to call. - mem::replace(&mut *ptr, Some(value)); - - // After storing `Some` we want to get a reference to the contents of - // what we just stored. While we could use `unwrap` here and it should - // always work it empirically doesn't seem to always get optimized away, - // which means that using something like `try_with` can pull in - // panicking code and cause a large size bloat. - match *ptr { - Some(ref x) => x, - None => hint::unreachable_unchecked(), - } - } - /// Acquires a reference to the value in this TLS key. /// /// This will lazily initialize the value if this thread has not referenced @@ -293,13 +251,68 @@ impl LocalKey { F: FnOnce(&T) -> R, { unsafe { - let slot = (self.inner)().ok_or(AccessError { + let thread_local = (self.inner)().ok_or(AccessError { _private: (), })?; - Ok(f(match *slot.get() { - Some(ref inner) => inner, - None => self.init(slot), - })) + Ok(f(thread_local)) + } + } +} + +mod lazy { + use crate::cell::UnsafeCell; + use crate::mem; + use crate::hint; + + pub struct LazyKeyInner { + inner: UnsafeCell>, + } + + impl LazyKeyInner { + pub const fn new() -> LazyKeyInner { + LazyKeyInner { + inner: UnsafeCell::new(None), + } + } + + #[inline] + pub unsafe fn get(&self) -> Option<&'static T> { + (*self.inner.get()).as_ref() + } + + pub unsafe fn initialize T>(&self, init: F) -> &'static T { + // Execute the initialization up front, *then* move it into our slot, + // just in case initialization fails. + let value = init(); + let ptr = self.inner.get(); + + // note that this can in theory just be `*ptr = Some(value)`, but due to + // the compiler will currently codegen that pattern with something like: + // + // ptr::drop_in_place(ptr) + // ptr::write(ptr, Some(value)) + // + // Due to this pattern it's possible for the destructor of the value in + // `ptr` (e.g., if this is being recursively initialized) to re-access + // TLS, in which case there will be a `&` and `&mut` pointer to the same + // value (an aliasing violation). To avoid setting the "I'm running a + // destructor" flag we just use `mem::replace` which should sequence the + // operations a little differently and make this safe to call. + mem::replace(&mut *ptr, Some(value)); + + // After storing `Some` we want to get a reference to the contents of + // what we just stored. While we could use `unwrap` here and it should + // always work it empirically doesn't seem to always get optimized away, + // which means that using something like `try_with` can pull in + // panicking code and cause a large size bloat. + match *ptr { + Some(ref x) => x, + None => hint::unreachable_unchecked(), + } + } + + pub unsafe fn take(&mut self) -> Option { + (*self.inner.get()).take() } } } @@ -309,11 +322,12 @@ impl LocalKey { #[doc(hidden)] #[cfg(all(target_arch = "wasm32", not(target_feature = "atomics")))] pub mod statik { + use super::lazy::LazyKeyInner; use crate::cell::UnsafeCell; use crate::fmt; pub struct Key { - inner: UnsafeCell>, + inner: LazyKeyInner, } unsafe impl Sync for Key { } @@ -327,12 +341,17 @@ pub mod statik { impl Key { pub const fn new() -> Key { Key { - inner: UnsafeCell::new(None), + inner: LazyKeyInner::new(), } } - pub unsafe fn get(&self) -> Option<&'static UnsafeCell>> { - Some(&*(&self.inner as *const _)) + #[inline] + pub unsafe fn get(&self, init: fn() -> T) -> Option<&'static T> { + let value = match self.inner.get() { + Some(ref value) => value, + None => self.inner.initialize(init), + }; + Some(value) } } } @@ -340,19 +359,33 @@ pub mod statik { #[doc(hidden)] #[cfg(target_thread_local)] pub mod fast { - use crate::cell::{Cell, UnsafeCell}; + use super::lazy::LazyKeyInner; + use crate::cell::Cell; use crate::fmt; use crate::mem; - use crate::ptr; use crate::sys::fast_thread_local::register_dtor; + #[derive(Copy, Clone)] + enum DtorState { + Unregistered, + Registered, + RunningOrHasRun, + } + pub struct Key { - inner: UnsafeCell>, + // If `LazyKeyInner::get` returns `None`, that indicates either: + // * The value has never been initialized + // * The value is being recursively initialized + // * The value has already been destroyed or is being destroyed + // To determine which kind of `None`, check `dtor_state`. + // + // This is very optimizer friendly for the fast path - initialized but + // not yet dropped. + inner: LazyKeyInner, // Metadata to keep track of the state of the destructor. Remember that - // these variables are thread-local, not global. - dtor_registered: Cell, - dtor_running: Cell, + // this variable is thread-local, not global. + dtor_state: Cell, } impl fmt::Debug for Key { @@ -364,45 +397,84 @@ pub mod fast { impl Key { pub const fn new() -> Key { Key { - inner: UnsafeCell::new(None), - dtor_registered: Cell::new(false), - dtor_running: Cell::new(false) + inner: LazyKeyInner::new(), + dtor_state: Cell::new(DtorState::Unregistered), } } - pub unsafe fn get(&self) -> Option<&'static UnsafeCell>> { - if mem::needs_drop::() && self.dtor_running.get() { - return None + #[inline] + pub unsafe fn get T>(&self, init: F) -> Option<&'static T> { + match self.inner.get() { + Some(val) => Some(val), + None => { + if mem::needs_drop::() { + self.try_initialize_drop(init) + } else { + Some(self.try_initialize_nodrop(init)) + } + } } - self.register_dtor(); - Some(&*(&self.inner as *const _)) } - unsafe fn register_dtor(&self) { - if !mem::needs_drop::() || self.dtor_registered.get() { - return + // `try_initialize_nodrop` is only called once per fast thread local + // variable, except in corner cases where it is being recursively + // initialized. + // + // Macos: Inlining this function causes two `tlv_get_addr` calls to be + // performed for every call to `Key::get`. + // LLVM issue: https://bugs.llvm.org/show_bug.cgi?id=41722 + #[inline(never)] + #[cold] + unsafe fn try_initialize_nodrop T>(&self, init: F) -> &'static T { + self.inner.initialize(init) + } + + // `try_initialize_drop` is only called once per fast thread local + // variable, except in corner cases where thread_local dtors reference + // other thread_local's, or it is being recursively initialized. + #[inline(never)] + #[cold] + unsafe fn try_initialize_drop T>(&self, init: F) -> Option<&'static T> { + // We don't put a `needs_drop` check around this and call it a day + // because this function is not inlined. Unwrapping code gets + // generated for callers of `LocalKey::with` even if we always + // return `Some` here. + match self.dtor_state.get() { + DtorState::Unregistered => { + // dtor registration happens before initialization. + register_dtor(self as *const _ as *mut u8, + destroy_value::); + self.dtor_state.set(DtorState::Registered); + } + DtorState::Registered => { + // recursively initialized + } + DtorState::RunningOrHasRun => { + return None + } } - register_dtor(self as *const _ as *mut u8, - destroy_value::); - self.dtor_registered.set(true); + Some(self.inner.initialize(init)) } } unsafe extern fn destroy_value(ptr: *mut u8) { let ptr = ptr as *mut Key; - // Right before we run the user destructor be sure to flag the - // destructor as running for this thread so calls to `get` will return - // `None`. - (*ptr).dtor_running.set(true); - - ptr::drop_in_place((*ptr).inner.get()); + + // Right before we run the user destructor be sure to set the + // `Option` to `None`, and `dtor_state` to `RunningOrHasRun`. This + // causes future calls to `get` to run `try_initialize_drop` again, + // which will now fail, and return `None`. + let value = (*ptr).inner.take(); + (*ptr).dtor_state.set(DtorState::RunningOrHasRun); + drop(value); } } #[doc(hidden)] pub mod os { - use crate::cell::{Cell, UnsafeCell}; + use super::lazy::LazyKeyInner; + use crate::cell::Cell; use crate::fmt; use crate::marker; use crate::ptr; @@ -423,8 +495,8 @@ pub mod os { unsafe impl Sync for Key { } struct Value { + inner: LazyKeyInner, key: &'static Key, - value: UnsafeCell>, } impl Key { @@ -435,24 +507,43 @@ pub mod os { } } - pub unsafe fn get(&'static self) -> Option<&'static UnsafeCell>> { + pub unsafe fn get(&'static self, init: fn() -> T) -> Option<&'static T> { let ptr = self.os.get() as *mut Value; - if !ptr.is_null() { - if ptr as usize == 1 { - return None + if ptr as usize > 1 { + match (*ptr).inner.get() { + Some(ref value) => return Some(value), + None => {}, } - return Some(&(*ptr).value); } + self.try_initialize(init) + } - // If the lookup returned null, we haven't initialized our own - // local copy, so do that now. - let ptr: Box> = box Value { - key: self, - value: UnsafeCell::new(None), + // `try_initialize` is only called once per os thread local variable, + // except in corner cases where thread_local dtors reference other + // thread_local's, or it is being recursively initialized. + unsafe fn try_initialize(&'static self, init: fn() -> T) -> Option<&'static T> { + let ptr = self.os.get() as *mut Value; + if ptr as usize == 1 { + // destructor is running + return None + } + + let ptr = if ptr.is_null() { + // If the lookup returned null, we haven't initialized our own + // local copy, so do that now. + let ptr: Box> = box Value { + inner: LazyKeyInner::new(), + key: self, + }; + let ptr = Box::into_raw(ptr); + self.os.set(ptr as *mut u8); + ptr + } else { + // recursive initialization + ptr }; - let ptr = Box::into_raw(ptr); - self.os.set(ptr as *mut u8); - Some(&(*ptr).value) + + Some((*ptr).inner.initialize(init)) } } diff --git a/src/test/ui/issues/issue-43733.rs b/src/test/ui/issues/issue-43733.rs index 91192e3360c2c..ba599ff7ce4db 100644 --- a/src/test/ui/issues/issue-43733.rs +++ b/src/test/ui/issues/issue-43733.rs @@ -13,11 +13,9 @@ static __KEY: std::thread::__FastLocalKeyInner = static __KEY: std::thread::__OsLocalKeyInner = std::thread::__OsLocalKeyInner::new(); -fn __getit() -> std::option::Option< - &'static std::cell::UnsafeCell< - std::option::Option>> +fn __getit(init: fn() -> Foo) -> std::option::Option<&'static Foo> { - __KEY.get() //~ ERROR call to unsafe function is unsafe + __KEY.get(init) //~ ERROR call to unsafe function is unsafe } static FOO: std::thread::LocalKey = diff --git a/src/test/ui/issues/issue-43733.stderr b/src/test/ui/issues/issue-43733.stderr index c48f9baf27194..0ea931d7fb6e3 100644 --- a/src/test/ui/issues/issue-43733.stderr +++ b/src/test/ui/issues/issue-43733.stderr @@ -1,13 +1,13 @@ error[E0133]: call to unsafe function is unsafe and requires unsafe function or block - --> $DIR/issue-43733.rs:20:5 + --> $DIR/issue-43733.rs:18:5 | -LL | __KEY.get() - | ^^^^^^^^^^^ call to unsafe function +LL | __KEY.get(init) + | ^^^^^^^^^^^^^^^ call to unsafe function | = note: consult the function's documentation for information on how to avoid undefined behavior error[E0133]: call to unsafe function is unsafe and requires unsafe function or block - --> $DIR/issue-43733.rs:24:5 + --> $DIR/issue-43733.rs:22:5 | LL | std::thread::LocalKey::new(__getit, Default::default); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ call to unsafe function From b266ba7850d3996b0f19f66d7a28478acfdcdbaf Mon Sep 17 00:00:00 2001 From: tyler Date: Thu, 2 May 2019 23:27:03 -0700 Subject: [PATCH 06/15] update test to match new doc(hidden) thread_local api --- src/test/ui/issues/issue-43733.rs | 6 +++--- src/test/ui/issues/issue-43733.stderr | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/test/ui/issues/issue-43733.rs b/src/test/ui/issues/issue-43733.rs index ba599ff7ce4db..a602d7667c48d 100644 --- a/src/test/ui/issues/issue-43733.rs +++ b/src/test/ui/issues/issue-43733.rs @@ -13,13 +13,13 @@ static __KEY: std::thread::__FastLocalKeyInner = static __KEY: std::thread::__OsLocalKeyInner = std::thread::__OsLocalKeyInner::new(); -fn __getit(init: fn() -> Foo) -> std::option::Option<&'static Foo> +fn __getit() -> std::option::Option<&'static Foo> { - __KEY.get(init) //~ ERROR call to unsafe function is unsafe + __KEY.get(Default::default) //~ ERROR call to unsafe function is unsafe } static FOO: std::thread::LocalKey = - std::thread::LocalKey::new(__getit, Default::default); + std::thread::LocalKey::new(__getit); //~^ ERROR call to unsafe function is unsafe fn main() { diff --git a/src/test/ui/issues/issue-43733.stderr b/src/test/ui/issues/issue-43733.stderr index 0ea931d7fb6e3..ee6a3b065d6ee 100644 --- a/src/test/ui/issues/issue-43733.stderr +++ b/src/test/ui/issues/issue-43733.stderr @@ -1,16 +1,16 @@ error[E0133]: call to unsafe function is unsafe and requires unsafe function or block --> $DIR/issue-43733.rs:18:5 | -LL | __KEY.get(init) - | ^^^^^^^^^^^^^^^ call to unsafe function +LL | __KEY.get(Default::default) + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ call to unsafe function | = note: consult the function's documentation for information on how to avoid undefined behavior error[E0133]: call to unsafe function is unsafe and requires unsafe function or block --> $DIR/issue-43733.rs:22:5 | -LL | std::thread::LocalKey::new(__getit, Default::default); - | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ call to unsafe function +LL | std::thread::LocalKey::new(__getit); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ call to unsafe function | = note: consult the function's documentation for information on how to avoid undefined behavior From 516c03d5102d0ffb5cc1b86451d837c26d2a0f6a Mon Sep 17 00:00:00 2001 From: tyler Date: Fri, 3 May 2019 09:46:16 -0700 Subject: [PATCH 07/15] fix another test --- src/test/compile-fail/issue-43733-2.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/compile-fail/issue-43733-2.rs b/src/test/compile-fail/issue-43733-2.rs index 2943089674d5e..8e3c93ac251e8 100644 --- a/src/test/compile-fail/issue-43733-2.rs +++ b/src/test/compile-fail/issue-43733-2.rs @@ -23,6 +23,6 @@ use std::thread::__FastLocalKeyInner as Key; static __KEY: Key<()> = Key::new(); //~^ ERROR `std::cell::UnsafeCell>` cannot be shared between threads -//~| ERROR `std::cell::Cell` cannot be shared between threads safely [E0277] +//~| ERROR `std::cell::Cell` cannot be shared between threads safely [E0277] fn main() {} From f5e56eeff6e0ed186410a358db565ab959648aba Mon Sep 17 00:00:00 2001 From: tyler Date: Fri, 3 May 2019 16:26:54 -0700 Subject: [PATCH 08/15] clang tidy fixes --- src/test/compile-fail/issue-43733-2.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/test/compile-fail/issue-43733-2.rs b/src/test/compile-fail/issue-43733-2.rs index 8e3c93ac251e8..c14592187ab76 100644 --- a/src/test/compile-fail/issue-43733-2.rs +++ b/src/test/compile-fail/issue-43733-2.rs @@ -23,6 +23,7 @@ use std::thread::__FastLocalKeyInner as Key; static __KEY: Key<()> = Key::new(); //~^ ERROR `std::cell::UnsafeCell>` cannot be shared between threads -//~| ERROR `std::cell::Cell` cannot be shared between threads safely [E0277] +//~| ERROR `std::cell::Cell` cannot be shared between threads +// safely [E0277] fn main() {} From 060d8bb6b014f9e9c8b697c5ecd6d86159f122b9 Mon Sep 17 00:00:00 2001 From: tyler Date: Fri, 3 May 2019 17:01:53 -0700 Subject: [PATCH 09/15] add #[allow(unused)] --- src/libstd/thread/local.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/libstd/thread/local.rs b/src/libstd/thread/local.rs index 7ff81bd9a1732..a56a34deba22e 100644 --- a/src/libstd/thread/local.rs +++ b/src/libstd/thread/local.rs @@ -311,6 +311,7 @@ mod lazy { } } + #[allow(unused)] pub unsafe fn take(&mut self) -> Option { (*self.inner.get()).take() } From 1a7f774914d8d3c2a16e40332ad43270c461ec71 Mon Sep 17 00:00:00 2001 From: tyler Date: Fri, 10 May 2019 17:29:43 -0700 Subject: [PATCH 10/15] - remove unnecessary inlines - add comment explaining that the fast::Key data structure was carefully constructed for fast access on OSX - remove inline(never) from the initializer for types where `needs_drop::()` is false --- src/libstd/thread/local.rs | 39 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/src/libstd/thread/local.rs b/src/libstd/thread/local.rs index a56a34deba22e..e6f096a8da539 100644 --- a/src/libstd/thread/local.rs +++ b/src/libstd/thread/local.rs @@ -275,7 +275,6 @@ mod lazy { } } - #[inline] pub unsafe fn get(&self) -> Option<&'static T> { (*self.inner.get()).as_ref() } @@ -346,7 +345,6 @@ pub mod statik { } } - #[inline] pub unsafe fn get(&self, init: fn() -> T) -> Option<&'static T> { let value = match self.inner.get() { Some(ref value) => value, @@ -373,6 +371,11 @@ pub mod fast { RunningOrHasRun, } + // This data structure has been carefully constructed so that the fast path + // only contains one branch on x86. That optimization is necessary to avoid + // duplicated tls lookups on OSX. + // + // LLVM issue: https://bugs.llvm.org/show_bug.cgi?id=41722 pub struct Key { // If `LazyKeyInner::get` returns `None`, that indicates either: // * The value has never been initialized @@ -403,38 +406,32 @@ pub mod fast { } } - #[inline] pub unsafe fn get T>(&self, init: F) -> Option<&'static T> { match self.inner.get() { Some(val) => Some(val), - None => { - if mem::needs_drop::() { - self.try_initialize_drop(init) - } else { - Some(self.try_initialize_nodrop(init)) - } - } + None => self.try_initialize(init), } } - // `try_initialize_nodrop` is only called once per fast thread local - // variable, except in corner cases where it is being recursively - // initialized. - // - // Macos: Inlining this function causes two `tlv_get_addr` calls to be - // performed for every call to `Key::get`. - // LLVM issue: https://bugs.llvm.org/show_bug.cgi?id=41722 - #[inline(never)] + // `try_initialize` is only called once per fast thread local variable, + // except in corner cases where it is being recursively initialized. #[cold] - unsafe fn try_initialize_nodrop T>(&self, init: F) -> &'static T { - self.inner.initialize(init) + unsafe fn try_initialize T>(&self, init: F) -> Option<&'static T> { + if mem::needs_drop::() { + self.try_initialize_drop(init) + } else { + Some(self.inner.initialize(init)) + } } // `try_initialize_drop` is only called once per fast thread local // variable, except in corner cases where thread_local dtors reference // other thread_local's, or it is being recursively initialized. + // + // Macos: Inlining this function causes two `tlv_get_addr` calls to be + // performed for every call to `Key::get`. + // LLVM issue: https://bugs.llvm.org/show_bug.cgi?id=41722 #[inline(never)] - #[cold] unsafe fn try_initialize_drop T>(&self, init: F) -> Option<&'static T> { // We don't put a `needs_drop` check around this and call it a day // because this function is not inlined. Unwrapping code gets From c3241d0ba03ff1e90ffdb4cd434660f81194b438 Mon Sep 17 00:00:00 2001 From: tyler Date: Sat, 11 May 2019 10:14:40 -0700 Subject: [PATCH 11/15] cold was necessary on try_initialize_nodrop to get more straight line asm --- src/libstd/thread/local.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/libstd/thread/local.rs b/src/libstd/thread/local.rs index e6f096a8da539..998d9dcc68333 100644 --- a/src/libstd/thread/local.rs +++ b/src/libstd/thread/local.rs @@ -432,6 +432,7 @@ pub mod fast { // performed for every call to `Key::get`. // LLVM issue: https://bugs.llvm.org/show_bug.cgi?id=41722 #[inline(never)] + #[cold] unsafe fn try_initialize_drop T>(&self, init: F) -> Option<&'static T> { // We don't put a `needs_drop` check around this and call it a day // because this function is not inlined. Unwrapping code gets From 2b3642b95b04b4078405f4bc20ba537ce5512c00 Mon Sep 17 00:00:00 2001 From: tyler Date: Sat, 11 May 2019 10:42:44 -0700 Subject: [PATCH 12/15] remove trailing whitespace --- src/libstd/thread/local.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libstd/thread/local.rs b/src/libstd/thread/local.rs index 998d9dcc68333..1a12457646a1b 100644 --- a/src/libstd/thread/local.rs +++ b/src/libstd/thread/local.rs @@ -374,7 +374,7 @@ pub mod fast { // This data structure has been carefully constructed so that the fast path // only contains one branch on x86. That optimization is necessary to avoid // duplicated tls lookups on OSX. - // + // // LLVM issue: https://bugs.llvm.org/show_bug.cgi?id=41722 pub struct Key { // If `LazyKeyInner::get` returns `None`, that indicates either: From 9289d03c9d263ac32a9dd0a5c581779fe1def7d3 Mon Sep 17 00:00:00 2001 From: tyler Date: Wed, 15 May 2019 07:18:24 -0700 Subject: [PATCH 13/15] llvm makes good inlining choices with only the #[cold] attribute --- src/libstd/thread/local.rs | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/src/libstd/thread/local.rs b/src/libstd/thread/local.rs index 1a12457646a1b..733c772a1f5f7 100644 --- a/src/libstd/thread/local.rs +++ b/src/libstd/thread/local.rs @@ -414,46 +414,42 @@ pub mod fast { } // `try_initialize` is only called once per fast thread local variable, - // except in corner cases where it is being recursively initialized. + // except in corner cases where thread_local dtors reference other + // thread_local's, or it is being recursively initialized. + // + // Macos: Inlining this function can cause two `tlv_get_addr` calls to + // be performed for every call to `Key::get`. The #[cold] hint makes + // that less likely. + // LLVM issue: https://bugs.llvm.org/show_bug.cgi?id=41722 #[cold] unsafe fn try_initialize T>(&self, init: F) -> Option<&'static T> { - if mem::needs_drop::() { - self.try_initialize_drop(init) - } else { + if !mem::needs_drop::() || self.try_register_dtor() { Some(self.inner.initialize(init)) + } else { + None } } - // `try_initialize_drop` is only called once per fast thread local + // `try_register_dtor` is only called once per fast thread local // variable, except in corner cases where thread_local dtors reference // other thread_local's, or it is being recursively initialized. - // - // Macos: Inlining this function causes two `tlv_get_addr` calls to be - // performed for every call to `Key::get`. - // LLVM issue: https://bugs.llvm.org/show_bug.cgi?id=41722 - #[inline(never)] - #[cold] - unsafe fn try_initialize_drop T>(&self, init: F) -> Option<&'static T> { - // We don't put a `needs_drop` check around this and call it a day - // because this function is not inlined. Unwrapping code gets - // generated for callers of `LocalKey::with` even if we always - // return `Some` here. + unsafe fn try_register_dtor(&self) -> bool { match self.dtor_state.get() { DtorState::Unregistered => { // dtor registration happens before initialization. register_dtor(self as *const _ as *mut u8, destroy_value::); self.dtor_state.set(DtorState::Registered); + true } DtorState::Registered => { // recursively initialized + true } DtorState::RunningOrHasRun => { - return None + false } } - - Some(self.inner.initialize(init)) } } From 2c3796172ba2d41f15294e116cd8d993f026b7c2 Mon Sep 17 00:00:00 2001 From: tyler Date: Wed, 15 May 2019 14:35:24 -0700 Subject: [PATCH 14/15] fix wasm unused import in thread local implementation --- src/libstd/thread/local.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/libstd/thread/local.rs b/src/libstd/thread/local.rs index 733c772a1f5f7..9b355aa2023ff 100644 --- a/src/libstd/thread/local.rs +++ b/src/libstd/thread/local.rs @@ -323,7 +323,6 @@ mod lazy { #[cfg(all(target_arch = "wasm32", not(target_feature = "atomics")))] pub mod statik { use super::lazy::LazyKeyInner; - use crate::cell::UnsafeCell; use crate::fmt; pub struct Key { From b148c25cacd05a0268537ead29881357317a4073 Mon Sep 17 00:00:00 2001 From: tyler Date: Wed, 19 Jun 2019 11:50:23 -0700 Subject: [PATCH 15/15] fix compile-fail test for targets without thread locals --- src/test/compile-fail/issue-43733-2.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/test/compile-fail/issue-43733-2.rs b/src/test/compile-fail/issue-43733-2.rs index c14592187ab76..dae27c4785f15 100644 --- a/src/test/compile-fail/issue-43733-2.rs +++ b/src/test/compile-fail/issue-43733-2.rs @@ -5,7 +5,7 @@ #[cfg(not(target_thread_local))] struct Key { _data: std::cell::UnsafeCell>, - _flag: std::cell::Cell, + _flag: std::cell::Cell<()>, } #[cfg(not(target_thread_local))] @@ -13,7 +13,7 @@ impl Key { const fn new() -> Self { Key { _data: std::cell::UnsafeCell::new(None), - _flag: std::cell::Cell::new(false), + _flag: std::cell::Cell::new(()), } } } @@ -23,7 +23,6 @@ use std::thread::__FastLocalKeyInner as Key; static __KEY: Key<()> = Key::new(); //~^ ERROR `std::cell::UnsafeCell>` cannot be shared between threads -//~| ERROR `std::cell::Cell` cannot be shared between threads -// safely [E0277] +//~| ERROR cannot be shared between threads safely [E0277] fn main() {}