Skip to content

Commit 838c549

Browse files
committed
Document tagged pointers better
1 parent 6f9b15c commit 838c549

File tree

3 files changed

+130
-31
lines changed

3 files changed

+130
-31
lines changed

compiler/rustc_data_structures/src/tagged_ptr.rs

Lines changed: 59 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,17 @@
33
//! In order to utilize the pointer packing, you must have two types: a pointer,
44
//! and a tag.
55
//!
6-
//! The pointer must implement the `Pointer` trait, with the primary requirement
7-
//! being conversion to and from a usize. Note that the pointer must be
8-
//! dereferenceable, so raw pointers generally cannot implement the `Pointer`
9-
//! trait. This implies that the pointer must also be nonzero.
6+
//! The pointer must implement the [`Pointer`] trait, with the primary
7+
//! requirement being convertible to and from a raw pointer. Note that the
8+
//! pointer must be dereferenceable, so raw pointers generally cannot implement
9+
//! the [`Pointer`] trait. This implies that the pointer must also be non-null.
1010
//!
11-
//! Many common pointer types already implement the `Pointer` trait.
11+
//! Many common pointer types already implement the [`Pointer`] trait.
1212
//!
13-
//! The tag must implement the `Tag` trait. We assert that the tag and `Pointer`
14-
//! are compatible at compile time.
13+
//! The tag must implement the [`Tag`] trait.
14+
//!
15+
//! We assert that the tag and the [`Pointer`] types are compatible at compile
16+
//! time.
1517
1618
use std::ops::Deref;
1719
use std::ptr::NonNull;
@@ -71,32 +73,66 @@ pub unsafe trait Pointer: Deref {
7173
/// [`Self::Target`]: Deref::Target
7274
const BITS: usize;
7375

76+
/// Turns this pointer into a raw, non-null pointer.
77+
///
78+
/// The inverse of this function is [`from_ptr`].
79+
///
80+
/// This function guarantees that the least-significant [`Self::BITS`] bits
81+
/// are zero.
82+
///
83+
/// [`from_ptr`]: Pointer::from_ptr
84+
/// [`Self::BITS`]: Pointer::BITS
7485
fn into_ptr(self) -> NonNull<Self::Target>;
7586

87+
/// Re-creates the original pointer, from a raw pointer returned by [`into_ptr`].
88+
///
7689
/// # Safety
7790
///
78-
/// The passed `ptr` must be returned from `into_usize`.
91+
/// The passed `ptr` must be returned from [`into_ptr`].
92+
///
93+
/// This acts as [`ptr::read::<Self>()`] semantically, it should not be called more than
94+
/// once on non-[`Copy`] `Pointer`s.
7995
///
80-
/// This acts as `ptr::read` semantically, it should not be called more than
81-
/// once on non-`Copy` `Pointer`s.
96+
/// [`into_ptr`]: Pointer::into_ptr
97+
/// [`ptr::read::<Self>()`]: std::ptr::read
8298
unsafe fn from_ptr(ptr: NonNull<Self::Target>) -> Self;
8399
}
84100

85-
/// This describes tags that the `TaggedPtr` struct can hold.
101+
/// This describes tags that the [`TaggedPtr`] struct can hold.
86102
///
87103
/// # Safety
88104
///
89-
/// The BITS constant must be correct.
105+
/// The [`BITS`] constant must be correct.
106+
///
107+
/// No more than [`BITS`] least significant bits may be set in the returned usize.
90108
///
91-
/// No more than `BITS` least significant bits may be set in the returned usize.
109+
/// [`BITS`]: Tag::BITS
92110
pub unsafe trait Tag: Copy {
111+
/// Number of least-significant bits in the return value of [`into_usize`]
112+
/// which may be non-zero. In other words this is the bit width of the
113+
/// value.
114+
///
115+
/// [`into_usize`]: Tag::into_usize
93116
const BITS: usize;
94117

118+
/// Turns this tag into an integer.
119+
///
120+
/// The inverse of this function is [`from_usize`].
121+
///
122+
/// This function guarantees that only the least-significant [`Self::BITS`]
123+
/// bits can be non-zero.
124+
///
125+
/// [`from_usize`]: Pointer::from_usize
126+
/// [`Self::BITS`]: Tag::BITS
95127
fn into_usize(self) -> usize;
96128

129+
/// Re-creates the tag from the integer returned by [`into_usize`].
130+
///
97131
/// # Safety
98132
///
99-
/// The passed `tag` must be returned from `into_usize`.
133+
/// The passed `tag` must be returned from [`into_usize`].
134+
///
135+
/// [`into_usize`]: Tag::into_usize
100136
unsafe fn from_usize(tag: usize) -> Self;
101137
}
102138

@@ -111,6 +147,7 @@ unsafe impl<T: ?Sized + Aligned> Pointer for Box<T> {
111147

112148
#[inline]
113149
unsafe fn from_ptr(ptr: NonNull<T>) -> Self {
150+
// Safety: `ptr` comes from `into_ptr` which calls `Box::into_raw`
114151
Box::from_raw(ptr.as_ptr())
115152
}
116153
}
@@ -120,11 +157,13 @@ unsafe impl<T: ?Sized + Aligned> Pointer for Rc<T> {
120157

121158
#[inline]
122159
fn into_ptr(self) -> NonNull<T> {
160+
// Safety: pointers from `Rc::into_raw` are valid & non-null
123161
unsafe { NonNull::new_unchecked(Rc::into_raw(self).cast_mut()) }
124162
}
125163

126164
#[inline]
127165
unsafe fn from_ptr(ptr: NonNull<T>) -> Self {
166+
// Safety: `ptr` comes from `into_ptr` which calls `Rc::into_raw`
128167
Rc::from_raw(ptr.as_ptr())
129168
}
130169
}
@@ -134,11 +173,13 @@ unsafe impl<T: ?Sized + Aligned> Pointer for Arc<T> {
134173

135174
#[inline]
136175
fn into_ptr(self) -> NonNull<T> {
176+
// Safety: pointers from `Arc::into_raw` are valid & non-null
137177
unsafe { NonNull::new_unchecked(Arc::into_raw(self).cast_mut()) }
138178
}
139179

140180
#[inline]
141181
unsafe fn from_ptr(ptr: NonNull<T>) -> Self {
182+
// Safety: `ptr` comes from `into_ptr` which calls `Arc::into_raw`
142183
Arc::from_raw(ptr.as_ptr())
143184
}
144185
}
@@ -153,6 +194,8 @@ unsafe impl<'a, T: 'a + ?Sized + Aligned> Pointer for &'a T {
153194

154195
#[inline]
155196
unsafe fn from_ptr(ptr: NonNull<T>) -> Self {
197+
// Safety:
198+
// `ptr` comes from `into_ptr` which gets the pointer from a reference
156199
ptr.as_ref()
157200
}
158201
}
@@ -167,6 +210,8 @@ unsafe impl<'a, T: 'a + ?Sized + Aligned> Pointer for &'a mut T {
167210

168211
#[inline]
169212
unsafe fn from_ptr(mut ptr: NonNull<T>) -> Self {
213+
// Safety:
214+
// `ptr` comes from `into_ptr` which gets the pointer from a reference
170215
ptr.as_mut()
171216
}
172217
}

compiler/rustc_data_structures/src/tagged_ptr/copy.rs

Lines changed: 60 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,35 +8,75 @@ use std::num::NonZeroUsize;
88
use std::ops::{Deref, DerefMut};
99
use std::ptr::NonNull;
1010

11-
/// A `Copy` TaggedPtr.
11+
/// A [`Copy`] tagged pointer.
1212
///
13-
/// You should use this instead of the `TaggedPtr` type in all cases where
14-
/// `P: Copy`.
13+
/// This is essentially `{ pointer: P, tag: T }` packed in a single pointer.
14+
///
15+
/// You should use this instead of the [`TaggedPtr`] type in all cases where
16+
/// `P` implements [`Copy`].
1517
///
1618
/// If `COMPARE_PACKED` is true, then the pointers will be compared and hashed without
17-
/// unpacking. Otherwise we don't implement PartialEq/Eq/Hash; if you want that,
18-
/// wrap the TaggedPtr.
19+
/// unpacking. Otherwise we don't implement [`PartialEq`], [`Eq`] and [`Hash`];
20+
/// if you want that, wrap the [`CopyTaggedPtr`].
21+
///
22+
/// [`TaggedPtr`]: crate::tagged_ptr::TaggedPtr
1923
pub struct CopyTaggedPtr<P, T, const COMPARE_PACKED: bool>
2024
where
2125
P: Pointer,
2226
T: Tag,
2327
{
28+
/// This is semantically a pair of `pointer: P` and `tag: T` fields,
29+
/// however we pack them in a single pointer, to save space.
30+
///
31+
/// We pack the tag into the **most**-significant bits of the pointer to
32+
/// ease retrieval of the value. A left shift is a multiplication and
33+
/// those are embeddable in instruction encoding, for example:
34+
///
35+
/// ```asm
36+
/// // (https://godbolt.org/z/jqcYPWEr3)
37+
/// example::shift_read3:
38+
/// mov eax, dword ptr [8*rdi]
39+
/// ret
40+
///
41+
/// example::mask_read3:
42+
/// and rdi, -8
43+
/// mov eax, dword ptr [rdi]
44+
/// ret
45+
/// ```
46+
///
47+
/// This is ASM outputted by rustc for reads of values behind tagged
48+
/// pointers for different approaches of tagging:
49+
/// - `shift_read3` uses `<< 3` (the tag is in the most-significant bits)
50+
/// - `mask_read3` uses `& !0b111` (the tag is in the least-significant bits)
51+
///
52+
/// The shift approach thus produces less instructions and is likely faster.
53+
///
54+
/// Encoding diagram:
55+
/// ```text
56+
/// [ packed.addr ]
57+
/// [ tag ] [ pointer.addr >> T::BITS ] <-- usize::BITS - T::BITS bits
58+
/// ^
59+
/// |
60+
/// T::BITS bits
61+
/// ```
62+
///
63+
/// The tag can be retrieved by `packed.addr() >> T::BITS` and the pointer
64+
/// can be retrieved by `packed.map_addr(|addr| addr << T::BITS)`.
2465
packed: NonNull<P::Target>,
2566
tag_ghost: PhantomData<T>,
2667
}
2768

28-
// We pack the tag into the *upper* bits of the pointer to ease retrieval of the
29-
// value; a left shift is a multiplication and those are embeddable in
30-
// instruction encoding.
3169
impl<P, T, const CP: bool> CopyTaggedPtr<P, T, CP>
3270
where
3371
P: Pointer,
3472
T: Tag,
3573
{
74+
/// Tags `pointer` with `tag`.
3675
pub fn new(pointer: P, tag: T) -> Self {
3776
Self { packed: Self::pack(P::into_ptr(pointer), tag), tag_ghost: PhantomData }
3877
}
3978

79+
/// Retrieves the pointer.
4080
pub fn pointer(self) -> P
4181
where
4282
P: Copy,
@@ -48,11 +88,18 @@ where
4888
unsafe { P::from_ptr(self.pointer_raw()) }
4989
}
5090

91+
/// Retrieves the tag.
5192
#[inline]
5293
pub fn tag(&self) -> T {
53-
unsafe { T::from_usize(self.packed.addr().get() >> Self::TAG_BIT_SHIFT) }
94+
// Unpack the tag, according to the `self.packed` encoding scheme
95+
let tag = self.packed.addr().get() >> Self::TAG_BIT_SHIFT;
96+
97+
// Safety:
98+
//
99+
unsafe { T::from_usize(tag) }
54100
}
55101

102+
/// Sets the tag to a new value.
56103
#[inline]
57104
pub fn set_tag(&mut self, tag: T) {
58105
self.packed = Self::pack(self.pointer_raw(), tag);
@@ -61,7 +108,8 @@ where
61108
const TAG_BIT_SHIFT: usize = usize::BITS as usize - T::BITS;
62109
const ASSERTION: () = { assert!(T::BITS <= P::BITS) };
63110

64-
/// Pack pointer `ptr` that comes from [`P::into_ptr`] with a `tag`.
111+
/// Pack pointer `ptr` that comes from [`P::into_ptr`] with a `tag`,
112+
/// according to `self.packed` encoding scheme.
65113
///
66114
/// [`P::into_ptr`]: Pointer::into_ptr
67115
fn pack(ptr: NonNull<P::Target>, tag: T) -> NonNull<P::Target> {
@@ -71,7 +119,7 @@ where
71119
let packed_tag = tag.into_usize() << Self::TAG_BIT_SHIFT;
72120

73121
ptr.map_addr(|addr| {
74-
// SAFETY:
122+
// Safety:
75123
// - The pointer is `NonNull` => it's address is `NonZeroUsize`
76124
// - `P::BITS` least significant bits are always zero (`Pointer` contract)
77125
// - `T::BITS <= P::BITS` (from `Self::ASSERTION`)
@@ -85,6 +133,7 @@ where
85133
})
86134
}
87135

136+
/// Retrieves the original raw pointer from `self.packed`.
88137
pub(super) fn pointer_raw(&self) -> NonNull<P::Target> {
89138
self.packed.map_addr(|addr| unsafe { NonZeroUsize::new_unchecked(addr.get() << T::BITS) })
90139
}

compiler/rustc_data_structures/src/tagged_ptr/drop.rs

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,16 @@ use super::CopyTaggedPtr;
66
use super::{Pointer, Tag};
77
use crate::stable_hasher::{HashStable, StableHasher};
88

9-
/// A TaggedPtr implementing `Drop`.
9+
/// A tagged pointer that supports pointers that implement [`Drop`].
10+
///
11+
/// This is essentially `{ pointer: P, tag: T }` packed in a single pointer.
12+
///
13+
/// You should use [`CopyTaggedPtr`] instead of the this type in all cases
14+
/// where `P` implements [`Copy`].
1015
///
1116
/// If `COMPARE_PACKED` is true, then the pointers will be compared and hashed without
12-
/// unpacking. Otherwise we don't implement PartialEq/Eq/Hash; if you want that,
13-
/// wrap the TaggedPtr.
17+
/// unpacking. Otherwise we don't implement [`PartialEq`], [`Eq`] and [`Hash`];
18+
/// if you want that, wrap the [`TaggedPtr`].
1419
pub struct TaggedPtr<P, T, const COMPARE_PACKED: bool>
1520
where
1621
P: Pointer,
@@ -19,22 +24,22 @@ where
1924
raw: CopyTaggedPtr<P, T, COMPARE_PACKED>,
2025
}
2126

22-
// We pack the tag into the *upper* bits of the pointer to ease retrieval of the
23-
// value; a right shift is a multiplication and those are embeddable in
24-
// instruction encoding.
2527
impl<P, T, const CP: bool> TaggedPtr<P, T, CP>
2628
where
2729
P: Pointer,
2830
T: Tag,
2931
{
32+
/// Tags `pointer` with `tag`.
3033
pub fn new(pointer: P, tag: T) -> Self {
3134
TaggedPtr { raw: CopyTaggedPtr::new(pointer, tag) }
3235
}
3336

37+
/// Retrieves the tag.
3438
pub fn tag(&self) -> T {
3539
self.raw.tag()
3640
}
3741

42+
/// Sets the tag to a new value.
3843
pub fn set_tag(&mut self, tag: T) {
3944
self.raw.set_tag(tag)
4045
}

0 commit comments

Comments
 (0)