1
1
use crate :: mem:: { self , MaybeUninit , SizedTypeProperties } ;
2
2
use crate :: { cmp, ptr} ;
3
3
4
+ type BufType = [ usize ; 32 ] ;
5
+
4
6
/// Rotates the range `[mid-left, mid+right)` such that the element at `mid` becomes the first
5
7
/// element. Equivalently, rotates the range `left` elements to the left or `right` elements to the
6
8
/// right.
7
9
///
8
10
/// # Safety
9
11
///
10
12
/// The specified range must be valid for reading and writing.
13
+ #[ inline]
14
+ pub ( super ) unsafe fn ptr_rotate < T > ( left : usize , mid : * mut T , right : usize ) {
15
+ if T :: IS_ZST {
16
+ return ;
17
+ }
18
+ // abort early if the rotate is a no-op
19
+ if ( left == 0 ) || ( right == 0 ) {
20
+ return ;
21
+ }
22
+ // `T` is not a zero-sized type, so it's okay to divide by its size.
23
+ if !cfg ! ( feature = "optimize_for_size" )
24
+ && cmp:: min ( left, right) <= mem:: size_of :: < BufType > ( ) / mem:: size_of :: < T > ( )
25
+ {
26
+ // SAFETY: guaranteed by the caller
27
+ unsafe { ptr_rotate_memmove ( left, mid, right) } ;
28
+ } else if !cfg ! ( feature = "optimize_for_size" )
29
+ && ( ( left + right < 24 ) || ( mem:: size_of :: < T > ( ) > mem:: size_of :: < [ usize ; 4 ] > ( ) ) )
30
+ {
31
+ // SAFETY: guaranteed by the caller
32
+ unsafe { ptr_rotate_gcd ( left, mid, right) }
33
+ } else {
34
+ // SAFETY: guaranteed by the caller
35
+ unsafe { ptr_rotate_swap ( left, mid, right) }
36
+ }
37
+ }
38
+
39
+ /// Algorithm 1 is used if `min(left, right)` is small enough to fit onto a stack buffer. The
40
+ /// `min(left, right)` elements are copied onto the buffer, `memmove` is applied to the others, and
41
+ /// the ones on the buffer are moved back into the hole on the opposite side of where they
42
+ /// originated.
11
43
///
12
- /// # Algorithm
44
+ /// # Safety
13
45
///
14
- /// Algorithm 1 is used for small values of `left + right` or for large `T`. The elements are moved
15
- /// into their final positions one at a time starting at `mid - left` and advancing by `right` steps
16
- /// modulo `left + right`, such that only one temporary is needed. Eventually, we arrive back at
17
- /// `mid - left`. However, if `gcd(left + right, right)` is not 1, the above steps skipped over
18
- /// elements. For example:
46
+ /// The specified range must be valid for reading and writing.
47
+ #[ inline]
48
+ unsafe fn ptr_rotate_memmove < T > ( left : usize , mid : * mut T , right : usize ) {
49
+ // The `[T; 0]` here is to ensure this is appropriately aligned for T
50
+ let mut rawarray = MaybeUninit :: < ( BufType , [ T ; 0 ] ) > :: uninit ( ) ;
51
+ let buf = rawarray. as_mut_ptr ( ) as * mut T ;
52
+ // SAFETY: `mid-left <= mid-left+right < mid+right`
53
+ let dim = unsafe { mid. sub ( left) . add ( right) } ;
54
+ if left <= right {
55
+ // SAFETY:
56
+ //
57
+ // 1) The `if` condition about the sizes ensures `[mid-left; left]` will fit in
58
+ // `buf` without overflow and `buf` was created just above and so cannot be
59
+ // overlapped with any value of `[mid-left; left]`
60
+ // 2) [mid-left, mid+right) are all valid for reading and writing and we don't care
61
+ // about overlaps here.
62
+ // 3) The `if` condition about `left <= right` ensures writing `left` elements to
63
+ // `dim = mid-left+right` is valid because:
64
+ // - `buf` is valid and `left` elements were written in it in 1)
65
+ // - `dim+left = mid-left+right+left = mid+right` and we write `[dim, dim+left)`
66
+ unsafe {
67
+ // 1)
68
+ ptr:: copy_nonoverlapping ( mid. sub ( left) , buf, left) ;
69
+ // 2)
70
+ ptr:: copy ( mid, mid. sub ( left) , right) ;
71
+ // 3)
72
+ ptr:: copy_nonoverlapping ( buf, dim, left) ;
73
+ }
74
+ } else {
75
+ // SAFETY: same reasoning as above but with `left` and `right` reversed
76
+ unsafe {
77
+ ptr:: copy_nonoverlapping ( mid, buf, right) ;
78
+ ptr:: copy ( mid. sub ( left) , dim, left) ;
79
+ ptr:: copy_nonoverlapping ( buf, mid. sub ( left) , right) ;
80
+ }
81
+ }
82
+ }
83
+
84
+ /// Algorithm 2 is used for small values of `left + right` or for large `T`. The elements
85
+ /// are moved into their final positions one at a time starting at `mid - left` and advancing by
86
+ /// `right` steps modulo `left + right`, such that only one temporary is needed. Eventually, we
87
+ /// arrive back at `mid - left`. However, if `gcd(left + right, right)` is not 1, the above steps
88
+ /// skipped over elements. For example:
19
89
/// ```text
20
90
/// left = 10, right = 6
21
91
/// the `^` indicates an element in its final place
@@ -39,41 +109,16 @@ use crate::{cmp, ptr};
39
109
/// `gcd(left + right, right)` value). The end result is that all elements are finalized once and
40
110
/// only once.
41
111
///
42
- /// Algorithm 2 is used if `left + right` is large but `min(left, right)` is small enough to
43
- /// fit onto a stack buffer. The `min(left, right)` elements are copied onto the buffer, `memmove`
44
- /// is applied to the others, and the ones on the buffer are moved back into the hole on the
45
- /// opposite side of where they originated.
46
- ///
47
- /// Algorithms that can be vectorized outperform the above once `left + right` becomes large enough.
48
- /// Algorithm 1 can be vectorized by chunking and performing many rounds at once, but there are too
112
+ /// Algorithm 2 can be vectorized by chunking and performing many rounds at once, but there are too
49
113
/// few rounds on average until `left + right` is enormous, and the worst case of a single
50
- /// round is always there. Instead, algorithm 3 utilizes repeated swapping of
51
- /// `min(left, right)` elements until a smaller rotate problem is left.
114
+ /// round is always there.
52
115
///
53
- /// ```text
54
- /// left = 11, right = 4
55
- /// [4 5 6 7 8 9 10 11 12 13 14 . 0 1 2 3]
56
- /// ^ ^ ^ ^ ^ ^ ^ ^ swapping the right most elements with elements to the left
57
- /// [4 5 6 7 8 9 10 . 0 1 2 3] 11 12 13 14
58
- /// ^ ^ ^ ^ ^ ^ ^ ^ swapping these
59
- /// [4 5 6 . 0 1 2 3] 7 8 9 10 11 12 13 14
60
- /// we cannot swap any more, but a smaller rotation problem is left to solve
61
- /// ```
62
- /// when `left < right` the swapping happens from the left instead.
63
- pub ( super ) unsafe fn ptr_rotate < T > ( mut left : usize , mut mid : * mut T , mut right : usize ) {
64
- type BufType = [ usize ; 32 ] ;
65
- if T :: IS_ZST {
66
- return ;
67
- }
68
- loop {
69
- // N.B. the below algorithms can fail if these cases are not checked
70
- if ( right == 0 ) || ( left == 0 ) {
71
- return ;
72
- }
73
- if !cfg ! ( feature = "optimize_for_size" )
74
- && ( ( left + right < 24 ) || ( mem:: size_of :: < T > ( ) > mem:: size_of :: < [ usize ; 4 ] > ( ) ) )
75
- {
76
- // Algorithm 1
116
+ /// # Safety
117
+ ///
118
+ /// The specified range must be valid for reading and writing.
119
+ #[ inline]
120
+ unsafe fn ptr_rotate_gcd < T > ( left : usize , mid : * mut T , right : usize ) {
121
+ // Algorithm 2
77
122
// Microbenchmarks indicate that the average performance for random shifts is better all
78
123
// the way until about `left + right == 32`, but the worst case performance breaks even
79
124
// around 16. 24 was chosen as middle ground. If the size of `T` is larger than 4
@@ -157,47 +202,29 @@ pub(super) unsafe fn ptr_rotate<T>(mut left: usize, mut mid: *mut T, mut right:
157
202
}
158
203
}
159
204
}
160
- return ;
161
- // `T` is not a zero-sized type, so it's okay to divide by its size.
162
- } else if !cfg ! ( feature = "optimize_for_size" )
163
- && cmp:: min ( left, right) <= mem:: size_of :: < BufType > ( ) / mem:: size_of :: < T > ( )
164
- {
165
- // Algorithm 2
166
- // The `[T; 0]` here is to ensure this is appropriately aligned for T
167
- let mut rawarray = MaybeUninit :: < ( BufType , [ T ; 0 ] ) > :: uninit ( ) ;
168
- let buf = rawarray. as_mut_ptr ( ) as * mut T ;
169
- // SAFETY: `mid-left <= mid-left+right < mid+right`
170
- let dim = unsafe { mid. sub ( left) . add ( right) } ;
171
- if left <= right {
172
- // SAFETY:
173
- //
174
- // 1) The `else if` condition about the sizes ensures `[mid-left; left]` will fit in
175
- // `buf` without overflow and `buf` was created just above and so cannot be
176
- // overlapped with any value of `[mid-left; left]`
177
- // 2) [mid-left, mid+right) are all valid for reading and writing and we don't care
178
- // about overlaps here.
179
- // 3) The `if` condition about `left <= right` ensures writing `left` elements to
180
- // `dim = mid-left+right` is valid because:
181
- // - `buf` is valid and `left` elements were written in it in 1)
182
- // - `dim+left = mid-left+right+left = mid+right` and we write `[dim, dim+left)`
183
- unsafe {
184
- // 1)
185
- ptr:: copy_nonoverlapping ( mid. sub ( left) , buf, left) ;
186
- // 2)
187
- ptr:: copy ( mid, mid. sub ( left) , right) ;
188
- // 3)
189
- ptr:: copy_nonoverlapping ( buf, dim, left) ;
190
205
}
191
- } else {
192
- // SAFETY: same reasoning as above but with `left` and `right` reversed
193
- unsafe {
194
- ptr:: copy_nonoverlapping ( mid, buf, right) ;
195
- ptr:: copy ( mid. sub ( left) , dim, left) ;
196
- ptr:: copy_nonoverlapping ( buf, mid. sub ( left) , right) ;
197
- }
198
- }
199
- return ;
200
- } else if left >= right {
206
+
207
+ /// Algorithm 3 utilizes repeated swapping of `min(left, right)` elements.
208
+ ///
209
+ /// ///
210
+ /// ```text
211
+ /// left = 11, right = 4
212
+ /// [4 5 6 7 8 9 10 11 12 13 14 . 0 1 2 3]
213
+ /// ^ ^ ^ ^ ^ ^ ^ ^ swapping the right most elements with elements to the left
214
+ /// [4 5 6 7 8 9 10 . 0 1 2 3] 11 12 13 14
215
+ /// ^ ^ ^ ^ ^ ^ ^ ^ swapping these
216
+ /// [4 5 6 . 0 1 2 3] 7 8 9 10 11 12 13 14
217
+ /// we cannot swap any more, but a smaller rotation problem is left to solve
218
+ /// ```
219
+ /// when `left < right` the swapping happens from the left instead.
220
+ ///
221
+ /// # Safety
222
+ ///
223
+ /// The specified range must be valid for reading and writing.
224
+ #[ inline]
225
+ unsafe fn ptr_rotate_swap < T > ( mut left : usize , mut mid : * mut T , mut right : usize ) {
226
+ loop {
227
+ if left >= right {
201
228
// Algorithm 3
202
229
// There is an alternate way of swapping that involves finding where the last swap
203
230
// of this algorithm would be, and swapping using that last chunk instead of swapping
@@ -233,5 +260,8 @@ pub(super) unsafe fn ptr_rotate<T>(mut left: usize, mut mid: *mut T, mut right:
233
260
}
234
261
}
235
262
}
263
+ if ( right == 0 ) || ( left == 0 ) {
264
+ return ;
265
+ }
236
266
}
237
267
}
0 commit comments