Skip to content

Commit fcfecc1

Browse files
committed
Implement word-sized copy
1 parent 0c41d60 commit fcfecc1

File tree

1 file changed

+208
-16
lines changed

1 file changed

+208
-16
lines changed

src/mem/impls.rs

Lines changed: 208 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,219 @@
1+
use core::intrinsics::likely;
2+
3+
const WORD_SIZE: usize = core::mem::size_of::<usize>();
4+
const WORD_MASK: usize = WORD_SIZE - 1;
5+
6+
// If the number of bytes involved exceed this threshold we will opt in word-wise copy.
7+
// The value here selected is max(2 * WORD_SIZE, 16):
8+
// * We need at least 2 * WORD_SIZE bytes to guarantee that at least 1 word will be copied through
9+
// word-wise copy.
10+
// * The word-wise copy logic needs to perform some checks so it has some small overhead.
11+
// ensures that even on 32-bit platforms we have copied at least 8 bytes through
12+
// word-wise copy so the saving of word-wise copy outweights the fixed overhead.
13+
const WORD_COPY_THRESHOLD: usize = if 2 * WORD_SIZE > 16 {
14+
2 * WORD_SIZE
15+
} else {
16+
16
17+
};
18+
119
#[inline(always)]
2-
pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, n: usize) {
3-
let mut i = 0;
4-
while i < n {
5-
*dest.add(i) = *src.add(i);
6-
i += 1;
20+
pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize) {
21+
#[inline(always)]
22+
unsafe fn copy_forward_bytes(mut dest: *mut u8, mut src: *const u8, n: usize) {
23+
let dest_end = dest.add(n);
24+
while dest < dest_end {
25+
*dest = *src;
26+
dest = dest.add(1);
27+
src = src.add(1);
28+
}
29+
}
30+
31+
#[inline(always)]
32+
unsafe fn copy_forward_aligned_words(dest: *mut u8, src: *const u8, n: usize) {
33+
let mut dest_usize = dest as *mut usize;
34+
let mut src_usize = src as *mut usize;
35+
let dest_end = dest.add(n) as *mut usize;
36+
37+
while dest_usize < dest_end {
38+
*dest_usize = *src_usize;
39+
dest_usize = dest_usize.add(1);
40+
src_usize = src_usize.add(1);
41+
}
42+
}
43+
44+
#[inline(always)]
45+
unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
46+
let mut dest_usize = dest as *mut usize;
47+
let dest_end = dest.add(n) as *mut usize;
48+
49+
// Calculate the misalignment offset and shift needed to reassemble value.
50+
let offset = src as usize & WORD_MASK;
51+
let shift = offset * 8;
52+
53+
// Realign src
54+
let mut src_aligned = (src as usize & !WORD_MASK) as *mut usize;
55+
// XXX: Could this possibly be UB?
56+
let mut prev_word = *src_aligned;
57+
58+
while dest_usize < dest_end {
59+
src_aligned = src_aligned.add(1);
60+
let cur_word = *src_aligned;
61+
#[cfg(target_endian = "little")]
62+
let resembled = prev_word >> shift | cur_word << (WORD_SIZE * 8 - shift);
63+
#[cfg(target_endian = "big")]
64+
let resembled = prev_word << shift | cur_word >> (WORD_SIZE * 8 - shift);
65+
prev_word = cur_word;
66+
67+
*dest_usize = resembled;
68+
dest_usize = dest_usize.add(1);
69+
}
770
}
71+
72+
if n >= WORD_COPY_THRESHOLD {
73+
// Align dest
74+
// Because of n >= 2 * WORD_SIZE, dst_misalignment < n
75+
let dest_misalignment = (dest as usize).wrapping_neg() & WORD_MASK;
76+
copy_forward_bytes(dest, src, dest_misalignment);
77+
dest = dest.add(dest_misalignment);
78+
src = src.add(dest_misalignment);
79+
n -= dest_misalignment;
80+
81+
let n_words = n & !WORD_MASK;
82+
let src_misalignment = src as usize & WORD_MASK;
83+
if likely(src_misalignment == 0) {
84+
copy_forward_aligned_words(dest, src, n_words);
85+
} else {
86+
copy_forward_misaligned_words(dest, src, n_words);
87+
}
88+
dest = dest.add(n_words);
89+
src = src.add(n_words);
90+
n -= n_words;
91+
}
92+
copy_forward_bytes(dest, src, n);
893
}
994

1095
#[inline(always)]
11-
pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, n: usize) {
12-
// copy from end
13-
let mut i = n;
14-
while i != 0 {
15-
i -= 1;
16-
*dest.add(i) = *src.add(i);
96+
pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, mut n: usize) {
97+
// The following backward copy helper functions uses the pointers past the end
98+
// as their inputs instead of pointers to the start!
99+
#[inline(always)]
100+
unsafe fn copy_backward_bytes(mut dest: *mut u8, mut src: *const u8, n: usize) {
101+
let dest_start = dest.sub(n);
102+
while dest_start < dest {
103+
dest = dest.sub(1);
104+
src = src.sub(1);
105+
*dest = *src;
106+
}
107+
}
108+
109+
#[inline(always)]
110+
unsafe fn copy_backward_aligned_words(dest: *mut u8, src: *const u8, n: usize) {
111+
let mut dest_usize = dest as *mut usize;
112+
let mut src_usize = src as *mut usize;
113+
let dest_start = dest.sub(n) as *mut usize;
114+
115+
while dest_start < dest_usize {
116+
dest_usize = dest_usize.sub(1);
117+
src_usize = src_usize.sub(1);
118+
*dest_usize = *src_usize;
119+
}
17120
}
121+
122+
#[inline(always)]
123+
unsafe fn copy_backward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
124+
let mut dest_usize = dest as *mut usize;
125+
let dest_start = dest.sub(n) as *mut usize;
126+
127+
// Calculate the misalignment offset and shift needed to reassemble value.
128+
let offset = src as usize & WORD_MASK;
129+
let shift = offset * 8;
130+
131+
// Realign src_aligned
132+
let mut src_aligned = (src as usize & !WORD_MASK) as *mut usize;
133+
// XXX: Could this possibly be UB?
134+
let mut prev_word = *src_aligned;
135+
136+
while dest_start < dest_usize {
137+
src_aligned = src_aligned.sub(1);
138+
let cur_word = *src_aligned;
139+
#[cfg(target_endian = "little")]
140+
let resembled = prev_word << (WORD_SIZE * 8 - shift) | cur_word >> shift;
141+
#[cfg(target_endian = "big")]
142+
let resembled = prev_word >> (WORD_SIZE * 8 - shift) | cur_word << shift;
143+
prev_word = cur_word;
144+
145+
dest_usize = dest_usize.sub(1);
146+
*dest_usize = resembled;
147+
}
148+
}
149+
150+
let mut dest = dest.add(n);
151+
let mut src = src.add(n);
152+
153+
if n >= WORD_COPY_THRESHOLD {
154+
// Align dest
155+
// Because of n >= 2 * WORD_SIZE, dst_misalignment < n
156+
let dest_misalignment = dest as usize & WORD_MASK;
157+
copy_backward_bytes(dest, src, dest_misalignment);
158+
dest = dest.sub(dest_misalignment);
159+
src = src.sub(dest_misalignment);
160+
n -= dest_misalignment;
161+
162+
let n_words = n & !WORD_MASK;
163+
let src_misalignment = src as usize & WORD_MASK;
164+
if likely(src_misalignment == 0) {
165+
copy_backward_aligned_words(dest, src, n_words);
166+
} else {
167+
copy_backward_misaligned_words(dest, src, n_words);
168+
}
169+
dest = dest.sub(n_words);
170+
src = src.sub(n_words);
171+
n -= n_words;
172+
}
173+
copy_backward_bytes(dest, src, n);
18174
}
19175

20176
#[inline(always)]
21-
pub unsafe fn set_bytes(s: *mut u8, c: u8, n: usize) {
22-
let mut i = 0;
23-
while i < n {
24-
*s.add(i) = c;
25-
i += 1;
177+
pub unsafe fn set_bytes(mut s: *mut u8, c: u8, mut n: usize) {
178+
#[inline(always)]
179+
pub unsafe fn set_bytes_bytes(mut s: *mut u8, c: u8, n: usize) {
180+
let end = s.add(n);
181+
while s < end {
182+
*s = c;
183+
s = s.add(1);
184+
}
185+
}
186+
187+
#[inline(always)]
188+
pub unsafe fn set_bytes_words(s: *mut u8, c: u8, n: usize) {
189+
let mut broadcast = c as usize;
190+
let mut bits = 8;
191+
while bits < WORD_SIZE * 8 {
192+
broadcast |= broadcast << bits;
193+
bits *= 2;
194+
}
195+
196+
let mut s_usize = s as *mut usize;
197+
let end = s.add(n) as *mut usize;
198+
199+
while s_usize < end {
200+
*s_usize = broadcast;
201+
s_usize = s_usize.add(1);
202+
}
203+
}
204+
205+
if likely(n >= WORD_COPY_THRESHOLD) {
206+
// Align s
207+
// Because of n >= 2 * WORD_SIZE, dst_misalignment < n
208+
let misalignment = (s as usize).wrapping_neg() & WORD_MASK;
209+
set_bytes_bytes(s, c, misalignment);
210+
s = s.add(misalignment);
211+
n -= misalignment;
212+
213+
let n_words = n & !WORD_MASK;
214+
set_bytes_words(s, c, n_words);
215+
s = s.add(n_words);
216+
n -= n_words;
26217
}
218+
set_bytes_bytes(s, c, n);
27219
}

0 commit comments

Comments
 (0)