Skip to content

Commit d453782

Browse files
committed
Change align_offset to support different strides
This is necessary if we want to implement `[T]::align_to` and is more useful in general. This implementation effort has begun during the All Hands and represents a month of my futile efforts to do any sort of maths. Luckily, I found the very very nice Chris McDonald (cjm) on IRC who figured out the core formulas for me! All the thanks for existence of this PR go to them! Anyway… Those formulas were mangled by yours truly into the arcane forms you see here to squeeze out the best assembly possible on most of the modern architectures (x86 and ARM were evaluated in practice). I mean, just look at it: *one actual* modulo operation and everything else is just the cheap single cycle ops! Admitedly, the naive solution might be faster in some common scenarios, but this code absolutely butchers the naive solution on the worst case scenario. Alas, the result of this arcane magic also means that the code pretty heavily relies on the preconditions holding true and breaking those preconditions will unleash the UB-est of all UBs! So don’t.
1 parent 90463a6 commit d453782

File tree

7 files changed

+363
-68
lines changed

7 files changed

+363
-68
lines changed

src/libcore/intrinsics.rs

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1463,15 +1463,26 @@ extern "rust-intrinsic" {
14631463
/// source as well as std's catch implementation.
14641464
pub fn try(f: fn(*mut u8), data: *mut u8, local_ptr: *mut u8) -> i32;
14651465

1466-
/// Computes the byte offset that needs to be applied to `ptr` in order to
1467-
/// make it aligned to `align`.
1468-
/// If it is not possible to align `ptr`, the implementation returns
1466+
#[cfg(stage0)]
1467+
/// docs my friends, its friday!
1468+
pub fn align_offset(ptr: *const (), align: usize) -> usize;
1469+
1470+
/// Computes the offset that needs to be applied to the pointer in order to make it aligned to
1471+
/// `align`.
1472+
///
1473+
/// If it is not possible to align the pointer, the implementation returns
14691474
/// `usize::max_value()`.
14701475
///
1471-
/// There are no guarantees whatsover that offsetting the pointer will not
1472-
/// overflow or go beyond the allocation that `ptr` points into.
1473-
/// It is up to the caller to ensure that the returned offset is correct
1474-
/// in all terms other than alignment.
1476+
/// The offset is expressed in number of `T` elements, and not bytes. The value returned can be
1477+
/// used with the `offset` or `offset_to` methods.
1478+
///
1479+
/// There are no guarantees whatsover that offsetting the pointer will not overflow or go
1480+
/// beyond the allocation that the pointer points into. It is up to the caller to ensure that
1481+
/// the returned offset is correct in all terms other than alignment.
1482+
///
1483+
/// # Unsafety
1484+
///
1485+
/// `align` must be a power-of-two.
14751486
///
14761487
/// # Examples
14771488
///
@@ -1485,7 +1496,7 @@ extern "rust-intrinsic" {
14851496
/// # unsafe {
14861497
/// let x = [5u8, 6u8, 7u8, 8u8, 9u8];
14871498
/// let ptr = &x[n] as *const u8;
1488-
/// let offset = align_offset(ptr as *const (), align_of::<u16>());
1499+
/// let offset = align_offset(ptr, align_of::<u16>());
14891500
/// if offset < x.len() - n - 1 {
14901501
/// let u16_ptr = ptr.offset(offset as isize) as *const u16;
14911502
/// assert_ne!(*u16_ptr, 500);
@@ -1495,7 +1506,8 @@ extern "rust-intrinsic" {
14951506
/// }
14961507
/// # } }
14971508
/// ```
1498-
pub fn align_offset(ptr: *const (), align: usize) -> usize;
1509+
#[cfg(not(stage0))]
1510+
pub fn align_offset<T>(ptr: *const T, align: usize) -> usize;
14991511

15001512
/// Emits a `!nontemporal` store according to LLVM (see their docs).
15011513
/// Probably will never become stable.

src/libcore/ptr.rs

Lines changed: 213 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1433,15 +1433,22 @@ impl<T: ?Sized> *const T {
14331433
copy_nonoverlapping(self, dest, count)
14341434
}
14351435

1436-
/// Computes the byte offset that needs to be applied in order to
1437-
/// make the pointer aligned to `align`.
1436+
/// Computes the offset that needs to be applied to the pointer in order to make it aligned to
1437+
/// `align`.
1438+
///
14381439
/// If it is not possible to align the pointer, the implementation returns
14391440
/// `usize::max_value()`.
14401441
///
1441-
/// There are no guarantees whatsover that offsetting the pointer will not
1442-
/// overflow or go beyond the allocation that the pointer points into.
1443-
/// It is up to the caller to ensure that the returned offset is correct
1444-
/// in all terms other than alignment.
1442+
/// The offset is expressed in number of `T` elements, and not bytes. The value returned can be
1443+
/// used with the `offset` or `offset_to` methods.
1444+
///
1445+
/// There are no guarantees whatsover that offsetting the pointer will not overflow or go
1446+
/// beyond the allocation that the pointer points into. It is up to the caller to ensure that
1447+
/// the returned offset is correct in all terms other than alignment.
1448+
///
1449+
/// # Panics
1450+
///
1451+
/// The function panics if `align` is not a power-of-two.
14451452
///
14461453
/// # Examples
14471454
///
@@ -1465,13 +1472,30 @@ impl<T: ?Sized> *const T {
14651472
/// # } }
14661473
/// ```
14671474
#[unstable(feature = "align_offset", issue = "44488")]
1468-
pub fn align_offset(self, align: usize) -> usize {
1475+
#[cfg(not(stage0))]
1476+
pub fn align_offset(self, align: usize) -> usize where T: Sized {
1477+
if !align.is_power_of_two() {
1478+
panic!("align_offset: align is not a power-of-two");
1479+
}
14691480
unsafe {
1470-
intrinsics::align_offset(self as *const _, align)
1481+
intrinsics::align_offset(self, align)
1482+
}
1483+
}
1484+
1485+
/// definitely docs.
1486+
#[unstable(feature = "align_offset", issue = "44488")]
1487+
#[cfg(stage0)]
1488+
pub fn align_offset(self, align: usize) -> usize where T: Sized {
1489+
if !align.is_power_of_two() {
1490+
panic!("align_offset: align is not a power-of-two");
1491+
}
1492+
unsafe {
1493+
intrinsics::align_offset(self as *const (), align)
14711494
}
14721495
}
14731496
}
14741497

1498+
14751499
#[lang = "mut_ptr"]
14761500
impl<T: ?Sized> *mut T {
14771501
/// Returns `true` if the pointer is null.
@@ -1804,44 +1828,6 @@ impl<T: ?Sized> *mut T {
18041828
(self as *const T).wrapping_offset_from(origin)
18051829
}
18061830

1807-
/// Computes the byte offset that needs to be applied in order to
1808-
/// make the pointer aligned to `align`.
1809-
/// If it is not possible to align the pointer, the implementation returns
1810-
/// `usize::max_value()`.
1811-
///
1812-
/// There are no guarantees whatsover that offsetting the pointer will not
1813-
/// overflow or go beyond the allocation that the pointer points into.
1814-
/// It is up to the caller to ensure that the returned offset is correct
1815-
/// in all terms other than alignment.
1816-
///
1817-
/// # Examples
1818-
///
1819-
/// Accessing adjacent `u8` as `u16`
1820-
///
1821-
/// ```
1822-
/// # #![feature(align_offset)]
1823-
/// # fn foo(n: usize) {
1824-
/// # use std::mem::align_of;
1825-
/// # unsafe {
1826-
/// let x = [5u8, 6u8, 7u8, 8u8, 9u8];
1827-
/// let ptr = &x[n] as *const u8;
1828-
/// let offset = ptr.align_offset(align_of::<u16>());
1829-
/// if offset < x.len() - n - 1 {
1830-
/// let u16_ptr = ptr.offset(offset as isize) as *const u16;
1831-
/// assert_ne!(*u16_ptr, 500);
1832-
/// } else {
1833-
/// // while the pointer can be aligned via `offset`, it would point
1834-
/// // outside the allocation
1835-
/// }
1836-
/// # } }
1837-
/// ```
1838-
#[unstable(feature = "align_offset", issue = "44488")]
1839-
pub fn align_offset(self, align: usize) -> usize {
1840-
unsafe {
1841-
intrinsics::align_offset(self as *const _, align)
1842-
}
1843-
}
1844-
18451831
/// Calculates the offset from a pointer (convenience for `.offset(count as isize)`).
18461832
///
18471833
/// `count` is in units of T; e.g. a `count` of 3 represents a pointer
@@ -2511,8 +2497,189 @@ impl<T: ?Sized> *mut T {
25112497
{
25122498
swap(self, with)
25132499
}
2500+
2501+
/// Computes the offset that needs to be applied to the pointer in order to make it aligned to
2502+
/// `align`.
2503+
///
2504+
/// If it is not possible to align the pointer, the implementation returns
2505+
/// `usize::max_value()`.
2506+
///
2507+
/// The offset is expressed in number of `T` elements, and not bytes. The value returned can be
2508+
/// used with the `offset` or `offset_to` methods.
2509+
///
2510+
/// There are no guarantees whatsover that offsetting the pointer will not overflow or go
2511+
/// beyond the allocation that the pointer points into. It is up to the caller to ensure that
2512+
/// the returned offset is correct in all terms other than alignment.
2513+
///
2514+
/// # Panics
2515+
///
2516+
/// The function panics if `align` is not a power-of-two.
2517+
///
2518+
/// # Examples
2519+
///
2520+
/// Accessing adjacent `u8` as `u16`
2521+
///
2522+
/// ```
2523+
/// # #![feature(align_offset)]
2524+
/// # fn foo(n: usize) {
2525+
/// # use std::mem::align_of;
2526+
/// # unsafe {
2527+
/// let x = [5u8, 6u8, 7u8, 8u8, 9u8];
2528+
/// let ptr = &x[n] as *const u8;
2529+
/// let offset = ptr.align_offset(align_of::<u16>());
2530+
/// if offset < x.len() - n - 1 {
2531+
/// let u16_ptr = ptr.offset(offset as isize) as *const u16;
2532+
/// assert_ne!(*u16_ptr, 500);
2533+
/// } else {
2534+
/// // while the pointer can be aligned via `offset`, it would point
2535+
/// // outside the allocation
2536+
/// }
2537+
/// # } }
2538+
/// ```
2539+
#[unstable(feature = "align_offset", issue = "44488")]
2540+
#[cfg(not(stage0))]
2541+
pub fn align_offset(self, align: usize) -> usize where T: Sized {
2542+
if !align.is_power_of_two() {
2543+
panic!("align_offset: align is not a power-of-two");
2544+
}
2545+
unsafe {
2546+
intrinsics::align_offset(self, align)
2547+
}
2548+
}
2549+
2550+
/// definitely docs.
2551+
#[unstable(feature = "align_offset", issue = "44488")]
2552+
#[cfg(stage0)]
2553+
pub fn align_offset(self, align: usize) -> usize where T: Sized {
2554+
if !align.is_power_of_two() {
2555+
panic!("align_offset: align is not a power-of-two");
2556+
}
2557+
unsafe {
2558+
intrinsics::align_offset(self as *const (), align)
2559+
}
2560+
}
25142561
}
25152562

2563+
/// Align pointer `p`.
2564+
///
2565+
/// Calculate offset (in terms of elements of `stride` stride) that has to be applied
2566+
/// to pointer `p` so that pointer `p` would get aligned to `a`.
2567+
///
2568+
/// This is an implementation of the `align_offset` intrinsic for the case where `stride > 1`.
2569+
///
2570+
/// Note: This implementation has been carefully tailored to not panic. It is UB for this to panic.
2571+
/// The only real change that can be made here is change of `INV_TABLE_MOD_16` and associated
2572+
/// constants.
2573+
///
2574+
/// If we ever decide to make it possible to call the intrinsic with `a` that is not a
2575+
/// power-of-two, it will probably be more prudent to just change to a naive implementation rather
2576+
/// than trying to adapt this to accomodate that change.
2577+
///
2578+
/// Any questions go to @nagisa.
2579+
#[lang="align_offset"]
2580+
#[cfg(not(stage0))]
2581+
unsafe fn align_offset(p: *const (), a: usize, stride: usize) -> usize {
2582+
/// Calculate multiplicative modular inverse of `x` modulo `m`.
2583+
///
2584+
/// This implementation is tailored for align_offset and has following preconditions:
2585+
///
2586+
/// * `m` is a power-of-two;
2587+
/// * `x < m`; (if `x ≥ m`, pass in `x % m` instead)
2588+
///
2589+
/// Implementation of this function shall not panic. Ever.
2590+
fn mod_inv(x: usize, m: usize) -> usize {
2591+
/// Multiplicative modular inverse table modulo 2⁴ = 16.
2592+
///
2593+
/// Note, that this table does not contain values where inverse does not exist (i.e. for
2594+
/// `0⁻¹ mod 16`, `2⁻¹ mod 16`, etc.)
2595+
static INV_TABLE_MOD_16: [usize; 8] = [1, 11, 13, 7, 9, 3, 5, 15];
2596+
/// Modulo for which the `INV_TABLE_MOD_16` is intended.
2597+
const INV_TABLE_MOD: usize = 16;
2598+
/// INV_TABLE_MOD²
2599+
const INV_TABLE_MOD_SQUARED: usize = INV_TABLE_MOD * INV_TABLE_MOD;
2600+
2601+
let table_inverse = INV_TABLE_MOD_16[(x & (INV_TABLE_MOD - 1)) >> 1];
2602+
if m <= INV_TABLE_MOD {
2603+
return table_inverse & (m - 1);
2604+
} else {
2605+
// We iterate "up" using the following formula:
2606+
//
2607+
// $$ xy ≡ 1 (mod 2ⁿ) → xy (2 - xy) ≡ 1 (mod 2²ⁿ) $$
2608+
//
2609+
// until 2²ⁿ ≥ m. Then we can reduce to our desired `m` by taking the result `mod m`.
2610+
let mut inverse = table_inverse;
2611+
let mut going_mod = INV_TABLE_MOD_SQUARED;
2612+
loop {
2613+
// y = y * (2 - xy) mod n
2614+
//
2615+
// Note, that we use wrapping operations here intentionally – the original formula
2616+
// uses e.g. subtraction `mod n`. It is entirely fine to do them `mod
2617+
// usize::max_value()` instead, because we take the result `mod n` at the end
2618+
// anyway.
2619+
inverse = inverse.wrapping_mul(
2620+
2usize.wrapping_sub(x.wrapping_mul(inverse))
2621+
) & (going_mod - 1);
2622+
if going_mod > m {
2623+
return inverse & (m - 1);
2624+
}
2625+
going_mod = going_mod.wrapping_mul(going_mod);
2626+
}
2627+
}
2628+
}
2629+
2630+
let a_minus_one = a.wrapping_sub(1);
2631+
let pmoda = p as usize & a_minus_one;
2632+
let smoda = stride & a_minus_one;
2633+
// a is power-of-two so cannot be 0. stride = 0 is handled by the intrinsic.
2634+
let gcdpow = intrinsics::cttz_nonzero(stride).min(intrinsics::cttz_nonzero(a));
2635+
let gcd = 1usize << gcdpow;
2636+
2637+
if pmoda == 0 {
2638+
// Already aligned. Yay!
2639+
return 0;
2640+
}
2641+
2642+
if gcd == 1 {
2643+
// This branch solves for the variable $o$ in following linear congruence equation:
2644+
//
2645+
// ⎰ p + o ≡ 0 (mod a) # $p + o$ must be aligned to specified alignment $a$
2646+
// ⎱ o ≡ 0 (mod s) # offset $o$ must be a multiple of stride $s$
2647+
//
2648+
// where
2649+
//
2650+
// * a, s are co-prime
2651+
//
2652+
// This gives us the formula below:
2653+
//
2654+
// o = (a - (p mod a)) * (s⁻¹ mod a) * s
2655+
//
2656+
// The first term is “the relative alignment of p to a”, the second term is “how does
2657+
// incrementing p by one s change the relative alignment of p”, the third term is
2658+
// translating change in units of s to a byte count.
2659+
//
2660+
// Furthermore, the result produced by this solution is not “minimal”, so it is necessary
2661+
// to take the result $o mod lcm(s, a)$. Since $s$ and $a$ are co-prime (i.e. $gcd(s, a) =
2662+
// 1$) and $lcm(s, a) = s * a / gcd(s, a)$, we can replace $lcm(s, a)$ with just a $s * a$.
2663+
//
2664+
// (Author note: we decided later on to express the offset in "elements" rather than bytes,
2665+
// which drops the multiplication by `s` on both sides of the modulo.)
2666+
return intrinsics::unchecked_rem(a.wrapping_sub(pmoda).wrapping_mul(mod_inv(smoda, a)), a);
2667+
}
2668+
2669+
if p as usize & (gcd - 1) == 0 {
2670+
// This can be aligned, but `a` and `stride` are not co-prime, so a somewhat adapted
2671+
// formula is used.
2672+
let j = a.wrapping_sub(pmoda) >> gcdpow;
2673+
let k = smoda >> gcdpow;
2674+
return intrinsics::unchecked_rem(j.wrapping_mul(mod_inv(k, a)), a >> gcdpow);
2675+
}
2676+
2677+
// Cannot be aligned at all.
2678+
return usize::max_value();
2679+
}
2680+
2681+
2682+
25162683
// Equality for pointers
25172684
#[stable(feature = "rust1", since = "1.0.0")]
25182685
impl<T: ?Sized> PartialEq for *const T {

src/libcore/slice/mod.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1696,6 +1696,28 @@ impl<T> [T] {
16961696
self.as_mut_ptr(), other.as_mut_ptr(), self.len());
16971697
}
16981698
}
1699+
1700+
// #[unstable(feature = "slice_align_to", issue = "44488")]
1701+
// pub fn align_to<U>(&self) -> (&[T], &[U], &[T]) {
1702+
// // First, find at what point do we split between the first and 2nd slice.
1703+
// let x = self.as_ptr();
1704+
// let offset = x.align_offset(::mem::align_of::<U>());
1705+
// if offset > x * ::mem::size_of::<T>() {
1706+
// return (self, [], []);
1707+
// }
1708+
1709+
// }
1710+
1711+
// #[unstable(feature = "slice_align_to", issue = "44488")]
1712+
// pub fn align_to_mut<U>(&mut self) -> (&mut [T], &mut [U], &mut [T]) {
1713+
// }
1714+
}}
1715+
1716+
#[lang = "slice"]
1717+
#[cfg(not(test))]
1718+
#[cfg(not(stage0))]
1719+
impl<T> [T] {
1720+
slice_core_methods!();
16991721
}
17001722

17011723
#[lang = "slice_u8"]

src/librustc/middle/lang_items.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,9 @@ language_item_table! {
348348
I128ShroFnLangItem, "i128_shro", i128_shro_fn;
349349
U128ShroFnLangItem, "u128_shro", u128_shro_fn;
350350

351+
// Align offset for stride != 1, must not panic.
352+
AlignOffsetLangItem, "align_offset", align_offset_fn;
353+
351354
TerminationTraitLangItem, "termination", termination;
352355
}
353356

0 commit comments

Comments
 (0)