@@ -1433,15 +1433,22 @@ impl<T: ?Sized> *const T {
1433
1433
copy_nonoverlapping ( self , dest, count)
1434
1434
}
1435
1435
1436
- /// Computes the byte offset that needs to be applied in order to
1437
- /// make the pointer aligned to `align`.
1436
+ /// Computes the offset that needs to be applied to the pointer in order to make it aligned to
1437
+ /// `align`.
1438
+ ///
1438
1439
/// If it is not possible to align the pointer, the implementation returns
1439
1440
/// `usize::max_value()`.
1440
1441
///
1441
- /// There are no guarantees whatsover that offsetting the pointer will not
1442
- /// overflow or go beyond the allocation that the pointer points into.
1443
- /// It is up to the caller to ensure that the returned offset is correct
1444
- /// in all terms other than alignment.
1442
+ /// The offset is expressed in number of `T` elements, and not bytes. The value returned can be
1443
+ /// used with the `offset` or `offset_to` methods.
1444
+ ///
1445
+ /// There are no guarantees whatsover that offsetting the pointer will not overflow or go
1446
+ /// beyond the allocation that the pointer points into. It is up to the caller to ensure that
1447
+ /// the returned offset is correct in all terms other than alignment.
1448
+ ///
1449
+ /// # Panics
1450
+ ///
1451
+ /// The function panics if `align` is not a power-of-two.
1445
1452
///
1446
1453
/// # Examples
1447
1454
///
@@ -1465,13 +1472,30 @@ impl<T: ?Sized> *const T {
1465
1472
/// # } }
1466
1473
/// ```
1467
1474
#[ unstable( feature = "align_offset" , issue = "44488" ) ]
1468
- pub fn align_offset ( self , align : usize ) -> usize {
1475
+ #[ cfg( not( stage0) ) ]
1476
+ pub fn align_offset ( self , align : usize ) -> usize where T : Sized {
1477
+ if !align. is_power_of_two ( ) {
1478
+ panic ! ( "align_offset: align is not a power-of-two" ) ;
1479
+ }
1469
1480
unsafe {
1470
- intrinsics:: align_offset ( self as * const _ , align)
1481
+ intrinsics:: align_offset ( self , align)
1482
+ }
1483
+ }
1484
+
1485
+ /// definitely docs.
1486
+ #[ unstable( feature = "align_offset" , issue = "44488" ) ]
1487
+ #[ cfg( stage0) ]
1488
+ pub fn align_offset ( self , align : usize ) -> usize where T : Sized {
1489
+ if !align. is_power_of_two ( ) {
1490
+ panic ! ( "align_offset: align is not a power-of-two" ) ;
1491
+ }
1492
+ unsafe {
1493
+ intrinsics:: align_offset ( self as * const ( ) , align)
1471
1494
}
1472
1495
}
1473
1496
}
1474
1497
1498
+
1475
1499
#[ lang = "mut_ptr" ]
1476
1500
impl < T : ?Sized > * mut T {
1477
1501
/// Returns `true` if the pointer is null.
@@ -1804,44 +1828,6 @@ impl<T: ?Sized> *mut T {
1804
1828
( self as * const T ) . wrapping_offset_from ( origin)
1805
1829
}
1806
1830
1807
- /// Computes the byte offset that needs to be applied in order to
1808
- /// make the pointer aligned to `align`.
1809
- /// If it is not possible to align the pointer, the implementation returns
1810
- /// `usize::max_value()`.
1811
- ///
1812
- /// There are no guarantees whatsover that offsetting the pointer will not
1813
- /// overflow or go beyond the allocation that the pointer points into.
1814
- /// It is up to the caller to ensure that the returned offset is correct
1815
- /// in all terms other than alignment.
1816
- ///
1817
- /// # Examples
1818
- ///
1819
- /// Accessing adjacent `u8` as `u16`
1820
- ///
1821
- /// ```
1822
- /// # #![feature(align_offset)]
1823
- /// # fn foo(n: usize) {
1824
- /// # use std::mem::align_of;
1825
- /// # unsafe {
1826
- /// let x = [5u8, 6u8, 7u8, 8u8, 9u8];
1827
- /// let ptr = &x[n] as *const u8;
1828
- /// let offset = ptr.align_offset(align_of::<u16>());
1829
- /// if offset < x.len() - n - 1 {
1830
- /// let u16_ptr = ptr.offset(offset as isize) as *const u16;
1831
- /// assert_ne!(*u16_ptr, 500);
1832
- /// } else {
1833
- /// // while the pointer can be aligned via `offset`, it would point
1834
- /// // outside the allocation
1835
- /// }
1836
- /// # } }
1837
- /// ```
1838
- #[ unstable( feature = "align_offset" , issue = "44488" ) ]
1839
- pub fn align_offset ( self , align : usize ) -> usize {
1840
- unsafe {
1841
- intrinsics:: align_offset ( self as * const _ , align)
1842
- }
1843
- }
1844
-
1845
1831
/// Calculates the offset from a pointer (convenience for `.offset(count as isize)`).
1846
1832
///
1847
1833
/// `count` is in units of T; e.g. a `count` of 3 represents a pointer
@@ -2511,8 +2497,189 @@ impl<T: ?Sized> *mut T {
2511
2497
{
2512
2498
swap ( self , with)
2513
2499
}
2500
+
2501
+ /// Computes the offset that needs to be applied to the pointer in order to make it aligned to
2502
+ /// `align`.
2503
+ ///
2504
+ /// If it is not possible to align the pointer, the implementation returns
2505
+ /// `usize::max_value()`.
2506
+ ///
2507
+ /// The offset is expressed in number of `T` elements, and not bytes. The value returned can be
2508
+ /// used with the `offset` or `offset_to` methods.
2509
+ ///
2510
+ /// There are no guarantees whatsover that offsetting the pointer will not overflow or go
2511
+ /// beyond the allocation that the pointer points into. It is up to the caller to ensure that
2512
+ /// the returned offset is correct in all terms other than alignment.
2513
+ ///
2514
+ /// # Panics
2515
+ ///
2516
+ /// The function panics if `align` is not a power-of-two.
2517
+ ///
2518
+ /// # Examples
2519
+ ///
2520
+ /// Accessing adjacent `u8` as `u16`
2521
+ ///
2522
+ /// ```
2523
+ /// # #![feature(align_offset)]
2524
+ /// # fn foo(n: usize) {
2525
+ /// # use std::mem::align_of;
2526
+ /// # unsafe {
2527
+ /// let x = [5u8, 6u8, 7u8, 8u8, 9u8];
2528
+ /// let ptr = &x[n] as *const u8;
2529
+ /// let offset = ptr.align_offset(align_of::<u16>());
2530
+ /// if offset < x.len() - n - 1 {
2531
+ /// let u16_ptr = ptr.offset(offset as isize) as *const u16;
2532
+ /// assert_ne!(*u16_ptr, 500);
2533
+ /// } else {
2534
+ /// // while the pointer can be aligned via `offset`, it would point
2535
+ /// // outside the allocation
2536
+ /// }
2537
+ /// # } }
2538
+ /// ```
2539
+ #[ unstable( feature = "align_offset" , issue = "44488" ) ]
2540
+ #[ cfg( not( stage0) ) ]
2541
+ pub fn align_offset ( self , align : usize ) -> usize where T : Sized {
2542
+ if !align. is_power_of_two ( ) {
2543
+ panic ! ( "align_offset: align is not a power-of-two" ) ;
2544
+ }
2545
+ unsafe {
2546
+ intrinsics:: align_offset ( self , align)
2547
+ }
2548
+ }
2549
+
2550
+ /// definitely docs.
2551
+ #[ unstable( feature = "align_offset" , issue = "44488" ) ]
2552
+ #[ cfg( stage0) ]
2553
+ pub fn align_offset ( self , align : usize ) -> usize where T : Sized {
2554
+ if !align. is_power_of_two ( ) {
2555
+ panic ! ( "align_offset: align is not a power-of-two" ) ;
2556
+ }
2557
+ unsafe {
2558
+ intrinsics:: align_offset ( self as * const ( ) , align)
2559
+ }
2560
+ }
2514
2561
}
2515
2562
2563
+ /// Align pointer `p`.
2564
+ ///
2565
+ /// Calculate offset (in terms of elements of `stride` stride) that has to be applied
2566
+ /// to pointer `p` so that pointer `p` would get aligned to `a`.
2567
+ ///
2568
+ /// This is an implementation of the `align_offset` intrinsic for the case where `stride > 1`.
2569
+ ///
2570
+ /// Note: This implementation has been carefully tailored to not panic. It is UB for this to panic.
2571
+ /// The only real change that can be made here is change of `INV_TABLE_MOD_16` and associated
2572
+ /// constants.
2573
+ ///
2574
+ /// If we ever decide to make it possible to call the intrinsic with `a` that is not a
2575
+ /// power-of-two, it will probably be more prudent to just change to a naive implementation rather
2576
+ /// than trying to adapt this to accomodate that change.
2577
+ ///
2578
+ /// Any questions go to @nagisa.
2579
+ #[ lang="align_offset" ]
2580
+ #[ cfg( not( stage0) ) ]
2581
+ unsafe fn align_offset ( p : * const ( ) , a : usize , stride : usize ) -> usize {
2582
+ /// Calculate multiplicative modular inverse of `x` modulo `m`.
2583
+ ///
2584
+ /// This implementation is tailored for align_offset and has following preconditions:
2585
+ ///
2586
+ /// * `m` is a power-of-two;
2587
+ /// * `x < m`; (if `x ≥ m`, pass in `x % m` instead)
2588
+ ///
2589
+ /// Implementation of this function shall not panic. Ever.
2590
+ fn mod_inv ( x : usize , m : usize ) -> usize {
2591
+ /// Multiplicative modular inverse table modulo 2⁴ = 16.
2592
+ ///
2593
+ /// Note, that this table does not contain values where inverse does not exist (i.e. for
2594
+ /// `0⁻¹ mod 16`, `2⁻¹ mod 16`, etc.)
2595
+ static INV_TABLE_MOD_16 : [ usize ; 8 ] = [ 1 , 11 , 13 , 7 , 9 , 3 , 5 , 15 ] ;
2596
+ /// Modulo for which the `INV_TABLE_MOD_16` is intended.
2597
+ const INV_TABLE_MOD : usize = 16 ;
2598
+ /// INV_TABLE_MOD²
2599
+ const INV_TABLE_MOD_SQUARED : usize = INV_TABLE_MOD * INV_TABLE_MOD ;
2600
+
2601
+ let table_inverse = INV_TABLE_MOD_16 [ ( x & ( INV_TABLE_MOD - 1 ) ) >> 1 ] ;
2602
+ if m <= INV_TABLE_MOD {
2603
+ return table_inverse & ( m - 1 ) ;
2604
+ } else {
2605
+ // We iterate "up" using the following formula:
2606
+ //
2607
+ // $$ xy ≡ 1 (mod 2ⁿ) → xy (2 - xy) ≡ 1 (mod 2²ⁿ) $$
2608
+ //
2609
+ // until 2²ⁿ ≥ m. Then we can reduce to our desired `m` by taking the result `mod m`.
2610
+ let mut inverse = table_inverse;
2611
+ let mut going_mod = INV_TABLE_MOD_SQUARED ;
2612
+ loop {
2613
+ // y = y * (2 - xy) mod n
2614
+ //
2615
+ // Note, that we use wrapping operations here intentionally – the original formula
2616
+ // uses e.g. subtraction `mod n`. It is entirely fine to do them `mod
2617
+ // usize::max_value()` instead, because we take the result `mod n` at the end
2618
+ // anyway.
2619
+ inverse = inverse. wrapping_mul (
2620
+ 2usize . wrapping_sub ( x. wrapping_mul ( inverse) )
2621
+ ) & ( going_mod - 1 ) ;
2622
+ if going_mod > m {
2623
+ return inverse & ( m - 1 ) ;
2624
+ }
2625
+ going_mod = going_mod. wrapping_mul ( going_mod) ;
2626
+ }
2627
+ }
2628
+ }
2629
+
2630
+ let a_minus_one = a. wrapping_sub ( 1 ) ;
2631
+ let pmoda = p as usize & a_minus_one;
2632
+ let smoda = stride & a_minus_one;
2633
+ // a is power-of-two so cannot be 0. stride = 0 is handled by the intrinsic.
2634
+ let gcdpow = intrinsics:: cttz_nonzero ( stride) . min ( intrinsics:: cttz_nonzero ( a) ) ;
2635
+ let gcd = 1usize << gcdpow;
2636
+
2637
+ if pmoda == 0 {
2638
+ // Already aligned. Yay!
2639
+ return 0 ;
2640
+ }
2641
+
2642
+ if gcd == 1 {
2643
+ // This branch solves for the variable $o$ in following linear congruence equation:
2644
+ //
2645
+ // ⎰ p + o ≡ 0 (mod a) # $p + o$ must be aligned to specified alignment $a$
2646
+ // ⎱ o ≡ 0 (mod s) # offset $o$ must be a multiple of stride $s$
2647
+ //
2648
+ // where
2649
+ //
2650
+ // * a, s are co-prime
2651
+ //
2652
+ // This gives us the formula below:
2653
+ //
2654
+ // o = (a - (p mod a)) * (s⁻¹ mod a) * s
2655
+ //
2656
+ // The first term is “the relative alignment of p to a”, the second term is “how does
2657
+ // incrementing p by one s change the relative alignment of p”, the third term is
2658
+ // translating change in units of s to a byte count.
2659
+ //
2660
+ // Furthermore, the result produced by this solution is not “minimal”, so it is necessary
2661
+ // to take the result $o mod lcm(s, a)$. Since $s$ and $a$ are co-prime (i.e. $gcd(s, a) =
2662
+ // 1$) and $lcm(s, a) = s * a / gcd(s, a)$, we can replace $lcm(s, a)$ with just a $s * a$.
2663
+ //
2664
+ // (Author note: we decided later on to express the offset in "elements" rather than bytes,
2665
+ // which drops the multiplication by `s` on both sides of the modulo.)
2666
+ return intrinsics:: unchecked_rem ( a. wrapping_sub ( pmoda) . wrapping_mul ( mod_inv ( smoda, a) ) , a) ;
2667
+ }
2668
+
2669
+ if p as usize & ( gcd - 1 ) == 0 {
2670
+ // This can be aligned, but `a` and `stride` are not co-prime, so a somewhat adapted
2671
+ // formula is used.
2672
+ let j = a. wrapping_sub ( pmoda) >> gcdpow;
2673
+ let k = smoda >> gcdpow;
2674
+ return intrinsics:: unchecked_rem ( j. wrapping_mul ( mod_inv ( k, a) ) , a >> gcdpow) ;
2675
+ }
2676
+
2677
+ // Cannot be aligned at all.
2678
+ return usize:: max_value ( ) ;
2679
+ }
2680
+
2681
+
2682
+
2516
2683
// Equality for pointers
2517
2684
#[ stable( feature = "rust1" , since = "1.0.0" ) ]
2518
2685
impl < T : ?Sized > PartialEq for * const T {
0 commit comments