@@ -151,8 +151,7 @@ pub unsafe fn _mm_extract_epi64(a: i64x2, imm8: u8) -> i64 {
151
151
/// Then zero elements according to `imm8`.
152
152
///
153
153
/// `imm8` specifies which bits from operand `a` will be copied, which bits in
154
- /// the
155
- /// result they will be copied to, and which bits in the result will be
154
+ /// the result they will be copied to, and which bits in the result will be
156
155
/// cleared. The following assignments are made:
157
156
///
158
157
/// * Bits `[7:6]` specify the bits to copy from operand `a`:
@@ -413,14 +412,14 @@ pub unsafe fn _mm_mullo_epi32 (a: i32x4, b:i32x4) -> i32x4 {
413
412
414
413
/// Tests whether the specified bits in a 128-bit integer vector are all
415
414
/// zeros.
416
- ///
415
+ ///
417
416
/// Arguments:
418
- ///
417
+ ///
419
418
/// * `a` - A 128-bit integer vector containing the bits to be tested.
420
419
/// * `mask` - A 128-bit integer vector selecting which bits to test in operand `a`.
421
- ///
420
+ ///
422
421
/// Returns:
423
- ///
422
+ ///
424
423
/// * `1` - if the specified bits are all zeros,
425
424
/// * `0` - otherwise.
426
425
#[ inline( always) ]
@@ -435,12 +434,12 @@ pub unsafe fn _mm_testz_si128(a: i64x2, mask: i64x2) -> i32 {
435
434
/// ones.
436
435
///
437
436
/// Arguments:
438
- ///
437
+ ///
439
438
/// * `a` - A 128-bit integer vector containing the bits to be tested.
440
439
/// * `mask` - A 128-bit integer vector selecting which bits to test in operand `a`.
441
- ///
440
+ ///
442
441
/// Returns:
443
- ///
442
+ ///
444
443
/// * `1` - if the specified bits are all ones,
445
444
/// * `0` - otherwise.
446
445
#[ inline( always) ]
@@ -454,12 +453,12 @@ pub unsafe fn _mm_testc_si128(a: i64x2, mask: i64x2) -> i32 {
454
453
/// neither all zeros nor all ones.
455
454
///
456
455
/// Arguments:
457
- ///
456
+ ///
458
457
/// * `a` - A 128-bit integer vector containing the bits to be tested.
459
458
/// * `mask` - A 128-bit integer vector selecting which bits to test in operand `a`.
460
- ///
459
+ ///
461
460
/// Returns:
462
- ///
461
+ ///
463
462
/// * `1` - if the specified bits are neither all zeros nor all ones,
464
463
/// * `0` - otherwise.
465
464
#[ inline( always) ]
@@ -471,14 +470,14 @@ pub unsafe fn _mm_testnzc_si128(a: i64x2, mask: i64x2) -> i32 {
471
470
472
471
/// Tests whether the specified bits in a 128-bit integer vector are all
473
472
/// zeros.
474
- ///
473
+ ///
475
474
/// Arguments:
476
- ///
475
+ ///
477
476
/// * `a` - A 128-bit integer vector containing the bits to be tested.
478
477
/// * `mask` - A 128-bit integer vector selecting which bits to test in operand `a`.
479
- ///
478
+ ///
480
479
/// Returns:
481
- ///
480
+ ///
482
481
/// * `1` - if the specified bits are all zeros,
483
482
/// * `0` - otherwise.
484
483
#[ inline( always) ]
@@ -490,13 +489,13 @@ pub unsafe fn _mm_test_all_zeros(a: i64x2, mask: i64x2) -> i32 {
490
489
491
490
/// Tests whether the specified bits in `a` 128-bit integer vector are all
492
491
/// ones.
493
- ///
492
+ ///
494
493
/// Argument:
495
- ///
494
+ ///
496
495
/// * `a` - A 128-bit integer vector containing the bits to be tested.
497
- ///
496
+ ///
498
497
/// Returns:
499
- ///
498
+ ///
500
499
/// * `1` - if the bits specified in the operand are all set to 1,
501
500
/// * `0` - otherwise.
502
501
#[ inline( always) ]
@@ -511,12 +510,12 @@ pub unsafe fn _mm_test_all_ones(a: i64x2) -> i32 {
511
510
/// neither all zeros nor all ones.
512
511
///
513
512
/// Arguments:
514
- ///
513
+ ///
515
514
/// * `a` - A 128-bit integer vector containing the bits to be tested.
516
515
/// * `mask` - A 128-bit integer vector selecting which bits to test in operand `a`.
517
- ///
516
+ ///
518
517
/// Returns:
519
- ///
518
+ ///
520
519
/// * `1` - if the specified bits are neither all zeros nor all ones,
521
520
/// * `0` - otherwise.
522
521
#[ inline( always) ]
@@ -768,11 +767,9 @@ pub unsafe fn _mm_round_ss(a: f32x4, b: f32x4, rounding: i32) -> f32x4 {
768
767
constify_imm4 ! ( rounding, call)
769
768
}
770
769
771
- /// Finds the minimum u16 in the u16x8 vector, returning it in the first
772
- /// position of the result vector along with its index in the second position;
773
- /// all other elements are set to zero.
774
- ///
775
- /// \headerfile <x86intrin.h>
770
+ /// Finds the minimum unsigned 16-bit element in the 128-bit u16x8 vector,
771
+ /// returning a vector containing its value in its first position, and its index
772
+ /// in its second position; all other elements are set to zero.
776
773
///
777
774
/// This intrinsic corresponds to the <c> VPHMINPOSUW / PHMINPOSUW </c>
778
775
/// instruction.
@@ -817,6 +814,47 @@ pub unsafe fn _mm_mullo_epi32(a: i32x4, b: i32x4) -> i32x4 {
817
814
a * b
818
815
}
819
816
817
+ /// Subtracts 8-bit unsigned integer values and computes the absolute
818
+ /// values of the differences to the corresponding bits in the destination.
819
+ /// Then sums of the absolute differences are returned according to the bit
820
+ /// fields in the immediate operand.
821
+ ///
822
+ /// The following algorithm is performed:
823
+ ///
824
+ /// ```ignore
825
+ /// i = imm8[2] * 4
826
+ /// j = imm8[1:0] * 4
827
+ /// for k := 0 to 7
828
+ /// d0 = abs(a[i + k + 0] - b[j + 0])
829
+ /// d1 = abs(a[i + k + 1] - b[j + 1])
830
+ /// d2 = abs(a[i + k + 2] - b[j + 2])
831
+ /// d3 = abs(a[i + k + 3] - b[j + 3])
832
+ /// r[k] = d0 + d1 + d2 + d3
833
+ /// ```
834
+ ///
835
+ /// Arguments:
836
+ ///
837
+ /// * `a` - A 128-bit vector of type `i8x16`.
838
+ /// * `b` - A 128-bit vector of type `i8x16`.
839
+ /// * `imm8` - An 8-bit immediate operand specifying how the absolute differences are to
840
+ /// be calculated
841
+ /// * Bit `[2]` specify the offset for operand `a`
842
+ /// * Bits `[1:0]` specify the offset for operand `b`
843
+ ///
844
+ /// Returns:
845
+ ///
846
+ /// * A `i16x8` vector containing the sums of the sets of
847
+ /// absolute differences between both operands.
848
+ #[ inline( always) ]
849
+ #[ target_feature = "+sse4.1" ]
850
+ #[ cfg_attr( test, assert_instr( mpsadbw, imm8=0 ) ) ]
851
+ pub unsafe fn _mm_mpsadbw_epu8 ( a : i8x16 , b : i8x16 , imm8 : u8 ) -> i16x8 {
852
+ macro_rules! call {
853
+ ( $imm8: expr) => { mpsadbw( a, b, $imm8) }
854
+ }
855
+ constify_imm3 ! ( imm8, call)
856
+ }
857
+
820
858
#[ allow( improper_ctypes) ]
821
859
extern "C" {
822
860
#[ link_name = "llvm.x86.sse41.pblendvb" ]
@@ -875,6 +913,8 @@ extern "C" {
875
913
fn phminposuw ( a : u16x8 ) -> u16x8 ;
876
914
#[ link_name = "llvm.x86.sse41.pmuldq" ]
877
915
fn pmuldq ( a : i32x4 , b : i32x4 ) -> i64x2 ;
916
+ #[ link_name = "llvm.x86.sse41.mpsadbw" ]
917
+ fn mpsadbw ( a : i8x16 , b : i8x16 , imm8 : u8 ) -> i16x8 ;
878
918
}
879
919
880
920
#[ cfg( test) ]
@@ -1581,4 +1621,29 @@ mod tests {
1581
1621
let e = u16x8:: splat ( 0 ) . replace ( 0 , 1 ) . replace ( 1 , 5 ) ;
1582
1622
assert_eq ! ( r, e) ;
1583
1623
}
1624
+
1625
+ #[ simd_test = "sse4.1" ]
1626
+ unsafe fn _mm_mpsadbw_epu8 ( ) {
1627
+ let a = i8x16:: new ( 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ) ;
1628
+
1629
+ let r = sse41:: _mm_mpsadbw_epu8 ( a, a, 0b000 ) ;
1630
+ let e = i16x8:: new ( 0 , 4 , 8 , 12 , 16 , 20 , 24 , 28 ) ;
1631
+ assert_eq ! ( r, e) ;
1632
+
1633
+ let r = sse41:: _mm_mpsadbw_epu8 ( a, a, 0b001 ) ;
1634
+ let e = i16x8:: new ( 16 , 12 , 8 , 4 , 0 , 4 , 8 , 12 ) ;
1635
+ assert_eq ! ( r, e) ;
1636
+
1637
+ let r = sse41:: _mm_mpsadbw_epu8 ( a, a, 0b100 ) ;
1638
+ let e = i16x8:: new ( 16 , 20 , 24 , 28 , 32 , 36 , 40 , 44 ) ;
1639
+ assert_eq ! ( r, e) ;
1640
+
1641
+ let r = sse41:: _mm_mpsadbw_epu8 ( a, a, 0b101 ) ;
1642
+ let e = i16x8:: new ( 0 , 4 , 8 , 12 , 16 , 20 , 24 , 28 ) ;
1643
+ assert_eq ! ( r, e) ;
1644
+
1645
+ let r = sse41:: _mm_mpsadbw_epu8 ( a, a, 0b111 ) ;
1646
+ let e = i16x8:: new ( 32 , 28 , 24 , 20 , 16 , 12 , 8 , 4 ) ;
1647
+ assert_eq ! ( r, e) ;
1648
+ }
1584
1649
}
0 commit comments