@@ -151,8 +151,7 @@ pub unsafe fn _mm_extract_epi64(a: i64x2, imm8: u8) -> i64 {
151
151
/// Then zero elements according to `imm8`.
152
152
///
153
153
/// `imm8` specifies which bits from operand `a` will be copied, which bits in
154
- /// the
155
- /// result they will be copied to, and which bits in the result will be
154
+ /// the result they will be copied to, and which bits in the result will be
156
155
/// cleared. The following assignments are made:
157
156
///
158
157
/// * Bits `[7:6]` specify the bits to copy from operand `a`:
@@ -375,14 +374,14 @@ pub unsafe fn _mm_mullo_epi32 (a: i32x4, b:i32x4) -> i32x4 {
375
374
376
375
/// Tests whether the specified bits in a 128-bit integer vector are all
377
376
/// zeros.
378
- ///
377
+ ///
379
378
/// Arguments:
380
- ///
379
+ ///
381
380
/// * `a` - A 128-bit integer vector containing the bits to be tested.
382
381
/// * `mask` - A 128-bit integer vector selecting which bits to test in operand `a`.
383
- ///
382
+ ///
384
383
/// Returns:
385
- ///
384
+ ///
386
385
/// * `1` - if the specified bits are all zeros,
387
386
/// * `0` - otherwise.
388
387
#[ inline( always) ]
@@ -397,12 +396,12 @@ pub unsafe fn _mm_testz_si128(a: i64x2, mask: i64x2) -> i32 {
397
396
/// ones.
398
397
///
399
398
/// Arguments:
400
- ///
399
+ ///
401
400
/// * `a` - A 128-bit integer vector containing the bits to be tested.
402
401
/// * `mask` - A 128-bit integer vector selecting which bits to test in operand `a`.
403
- ///
402
+ ///
404
403
/// Returns:
405
- ///
404
+ ///
406
405
/// * `1` - if the specified bits are all ones,
407
406
/// * `0` - otherwise.
408
407
#[ inline( always) ]
@@ -416,12 +415,12 @@ pub unsafe fn _mm_testc_si128(a: i64x2, mask: i64x2) -> i32 {
416
415
/// neither all zeros nor all ones.
417
416
///
418
417
/// Arguments:
419
- ///
418
+ ///
420
419
/// * `a` - A 128-bit integer vector containing the bits to be tested.
421
420
/// * `mask` - A 128-bit integer vector selecting which bits to test in operand `a`.
422
- ///
421
+ ///
423
422
/// Returns:
424
- ///
423
+ ///
425
424
/// * `1` - if the specified bits are neither all zeros nor all ones,
426
425
/// * `0` - otherwise.
427
426
#[ inline( always) ]
@@ -433,14 +432,14 @@ pub unsafe fn _mm_testnzc_si128(a: i64x2, mask: i64x2) -> i32 {
433
432
434
433
/// Tests whether the specified bits in a 128-bit integer vector are all
435
434
/// zeros.
436
- ///
435
+ ///
437
436
/// Arguments:
438
- ///
437
+ ///
439
438
/// * `a` - A 128-bit integer vector containing the bits to be tested.
440
439
/// * `mask` - A 128-bit integer vector selecting which bits to test in operand `a`.
441
- ///
440
+ ///
442
441
/// Returns:
443
- ///
442
+ ///
444
443
/// * `1` - if the specified bits are all zeros,
445
444
/// * `0` - otherwise.
446
445
#[ inline( always) ]
@@ -452,13 +451,13 @@ pub unsafe fn _mm_test_all_zeros(a: i64x2, mask: i64x2) -> i32 {
452
451
453
452
/// Tests whether the specified bits in `a` 128-bit integer vector are all
454
453
/// ones.
455
- ///
454
+ ///
456
455
/// Argument:
457
- ///
456
+ ///
458
457
/// * `a` - A 128-bit integer vector containing the bits to be tested.
459
- ///
458
+ ///
460
459
/// Returns:
461
- ///
460
+ ///
462
461
/// * `1` - if the bits specified in the operand are all set to 1,
463
462
/// * `0` - otherwise.
464
463
#[ inline( always) ]
@@ -473,12 +472,12 @@ pub unsafe fn _mm_test_all_ones(a: i64x2) -> i32 {
473
472
/// neither all zeros nor all ones.
474
473
///
475
474
/// Arguments:
476
- ///
475
+ ///
477
476
/// * `a` - A 128-bit integer vector containing the bits to be tested.
478
477
/// * `mask` - A 128-bit integer vector selecting which bits to test in operand `a`.
479
- ///
478
+ ///
480
479
/// Returns:
481
- ///
480
+ ///
482
481
/// * `1` - if the specified bits are neither all zeros nor all ones,
483
482
/// * `0` - otherwise.
484
483
#[ inline( always) ]
@@ -731,22 +730,17 @@ pub unsafe fn _mm_round_ss(a: f32x4, b: f32x4, rounding: i32) -> f32x4 {
731
730
}
732
731
733
732
/// Finds the minimum unsigned 16-bit element in the input 128-bit
734
- /// vector of [8 x u16] and returns it and along with its index.
735
- ///
736
- /// \headerfile <x86intrin.h>
737
- ///
738
- /// This intrinsic corresponds to the <c> VPHMINPOSUW / PHMINPOSUW </c>
739
- /// instruction.
733
+ /// vector of `u16x8` and returns it and along with its index.
740
734
///
741
735
/// Arguments:
742
- ///
736
+ ///
743
737
/// * `a` - A 128-bit vector of type `u16x8`.
744
- ///
738
+ ///
745
739
/// Returns:
746
- ///
740
+ ///
747
741
/// A 128-bit value where:
748
- ///
749
- /// * bits `[15:0]` - contain the minimum value found in parameter `a`,
742
+ ///
743
+ /// * bits `[15:0]` - contain the minimum value found in parameter `a`,
750
744
/// * bits `[18:16]` - contain the index of the minimum value
751
745
/// * remaining bits are set to `0`.
752
746
#[ inline( always) ]
@@ -756,6 +750,47 @@ pub unsafe fn _mm_minpos_epu16(a: u16x8) -> u16x8 {
756
750
phminposuw ( a)
757
751
}
758
752
753
+ /// Subtracts 8-bit unsigned integer values and computes the absolute
754
+ /// values of the differences to the corresponding bits in the destination.
755
+ /// Then sums of the absolute differences are returned according to the bit
756
+ /// fields in the immediate operand.
757
+ ///
758
+ /// The following algorithm is performed:
759
+ ///
760
+ /// ```ignore
761
+ /// i = imm8[2] * 4
762
+ /// j = imm8[1:0] * 4
763
+ /// for k := 0 to 7
764
+ /// d0 = abs(a[i + k + 0] - b[j + 0])
765
+ /// d1 = abs(a[i + k + 1] - b[j + 1])
766
+ /// d2 = abs(a[i + k + 2] - b[j + 2])
767
+ /// d3 = abs(a[i + k + 3] - b[j + 3])
768
+ /// r[k] = d0 + d1 + d2 + d3
769
+ /// ```
770
+ ///
771
+ /// Arguments:
772
+ ///
773
+ /// * `a` - A 128-bit vector of type `i8x16`.
774
+ /// * `b` - A 128-bit vector of type `i8x16`.
775
+ /// * `imm8` - An 8-bit immediate operand specifying how the absolute differences are to
776
+ /// be calculated
777
+ /// * Bit `[2]` specify the offset for operand `a`
778
+ /// * Bits `[1:0]` specify the offset for operand `b`
779
+ ///
780
+ /// Returns:
781
+ ///
782
+ /// * A `i16x8` vector containing the sums of the sets of
783
+ /// absolute differences between both operands.
784
+ #[ inline( always) ]
785
+ #[ target_feature = "+sse4.1" ]
786
+ #[ cfg_attr( test, assert_instr( mpsadbw, imm8=0 ) ) ]
787
+ pub unsafe fn _mm_mpsadbw_epu8 ( a : i8x16 , b : i8x16 , imm8 : u8 ) -> i16x8 {
788
+ macro_rules! call {
789
+ ( $imm8: expr) => { mpsadbw( a, b, $imm8) }
790
+ }
791
+ constify_imm3 ! ( imm8, call)
792
+ }
793
+
759
794
760
795
#[ allow( improper_ctypes) ]
761
796
extern "C" {
@@ -805,6 +840,8 @@ extern "C" {
805
840
fn roundss ( a : f32x4 , b : f32x4 , rounding : i32 ) -> f32x4 ;
806
841
#[ link_name = "llvm.x86.sse41.phminposuw" ]
807
842
fn phminposuw ( a : u16x8 ) -> u16x8 ;
843
+ #[ link_name = "llvm.x86.sse41.mpsadbw" ]
844
+ fn mpsadbw ( a : i8x16 , b : i8x16 , imm8 : u8 ) -> i16x8 ;
808
845
}
809
846
810
847
#[ cfg( test) ]
@@ -1083,7 +1120,7 @@ mod tests {
1083
1120
let e = i64x2:: splat ( -10 ) ;
1084
1121
assert_eq ! ( r, e) ;
1085
1122
}
1086
-
1123
+
1087
1124
#[ simd_test = "sse4.1" ]
1088
1125
unsafe fn _mm_cvtepi32_epi64 ( ) {
1089
1126
let a = i32x4:: splat ( 10 ) ;
@@ -1393,4 +1430,29 @@ mod tests {
1393
1430
let e = u16x8:: splat ( 0 ) . replace ( 0 , 1 ) . replace ( 1 , 5 ) ;
1394
1431
assert_eq ! ( r, e) ;
1395
1432
}
1433
+
1434
+ #[ simd_test = "sse4.1" ]
1435
+ unsafe fn _mm_mpsadbw_epu8 ( ) {
1436
+ let a = i8x16:: new ( 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ) ;
1437
+
1438
+ let r = sse41:: _mm_mpsadbw_epu8 ( a, a, 0b000 ) ;
1439
+ let e = i16x8:: new ( 0 , 4 , 8 , 12 , 16 , 20 , 24 , 28 ) ;
1440
+ assert_eq ! ( r, e) ;
1441
+
1442
+ let r = sse41:: _mm_mpsadbw_epu8 ( a, a, 0b001 ) ;
1443
+ let e = i16x8:: new ( 16 , 12 , 8 , 4 , 0 , 4 , 8 , 12 ) ;
1444
+ assert_eq ! ( r, e) ;
1445
+
1446
+ let r = sse41:: _mm_mpsadbw_epu8 ( a, a, 0b100 ) ;
1447
+ let e = i16x8:: new ( 16 , 20 , 24 , 28 , 32 , 36 , 40 , 44 ) ;
1448
+ assert_eq ! ( r, e) ;
1449
+
1450
+ let r = sse41:: _mm_mpsadbw_epu8 ( a, a, 0b101 ) ;
1451
+ let e = i16x8:: new ( 0 , 4 , 8 , 12 , 16 , 20 , 24 , 28 ) ;
1452
+ assert_eq ! ( r, e) ;
1453
+
1454
+ let r = sse41:: _mm_mpsadbw_epu8 ( a, a, 0b111 ) ;
1455
+ let e = i16x8:: new ( 32 , 28 , 24 , 20 , 16 , 12 , 8 , 4 ) ;
1456
+ assert_eq ! ( r, e) ;
1457
+ }
1396
1458
}
0 commit comments