Skip to content

Commit 6b718d9

Browse files
p32blognzlbg
authored andcommitted
Add _mm_mpsadbw_epu8
1 parent f7f2a23 commit 6b718d9

File tree

2 files changed

+109
-28
lines changed

2 files changed

+109
-28
lines changed

src/x86/macros.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,22 @@ macro_rules! constify_imm4 {
328328
}
329329
}
330330

331+
macro_rules! constify_imm3 {
332+
($imm8:expr, $expand:ident) => {
333+
#[allow(overflowing_literals)]
334+
match $imm8 & 0b111 {
335+
0 => $expand!(0),
336+
1 => $expand!(1),
337+
2 => $expand!(2),
338+
3 => $expand!(3),
339+
4 => $expand!(4),
340+
5 => $expand!(5),
341+
6 => $expand!(6),
342+
_ => $expand!(7),
343+
}
344+
}
345+
}
346+
331347
macro_rules! constify_imm2 {
332348
($imm8:expr, $expand:ident) => {
333349
#[allow(overflowing_literals)]

src/x86/sse41.rs

Lines changed: 93 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -151,8 +151,7 @@ pub unsafe fn _mm_extract_epi64(a: i64x2, imm8: u8) -> i64 {
151151
/// Then zero elements according to `imm8`.
152152
///
153153
/// `imm8` specifies which bits from operand `a` will be copied, which bits in
154-
/// the
155-
/// result they will be copied to, and which bits in the result will be
154+
/// the result they will be copied to, and which bits in the result will be
156155
/// cleared. The following assignments are made:
157156
///
158157
/// * Bits `[7:6]` specify the bits to copy from operand `a`:
@@ -413,14 +412,14 @@ pub unsafe fn _mm_mullo_epi32 (a: i32x4, b:i32x4) -> i32x4 {
413412

414413
/// Tests whether the specified bits in a 128-bit integer vector are all
415414
/// zeros.
416-
///
415+
///
417416
/// Arguments:
418-
///
417+
///
419418
/// * `a` - A 128-bit integer vector containing the bits to be tested.
420419
/// * `mask` - A 128-bit integer vector selecting which bits to test in operand `a`.
421-
///
420+
///
422421
/// Returns:
423-
///
422+
///
424423
/// * `1` - if the specified bits are all zeros,
425424
/// * `0` - otherwise.
426425
#[inline(always)]
@@ -435,12 +434,12 @@ pub unsafe fn _mm_testz_si128(a: i64x2, mask: i64x2) -> i32 {
435434
/// ones.
436435
///
437436
/// Arguments:
438-
///
437+
///
439438
/// * `a` - A 128-bit integer vector containing the bits to be tested.
440439
/// * `mask` - A 128-bit integer vector selecting which bits to test in operand `a`.
441-
///
440+
///
442441
/// Returns:
443-
///
442+
///
444443
/// * `1` - if the specified bits are all ones,
445444
/// * `0` - otherwise.
446445
#[inline(always)]
@@ -454,12 +453,12 @@ pub unsafe fn _mm_testc_si128(a: i64x2, mask: i64x2) -> i32 {
454453
/// neither all zeros nor all ones.
455454
///
456455
/// Arguments:
457-
///
456+
///
458457
/// * `a` - A 128-bit integer vector containing the bits to be tested.
459458
/// * `mask` - A 128-bit integer vector selecting which bits to test in operand `a`.
460-
///
459+
///
461460
/// Returns:
462-
///
461+
///
463462
/// * `1` - if the specified bits are neither all zeros nor all ones,
464463
/// * `0` - otherwise.
465464
#[inline(always)]
@@ -471,14 +470,14 @@ pub unsafe fn _mm_testnzc_si128(a: i64x2, mask: i64x2) -> i32 {
471470

472471
/// Tests whether the specified bits in a 128-bit integer vector are all
473472
/// zeros.
474-
///
473+
///
475474
/// Arguments:
476-
///
475+
///
477476
/// * `a` - A 128-bit integer vector containing the bits to be tested.
478477
/// * `mask` - A 128-bit integer vector selecting which bits to test in operand `a`.
479-
///
478+
///
480479
/// Returns:
481-
///
480+
///
482481
/// * `1` - if the specified bits are all zeros,
483482
/// * `0` - otherwise.
484483
#[inline(always)]
@@ -490,13 +489,13 @@ pub unsafe fn _mm_test_all_zeros(a: i64x2, mask: i64x2) -> i32 {
490489

491490
/// Tests whether the specified bits in `a` 128-bit integer vector are all
492491
/// ones.
493-
///
492+
///
494493
/// Argument:
495-
///
494+
///
496495
/// * `a` - A 128-bit integer vector containing the bits to be tested.
497-
///
496+
///
498497
/// Returns:
499-
///
498+
///
500499
/// * `1` - if the bits specified in the operand are all set to 1,
501500
/// * `0` - otherwise.
502501
#[inline(always)]
@@ -511,12 +510,12 @@ pub unsafe fn _mm_test_all_ones(a: i64x2) -> i32 {
511510
/// neither all zeros nor all ones.
512511
///
513512
/// Arguments:
514-
///
513+
///
515514
/// * `a` - A 128-bit integer vector containing the bits to be tested.
516515
/// * `mask` - A 128-bit integer vector selecting which bits to test in operand `a`.
517-
///
516+
///
518517
/// Returns:
519-
///
518+
///
520519
/// * `1` - if the specified bits are neither all zeros nor all ones,
521520
/// * `0` - otherwise.
522521
#[inline(always)]
@@ -768,11 +767,9 @@ pub unsafe fn _mm_round_ss(a: f32x4, b: f32x4, rounding: i32) -> f32x4 {
768767
constify_imm4!(rounding, call)
769768
}
770769

771-
/// Finds the minimum u16 in the u16x8 vector, returning it in the first
772-
/// position of the result vector along with its index in the second position;
773-
/// all other elements are set to zero.
774-
///
775-
/// \headerfile <x86intrin.h>
770+
/// Finds the minimum unsigned 16-bit element in the 128-bit u16x8 vector,
771+
/// returning a vector containing its value in its first position, and its index
772+
/// in its second position; all other elements are set to zero.
776773
///
777774
/// This intrinsic corresponds to the <c> VPHMINPOSUW / PHMINPOSUW </c>
778775
/// instruction.
@@ -817,6 +814,47 @@ pub unsafe fn _mm_mullo_epi32(a: i32x4, b: i32x4) -> i32x4 {
817814
a * b
818815
}
819816

817+
/// Subtracts 8-bit unsigned integer values and computes the absolute
818+
/// values of the differences to the corresponding bits in the destination.
819+
/// Then sums of the absolute differences are returned according to the bit
820+
/// fields in the immediate operand.
821+
///
822+
/// The following algorithm is performed:
823+
///
824+
/// ```ignore
825+
/// i = imm8[2] * 4
826+
/// j = imm8[1:0] * 4
827+
/// for k := 0 to 7
828+
/// d0 = abs(a[i + k + 0] - b[j + 0])
829+
/// d1 = abs(a[i + k + 1] - b[j + 1])
830+
/// d2 = abs(a[i + k + 2] - b[j + 2])
831+
/// d3 = abs(a[i + k + 3] - b[j + 3])
832+
/// r[k] = d0 + d1 + d2 + d3
833+
/// ```
834+
///
835+
/// Arguments:
836+
///
837+
/// * `a` - A 128-bit vector of type `i8x16`.
838+
/// * `b` - A 128-bit vector of type `i8x16`.
839+
/// * `imm8` - An 8-bit immediate operand specifying how the absolute differences are to
840+
/// be calculated
841+
/// * Bit `[2]` specify the offset for operand `a`
842+
/// * Bits `[1:0]` specify the offset for operand `b`
843+
///
844+
/// Returns:
845+
///
846+
/// * A `i16x8` vector containing the sums of the sets of
847+
/// absolute differences between both operands.
848+
#[inline(always)]
849+
#[target_feature = "+sse4.1"]
850+
#[cfg_attr(test, assert_instr(mpsadbw, imm8=0))]
851+
pub unsafe fn _mm_mpsadbw_epu8(a: i8x16, b: i8x16, imm8: u8) -> i16x8 {
852+
macro_rules! call {
853+
($imm8:expr) => { mpsadbw(a, b, $imm8) }
854+
}
855+
constify_imm3!(imm8, call)
856+
}
857+
820858
#[allow(improper_ctypes)]
821859
extern "C" {
822860
#[link_name = "llvm.x86.sse41.pblendvb"]
@@ -875,6 +913,8 @@ extern "C" {
875913
fn phminposuw(a: u16x8) -> u16x8;
876914
#[link_name = "llvm.x86.sse41.pmuldq"]
877915
fn pmuldq(a: i32x4, b: i32x4) -> i64x2;
916+
#[link_name = "llvm.x86.sse41.mpsadbw"]
917+
fn mpsadbw(a: i8x16, b: i8x16, imm8: u8) -> i16x8;
878918
}
879919

880920
#[cfg(test)]
@@ -1581,4 +1621,29 @@ mod tests {
15811621
let e = u16x8::splat(0).replace(0, 1).replace(1, 5);
15821622
assert_eq!(r, e);
15831623
}
1624+
1625+
#[simd_test = "sse4.1"]
1626+
unsafe fn _mm_mpsadbw_epu8() {
1627+
let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
1628+
1629+
let r = sse41::_mm_mpsadbw_epu8(a, a, 0b000);
1630+
let e = i16x8::new(0, 4, 8, 12, 16, 20, 24, 28);
1631+
assert_eq!(r, e);
1632+
1633+
let r = sse41::_mm_mpsadbw_epu8(a, a, 0b001);
1634+
let e = i16x8::new(16, 12, 8, 4, 0, 4, 8, 12);
1635+
assert_eq!(r, e);
1636+
1637+
let r = sse41::_mm_mpsadbw_epu8(a, a, 0b100);
1638+
let e = i16x8::new(16, 20, 24, 28, 32, 36, 40, 44);
1639+
assert_eq!(r, e);
1640+
1641+
let r = sse41::_mm_mpsadbw_epu8(a, a, 0b101);
1642+
let e = i16x8::new(0, 4, 8, 12, 16, 20, 24, 28);
1643+
assert_eq!(r, e);
1644+
1645+
let r = sse41::_mm_mpsadbw_epu8(a, a, 0b111);
1646+
let e = i16x8::new(32, 28, 24, 20, 16, 12, 8, 4);
1647+
assert_eq!(r, e);
1648+
}
15841649
}

0 commit comments

Comments
 (0)