diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs index b1b60f12ee..63fa745c5a 100644 --- a/crates/core_arch/src/aarch64/neon/generated.rs +++ b/crates/core_arch/src/aarch64/neon/generated.rs @@ -2950,6 +2950,118 @@ pub unsafe fn vmlal_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uin vmlal_u32(a, b, c) } +/// Multiply-add long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(smlal2))] +pub unsafe fn vmlal_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t { + vmlal_high_s16(a, b, vdupq_n_s16(c)) +} + +/// Multiply-add long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(smlal2))] +pub unsafe fn vmlal_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t { + vmlal_high_s32(a, b, vdupq_n_s32(c)) +} + +/// Multiply-add long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(umlal2))] +pub unsafe fn vmlal_high_n_u16(a: uint32x4_t, b: uint16x8_t, c: u16) -> uint32x4_t { + vmlal_high_u16(a, b, vdupq_n_u16(c)) +} + +/// Multiply-add long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(umlal2))] +pub unsafe fn vmlal_high_n_u32(a: uint64x2_t, b: uint32x4_t, c: u32) -> uint64x2_t { + vmlal_high_u32(a, b, vdupq_n_u32(c)) +} + +/// Multiply-add long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(smlal2, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlal_high_lane_s16(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t { + static_assert_imm2!(LANE); + vmlal_high_s16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply-add long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(smlal2, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlal_high_laneq_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t { + static_assert_imm3!(LANE); + vmlal_high_s16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply-add long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(smlal2, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlal_high_lane_s32(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t { + static_assert_imm1!(LANE); + vmlal_high_s32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply-add long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(smlal2, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlal_high_laneq_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t { + static_assert_imm2!(LANE); + vmlal_high_s32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply-add long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(umlal2, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlal_high_lane_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x4_t) -> uint32x4_t { + static_assert_imm2!(LANE); + vmlal_high_u16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply-add long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(umlal2, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlal_high_laneq_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t { + static_assert_imm3!(LANE); + vmlal_high_u16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply-add long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(umlal2, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlal_high_lane_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x2_t) -> uint64x2_t { + static_assert_imm1!(LANE); + vmlal_high_u32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply-add long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(umlal2, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlal_high_laneq_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t { + static_assert_imm2!(LANE); + vmlal_high_u32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + /// Floating-point multiply-subtract from accumulator #[inline] #[target_feature(enable = "neon")] @@ -3026,6 +3138,118 @@ pub unsafe fn vmlsl_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uin vmlsl_u32(a, b, c) } +/// Multiply-subtract long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(smlsl2))] +pub unsafe fn vmlsl_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t { + vmlsl_high_s16(a, b, vdupq_n_s16(c)) +} + +/// Multiply-subtract long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(smlsl2))] +pub unsafe fn vmlsl_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t { + vmlsl_high_s32(a, b, vdupq_n_s32(c)) +} + +/// Multiply-subtract long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(umlsl2))] +pub unsafe fn vmlsl_high_n_u16(a: uint32x4_t, b: uint16x8_t, c: u16) -> uint32x4_t { + vmlsl_high_u16(a, b, vdupq_n_u16(c)) +} + +/// Multiply-subtract long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(umlsl2))] +pub unsafe fn vmlsl_high_n_u32(a: uint64x2_t, b: uint32x4_t, c: u32) -> uint64x2_t { + vmlsl_high_u32(a, b, vdupq_n_u32(c)) +} + +/// Multiply-subtract long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsl_high_lane_s16(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t { + static_assert_imm2!(LANE); + vmlsl_high_s16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply-subtract long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsl_high_laneq_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t { + static_assert_imm3!(LANE); + vmlsl_high_s16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply-subtract long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsl_high_lane_s32(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t { + static_assert_imm1!(LANE); + vmlsl_high_s32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply-subtract long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsl_high_laneq_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t { + static_assert_imm2!(LANE); + vmlsl_high_s32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply-subtract long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsl_high_lane_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x4_t) -> uint32x4_t { + static_assert_imm2!(LANE); + vmlsl_high_u16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply-subtract long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsl_high_laneq_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t { + static_assert_imm3!(LANE); + vmlsl_high_u16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply-subtract long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsl_high_lane_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x2_t) -> uint64x2_t { + static_assert_imm1!(LANE); + vmlsl_high_u32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply-subtract long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsl_high_laneq_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t { + static_assert_imm2!(LANE); + vmlsl_high_u32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + /// Extract narrow #[inline] #[target_feature(enable = "neon")] @@ -9750,6 +9974,126 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_high_n_s16() { + let a: i32x4 = i32x4::new(8, 7, 6, 5); + let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3); + let c: i16 = 2; + let e: i32x4 = i32x4::new(8, 9, 10, 11); + let r: i32x4 = transmute(vmlal_high_n_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_high_n_s32() { + let a: i64x2 = i64x2::new(8, 7); + let b: i32x4 = i32x4::new(3, 3, 0, 1); + let c: i32 = 2; + let e: i64x2 = i64x2::new(8, 9); + let r: i64x2 = transmute(vmlal_high_n_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_high_n_u16() { + let a: u32x4 = u32x4::new(8, 7, 6, 5); + let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3); + let c: u16 = 2; + let e: u32x4 = u32x4::new(8, 9, 10, 11); + let r: u32x4 = transmute(vmlal_high_n_u16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_high_n_u32() { + let a: u64x2 = u64x2::new(8, 7); + let b: u32x4 = u32x4::new(3, 3, 0, 1); + let c: u32 = 2; + let e: u64x2 = u64x2::new(8, 9); + let r: u64x2 = transmute(vmlal_high_n_u32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_high_lane_s16() { + let a: i32x4 = i32x4::new(8, 7, 6, 5); + let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3); + let c: i16x4 = i16x4::new(0, 2, 0, 0); + let e: i32x4 = i32x4::new(8, 9, 10, 11); + let r: i32x4 = transmute(vmlal_high_lane_s16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_high_laneq_s16() { + let a: i32x4 = i32x4::new(8, 7, 6, 5); + let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3); + let c: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0); + let e: i32x4 = i32x4::new(8, 9, 10, 11); + let r: i32x4 = transmute(vmlal_high_laneq_s16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_high_lane_s32() { + let a: i64x2 = i64x2::new(8, 7); + let b: i32x4 = i32x4::new(3, 3, 0, 1); + let c: i32x2 = i32x2::new(0, 2); + let e: i64x2 = i64x2::new(8, 9); + let r: i64x2 = transmute(vmlal_high_lane_s32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_high_laneq_s32() { + let a: i64x2 = i64x2::new(8, 7); + let b: i32x4 = i32x4::new(3, 3, 0, 1); + let c: i32x4 = i32x4::new(0, 2, 0, 0); + let e: i64x2 = i64x2::new(8, 9); + let r: i64x2 = transmute(vmlal_high_laneq_s32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_high_lane_u16() { + let a: u32x4 = u32x4::new(8, 7, 6, 5); + let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3); + let c: u16x4 = u16x4::new(0, 2, 0, 0); + let e: u32x4 = u32x4::new(8, 9, 10, 11); + let r: u32x4 = transmute(vmlal_high_lane_u16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_high_laneq_u16() { + let a: u32x4 = u32x4::new(8, 7, 6, 5); + let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3); + let c: u16x8 = u16x8::new(0, 2, 0, 0, 0, 0, 0, 0); + let e: u32x4 = u32x4::new(8, 9, 10, 11); + let r: u32x4 = transmute(vmlal_high_laneq_u16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_high_lane_u32() { + let a: u64x2 = u64x2::new(8, 7); + let b: u32x4 = u32x4::new(3, 3, 0, 1); + let c: u32x2 = u32x2::new(0, 2); + let e: u64x2 = u64x2::new(8, 9); + let r: u64x2 = transmute(vmlal_high_lane_u32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_high_laneq_u32() { + let a: u64x2 = u64x2::new(8, 7); + let b: u32x4 = u32x4::new(3, 3, 0, 1); + let c: u32x4 = u32x4::new(0, 2, 0, 0); + let e: u64x2 = u64x2::new(8, 9); + let r: u64x2 = transmute(vmlal_high_laneq_u32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vmls_f64() { let a: f64 = 6.; @@ -9830,6 +10174,126 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_high_n_s16() { + let a: i32x4 = i32x4::new(14, 15, 16, 17); + let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3); + let c: i16 = 2; + let e: i32x4 = i32x4::new(14, 13, 12, 11); + let r: i32x4 = transmute(vmlsl_high_n_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_high_n_s32() { + let a: i64x2 = i64x2::new(14, 15); + let b: i32x4 = i32x4::new(3, 3, 0, 1); + let c: i32 = 2; + let e: i64x2 = i64x2::new(14, 13); + let r: i64x2 = transmute(vmlsl_high_n_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_high_n_u16() { + let a: u32x4 = u32x4::new(14, 15, 16, 17); + let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3); + let c: u16 = 2; + let e: u32x4 = u32x4::new(14, 13, 12, 11); + let r: u32x4 = transmute(vmlsl_high_n_u16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_high_n_u32() { + let a: u64x2 = u64x2::new(14, 15); + let b: u32x4 = u32x4::new(3, 3, 0, 1); + let c: u32 = 2; + let e: u64x2 = u64x2::new(14, 13); + let r: u64x2 = transmute(vmlsl_high_n_u32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_high_lane_s16() { + let a: i32x4 = i32x4::new(14, 15, 16, 17); + let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3); + let c: i16x4 = i16x4::new(0, 2, 0, 0); + let e: i32x4 = i32x4::new(14, 13, 12, 11); + let r: i32x4 = transmute(vmlsl_high_lane_s16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_high_laneq_s16() { + let a: i32x4 = i32x4::new(14, 15, 16, 17); + let b: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3); + let c: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0); + let e: i32x4 = i32x4::new(14, 13, 12, 11); + let r: i32x4 = transmute(vmlsl_high_laneq_s16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_high_lane_s32() { + let a: i64x2 = i64x2::new(14, 15); + let b: i32x4 = i32x4::new(3, 3, 0, 1); + let c: i32x2 = i32x2::new(0, 2); + let e: i64x2 = i64x2::new(14, 13); + let r: i64x2 = transmute(vmlsl_high_lane_s32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_high_laneq_s32() { + let a: i64x2 = i64x2::new(14, 15); + let b: i32x4 = i32x4::new(3, 3, 0, 1); + let c: i32x4 = i32x4::new(0, 2, 0, 0); + let e: i64x2 = i64x2::new(14, 13); + let r: i64x2 = transmute(vmlsl_high_laneq_s32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_high_lane_u16() { + let a: u32x4 = u32x4::new(14, 15, 16, 17); + let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3); + let c: u16x4 = u16x4::new(0, 2, 0, 0); + let e: u32x4 = u32x4::new(14, 13, 12, 11); + let r: u32x4 = transmute(vmlsl_high_lane_u16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_high_laneq_u16() { + let a: u32x4 = u32x4::new(14, 15, 16, 17); + let b: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3); + let c: u16x8 = u16x8::new(0, 2, 0, 0, 0, 0, 0, 0); + let e: u32x4 = u32x4::new(14, 13, 12, 11); + let r: u32x4 = transmute(vmlsl_high_laneq_u16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_high_lane_u32() { + let a: u64x2 = u64x2::new(14, 15); + let b: u32x4 = u32x4::new(3, 3, 0, 1); + let c: u32x2 = u32x2::new(0, 2); + let e: u64x2 = u64x2::new(14, 13); + let r: u64x2 = transmute(vmlsl_high_lane_u32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_high_laneq_u32() { + let a: u64x2 = u64x2::new(14, 15); + let b: u32x4 = u32x4::new(3, 3, 0, 1); + let c: u32x4 = u32x4::new(0, 2, 0, 0); + let e: u64x2 = u64x2::new(14, 13); + let r: u64x2 = transmute(vmlsl_high_laneq_u32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vmovn_high_s16() { let a: i8x8 = i8x8::new(0, 1, 2, 3, 2, 3, 4, 5); diff --git a/crates/core_arch/src/arm_shared/neon/generated.rs b/crates/core_arch/src/arm_shared/neon/generated.rs index 0e40deaac7..7f7c8ffddc 100644 --- a/crates/core_arch/src/arm_shared/neon/generated.rs +++ b/crates/core_arch/src/arm_shared/neon/generated.rs @@ -3122,5390 +3122,5432 @@ pub unsafe fn vmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float simd_add(a, simd_mul(b, c)) } -/// Signed multiply-add long +/// Vector multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal))] -pub unsafe fn vmlal_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t { - simd_add(a, vmull_s8(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +pub unsafe fn vmla_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t { + vmla_s16(a, b, vdup_n_s16(c)) } -/// Signed multiply-add long +/// Vector multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal))] -pub unsafe fn vmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { - simd_add(a, vmull_s16(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +pub unsafe fn vmlaq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t { + vmlaq_s16(a, b, vdupq_n_s16(c)) } -/// Signed multiply-add long +/// Vector multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal))] -pub unsafe fn vmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { - simd_add(a, vmull_s32(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +pub unsafe fn vmla_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t { + vmla_s32(a, b, vdup_n_s32(c)) } -/// Unsigned multiply-add long +/// Vector multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal))] -pub unsafe fn vmlal_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t { - simd_add(a, vmull_u8(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +pub unsafe fn vmlaq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t { + vmlaq_s32(a, b, vdupq_n_s32(c)) } -/// Unsigned multiply-add long +/// Vector multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal))] -pub unsafe fn vmlal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { - simd_add(a, vmull_u16(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +pub unsafe fn vmla_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t { + vmla_u16(a, b, vdup_n_u16(c)) } -/// Unsigned multiply-add long +/// Vector multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal))] -pub unsafe fn vmlal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { - simd_add(a, vmull_u32(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +pub unsafe fn vmlaq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t { + vmlaq_u16(a, b, vdupq_n_u16(c)) } -/// Multiply-subtract from accumulator +/// Vector multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] -pub unsafe fn vmls_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { - simd_sub(a, simd_mul(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +pub unsafe fn vmla_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t { + vmla_u32(a, b, vdup_n_u32(c)) } -/// Multiply-subtract from accumulator +/// Vector multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] -pub unsafe fn vmlsq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t { - simd_sub(a, simd_mul(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +pub unsafe fn vmlaq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t { + vmlaq_u32(a, b, vdupq_n_u32(c)) } -/// Multiply-subtract from accumulator +/// Vector multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] -pub unsafe fn vmls_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { - simd_sub(a, simd_mul(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))] +pub unsafe fn vmla_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t { + vmla_f32(a, b, vdup_n_f32(c)) } -/// Multiply-subtract from accumulator +/// Vector multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] -pub unsafe fn vmlsq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { - simd_sub(a, simd_mul(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))] +pub unsafe fn vmlaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t { + vmlaq_f32(a, b, vdupq_n_f32(c)) } -/// Multiply-subtract from accumulator +/// Vector multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] -pub unsafe fn vmls_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { - simd_sub(a, simd_mul(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmla_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { + static_assert_imm2!(LANE); + vmla_s16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } -/// Multiply-subtract from accumulator +/// Vector multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] -pub unsafe fn vmlsq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { - simd_sub(a, simd_mul(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmla_laneq_s16(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t { + static_assert_imm3!(LANE); + vmla_s16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } -/// Multiply-subtract from accumulator +/// Vector multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] -pub unsafe fn vmls_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { - simd_sub(a, simd_mul(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlaq_lane_s16(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t { + static_assert_imm2!(LANE); + vmlaq_s16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } -/// Multiply-subtract from accumulator +/// Vector multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] -pub unsafe fn vmlsq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t { - simd_sub(a, simd_mul(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlaq_laneq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { + static_assert_imm3!(LANE); + vmlaq_s16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } -/// Multiply-subtract from accumulator +/// Vector multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] -pub unsafe fn vmls_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { - simd_sub(a, simd_mul(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmla_lane_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { + static_assert_imm1!(LANE); + vmla_s32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) } -/// Multiply-subtract from accumulator +/// Vector multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] -pub unsafe fn vmlsq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { - simd_sub(a, simd_mul(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmla_laneq_s32(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t { + static_assert_imm2!(LANE); + vmla_s32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) } -/// Multiply-subtract from accumulator +/// Vector multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] -pub unsafe fn vmls_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { - simd_sub(a, simd_mul(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlaq_lane_s32(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t { + static_assert_imm1!(LANE); + vmlaq_s32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } -/// Multiply-subtract from accumulator +/// Vector multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] -pub unsafe fn vmlsq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { - simd_sub(a, simd_mul(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlaq_laneq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { + static_assert_imm2!(LANE); + vmlaq_s32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } -/// Floating-point multiply-subtract from accumulator +/// Vector multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))] -pub unsafe fn vmls_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { - simd_sub(a, simd_mul(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmla_lane_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { + static_assert_imm2!(LANE); + vmla_u16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } -/// Floating-point multiply-subtract from accumulator +/// Vector multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))] -pub unsafe fn vmlsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { - simd_sub(a, simd_mul(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmla_laneq_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x8_t) -> uint16x4_t { + static_assert_imm3!(LANE); + vmla_u16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } -/// Signed multiply-subtract long +/// Vector multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl))] -pub unsafe fn vmlsl_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t { - simd_sub(a, vmull_s8(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlaq_lane_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x4_t) -> uint16x8_t { + static_assert_imm2!(LANE); + vmlaq_u16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } -/// Signed multiply-subtract long +/// Vector multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl))] -pub unsafe fn vmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { - simd_sub(a, vmull_s16(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlaq_laneq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { + static_assert_imm3!(LANE); + vmlaq_u16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } -/// Signed multiply-subtract long +/// Vector multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl))] -pub unsafe fn vmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { - simd_sub(a, vmull_s32(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmla_lane_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { + static_assert_imm1!(LANE); + vmla_u32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) } -/// Signed multiply-subtract long +/// Vector multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl))] -pub unsafe fn vmlsl_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t { - simd_sub(a, vmull_u8(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmla_laneq_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x4_t) -> uint32x2_t { + static_assert_imm2!(LANE); + vmla_u32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) } -/// Signed multiply-subtract long +/// Vector multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl))] -pub unsafe fn vmlsl_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { - simd_sub(a, vmull_u16(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlaq_lane_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x2_t) -> uint32x4_t { + static_assert_imm1!(LANE); + vmlaq_u32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } -/// Signed multiply-subtract long +/// Vector multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl))] -pub unsafe fn vmlsl_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { - simd_sub(a, vmull_u32(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlaq_laneq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + static_assert_imm2!(LANE); + vmlaq_u32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } -/// Negate +/// Vector multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(neg))] -pub unsafe fn vneg_s8(a: int8x8_t) -> int8x8_t { - simd_neg(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmla_lane_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { + static_assert_imm1!(LANE); + vmla_f32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) } -/// Negate +/// Vector multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(neg))] -pub unsafe fn vnegq_s8(a: int8x16_t) -> int8x16_t { - simd_neg(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmla_laneq_f32(a: float32x2_t, b: float32x2_t, c: float32x4_t) -> float32x2_t { + static_assert_imm2!(LANE); + vmla_f32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) } -/// Negate +/// Vector multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(neg))] -pub unsafe fn vneg_s16(a: int16x4_t) -> int16x4_t { - simd_neg(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlaq_lane_f32(a: float32x4_t, b: float32x4_t, c: float32x2_t) -> float32x4_t { + static_assert_imm1!(LANE); + vmlaq_f32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } -/// Negate +/// Vector multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(neg))] -pub unsafe fn vnegq_s16(a: int16x8_t) -> int16x8_t { - simd_neg(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlaq_laneq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { + static_assert_imm2!(LANE); + vmlaq_f32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } -/// Negate +/// Signed multiply-add long #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(neg))] -pub unsafe fn vneg_s32(a: int32x2_t) -> int32x2_t { - simd_neg(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal))] +pub unsafe fn vmlal_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t { + simd_add(a, vmull_s8(b, c)) } -/// Negate +/// Signed multiply-add long #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(neg))] -pub unsafe fn vnegq_s32(a: int32x4_t) -> int32x4_t { - simd_neg(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal))] +pub unsafe fn vmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + simd_add(a, vmull_s16(b, c)) } -/// Negate +/// Signed multiply-add long #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fneg))] -pub unsafe fn vneg_f32(a: float32x2_t) -> float32x2_t { - simd_neg(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal))] +pub unsafe fn vmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + simd_add(a, vmull_s32(b, c)) } -/// Negate +/// Unsigned multiply-add long #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fneg))] -pub unsafe fn vnegq_f32(a: float32x4_t) -> float32x4_t { - simd_neg(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal))] +pub unsafe fn vmlal_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t { + simd_add(a, vmull_u8(b, c)) } -/// Signed saturating negate +/// Unsigned multiply-add long #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqneg))] -pub unsafe fn vqneg_s8(a: int8x8_t) -> int8x8_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v8i8")] - fn vqneg_s8_(a: int8x8_t) -> int8x8_t; - } -vqneg_s8_(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal))] +pub unsafe fn vmlal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { + simd_add(a, vmull_u16(b, c)) } -/// Signed saturating negate +/// Unsigned multiply-add long #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqneg))] -pub unsafe fn vqnegq_s8(a: int8x16_t) -> int8x16_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v16i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v16i8")] - fn vqnegq_s8_(a: int8x16_t) -> int8x16_t; - } -vqnegq_s8_(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal))] +pub unsafe fn vmlal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { + simd_add(a, vmull_u32(b, c)) } -/// Signed saturating negate +/// Vector widening multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqneg))] -pub unsafe fn vqneg_s16(a: int16x4_t) -> int16x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v4i16")] - fn vqneg_s16_(a: int16x4_t) -> int16x4_t; - } -vqneg_s16_(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal))] +pub unsafe fn vmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { + vmlal_s16(a, b, vdup_n_s16(c)) } -/// Signed saturating negate +/// Vector widening multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqneg))] -pub unsafe fn vqnegq_s16(a: int16x8_t) -> int16x8_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v8i16")] - fn vqnegq_s16_(a: int16x8_t) -> int16x8_t; - } -vqnegq_s16_(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal))] +pub unsafe fn vmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { + vmlal_s32(a, b, vdup_n_s32(c)) } -/// Signed saturating negate +/// Vector widening multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqneg))] -pub unsafe fn vqneg_s32(a: int32x2_t) -> int32x2_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v2i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v2i32")] - fn vqneg_s32_(a: int32x2_t) -> int32x2_t; - } -vqneg_s32_(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal))] +pub unsafe fn vmlal_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t { + vmlal_u16(a, b, vdup_n_u16(c)) } -/// Signed saturating negate +/// Vector widening multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqneg))] -pub unsafe fn vqnegq_s32(a: int32x4_t) -> int32x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v4i32")] - fn vqnegq_s32_(a: int32x4_t) -> int32x4_t; - } -vqnegq_s32_(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal))] +pub unsafe fn vmlal_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t { + vmlal_u32(a, b, vdup_n_u32(c)) } -/// Saturating subtract +/// Vector widening multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))] -pub unsafe fn vqsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v8i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v8i8")] - fn vqsub_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - } -vqsub_u8_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlal_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + static_assert_imm2!(LANE); + vmlal_s16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } -/// Saturating subtract +/// Vector widening multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))] -pub unsafe fn vqsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v16i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v16i8")] - fn vqsubq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - } -vqsubq_u8_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlal_laneq_s16(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t { + static_assert_imm3!(LANE); + vmlal_s16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } -/// Saturating subtract +/// Vector widening multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))] -pub unsafe fn vqsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v4i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v4i16")] - fn vqsub_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - } -vqsub_u16_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlal_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + static_assert_imm1!(LANE); + vmlal_s32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) } -/// Saturating subtract +/// Vector widening multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))] -pub unsafe fn vqsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v8i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v8i16")] - fn vqsubq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - } -vqsubq_u16_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlal_laneq_s32(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t { + static_assert_imm2!(LANE); + vmlal_s32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) } -/// Saturating subtract +/// Vector widening multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))] -pub unsafe fn vqsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v2i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v2i32")] - fn vqsub_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - } -vqsub_u32_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlal_lane_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { + static_assert_imm2!(LANE); + vmlal_u16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } -/// Saturating subtract +/// Vector widening multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))] -pub unsafe fn vqsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v4i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v4i32")] - fn vqsubq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - } -vqsubq_u32_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlal_laneq_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x8_t) -> uint32x4_t { + static_assert_imm3!(LANE); + vmlal_u16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } -/// Saturating subtract +/// Vector widening multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))] -pub unsafe fn vqsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v1i64")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v1i64")] - fn vqsub_u64_(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t; - } -vqsub_u64_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlal_lane_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { + static_assert_imm1!(LANE); + vmlal_u32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) } -/// Saturating subtract +/// Vector widening multiply accumulate with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))] -pub unsafe fn vqsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v2i64")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v2i64")] - fn vqsubq_u64_(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; - } -vqsubq_u64_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlal_laneq_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x4_t) -> uint64x2_t { + static_assert_imm2!(LANE); + vmlal_u32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) } -/// Saturating subtract +/// Multiply-subtract from accumulator #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))] -pub unsafe fn vqsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v8i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v8i8")] - fn vqsub_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; - } -vqsub_s8_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmls_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { + simd_sub(a, simd_mul(b, c)) } -/// Saturating subtract +/// Multiply-subtract from accumulator #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))] -pub unsafe fn vqsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v16i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v16i8")] - fn vqsubq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; - } -vqsubq_s8_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmlsq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t { + simd_sub(a, simd_mul(b, c)) } -/// Saturating subtract +/// Multiply-subtract from accumulator #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))] -pub unsafe fn vqsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v4i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v4i16")] - fn vqsub_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; - } -vqsub_s16_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmls_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { + simd_sub(a, simd_mul(b, c)) } -/// Saturating subtract +/// Multiply-subtract from accumulator #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))] -pub unsafe fn vqsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v8i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v8i16")] - fn vqsubq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; - } -vqsubq_s16_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmlsq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { + simd_sub(a, simd_mul(b, c)) } -/// Saturating subtract +/// Multiply-subtract from accumulator #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))] -pub unsafe fn vqsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v2i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v2i32")] - fn vqsub_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; - } -vqsub_s32_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmls_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { + simd_sub(a, simd_mul(b, c)) } -/// Saturating subtract +/// Multiply-subtract from accumulator #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))] -pub unsafe fn vqsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v4i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v4i32")] - fn vqsubq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; - } -vqsubq_s32_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmlsq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { + simd_sub(a, simd_mul(b, c)) } -/// Saturating subtract +/// Multiply-subtract from accumulator #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))] -pub unsafe fn vqsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v1i64")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v1i64")] - fn vqsub_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t; - } -vqsub_s64_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmls_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { + simd_sub(a, simd_mul(b, c)) } -/// Saturating subtract +/// Multiply-subtract from accumulator #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))] -pub unsafe fn vqsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v2i64")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v2i64")] - fn vqsubq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t; - } -vqsubq_s64_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmlsq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t { + simd_sub(a, simd_mul(b, c)) } -/// Halving add +/// Multiply-subtract from accumulator #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))] -pub unsafe fn vhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v8i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v8i8")] - fn vhadd_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - } -vhadd_u8_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmls_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { + simd_sub(a, simd_mul(b, c)) } -/// Halving add +/// Multiply-subtract from accumulator #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))] -pub unsafe fn vhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v16i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v16i8")] - fn vhaddq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - } -vhaddq_u8_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmlsq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { + simd_sub(a, simd_mul(b, c)) } -/// Halving add +/// Multiply-subtract from accumulator #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))] -pub unsafe fn vhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v4i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v4i16")] - fn vhadd_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - } -vhadd_u16_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmls_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { + simd_sub(a, simd_mul(b, c)) } -/// Halving add +/// Multiply-subtract from accumulator #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))] -pub unsafe fn vhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v8i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v8i16")] - fn vhaddq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - } -vhaddq_u16_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmlsq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + simd_sub(a, simd_mul(b, c)) } -/// Halving add +/// Floating-point multiply-subtract from accumulator #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))] -pub unsafe fn vhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v2i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v2i32")] - fn vhadd_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - } -vhadd_u32_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))] +pub unsafe fn vmls_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { + simd_sub(a, simd_mul(b, c)) } -/// Halving add +/// Floating-point multiply-subtract from accumulator #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))] -pub unsafe fn vhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v4i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v4i32")] - fn vhaddq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - } -vhaddq_u32_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))] +pub unsafe fn vmlsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { + simd_sub(a, simd_mul(b, c)) } -/// Halving add +/// Vector multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))] -pub unsafe fn vhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v8i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v8i8")] - fn vhadd_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; - } -vhadd_s8_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmls_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t { + vmls_s16(a, b, vdup_n_s16(c)) } -/// Halving add +/// Vector multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))] -pub unsafe fn vhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v16i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v16i8")] - fn vhaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; - } -vhaddq_s8_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmlsq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t { + vmlsq_s16(a, b, vdupq_n_s16(c)) } -/// Halving add +/// Vector multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))] -pub unsafe fn vhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v4i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v4i16")] - fn vhadd_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; - } -vhadd_s16_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmls_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t { + vmls_s32(a, b, vdup_n_s32(c)) } -/// Halving add +/// Vector multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))] -pub unsafe fn vhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v8i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v8i16")] - fn vhaddq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; - } -vhaddq_s16_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmlsq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t { + vmlsq_s32(a, b, vdupq_n_s32(c)) } -/// Halving add +/// Vector multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))] -pub unsafe fn vhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v2i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v2i32")] - fn vhadd_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; - } -vhadd_s32_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmls_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t { + vmls_u16(a, b, vdup_n_u16(c)) } -/// Halving add +/// Vector multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))] -pub unsafe fn vhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v4i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v4i32")] - fn vhaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; - } -vhaddq_s32_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmlsq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t { + vmlsq_u16(a, b, vdupq_n_u16(c)) } -/// Rounding halving add +/// Vector multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urhadd))] -pub unsafe fn vrhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v8i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urhadd.v8i8")] - fn vrhadd_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - } -vrhadd_u8_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmls_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t { + vmls_u32(a, b, vdup_n_u32(c)) } -/// Rounding halving add +/// Vector multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urhadd))] -pub unsafe fn vrhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v16i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urhadd.v16i8")] - fn vrhaddq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - } -vrhaddq_u8_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +pub unsafe fn vmlsq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t { + vmlsq_u32(a, b, vdupq_n_u32(c)) } -/// Rounding halving add +/// Vector multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urhadd))] -pub unsafe fn vrhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v4i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urhadd.v4i16")] - fn vrhadd_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - } -vrhadd_u16_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))] +pub unsafe fn vmls_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t { + vmls_f32(a, b, vdup_n_f32(c)) } -/// Rounding halving add +/// Vector multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urhadd))] -pub unsafe fn vrhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v8i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urhadd.v8i16")] - fn vrhaddq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - } -vrhaddq_u16_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))] +pub unsafe fn vmlsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t { + vmlsq_f32(a, b, vdupq_n_f32(c)) } -/// Rounding halving add +/// Vector multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urhadd))] -pub unsafe fn vrhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v2i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urhadd.v2i32")] - fn vrhadd_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - } -vrhadd_u32_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmls_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { + static_assert_imm2!(LANE); + vmls_s16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } -/// Rounding halving add +/// Vector multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urhadd))] -pub unsafe fn vrhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v4i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urhadd.v4i32")] - fn vrhaddq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - } -vrhaddq_u32_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmls_laneq_s16(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t { + static_assert_imm3!(LANE); + vmls_s16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } -/// Rounding halving add +/// Vector multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srhadd))] -pub unsafe fn vrhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v8i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srhadd.v8i8")] - fn vrhadd_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; - } -vrhadd_s8_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsq_lane_s16(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t { + static_assert_imm2!(LANE); + vmlsq_s16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } -/// Rounding halving add +/// Vector multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srhadd))] -pub unsafe fn vrhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v16i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srhadd.v16i8")] - fn vrhaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; - } -vrhaddq_s8_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsq_laneq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { + static_assert_imm3!(LANE); + vmlsq_s16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } -/// Rounding halving add +/// Vector multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srhadd))] -pub unsafe fn vrhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v4i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srhadd.v4i16")] - fn vrhadd_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; - } -vrhadd_s16_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmls_lane_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { + static_assert_imm1!(LANE); + vmls_s32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) } -/// Rounding halving add +/// Vector multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srhadd))] -pub unsafe fn vrhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v8i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srhadd.v8i16")] - fn vrhaddq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; - } -vrhaddq_s16_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmls_laneq_s32(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t { + static_assert_imm2!(LANE); + vmls_s32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) } -/// Rounding halving add +/// Vector multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srhadd))] -pub unsafe fn vrhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v2i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srhadd.v2i32")] - fn vrhadd_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; - } -vrhadd_s32_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsq_lane_s32(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t { + static_assert_imm1!(LANE); + vmlsq_s32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } -/// Rounding halving add +/// Vector multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srhadd))] -pub unsafe fn vrhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v4i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srhadd.v4i32")] - fn vrhaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; - } -vrhaddq_s32_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsq_laneq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { + static_assert_imm2!(LANE); + vmlsq_s32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } -/// Floating-point round to integral, to nearest with ties to even +/// Vector multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frintn))] -pub unsafe fn vrndn_f32(a: float32x2_t) -> float32x2_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v2f32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v2f32")] - fn vrndn_f32_(a: float32x2_t) -> float32x2_t; - } -vrndn_f32_(a) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmls_lane_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { + static_assert_imm2!(LANE); + vmls_u16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } -/// Floating-point round to integral, to nearest with ties to even +/// Vector multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frintn))] -pub unsafe fn vrndnq_f32(a: float32x4_t) -> float32x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v4f32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v4f32")] - fn vrndnq_f32_(a: float32x4_t) -> float32x4_t; - } -vrndnq_f32_(a) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmls_laneq_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x8_t) -> uint16x4_t { + static_assert_imm3!(LANE); + vmls_u16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } -/// Saturating add +/// Vector multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))] -pub unsafe fn vqadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v8i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v8i8")] - fn vqadd_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - } -vqadd_u8_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsq_lane_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x4_t) -> uint16x8_t { + static_assert_imm2!(LANE); + vmlsq_u16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } -/// Saturating add +/// Vector multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))] -pub unsafe fn vqaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v16i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v16i8")] - fn vqaddq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - } -vqaddq_u8_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsq_laneq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { + static_assert_imm3!(LANE); + vmlsq_u16(a, b, simd_shuffle8(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } -/// Saturating add +/// Vector multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))] -pub unsafe fn vqadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v4i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v4i16")] - fn vqadd_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - } -vqadd_u16_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmls_lane_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { + static_assert_imm1!(LANE); + vmls_u32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) } -/// Saturating add +/// Vector multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))] -pub unsafe fn vqaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v8i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v8i16")] - fn vqaddq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - } -vqaddq_u16_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmls_laneq_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x4_t) -> uint32x2_t { + static_assert_imm2!(LANE); + vmls_u32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) } -/// Saturating add +/// Vector multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))] -pub unsafe fn vqadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v2i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v2i32")] - fn vqadd_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - } -vqadd_u32_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsq_lane_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x2_t) -> uint32x4_t { + static_assert_imm1!(LANE); + vmlsq_u32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } -/// Saturating add +/// Vector multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))] -pub unsafe fn vqaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v4i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v4i32")] - fn vqaddq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - } -vqaddq_u32_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsq_laneq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + static_assert_imm2!(LANE); + vmlsq_u32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } -/// Saturating add +/// Vector multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))] -pub unsafe fn vqadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v1i64")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v1i64")] - fn vqadd_u64_(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t; - } -vqadd_u64_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmls_lane_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { + static_assert_imm1!(LANE); + vmls_f32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) } -/// Saturating add +/// Vector multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))] -pub unsafe fn vqaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v2i64")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v2i64")] - fn vqaddq_u64_(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; - } -vqaddq_u64_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmls_laneq_f32(a: float32x2_t, b: float32x2_t, c: float32x4_t) -> float32x2_t { + static_assert_imm2!(LANE); + vmls_f32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) } -/// Saturating add +/// Vector multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))] -pub unsafe fn vqadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v8i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v8i8")] - fn vqadd_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; - } -vqadd_s8_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsq_lane_f32(a: float32x4_t, b: float32x4_t, c: float32x2_t) -> float32x4_t { + static_assert_imm1!(LANE); + vmlsq_f32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } -/// Saturating add +/// Vector multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))] -pub unsafe fn vqaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v16i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v16i8")] - fn vqaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; - } -vqaddq_s8_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsq_laneq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { + static_assert_imm2!(LANE); + vmlsq_f32(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } -/// Saturating add +/// Signed multiply-subtract long #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))] -pub unsafe fn vqadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v4i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v4i16")] - fn vqadd_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; - } -vqadd_s16_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl))] +pub unsafe fn vmlsl_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t { + simd_sub(a, vmull_s8(b, c)) } -/// Saturating add +/// Signed multiply-subtract long #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))] -pub unsafe fn vqaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v8i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v8i16")] - fn vqaddq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; - } -vqaddq_s16_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl))] +pub unsafe fn vmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + simd_sub(a, vmull_s16(b, c)) } -/// Saturating add +/// Signed multiply-subtract long #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))] -pub unsafe fn vqadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v2i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v2i32")] - fn vqadd_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; - } -vqadd_s32_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl))] +pub unsafe fn vmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + simd_sub(a, vmull_s32(b, c)) } -/// Saturating add +/// Unsigned multiply-subtract long #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))] -pub unsafe fn vqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v4i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v4i32")] - fn vqaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; - } -vqaddq_s32_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl))] +pub unsafe fn vmlsl_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t { + simd_sub(a, vmull_u8(b, c)) } -/// Saturating add +/// Unsigned multiply-subtract long #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))] -pub unsafe fn vqadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v1i64")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v1i64")] - fn vqadd_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t; - } -vqadd_s64_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl))] +pub unsafe fn vmlsl_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { + simd_sub(a, vmull_u16(b, c)) } -/// Saturating add +/// Unsigned multiply-subtract long #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))] -pub unsafe fn vqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v2i64")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v2i64")] - fn vqaddq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t; - } -vqaddq_s64_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl))] +pub unsafe fn vmlsl_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { + simd_sub(a, vmull_u32(b, c)) } -/// Multiply +/// Vector widening multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] -pub unsafe fn vmul_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - simd_mul(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl))] +pub unsafe fn vmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { + vmlsl_s16(a, b, vdup_n_s16(c)) } -/// Multiply +/// Vector widening multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] -pub unsafe fn vmulq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - simd_mul(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl))] +pub unsafe fn vmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { + vmlsl_s32(a, b, vdup_n_s32(c)) } -/// Multiply +/// Vector widening multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] -pub unsafe fn vmul_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - simd_mul(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl))] +pub unsafe fn vmlsl_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t { + vmlsl_u16(a, b, vdup_n_u16(c)) } -/// Multiply +/// Vector widening multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] -pub unsafe fn vmulq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - simd_mul(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl))] +pub unsafe fn vmlsl_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t { + vmlsl_u32(a, b, vdup_n_u32(c)) } -/// Multiply +/// Vector widening multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] -pub unsafe fn vmul_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - simd_mul(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsl_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + static_assert_imm2!(LANE); + vmlsl_s16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } -/// Multiply +/// Vector widening multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] -pub unsafe fn vmulq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - simd_mul(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsl_laneq_s16(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t { + static_assert_imm3!(LANE); + vmlsl_s16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } -/// Multiply +/// Vector widening multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] -pub unsafe fn vmul_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - simd_mul(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsl_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + static_assert_imm1!(LANE); + vmlsl_s32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) } -/// Multiply +/// Vector widening multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] -pub unsafe fn vmulq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - simd_mul(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsl_laneq_s32(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t { + static_assert_imm2!(LANE); + vmlsl_s32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) } -/// Multiply +/// Vector widening multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] -pub unsafe fn vmul_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - simd_mul(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsl_lane_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { + static_assert_imm2!(LANE); + vmlsl_u16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } -/// Multiply +/// Vector widening multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] -pub unsafe fn vmulq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - simd_mul(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsl_laneq_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x8_t) -> uint32x4_t { + static_assert_imm3!(LANE); + vmlsl_u16(a, b, simd_shuffle4(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) } -/// Multiply +/// Vector widening multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] -pub unsafe fn vmul_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - simd_mul(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsl_lane_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { + static_assert_imm1!(LANE); + vmlsl_u32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) } -/// Multiply +/// Vector widening multiply subtract with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] -pub unsafe fn vmulq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - simd_mul(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl, LANE = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vmlsl_laneq_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x4_t) -> uint64x2_t { + static_assert_imm2!(LANE); + vmlsl_u32(a, b, simd_shuffle2(c, c, [LANE as u32, LANE as u32])) } -/// Multiply +/// Negate #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))] -pub unsafe fn vmul_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - simd_mul(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(neg))] +pub unsafe fn vneg_s8(a: int8x8_t) -> int8x8_t { + simd_neg(a) } -/// Multiply +/// Negate #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))] -pub unsafe fn vmulq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - simd_mul(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(neg))] +pub unsafe fn vnegq_s8(a: int8x16_t) -> int8x16_t { + simd_neg(a) } -/// Signed multiply long +/// Negate #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull))] -pub unsafe fn vmull_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v8i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smull.v8i8")] - fn vmull_s8_(a: int8x8_t, b: int8x8_t) -> int16x8_t; - } -vmull_s8_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(neg))] +pub unsafe fn vneg_s16(a: int16x4_t) -> int16x4_t { + simd_neg(a) } -/// Signed multiply long +/// Negate #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull))] -pub unsafe fn vmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v4i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smull.v4i16")] - fn vmull_s16_(a: int16x4_t, b: int16x4_t) -> int32x4_t; - } -vmull_s16_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(neg))] +pub unsafe fn vnegq_s16(a: int16x8_t) -> int16x8_t { + simd_neg(a) } -/// Signed multiply long +/// Negate #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull))] -pub unsafe fn vmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v2i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smull.v2i32")] - fn vmull_s32_(a: int32x2_t, b: int32x2_t) -> int64x2_t; - } -vmull_s32_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(neg))] +pub unsafe fn vneg_s32(a: int32x2_t) -> int32x2_t { + simd_neg(a) } -/// Unsigned multiply long +/// Negate #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull))] -pub unsafe fn vmull_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v8i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umull.v8i8")] - fn vmull_u8_(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t; - } -vmull_u8_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(neg))] +pub unsafe fn vnegq_s32(a: int32x4_t) -> int32x4_t { + simd_neg(a) } -/// Unsigned multiply long +/// Negate #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull))] -pub unsafe fn vmull_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v4i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umull.v4i16")] - fn vmull_u16_(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t; - } -vmull_u16_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fneg))] +pub unsafe fn vneg_f32(a: float32x2_t) -> float32x2_t { + simd_neg(a) } -/// Unsigned multiply long +/// Negate #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull))] -pub unsafe fn vmull_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v2i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umull.v2i32")] - fn vmull_u32_(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t; - } -vmull_u32_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fneg))] +pub unsafe fn vnegq_f32(a: float32x4_t) -> float32x4_t { + simd_neg(a) } -/// Polynomial multiply long +/// Signed saturating negate #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.p8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(pmull))] -pub unsafe fn vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqneg))] +pub unsafe fn vqneg_s8(a: int8x8_t) -> int8x8_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullp.v8i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.pmull.v8i8")] - fn vmull_p8_(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v8i8")] + fn vqneg_s8_(a: int8x8_t) -> int8x8_t; } -vmull_p8_(a, b) +vqneg_s8_(a) } -/// Floating-point fused Multiply-Add to accumulator(vector) +/// Signed saturating negate #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmla))] -pub unsafe fn vfma_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqneg))] +pub unsafe fn vqnegq_s8(a: int8x16_t) -> int8x16_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.fma.v2f32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.v2f32")] - fn vfma_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v16i8")] + fn vqnegq_s8_(a: int8x16_t) -> int8x16_t; } -vfma_f32_(a, b, c) +vqnegq_s8_(a) } -/// Floating-point fused Multiply-Add to accumulator(vector) +/// Signed saturating negate #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmla))] -pub unsafe fn vfmaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqneg))] +pub unsafe fn vqneg_s16(a: int16x4_t) -> int16x4_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.fma.v4f32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.v4f32")] - fn vfmaq_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v4i16")] + fn vqneg_s16_(a: int16x4_t) -> int16x4_t; } -vfmaq_f32_(a, b, c) +vqneg_s16_(a) } -/// Floating-point fused Multiply-Add to accumulator(vector) +/// Signed saturating negate #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmla))] -pub unsafe fn vfma_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t { - let d: float32x2_t = transmute(f32x2::new(c, c)); - vfma_f32(b, d, a) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqneg))] +pub unsafe fn vqnegq_s16(a: int16x8_t) -> int16x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v8i16")] + fn vqnegq_s16_(a: int16x8_t) -> int16x8_t; + } +vqnegq_s16_(a) } -/// Floating-point fused Multiply-Add to accumulator(vector) +/// Signed saturating negate #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmla))] -pub unsafe fn vfmaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t { - let d: float32x4_t = transmute(f32x4::new(c, c, c, c)); - vfmaq_f32(b, d, a) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqneg))] +pub unsafe fn vqneg_s32(a: int32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v2i32")] + fn vqneg_s32_(a: int32x2_t) -> int32x2_t; + } +vqneg_s32_(a) } -/// Subtract +/// Signed saturating negate #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] -pub unsafe fn vsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - simd_sub(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqneg))] +pub unsafe fn vqnegq_s32(a: int32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v4i32")] + fn vqnegq_s32_(a: int32x4_t) -> int32x4_t; + } +vqnegq_s32_(a) } -/// Subtract +/// Saturating subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] -pub unsafe fn vsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - simd_sub(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))] +pub unsafe fn vqsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v8i8")] + fn vqsub_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + } +vqsub_u8_(a, b) } -/// Subtract +/// Saturating subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] -pub unsafe fn vsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - simd_sub(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))] +pub unsafe fn vqsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v16i8")] + fn vqsubq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + } +vqsubq_u8_(a, b) } -/// Subtract +/// Saturating subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] -pub unsafe fn vsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - simd_sub(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))] +pub unsafe fn vqsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v4i16")] + fn vqsub_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } +vqsub_u16_(a, b) } -/// Subtract +/// Saturating subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] -pub unsafe fn vsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - simd_sub(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))] +pub unsafe fn vqsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v8i16")] + fn vqsubq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; + } +vqsubq_u16_(a, b) } -/// Subtract +/// Saturating subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] -pub unsafe fn vsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - simd_sub(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))] +pub unsafe fn vqsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v2i32")] + fn vqsub_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } +vqsub_u32_(a, b) } -/// Subtract +/// Saturating subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] -pub unsafe fn vsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - simd_sub(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))] +pub unsafe fn vqsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v4i32")] + fn vqsubq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } +vqsubq_u32_(a, b) } -/// Subtract +/// Saturating subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] -pub unsafe fn vsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - simd_sub(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))] +pub unsafe fn vqsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v1i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v1i64")] + fn vqsub_u64_(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t; + } +vqsub_u64_(a, b) } -/// Subtract +/// Saturating subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] -pub unsafe fn vsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - simd_sub(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))] +pub unsafe fn vqsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v2i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v2i64")] + fn vqsubq_u64_(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; + } +vqsubq_u64_(a, b) } -/// Subtract +/// Saturating subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] -pub unsafe fn vsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - simd_sub(a, b) -} - -/// Subtract -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] -pub unsafe fn vsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - simd_sub(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))] +pub unsafe fn vqsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v8i8")] + fn vqsub_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } +vqsub_s8_(a, b) } -/// Subtract +/// Saturating subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] -pub unsafe fn vsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - simd_sub(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))] +pub unsafe fn vqsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v16i8")] + fn vqsubq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } +vqsubq_s8_(a, b) } -/// Subtract +/// Saturating subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] -pub unsafe fn vsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - simd_sub(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))] +pub unsafe fn vqsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v4i16")] + fn vqsub_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } +vqsub_s16_(a, b) } -/// Subtract +/// Saturating subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] -pub unsafe fn vsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - simd_sub(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))] +pub unsafe fn vqsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v8i16")] + fn vqsubq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } +vqsubq_s16_(a, b) } -/// Subtract +/// Saturating subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] -pub unsafe fn vsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { - simd_sub(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))] +pub unsafe fn vqsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v2i32")] + fn vqsub_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } +vqsub_s32_(a, b) } -/// Subtract +/// Saturating subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] -pub unsafe fn vsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - simd_sub(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))] +pub unsafe fn vqsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v4i32")] + fn vqsubq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } +vqsubq_s32_(a, b) } -/// Subtract +/// Saturating subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fsub))] -pub unsafe fn vsub_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - simd_sub(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))] +pub unsafe fn vqsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v1i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v1i64")] + fn vqsub_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t; + } +vqsub_s64_(a, b) } -/// Subtract +/// Saturating subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fsub))] -pub unsafe fn vsubq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - simd_sub(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))] +pub unsafe fn vqsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v2i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v2i64")] + fn vqsubq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t; + } +vqsubq_s64_(a, b) } -/// Subtract returning high narrow +/// Halving add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))] -pub unsafe fn vsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t { - let c: i16x8 = i16x8::new(8, 8, 8, 8, 8, 8, 8, 8); - simd_cast(simd_shr(simd_sub(a, b), transmute(c))) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))] +pub unsafe fn vhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v8i8")] + fn vhadd_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + } +vhadd_u8_(a, b) } -/// Subtract returning high narrow +/// Halving add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))] -pub unsafe fn vsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t { - let c: i32x4 = i32x4::new(16, 16, 16, 16); - simd_cast(simd_shr(simd_sub(a, b), transmute(c))) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))] +pub unsafe fn vhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v16i8")] + fn vhaddq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + } +vhaddq_u8_(a, b) } -/// Subtract returning high narrow +/// Halving add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))] -pub unsafe fn vsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t { - let c: i64x2 = i64x2::new(32, 32); - simd_cast(simd_shr(simd_sub(a, b), transmute(c))) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))] +pub unsafe fn vhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v4i16")] + fn vhadd_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } +vhadd_u16_(a, b) } -/// Subtract returning high narrow +/// Halving add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))] -pub unsafe fn vsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { - let c: u16x8 = u16x8::new(8, 8, 8, 8, 8, 8, 8, 8); - simd_cast(simd_shr(simd_sub(a, b), transmute(c))) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))] +pub unsafe fn vhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v8i16")] + fn vhaddq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; + } +vhaddq_u16_(a, b) } -/// Subtract returning high narrow +/// Halving add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))] -pub unsafe fn vsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { - let c: u32x4 = u32x4::new(16, 16, 16, 16); - simd_cast(simd_shr(simd_sub(a, b), transmute(c))) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))] +pub unsafe fn vhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v2i32")] + fn vhadd_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } +vhadd_u32_(a, b) } -/// Subtract returning high narrow +/// Halving add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))] -pub unsafe fn vsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { - let c: u64x2 = u64x2::new(32, 32); - simd_cast(simd_shr(simd_sub(a, b), transmute(c))) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))] +pub unsafe fn vhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v4i32")] + fn vhaddq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } +vhaddq_u32_(a, b) } -/// Subtract returning high narrow +/// Halving add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))] -pub unsafe fn vsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t { - let d: int8x8_t = vsubhn_s16(b, c); - simd_shuffle16(a, d, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))] +pub unsafe fn vhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v8i8")] + fn vhadd_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } +vhadd_s8_(a, b) } -/// Subtract returning high narrow +/// Halving add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))] -pub unsafe fn vsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t { - let d: int16x4_t = vsubhn_s32(b, c); - simd_shuffle8(a, d, [0, 1, 2, 3, 4, 5, 6, 7]) -} - -/// Subtract returning high narrow -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))] -pub unsafe fn vsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t { - let d: int32x2_t = vsubhn_s64(b, c); - simd_shuffle4(a, d, [0, 1, 2, 3]) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))] +pub unsafe fn vhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v16i8")] + fn vhaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } +vhaddq_s8_(a, b) } -/// Subtract returning high narrow +/// Halving add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))] -pub unsafe fn vsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t { - let d: uint8x8_t = vsubhn_u16(b, c); - simd_shuffle16(a, d, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))] +pub unsafe fn vhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v4i16")] + fn vhadd_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } +vhadd_s16_(a, b) } -/// Subtract returning high narrow +/// Halving add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))] -pub unsafe fn vsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t { - let d: uint16x4_t = vsubhn_u32(b, c); - simd_shuffle8(a, d, [0, 1, 2, 3, 4, 5, 6, 7]) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))] +pub unsafe fn vhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v8i16")] + fn vhaddq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } +vhaddq_s16_(a, b) } -/// Subtract returning high narrow +/// Halving add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))] -pub unsafe fn vsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t { - let d: uint32x2_t = vsubhn_u64(b, c); - simd_shuffle4(a, d, [0, 1, 2, 3]) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))] +pub unsafe fn vhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v2i32")] + fn vhadd_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } +vhadd_s32_(a, b) } -/// Signed halving subtract +/// Halving add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))] -pub unsafe fn vhsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))] +pub unsafe fn vhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v8i8")] - fn vhsub_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v4i32")] + fn vhaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; } -vhsub_u8_(a, b) +vhaddq_s32_(a, b) } -/// Signed halving subtract +/// Rounding halving add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))] -pub unsafe fn vhsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urhadd))] +pub unsafe fn vrhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v16i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v16i8")] - fn vhsubq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urhadd.v8i8")] + fn vrhadd_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; } -vhsubq_u8_(a, b) +vrhadd_u8_(a, b) } -/// Signed halving subtract +/// Rounding halving add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))] -pub unsafe fn vhsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urhadd))] +pub unsafe fn vrhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v4i16")] - fn vhsub_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urhadd.v16i8")] + fn vrhaddq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; } -vhsub_u16_(a, b) +vrhaddq_u8_(a, b) } -/// Signed halving subtract +/// Rounding halving add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))] -pub unsafe fn vhsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urhadd))] +pub unsafe fn vrhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v8i16")] - fn vhsubq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urhadd.v4i16")] + fn vrhadd_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; } -vhsubq_u16_(a, b) +vrhadd_u16_(a, b) } -/// Signed halving subtract +/// Rounding halving add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))] -pub unsafe fn vhsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urhadd))] +pub unsafe fn vrhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v2i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v2i32")] - fn vhsub_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urhadd.v8i16")] + fn vrhaddq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; } -vhsub_u32_(a, b) +vrhaddq_u16_(a, b) } -/// Signed halving subtract +/// Rounding halving add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))] -pub unsafe fn vhsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urhadd))] +pub unsafe fn vrhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v4i32")] - fn vhsubq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urhadd.v2i32")] + fn vrhadd_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; } -vhsubq_u32_(a, b) +vrhadd_u32_(a, b) } -/// Signed halving subtract +/// Rounding halving add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))] -pub unsafe fn vhsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urhadd))] +pub unsafe fn vrhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v8i8")] - fn vhsub_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urhadd.v4i32")] + fn vrhaddq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; } -vhsub_s8_(a, b) +vrhaddq_u32_(a, b) } -/// Signed halving subtract +/// Rounding halving add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))] -pub unsafe fn vhsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srhadd))] +pub unsafe fn vrhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v16i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v16i8")] - fn vhsubq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srhadd.v8i8")] + fn vrhadd_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; } -vhsubq_s8_(a, b) +vrhadd_s8_(a, b) } -/// Signed halving subtract +/// Rounding halving add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))] -pub unsafe fn vhsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srhadd))] +pub unsafe fn vrhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v4i16")] - fn vhsub_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srhadd.v16i8")] + fn vrhaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; } -vhsub_s16_(a, b) +vrhaddq_s8_(a, b) } -/// Signed halving subtract +/// Rounding halving add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))] -pub unsafe fn vhsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srhadd))] +pub unsafe fn vrhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v8i16")] - fn vhsubq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srhadd.v4i16")] + fn vrhadd_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; } -vhsubq_s16_(a, b) +vrhadd_s16_(a, b) } -/// Signed halving subtract +/// Rounding halving add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))] -pub unsafe fn vhsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srhadd))] +pub unsafe fn vrhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v2i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v2i32")] - fn vhsub_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srhadd.v8i16")] + fn vrhaddq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; } -vhsub_s32_(a, b) +vrhaddq_s16_(a, b) } -/// Signed halving subtract +/// Rounding halving add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))] -pub unsafe fn vhsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srhadd))] +pub unsafe fn vrhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v4i32")] - fn vhsubq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srhadd.v2i32")] + fn vrhadd_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; } -vhsubq_s32_(a, b) +vrhadd_s32_(a, b) } -/// Signed Subtract Wide +/// Rounding halving add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubw))] -pub unsafe fn vsubw_s8(a: int16x8_t, b: int8x8_t) -> int16x8_t { - simd_sub(a, simd_cast(b)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srhadd))] +pub unsafe fn vrhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srhadd.v4i32")] + fn vrhaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } +vrhaddq_s32_(a, b) } -/// Signed Subtract Wide +/// Floating-point round to integral, to nearest with ties to even #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubw))] -pub unsafe fn vsubw_s16(a: int32x4_t, b: int16x4_t) -> int32x4_t { - simd_sub(a, simd_cast(b)) +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frintn))] +pub unsafe fn vrndn_f32(a: float32x2_t) -> float32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v2f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v2f32")] + fn vrndn_f32_(a: float32x2_t) -> float32x2_t; + } +vrndn_f32_(a) } -/// Signed Subtract Wide +/// Floating-point round to integral, to nearest with ties to even #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubw))] -pub unsafe fn vsubw_s32(a: int64x2_t, b: int32x2_t) -> int64x2_t { - simd_sub(a, simd_cast(b)) +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frintn))] +pub unsafe fn vrndnq_f32(a: float32x4_t) -> float32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v4f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v4f32")] + fn vrndnq_f32_(a: float32x4_t) -> float32x4_t; + } +vrndnq_f32_(a) } -/// Unsigned Subtract Wide +/// Saturating add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubw))] -pub unsafe fn vsubw_u8(a: uint16x8_t, b: uint8x8_t) -> uint16x8_t { - simd_sub(a, simd_cast(b)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))] +pub unsafe fn vqadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v8i8")] + fn vqadd_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + } +vqadd_u8_(a, b) } -/// Unsigned Subtract Wide +/// Saturating add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubw))] -pub unsafe fn vsubw_u16(a: uint32x4_t, b: uint16x4_t) -> uint32x4_t { - simd_sub(a, simd_cast(b)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))] +pub unsafe fn vqaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v16i8")] + fn vqaddq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + } +vqaddq_u8_(a, b) } -/// Unsigned Subtract Wide +/// Saturating add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubw))] -pub unsafe fn vsubw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t { - simd_sub(a, simd_cast(b)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))] +pub unsafe fn vqadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v4i16")] + fn vqadd_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } +vqadd_u16_(a, b) } -/// Signed Subtract Long +/// Saturating add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubl))] -pub unsafe fn vsubl_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t { - let c: int16x8_t = simd_cast(a); - let d: int16x8_t = simd_cast(b); - simd_sub(c, d) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))] +pub unsafe fn vqaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v8i16")] + fn vqaddq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; + } +vqaddq_u16_(a, b) } -/// Signed Subtract Long +/// Saturating add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubl))] -pub unsafe fn vsubl_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { - let c: int32x4_t = simd_cast(a); - let d: int32x4_t = simd_cast(b); - simd_sub(c, d) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))] +pub unsafe fn vqadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v2i32")] + fn vqadd_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } +vqadd_u32_(a, b) } -/// Signed Subtract Long +/// Saturating add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubl))] -pub unsafe fn vsubl_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { - let c: int64x2_t = simd_cast(a); - let d: int64x2_t = simd_cast(b); - simd_sub(c, d) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))] +pub unsafe fn vqaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v4i32")] + fn vqaddq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } +vqaddq_u32_(a, b) } -/// Unsigned Subtract Long +/// Saturating add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubl))] -pub unsafe fn vsubl_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t { - let c: uint16x8_t = simd_cast(a); - let d: uint16x8_t = simd_cast(b); - simd_sub(c, d) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))] +pub unsafe fn vqadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v1i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v1i64")] + fn vqadd_u64_(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t; + } +vqadd_u64_(a, b) } -/// Unsigned Subtract Long +/// Saturating add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubl))] -pub unsafe fn vsubl_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t { - let c: uint32x4_t = simd_cast(a); - let d: uint32x4_t = simd_cast(b); - simd_sub(c, d) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))] +pub unsafe fn vqaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v2i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v2i64")] + fn vqaddq_u64_(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; + } +vqaddq_u64_(a, b) } -/// Unsigned Subtract Long +/// Saturating add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubl))] -pub unsafe fn vsubl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { - let c: uint64x2_t = simd_cast(a); - let d: uint64x2_t = simd_cast(b); - simd_sub(c, d) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))] +pub unsafe fn vqadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v8i8")] + fn vqadd_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } +vqadd_s8_(a, b) } -/// Maximum (vector) +/// Saturating add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))] -pub unsafe fn vmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))] +pub unsafe fn vqaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v8i8")] - fn vmax_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v16i8")] + fn vqaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; } -vmax_s8_(a, b) +vqaddq_s8_(a, b) } -/// Maximum (vector) +/// Saturating add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))] -pub unsafe fn vmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))] +pub unsafe fn vqadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v16i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v16i8")] - fn vmaxq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v4i16")] + fn vqadd_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; } -vmaxq_s8_(a, b) +vqadd_s16_(a, b) } -/// Maximum (vector) +/// Saturating add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))] -pub unsafe fn vmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))] +pub unsafe fn vqaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v4i16")] - fn vmax_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v8i16")] + fn vqaddq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; } -vmax_s16_(a, b) +vqaddq_s16_(a, b) } -/// Maximum (vector) +/// Saturating add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))] -pub unsafe fn vmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))] +pub unsafe fn vqadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v8i16")] - fn vmaxq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v2i32")] + fn vqadd_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; } -vmaxq_s16_(a, b) +vqadd_s32_(a, b) } -/// Maximum (vector) +/// Saturating add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))] -pub unsafe fn vmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))] +pub unsafe fn vqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v2i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v2i32")] - fn vmax_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v4i32")] + fn vqaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; } -vmax_s32_(a, b) +vqaddq_s32_(a, b) } -/// Maximum (vector) +/// Saturating add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))] -pub unsafe fn vmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))] +pub unsafe fn vqadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v4i32")] - fn vmaxq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v1i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v1i64")] + fn vqadd_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t; } -vmaxq_s32_(a, b) +vqadd_s64_(a, b) } -/// Maximum (vector) +/// Saturating add #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))] -pub unsafe fn vmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))] +pub unsafe fn vqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v8i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v8i8")] - fn vmax_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v2i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v2i64")] + fn vqaddq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t; } -vmax_u8_(a, b) +vqaddq_s64_(a, b) } -/// Maximum (vector) +/// Multiply #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))] -pub unsafe fn vmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v16i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v16i8")] - fn vmaxq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; - } -vmaxq_u8_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +pub unsafe fn vmul_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + simd_mul(a, b) } -/// Maximum (vector) +/// Multiply #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))] -pub unsafe fn vmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v4i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v4i16")] - fn vmax_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - } -vmax_u16_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +pub unsafe fn vmulq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + simd_mul(a, b) } -/// Maximum (vector) +/// Multiply #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))] -pub unsafe fn vmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v8i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v8i16")] - fn vmaxq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; - } -vmaxq_u16_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +pub unsafe fn vmul_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + simd_mul(a, b) } -/// Maximum (vector) +/// Multiply #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))] -pub unsafe fn vmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v2i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v2i32")] - fn vmax_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - } -vmax_u32_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +pub unsafe fn vmulq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + simd_mul(a, b) } -/// Maximum (vector) +/// Multiply #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))] -pub unsafe fn vmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v4i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v4i32")] - fn vmaxq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; - } -vmaxq_u32_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +pub unsafe fn vmul_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + simd_mul(a, b) } -/// Maximum (vector) +/// Multiply #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmax))] -pub unsafe fn vmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v2f32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmax.v2f32")] - fn vmax_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t; - } -vmax_f32_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +pub unsafe fn vmulq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + simd_mul(a, b) } -/// Maximum (vector) +/// Multiply #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmax))] -pub unsafe fn vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4f32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmax.v4f32")] - fn vmaxq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t; - } -vmaxq_f32_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +pub unsafe fn vmul_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + simd_mul(a, b) } -/// Floating-point Maximun Number (vector) +/// Multiply #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmaxnm))] -pub unsafe fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v2f32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v2f32")] - fn vmaxnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t; - } -vmaxnm_f32_(a, b) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +pub unsafe fn vmulq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + simd_mul(a, b) } -/// Floating-point Maximun Number (vector) +/// Multiply #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmaxnm))] -pub unsafe fn vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v4f32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v4f32")] - fn vmaxnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t; - } -vmaxnmq_f32_(a, b) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +pub unsafe fn vmul_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + simd_mul(a, b) } -/// Minimum (vector) +/// Multiply #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))] -pub unsafe fn vmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v8i8")] - fn vmin_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; - } -vmin_s8_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +pub unsafe fn vmulq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + simd_mul(a, b) } -/// Minimum (vector) +/// Multiply #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))] -pub unsafe fn vminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v16i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v16i8")] - fn vminq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; - } -vminq_s8_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +pub unsafe fn vmul_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + simd_mul(a, b) } -/// Minimum (vector) +/// Multiply #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))] -pub unsafe fn vmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v4i16")] - fn vmin_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; - } -vmin_s16_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +pub unsafe fn vmulq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + simd_mul(a, b) } -/// Minimum (vector) +/// Multiply #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))] -pub unsafe fn vminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v8i16")] - fn vminq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; - } -vminq_s16_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))] +pub unsafe fn vmul_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + simd_mul(a, b) } -/// Minimum (vector) +/// Multiply #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))] -pub unsafe fn vmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v2i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v2i32")] - fn vmin_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; - } -vmin_s32_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))] +pub unsafe fn vmulq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + simd_mul(a, b) } -/// Minimum (vector) +/// Signed multiply long #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))] -pub unsafe fn vminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull))] +pub unsafe fn vmull_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v4i32")] - fn vminq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smull.v8i8")] + fn vmull_s8_(a: int8x8_t, b: int8x8_t) -> int16x8_t; } -vminq_s32_(a, b) +vmull_s8_(a, b) } -/// Minimum (vector) +/// Signed multiply long #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))] -pub unsafe fn vmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull))] +pub unsafe fn vmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v8i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v8i8")] - fn vmin_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smull.v4i16")] + fn vmull_s16_(a: int16x4_t, b: int16x4_t) -> int32x4_t; } -vmin_u8_(a, b) +vmull_s16_(a, b) } -/// Minimum (vector) +/// Signed multiply long #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))] -pub unsafe fn vminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull))] +pub unsafe fn vmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v16i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v16i8")] - fn vminq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smull.v2i32")] + fn vmull_s32_(a: int32x2_t, b: int32x2_t) -> int64x2_t; } -vminq_u8_(a, b) +vmull_s32_(a, b) } -/// Minimum (vector) +/// Unsigned multiply long #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))] -pub unsafe fn vmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull))] +pub unsafe fn vmull_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v4i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v4i16")] - fn vmin_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umull.v8i8")] + fn vmull_u8_(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t; } -vmin_u16_(a, b) +vmull_u8_(a, b) } -/// Minimum (vector) +/// Unsigned multiply long #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))] -pub unsafe fn vminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull))] +pub unsafe fn vmull_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v8i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v8i16")] - fn vminq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umull.v4i16")] + fn vmull_u16_(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t; } -vminq_u16_(a, b) +vmull_u16_(a, b) } -/// Minimum (vector) +/// Unsigned multiply long #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))] -pub unsafe fn vmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull))] +pub unsafe fn vmull_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v2i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v2i32")] - fn vmin_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umull.v2i32")] + fn vmull_u32_(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t; } -vmin_u32_(a, b) +vmull_u32_(a, b) } -/// Minimum (vector) +/// Polynomial multiply long #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))] -pub unsafe fn vminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.p8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(pmull))] +pub unsafe fn vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v4i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v4i32")] - fn vminq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullp.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.pmull.v8i8")] + fn vmull_p8_(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t; } -vminq_u32_(a, b) +vmull_p8_(a, b) } -/// Minimum (vector) +/// Floating-point fused Multiply-Add to accumulator(vector) #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmin))] -pub unsafe fn vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmla))] +pub unsafe fn vfma_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v2f32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmin.v2f32")] - fn vmin_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.fma.v2f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.v2f32")] + fn vfma_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t; } -vmin_f32_(a, b) +vfma_f32_(a, b, c) } -/// Minimum (vector) +/// Floating-point fused Multiply-Add to accumulator(vector) #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmin))] -pub unsafe fn vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmla))] +pub unsafe fn vfmaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4f32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmin.v4f32")] - fn vminq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.fma.v4f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.v4f32")] + fn vfmaq_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t; } -vminq_f32_(a, b) +vfmaq_f32_(a, b, c) } -/// Floating-point Minimun Number (vector) +/// Floating-point fused Multiply-Add to accumulator(vector) #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fminnm))] -pub unsafe fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v2f32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v2f32")] - fn vminnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t; - } -vminnm_f32_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmla))] +pub unsafe fn vfma_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t { + let d: float32x2_t = transmute(f32x2::new(c, c)); + vfma_f32(b, d, a) } -/// Floating-point Minimun Number (vector) +/// Floating-point fused Multiply-Add to accumulator(vector) #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fminnm))] -pub unsafe fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v4f32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v4f32")] - fn vminnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t; - } -vminnmq_f32_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmla))] +pub unsafe fn vfmaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t { + let d: float32x4_t = transmute(f32x4::new(c, c, c, c)); + vfmaq_f32(b, d, a) } -/// Signed saturating doubling multiply long +/// Subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull))] -pub unsafe fn vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v4i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmull.v4i32")] - fn vqdmull_s16_(a: int16x4_t, b: int16x4_t) -> int32x4_t; - } -vqdmull_s16_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +pub unsafe fn vsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + simd_sub(a, b) } -/// Signed saturating doubling multiply long +/// Subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull))] -pub unsafe fn vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v2i64")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmull.v2i64")] - fn vqdmull_s32_(a: int32x2_t, b: int32x2_t) -> int64x2_t; - } -vqdmull_s32_(a, b) -} - -/// Vector saturating doubling long multiply with scalar -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull))] -pub unsafe fn vqdmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t { - vqdmull_s16(a, vdup_n_s16(b)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +pub unsafe fn vsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + simd_sub(a, b) } -/// Vector saturating doubling long multiply with scalar +/// Subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull))] -pub unsafe fn vqdmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t { - vqdmull_s32(a, vdup_n_s32(b)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +pub unsafe fn vsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + simd_sub(a, b) } -/// Vector saturating doubling long multiply by scalar +/// Subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull, N = 2))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vqdmull_lane_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { - static_assert_imm2!(N); - let b: int16x4_t = simd_shuffle4(b, b, [N as u32, N as u32, N as u32, N as u32]); - vqdmull_s16(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +pub unsafe fn vsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + simd_sub(a, b) } -/// Vector saturating doubling long multiply by scalar +/// Subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 1))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull, N = 1))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vqdmull_lane_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { - static_assert_imm1!(N); - let b: int32x2_t = simd_shuffle2(b, b, [N as u32, N as u32]); - vqdmull_s32(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +pub unsafe fn vsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + simd_sub(a, b) } -/// Signed saturating doubling multiply-add long +/// Subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal))] -pub unsafe fn vqdmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { - vqaddq_s32(a, vqdmull_s16(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +pub unsafe fn vsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + simd_sub(a, b) } -/// Signed saturating doubling multiply-add long +/// Subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal))] -pub unsafe fn vqdmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { - vqaddq_s64(a, vqdmull_s32(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +pub unsafe fn vsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + simd_sub(a, b) } -/// Vector widening saturating doubling multiply accumulate with scalar +/// Subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal))] -pub unsafe fn vqdmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { - vqaddq_s32(a, vqdmull_n_s16(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +pub unsafe fn vsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + simd_sub(a, b) } -/// Vector widening saturating doubling multiply accumulate with scalar +/// Subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal))] -pub unsafe fn vqdmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { - vqaddq_s64(a, vqdmull_n_s32(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +pub unsafe fn vsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + simd_sub(a, b) } -/// Vector widening saturating doubling multiply accumulate with scalar +/// Subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal, N = 2))] -#[rustc_legacy_const_generics(3)] -pub unsafe fn vqdmlal_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { - static_assert_imm2!(N); - vqaddq_s32(a, vqdmull_lane_s16::(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +pub unsafe fn vsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + simd_sub(a, b) } -/// Vector widening saturating doubling multiply accumulate with scalar +/// Subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 1))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal, N = 1))] -#[rustc_legacy_const_generics(3)] -pub unsafe fn vqdmlal_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { - static_assert_imm1!(N); - vqaddq_s64(a, vqdmull_lane_s32::(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +pub unsafe fn vsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + simd_sub(a, b) } -/// Signed saturating doubling multiply-subtract long +/// Subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl))] -pub unsafe fn vqdmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { - vqsubq_s32(a, vqdmull_s16(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +pub unsafe fn vsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + simd_sub(a, b) } -/// Signed saturating doubling multiply-subtract long +/// Subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl))] -pub unsafe fn vqdmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { - vqsubq_s64(a, vqdmull_s32(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +pub unsafe fn vsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + simd_sub(a, b) } -/// Vector widening saturating doubling multiply subtract with scalar +/// Subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl))] -pub unsafe fn vqdmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { - vqsubq_s32(a, vqdmull_n_s16(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +pub unsafe fn vsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + simd_sub(a, b) } -/// Vector widening saturating doubling multiply subtract with scalar +/// Subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl))] -pub unsafe fn vqdmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { - vqsubq_s64(a, vqdmull_n_s32(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +pub unsafe fn vsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + simd_sub(a, b) } -/// Vector widening saturating doubling multiply subtract with scalar +/// Subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl, N = 2))] -#[rustc_legacy_const_generics(3)] -pub unsafe fn vqdmlsl_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { - static_assert_imm2!(N); - vqsubq_s32(a, vqdmull_lane_s16::(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +pub unsafe fn vsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + simd_sub(a, b) } -/// Vector widening saturating doubling multiply subtract with scalar +/// Subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 1))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl, N = 1))] -#[rustc_legacy_const_generics(3)] -pub unsafe fn vqdmlsl_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { - static_assert_imm1!(N); - vqsubq_s64(a, vqdmull_lane_s32::(b, c)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fsub))] +pub unsafe fn vsub_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + simd_sub(a, b) } -/// Signed saturating doubling multiply returning high half +/// Subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))] -pub unsafe fn vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmulh.v4i16")] - fn vqdmulh_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; - } -vqdmulh_s16_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fsub))] +pub unsafe fn vsubq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + simd_sub(a, b) } -/// Signed saturating doubling multiply returning high half +/// Subtract returning high narrow #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))] -pub unsafe fn vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v8i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmulh.v8i16")] - fn vqdmulhq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; - } -vqdmulhq_s16_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))] +pub unsafe fn vsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t { + let c: i16x8 = i16x8::new(8, 8, 8, 8, 8, 8, 8, 8); + simd_cast(simd_shr(simd_sub(a, b), transmute(c))) } -/// Signed saturating doubling multiply returning high half +/// Subtract returning high narrow #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))] -pub unsafe fn vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v2i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmulh.v2i32")] - fn vqdmulh_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; - } -vqdmulh_s32_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))] +pub unsafe fn vsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t { + let c: i32x4 = i32x4::new(16, 16, 16, 16); + simd_cast(simd_shr(simd_sub(a, b), transmute(c))) } -/// Signed saturating doubling multiply returning high half +/// Subtract returning high narrow #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))] -pub unsafe fn vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmulh.v4i32")] - fn vqdmulhq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; - } -vqdmulhq_s32_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))] +pub unsafe fn vsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t { + let c: i64x2 = i64x2::new(32, 32); + simd_cast(simd_shr(simd_sub(a, b), transmute(c))) } -/// Vector saturating doubling multiply high with scalar +/// Subtract returning high narrow #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))] -pub unsafe fn vqdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t { - let b: int16x4_t = vdup_n_s16(b); - vqdmulh_s16(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))] +pub unsafe fn vsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { + let c: u16x8 = u16x8::new(8, 8, 8, 8, 8, 8, 8, 8); + simd_cast(simd_shr(simd_sub(a, b), transmute(c))) } -/// Vector saturating doubling multiply high with scalar +/// Subtract returning high narrow #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))] -pub unsafe fn vqdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t { - let b: int32x2_t = vdup_n_s32(b); - vqdmulh_s32(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))] +pub unsafe fn vsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { + let c: u32x4 = u32x4::new(16, 16, 16, 16); + simd_cast(simd_shr(simd_sub(a, b), transmute(c))) } -/// Vector saturating doubling multiply high with scalar +/// Subtract returning high narrow #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))] -pub unsafe fn vqdmulhq_nq_s16(a: int16x8_t, b: i16) -> int16x8_t { - let b: int16x8_t = vdupq_n_s16(b); - vqdmulhq_s16(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))] +pub unsafe fn vsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { + let c: u64x2 = u64x2::new(32, 32); + simd_cast(simd_shr(simd_sub(a, b), transmute(c))) } -/// Vector saturating doubling multiply high with scalar +/// Subtract returning high narrow #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))] -pub unsafe fn vqdmulhq_nq_s32(a: int32x4_t, b: i32) -> int32x4_t { - let b: int32x4_t = vdupq_n_s32(b); - vqdmulhq_s32(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))] +pub unsafe fn vsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t { + let d: int8x8_t = vsubhn_s16(b, c); + simd_shuffle16(a, d, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } -/// Signed saturating rounding shift left +/// Subtract returning high narrow #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))] -pub unsafe fn vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v8i8")] - fn vqrshl_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; - } -vqrshl_s8_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))] +pub unsafe fn vsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t { + let d: int16x4_t = vsubhn_s32(b, c); + simd_shuffle8(a, d, [0, 1, 2, 3, 4, 5, 6, 7]) } -/// Signed saturating rounding shift left +/// Subtract returning high narrow #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))] -pub unsafe fn vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v16i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v16i8")] - fn vqrshlq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; - } -vqrshlq_s8_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))] +pub unsafe fn vsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t { + let d: int32x2_t = vsubhn_s64(b, c); + simd_shuffle4(a, d, [0, 1, 2, 3]) } -/// Signed saturating rounding shift left +/// Subtract returning high narrow #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))] -pub unsafe fn vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v4i16")] - fn vqrshl_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; - } -vqrshl_s16_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))] +pub unsafe fn vsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t { + let d: uint8x8_t = vsubhn_u16(b, c); + simd_shuffle16(a, d, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) } -/// Signed saturating rounding shift left +/// Subtract returning high narrow #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))] -pub unsafe fn vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v8i16")] - fn vqrshlq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; - } -vqrshlq_s16_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))] +pub unsafe fn vsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t { + let d: uint16x4_t = vsubhn_u32(b, c); + simd_shuffle8(a, d, [0, 1, 2, 3, 4, 5, 6, 7]) } -/// Signed saturating rounding shift left +/// Subtract returning high narrow #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))] -pub unsafe fn vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v2i32")] - fn vqrshl_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; - } -vqrshl_s32_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))] +pub unsafe fn vsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t { + let d: uint32x2_t = vsubhn_u64(b, c); + simd_shuffle4(a, d, [0, 1, 2, 3]) } -/// Signed saturating rounding shift left +/// Signed halving subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))] -pub unsafe fn vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))] +pub unsafe fn vhsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v4i32")] - fn vqrshlq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v8i8")] + fn vhsub_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; } -vqrshlq_s32_(a, b) +vhsub_u8_(a, b) } -/// Signed saturating rounding shift left +/// Signed halving subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))] -pub unsafe fn vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))] +pub unsafe fn vhsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v1i64")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v1i64")] - fn vqrshl_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v16i8")] + fn vhsubq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; } -vqrshl_s64_(a, b) +vhsubq_u8_(a, b) } -/// Signed saturating rounding shift left +/// Signed halving subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))] -pub unsafe fn vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))] +pub unsafe fn vhsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i64")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v2i64")] - fn vqrshlq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v4i16")] + fn vhsub_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; } -vqrshlq_s64_(a, b) +vhsub_u16_(a, b) } -/// Unsigned signed saturating rounding shift left +/// Signed halving subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))] -pub unsafe fn vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))] +pub unsafe fn vhsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v8i8")] - fn vqrshl_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v8i16")] + fn vhsubq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; } -vqrshl_u8_(a, b) +vhsubq_u16_(a, b) } -/// Unsigned signed saturating rounding shift left +/// Signed halving subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))] -pub unsafe fn vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))] +pub unsafe fn vhsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v16i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v16i8")] - fn vqrshlq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v2i32")] + fn vhsub_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; } -vqrshlq_u8_(a, b) +vhsub_u32_(a, b) } -/// Unsigned signed saturating rounding shift left +/// Signed halving subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))] -pub unsafe fn vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))] +pub unsafe fn vhsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v4i16")] - fn vqrshl_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v4i32")] + fn vhsubq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; } -vqrshl_u16_(a, b) +vhsubq_u32_(a, b) } -/// Unsigned signed saturating rounding shift left +/// Signed halving subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))] -pub unsafe fn vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))] +pub unsafe fn vhsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v8i16")] - fn vqrshlq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v8i8")] + fn vhsub_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; } -vqrshlq_u16_(a, b) +vhsub_s8_(a, b) } -/// Unsigned signed saturating rounding shift left +/// Signed halving subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))] -pub unsafe fn vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))] +pub unsafe fn vhsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v2i32")] - fn vqrshl_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v16i8")] + fn vhsubq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; } -vqrshl_u32_(a, b) +vhsubq_s8_(a, b) } -/// Unsigned signed saturating rounding shift left +/// Signed halving subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))] -pub unsafe fn vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))] +pub unsafe fn vhsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v4i32")] - fn vqrshlq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v4i16")] + fn vhsub_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; } -vqrshlq_u32_(a, b) +vhsub_s16_(a, b) } -/// Unsigned signed saturating rounding shift left +/// Signed halving subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))] -pub unsafe fn vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))] +pub unsafe fn vhsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v1i64")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v1i64")] - fn vqrshl_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v8i16")] + fn vhsubq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; } -vqrshl_u64_(a, b) +vhsubq_s16_(a, b) } -/// Unsigned signed saturating rounding shift left +/// Signed halving subtract #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))] -pub unsafe fn vqrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i64")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v2i64")] - fn vqrshlq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; - } -vqrshlq_u64_(a, b) -} - -/// Signed saturating rounded shift right narrow -#[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqrshrn_n_s16(a: int16x8_t) -> int8x8_t { - static_assert!(N : i32 where N >= 1 && N <= 8); +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))] +pub unsafe fn vhsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v8i8")] - fn vqrshrn_n_s16_(a: int16x8_t, n: int16x8_t) -> int8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v2i32")] + fn vhsub_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; } -vqrshrn_n_s16_(a, int16x8_t(-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16)) +vhsub_s32_(a, b) } -/// Signed saturating rounded shift right narrow +/// Signed halving subtract #[inline] -#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqrshrn_n_s16(a: int16x8_t) -> int8x8_t { - static_assert!(N : i32 where N >= 1 && N <= 8); +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))] +pub unsafe fn vhsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrn.v8i8")] - fn vqrshrn_n_s16_(a: int16x8_t, n: i32) -> int8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v4i32")] + fn vhsubq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; } -vqrshrn_n_s16_(a, N) +vhsubq_s32_(a, b) } -/// Signed saturating rounded shift right narrow +/// Signed Subtract Wide #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqrshrn_n_s32(a: int32x4_t) -> int16x4_t { - static_assert!(N : i32 where N >= 1 && N <= 16); - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v4i16")] - fn vqrshrn_n_s32_(a: int32x4_t, n: int32x4_t) -> int16x4_t; - } -vqrshrn_n_s32_(a, int32x4_t(-N as i32, -N as i32, -N as i32, -N as i32)) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubw))] +pub unsafe fn vsubw_s8(a: int16x8_t, b: int8x8_t) -> int16x8_t { + simd_sub(a, simd_cast(b)) } -/// Signed saturating rounded shift right narrow +/// Signed Subtract Wide #[inline] -#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqrshrn_n_s32(a: int32x4_t) -> int16x4_t { - static_assert!(N : i32 where N >= 1 && N <= 16); - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrn.v4i16")] - fn vqrshrn_n_s32_(a: int32x4_t, n: i32) -> int16x4_t; - } -vqrshrn_n_s32_(a, N) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubw))] +pub unsafe fn vsubw_s16(a: int32x4_t, b: int16x4_t) -> int32x4_t { + simd_sub(a, simd_cast(b)) } -/// Signed saturating rounded shift right narrow +/// Signed Subtract Wide #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqrshrn_n_s64(a: int64x2_t) -> int32x2_t { - static_assert!(N : i32 where N >= 1 && N <= 32); - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v2i32")] - fn vqrshrn_n_s64_(a: int64x2_t, n: int64x2_t) -> int32x2_t; - } -vqrshrn_n_s64_(a, int64x2_t(-N as i64, -N as i64)) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubw))] +pub unsafe fn vsubw_s32(a: int64x2_t, b: int32x2_t) -> int64x2_t { + simd_sub(a, simd_cast(b)) } -/// Signed saturating rounded shift right narrow +/// Unsigned Subtract Wide #[inline] -#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqrshrn_n_s64(a: int64x2_t) -> int32x2_t { - static_assert!(N : i32 where N >= 1 && N <= 32); - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrn.v2i32")] - fn vqrshrn_n_s64_(a: int64x2_t, n: i32) -> int32x2_t; - } -vqrshrn_n_s64_(a, N) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubw))] +pub unsafe fn vsubw_u8(a: uint16x8_t, b: uint8x8_t) -> uint16x8_t { + simd_sub(a, simd_cast(b)) } -/// Unsigned signed saturating rounded shift right narrow +/// Unsigned Subtract Wide #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqrshrn_n_u16(a: uint16x8_t) -> uint8x8_t { - static_assert!(N : i32 where N >= 1 && N <= 8); - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v8i8")] - fn vqrshrn_n_u16_(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t; - } -vqrshrn_n_u16_(a, uint16x8_t(-N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16)) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubw))] +pub unsafe fn vsubw_u16(a: uint32x4_t, b: uint16x4_t) -> uint32x4_t { + simd_sub(a, simd_cast(b)) } -/// Unsigned signed saturating rounded shift right narrow +/// Unsigned Subtract Wide #[inline] -#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqrshrn_n_u16(a: uint16x8_t) -> uint8x8_t { - static_assert!(N : i32 where N >= 1 && N <= 8); - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshrn.v8i8")] - fn vqrshrn_n_u16_(a: uint16x8_t, n: i32) -> uint8x8_t; - } -vqrshrn_n_u16_(a, N) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubw))] +pub unsafe fn vsubw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t { + simd_sub(a, simd_cast(b)) } -/// Unsigned signed saturating rounded shift right narrow +/// Signed Subtract Long #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqrshrn_n_u32(a: uint32x4_t) -> uint16x4_t { - static_assert!(N : i32 where N >= 1 && N <= 16); - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v4i16")] - fn vqrshrn_n_u32_(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t; - } -vqrshrn_n_u32_(a, uint32x4_t(-N as u32, -N as u32, -N as u32, -N as u32)) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubl))] +pub unsafe fn vsubl_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t { + let c: int16x8_t = simd_cast(a); + let d: int16x8_t = simd_cast(b); + simd_sub(c, d) } -/// Unsigned signed saturating rounded shift right narrow +/// Signed Subtract Long #[inline] -#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqrshrn_n_u32(a: uint32x4_t) -> uint16x4_t { - static_assert!(N : i32 where N >= 1 && N <= 16); - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshrn.v4i16")] - fn vqrshrn_n_u32_(a: uint32x4_t, n: i32) -> uint16x4_t; - } -vqrshrn_n_u32_(a, N) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubl))] +pub unsafe fn vsubl_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { + let c: int32x4_t = simd_cast(a); + let d: int32x4_t = simd_cast(b); + simd_sub(c, d) } -/// Unsigned signed saturating rounded shift right narrow +/// Signed Subtract Long #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqrshrn_n_u64(a: uint64x2_t) -> uint32x2_t { - static_assert!(N : i32 where N >= 1 && N <= 32); - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v2i32")] - fn vqrshrn_n_u64_(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t; - } -vqrshrn_n_u64_(a, uint64x2_t(-N as u64, -N as u64)) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubl))] +pub unsafe fn vsubl_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { + let c: int64x2_t = simd_cast(a); + let d: int64x2_t = simd_cast(b); + simd_sub(c, d) } -/// Unsigned signed saturating rounded shift right narrow +/// Unsigned Subtract Long #[inline] -#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqrshrn_n_u64(a: uint64x2_t) -> uint32x2_t { - static_assert!(N : i32 where N >= 1 && N <= 32); - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshrn.v2i32")] - fn vqrshrn_n_u64_(a: uint64x2_t, n: i32) -> uint32x2_t; - } -vqrshrn_n_u64_(a, N) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubl))] +pub unsafe fn vsubl_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t { + let c: uint16x8_t = simd_cast(a); + let d: uint16x8_t = simd_cast(b); + simd_sub(c, d) } -/// Signed saturating rounded shift right unsigned narrow +/// Unsigned Subtract Long #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrun, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqrshrun_n_s16(a: int16x8_t) -> uint8x8_t { - static_assert!(N : i32 where N >= 1 && N <= 8); - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v8i8")] - fn vqrshrun_n_s16_(a: int16x8_t, n: int16x8_t) -> uint8x8_t; - } -vqrshrun_n_s16_(a, int16x8_t(-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16)) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubl))] +pub unsafe fn vsubl_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t { + let c: uint32x4_t = simd_cast(a); + let d: uint32x4_t = simd_cast(b); + simd_sub(c, d) } -/// Signed saturating rounded shift right unsigned narrow +/// Unsigned Subtract Long #[inline] -#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshrun, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqrshrun_n_s16(a: int16x8_t) -> uint8x8_t { - static_assert!(N : i32 where N >= 1 && N <= 8); - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrun.v8i8")] - fn vqrshrun_n_s16_(a: int16x8_t, n: i32) -> uint8x8_t; - } -vqrshrun_n_s16_(a, N) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubl))] +pub unsafe fn vsubl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { + let c: uint64x2_t = simd_cast(a); + let d: uint64x2_t = simd_cast(b); + simd_sub(c, d) } -/// Signed saturating rounded shift right unsigned narrow +/// Maximum (vector) #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrun, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqrshrun_n_s32(a: int32x4_t) -> uint16x4_t { - static_assert!(N : i32 where N >= 1 && N <= 16); +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))] +pub unsafe fn vmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v4i16")] - fn vqrshrun_n_s32_(a: int32x4_t, n: int32x4_t) -> uint16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v8i8")] + fn vmax_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; } -vqrshrun_n_s32_(a, int32x4_t(-N as i32, -N as i32, -N as i32, -N as i32)) +vmax_s8_(a, b) } -/// Signed saturating rounded shift right unsigned narrow +/// Maximum (vector) #[inline] -#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshrun, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqrshrun_n_s32(a: int32x4_t) -> uint16x4_t { - static_assert!(N : i32 where N >= 1 && N <= 16); +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))] +pub unsafe fn vmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrun.v4i16")] - fn vqrshrun_n_s32_(a: int32x4_t, n: i32) -> uint16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v16i8")] + fn vmaxq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; } -vqrshrun_n_s32_(a, N) +vmaxq_s8_(a, b) } -/// Signed saturating rounded shift right unsigned narrow +/// Maximum (vector) #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrun, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqrshrun_n_s64(a: int64x2_t) -> uint32x2_t { - static_assert!(N : i32 where N >= 1 && N <= 32); +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))] +pub unsafe fn vmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v2i32")] - fn vqrshrun_n_s64_(a: int64x2_t, n: int64x2_t) -> uint32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v4i16")] + fn vmax_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; } -vqrshrun_n_s64_(a, int64x2_t(-N as i64, -N as i64)) +vmax_s16_(a, b) } -/// Signed saturating rounded shift right unsigned narrow +/// Maximum (vector) #[inline] -#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshrun, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqrshrun_n_s64(a: int64x2_t) -> uint32x2_t { - static_assert!(N : i32 where N >= 1 && N <= 32); +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))] +pub unsafe fn vmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrun.v2i32")] - fn vqrshrun_n_s64_(a: int64x2_t, n: i32) -> uint32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v8i16")] + fn vmaxq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; } -vqrshrun_n_s64_(a, N) +vmaxq_s16_(a, b) } -/// Signed saturating shift left +/// Maximum (vector) #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))] -pub unsafe fn vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))] +pub unsafe fn vmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v8i8")] - fn vqshl_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v2i32")] + fn vmax_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; } -vqshl_s8_(a, b) +vmax_s32_(a, b) } -/// Signed saturating shift left +/// Maximum (vector) #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))] -pub unsafe fn vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))] +pub unsafe fn vmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v16i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v16i8")] - fn vqshlq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v4i32")] + fn vmaxq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; } -vqshlq_s8_(a, b) +vmaxq_s32_(a, b) } -/// Signed saturating shift left +/// Maximum (vector) #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))] -pub unsafe fn vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))] +pub unsafe fn vmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v4i16")] - fn vqshl_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v8i8")] + fn vmax_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; } -vqshl_s16_(a, b) +vmax_u8_(a, b) } -/// Signed saturating shift left +/// Maximum (vector) #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))] -pub unsafe fn vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))] +pub unsafe fn vmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v8i16")] - fn vqshlq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v16i8")] + fn vmaxq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; } -vqshlq_s16_(a, b) +vmaxq_u8_(a, b) } -/// Signed saturating shift left +/// Maximum (vector) #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))] -pub unsafe fn vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))] +pub unsafe fn vmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v2i32")] - fn vqshl_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v4i16")] + fn vmax_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; } -vqshl_s32_(a, b) +vmax_u16_(a, b) } -/// Signed saturating shift left +/// Maximum (vector) #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))] -pub unsafe fn vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))] +pub unsafe fn vmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v4i32")] - fn vqshlq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v8i16")] + fn vmaxq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; } -vqshlq_s32_(a, b) +vmaxq_u16_(a, b) } -/// Signed saturating shift left +/// Maximum (vector) #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))] -pub unsafe fn vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))] +pub unsafe fn vmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v1i64")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v1i64")] - fn vqshl_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v2i32")] + fn vmax_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; } -vqshl_s64_(a, b) +vmax_u32_(a, b) } -/// Signed saturating shift left +/// Maximum (vector) #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))] -pub unsafe fn vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))] +pub unsafe fn vmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i64")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v2i64")] - fn vqshlq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v4i32")] + fn vmaxq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; } -vqshlq_s64_(a, b) +vmaxq_u32_(a, b) } -/// Unsigned saturating shift left +/// Maximum (vector) #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))] -pub unsafe fn vqshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmax))] +pub unsafe fn vmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v8i8")] - fn vqshl_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v2f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmax.v2f32")] + fn vmax_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t; } -vqshl_u8_(a, b) +vmax_f32_(a, b) } -/// Unsigned saturating shift left +/// Maximum (vector) #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))] -pub unsafe fn vqshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmax))] +pub unsafe fn vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v16i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v16i8")] - fn vqshlq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmax.v4f32")] + fn vmaxq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t; } -vqshlq_u8_(a, b) +vmaxq_f32_(a, b) } -/// Unsigned saturating shift left +/// Floating-point Maximun Number (vector) #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))] -pub unsafe fn vqshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmaxnm))] +pub unsafe fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v4i16")] - fn vqshl_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v2f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v2f32")] + fn vmaxnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t; } -vqshl_u16_(a, b) +vmaxnm_f32_(a, b) } -/// Unsigned saturating shift left +/// Floating-point Maximun Number (vector) #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))] -pub unsafe fn vqshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmaxnm))] +pub unsafe fn vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v8i16")] - fn vqshlq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v4f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v4f32")] + fn vmaxnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t; } -vqshlq_u16_(a, b) +vmaxnmq_f32_(a, b) } -/// Unsigned saturating shift left +/// Minimum (vector) #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))] -pub unsafe fn vqshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))] +pub unsafe fn vmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v2i32")] - fn vqshl_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v8i8")] + fn vmin_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; } -vqshl_u32_(a, b) +vmin_s8_(a, b) } -/// Unsigned saturating shift left +/// Minimum (vector) #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))] -pub unsafe fn vqshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))] +pub unsafe fn vminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v4i32")] - fn vqshlq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v16i8")] + fn vminq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; } -vqshlq_u32_(a, b) +vminq_s8_(a, b) } -/// Unsigned saturating shift left +/// Minimum (vector) #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))] -pub unsafe fn vqshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))] +pub unsafe fn vmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v1i64")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v1i64")] - fn vqshl_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v4i16")] + fn vmin_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; } -vqshl_u64_(a, b) +vmin_s16_(a, b) } -/// Unsigned saturating shift left +/// Minimum (vector) #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))] -pub unsafe fn vqshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))] +pub unsafe fn vminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i64")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v2i64")] - fn vqshlq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v8i16")] + fn vminq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; } -vqshlq_u64_(a, b) +vminq_s16_(a, b) } -/// Signed saturating shift left +/// Minimum (vector) #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqshl_n_s8(a: int8x8_t) -> int8x8_t { - static_assert_imm3!(N); - vqshl_s8(a, vdup_n_s8(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))] +pub unsafe fn vmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v2i32")] + fn vmin_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } +vmin_s32_(a, b) } -/// Signed saturating shift left +/// Minimum (vector) #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqshlq_n_s8(a: int8x16_t) -> int8x16_t { - static_assert_imm3!(N); - vqshlq_s8(a, vdupq_n_s8(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))] +pub unsafe fn vminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v4i32")] + fn vminq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } +vminq_s32_(a, b) } -/// Signed saturating shift left +/// Minimum (vector) #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqshl_n_s16(a: int16x4_t) -> int16x4_t { - static_assert_imm4!(N); - vqshl_s16(a, vdup_n_s16(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))] +pub unsafe fn vmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v8i8")] + fn vmin_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + } +vmin_u8_(a, b) } -/// Signed saturating shift left +/// Minimum (vector) #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqshlq_n_s16(a: int16x8_t) -> int16x8_t { - static_assert_imm4!(N); - vqshlq_s16(a, vdupq_n_s16(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))] +pub unsafe fn vminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v16i8")] + fn vminq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + } +vminq_u8_(a, b) } -/// Signed saturating shift left +/// Minimum (vector) #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqshl_n_s32(a: int32x2_t) -> int32x2_t { - static_assert_imm5!(N); - vqshl_s32(a, vdup_n_s32(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))] +pub unsafe fn vmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v4i16")] + fn vmin_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } +vmin_u16_(a, b) } -/// Signed saturating shift left +/// Minimum (vector) #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqshlq_n_s32(a: int32x4_t) -> int32x4_t { - static_assert_imm5!(N); - vqshlq_s32(a, vdupq_n_s32(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))] +pub unsafe fn vminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v8i16")] + fn vminq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; + } +vminq_u16_(a, b) } -/// Signed saturating shift left +/// Minimum (vector) #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqshl_n_s64(a: int64x1_t) -> int64x1_t { - static_assert_imm6!(N); - vqshl_s64(a, vdup_n_s64(N.try_into().unwrap())) -} - -/// Signed saturating shift left +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))] +pub unsafe fn vmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v2i32")] + fn vmin_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } +vmin_u32_(a, b) +} + +/// Minimum (vector) #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqshlq_n_s64(a: int64x2_t) -> int64x2_t { - static_assert_imm6!(N); - vqshlq_s64(a, vdupq_n_s64(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))] +pub unsafe fn vminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v4i32")] + fn vminq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } +vminq_u32_(a, b) } -/// Unsigned saturating shift left +/// Minimum (vector) #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqshl_n_u8(a: uint8x8_t) -> uint8x8_t { - static_assert_imm3!(N); - vqshl_u8(a, vdup_n_s8(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmin))] +pub unsafe fn vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v2f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmin.v2f32")] + fn vmin_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } +vmin_f32_(a, b) } -/// Unsigned saturating shift left +/// Minimum (vector) #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqshlq_n_u8(a: uint8x16_t) -> uint8x16_t { - static_assert_imm3!(N); - vqshlq_u8(a, vdupq_n_s8(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmin))] +pub unsafe fn vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmin.v4f32")] + fn vminq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } +vminq_f32_(a, b) } -/// Unsigned saturating shift left +/// Floating-point Minimun Number (vector) #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqshl_n_u16(a: uint16x4_t) -> uint16x4_t { - static_assert_imm4!(N); - vqshl_u16(a, vdup_n_s16(N.try_into().unwrap())) +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fminnm))] +pub unsafe fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v2f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v2f32")] + fn vminnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } +vminnm_f32_(a, b) } -/// Unsigned saturating shift left +/// Floating-point Minimun Number (vector) #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqshlq_n_u16(a: uint16x8_t) -> uint16x8_t { - static_assert_imm4!(N); - vqshlq_u16(a, vdupq_n_s16(N.try_into().unwrap())) +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fminnm))] +pub unsafe fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v4f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v4f32")] + fn vminnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } +vminnmq_f32_(a, b) } -/// Unsigned saturating shift left +/// Signed saturating doubling multiply long #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqshl_n_u32(a: uint32x2_t) -> uint32x2_t { - static_assert_imm5!(N); - vqshl_u32(a, vdup_n_s32(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull))] +pub unsafe fn vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmull.v4i32")] + fn vqdmull_s16_(a: int16x4_t, b: int16x4_t) -> int32x4_t; + } +vqdmull_s16_(a, b) } -/// Unsigned saturating shift left +/// Signed saturating doubling multiply long #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqshlq_n_u32(a: uint32x4_t) -> uint32x4_t { - static_assert_imm5!(N); - vqshlq_u32(a, vdupq_n_s32(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull))] +pub unsafe fn vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v2i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmull.v2i64")] + fn vqdmull_s32_(a: int32x2_t, b: int32x2_t) -> int64x2_t; + } +vqdmull_s32_(a, b) } -/// Unsigned saturating shift left +/// Vector saturating doubling long multiply with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqshl_n_u64(a: uint64x1_t) -> uint64x1_t { - static_assert_imm6!(N); - vqshl_u64(a, vdup_n_s64(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull))] +pub unsafe fn vqdmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t { + vqdmull_s16(a, vdup_n_s16(b)) } -/// Unsigned saturating shift left +/// Vector saturating doubling long multiply with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqshlq_n_u64(a: uint64x2_t) -> uint64x2_t { - static_assert_imm6!(N); - vqshlq_u64(a, vdupq_n_s64(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull))] +pub unsafe fn vqdmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t { + vqdmull_s32(a, vdup_n_s32(b)) } -/// Signed saturating shift right narrow +/// Vector saturating doubling long multiply by scalar #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqshrn_n_s16(a: int16x8_t) -> int8x8_t { - static_assert!(N : i32 where N >= 1 && N <= 8); - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v8i8")] - fn vqshrn_n_s16_(a: int16x8_t, n: int16x8_t) -> int8x8_t; - } -vqshrn_n_s16_(a, int16x8_t(-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16)) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vqdmull_lane_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { + static_assert_imm2!(N); + let b: int16x4_t = simd_shuffle4(b, b, [N as u32, N as u32, N as u32, N as u32]); + vqdmull_s16(a, b) } -/// Signed saturating shift right narrow +/// Vector saturating doubling long multiply by scalar #[inline] -#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqshrn_n_s16(a: int16x8_t) -> int8x8_t { - static_assert!(N : i32 where N >= 1 && N <= 8); - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrn.v8i8")] - fn vqshrn_n_s16_(a: int16x8_t, n: i32) -> int8x8_t; - } -vqshrn_n_s16_(a, N) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull, N = 1))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vqdmull_lane_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { + static_assert_imm1!(N); + let b: int32x2_t = simd_shuffle2(b, b, [N as u32, N as u32]); + vqdmull_s32(a, b) } -/// Signed saturating shift right narrow +/// Signed saturating doubling multiply-add long #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqshrn_n_s32(a: int32x4_t) -> int16x4_t { - static_assert!(N : i32 where N >= 1 && N <= 16); - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v4i16")] - fn vqshrn_n_s32_(a: int32x4_t, n: int32x4_t) -> int16x4_t; - } -vqshrn_n_s32_(a, int32x4_t(-N as i32, -N as i32, -N as i32, -N as i32)) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal))] +pub unsafe fn vqdmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + vqaddq_s32(a, vqdmull_s16(b, c)) } -/// Signed saturating shift right narrow +/// Signed saturating doubling multiply-add long #[inline] -#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqshrn_n_s32(a: int32x4_t) -> int16x4_t { - static_assert!(N : i32 where N >= 1 && N <= 16); - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrn.v4i16")] - fn vqshrn_n_s32_(a: int32x4_t, n: i32) -> int16x4_t; - } -vqshrn_n_s32_(a, N) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal))] +pub unsafe fn vqdmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + vqaddq_s64(a, vqdmull_s32(b, c)) } -/// Signed saturating shift right narrow +/// Vector widening saturating doubling multiply accumulate with scalar #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqshrn_n_s64(a: int64x2_t) -> int32x2_t { - static_assert!(N : i32 where N >= 1 && N <= 32); - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v2i32")] - fn vqshrn_n_s64_(a: int64x2_t, n: int64x2_t) -> int32x2_t; - } -vqshrn_n_s64_(a, int64x2_t(-N as i64, -N as i64)) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal))] +pub unsafe fn vqdmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { + vqaddq_s32(a, vqdmull_n_s16(b, c)) } -/// Signed saturating shift right narrow +/// Vector widening saturating doubling multiply accumulate with scalar #[inline] -#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqshrn_n_s64(a: int64x2_t) -> int32x2_t { - static_assert!(N : i32 where N >= 1 && N <= 32); - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrn.v2i32")] - fn vqshrn_n_s64_(a: int64x2_t, n: i32) -> int32x2_t; - } -vqshrn_n_s64_(a, N) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal))] +pub unsafe fn vqdmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { + vqaddq_s64(a, vqdmull_n_s32(b, c)) } -/// Unsigned saturating shift right narrow +/// Vector widening saturating doubling multiply accumulate with scalar #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqshrn_n_u16(a: uint16x8_t) -> uint8x8_t { - static_assert!(N : i32 where N >= 1 && N <= 8); - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v8i8")] - fn vqshrn_n_u16_(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t; - } -vqshrn_n_u16_(a, uint16x8_t(-N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16)) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal, N = 2))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vqdmlal_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + static_assert_imm2!(N); + vqaddq_s32(a, vqdmull_lane_s16::(b, c)) } -/// Unsigned saturating shift right narrow +/// Vector widening saturating doubling multiply accumulate with scalar #[inline] -#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqshrn_n_u16(a: uint16x8_t) -> uint8x8_t { - static_assert!(N : i32 where N >= 1 && N <= 8); - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshrn.v8i8")] - fn vqshrn_n_u16_(a: uint16x8_t, n: i32) -> uint8x8_t; - } -vqshrn_n_u16_(a, N) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal, N = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vqdmlal_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + static_assert_imm1!(N); + vqaddq_s64(a, vqdmull_lane_s32::(b, c)) } -/// Unsigned saturating shift right narrow +/// Signed saturating doubling multiply-subtract long #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqshrn_n_u32(a: uint32x4_t) -> uint16x4_t { - static_assert!(N : i32 where N >= 1 && N <= 16); - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v4i16")] - fn vqshrn_n_u32_(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t; - } -vqshrn_n_u32_(a, uint32x4_t(-N as u32, -N as u32, -N as u32, -N as u32)) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl))] +pub unsafe fn vqdmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + vqsubq_s32(a, vqdmull_s16(b, c)) } -/// Unsigned saturating shift right narrow +/// Signed saturating doubling multiply-subtract long #[inline] -#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqshrn_n_u32(a: uint32x4_t) -> uint16x4_t { - static_assert!(N : i32 where N >= 1 && N <= 16); - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshrn.v4i16")] - fn vqshrn_n_u32_(a: uint32x4_t, n: i32) -> uint16x4_t; - } -vqshrn_n_u32_(a, N) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl))] +pub unsafe fn vqdmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + vqsubq_s64(a, vqdmull_s32(b, c)) } -/// Unsigned saturating shift right narrow +/// Vector widening saturating doubling multiply subtract with scalar #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqshrn_n_u64(a: uint64x2_t) -> uint32x2_t { - static_assert!(N : i32 where N >= 1 && N <= 32); - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v2i32")] - fn vqshrn_n_u64_(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t; - } -vqshrn_n_u64_(a, uint64x2_t(-N as u64, -N as u64)) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl))] +pub unsafe fn vqdmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { + vqsubq_s32(a, vqdmull_n_s16(b, c)) } -/// Unsigned saturating shift right narrow +/// Vector widening saturating doubling multiply subtract with scalar #[inline] -#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqshrn_n_u64(a: uint64x2_t) -> uint32x2_t { - static_assert!(N : i32 where N >= 1 && N <= 32); - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshrn.v2i32")] - fn vqshrn_n_u64_(a: uint64x2_t, n: i32) -> uint32x2_t; - } -vqshrn_n_u64_(a, N) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl))] +pub unsafe fn vqdmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { + vqsubq_s64(a, vqdmull_n_s32(b, c)) } -/// Signed saturating shift right unsigned narrow +/// Vector widening saturating doubling multiply subtract with scalar #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrun, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqshrun_n_s16(a: int16x8_t) -> uint8x8_t { - static_assert!(N : i32 where N >= 1 && N <= 8); - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v8i8")] - fn vqshrun_n_s16_(a: int16x8_t, n: int16x8_t) -> uint8x8_t; - } -vqshrun_n_s16_(a, int16x8_t(-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16)) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl, N = 2))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vqdmlsl_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + static_assert_imm2!(N); + vqsubq_s32(a, vqdmull_lane_s16::(b, c)) } -/// Signed saturating shift right unsigned narrow +/// Vector widening saturating doubling multiply subtract with scalar #[inline] -#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshrun, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqshrun_n_s16(a: int16x8_t) -> uint8x8_t { - static_assert!(N : i32 where N >= 1 && N <= 8); - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrun.v8i8")] - fn vqshrun_n_s16_(a: int16x8_t, n: i32) -> uint8x8_t; - } -vqshrun_n_s16_(a, N) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl, N = 1))] +#[rustc_legacy_const_generics(3)] +pub unsafe fn vqdmlsl_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + static_assert_imm1!(N); + vqsubq_s64(a, vqdmull_lane_s32::(b, c)) } -/// Signed saturating shift right unsigned narrow +/// Signed saturating doubling multiply returning high half #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrun, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqshrun_n_s32(a: int32x4_t) -> uint16x4_t { - static_assert!(N : i32 where N >= 1 && N <= 16); +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))] +pub unsafe fn vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v4i16")] - fn vqshrun_n_s32_(a: int32x4_t, n: int32x4_t) -> uint16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmulh.v4i16")] + fn vqdmulh_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; } -vqshrun_n_s32_(a, int32x4_t(-N as i32, -N as i32, -N as i32, -N as i32)) +vqdmulh_s16_(a, b) } -/// Signed saturating shift right unsigned narrow +/// Signed saturating doubling multiply returning high half #[inline] -#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshrun, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqshrun_n_s32(a: int32x4_t) -> uint16x4_t { - static_assert!(N : i32 where N >= 1 && N <= 16); +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))] +pub unsafe fn vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrun.v4i16")] - fn vqshrun_n_s32_(a: int32x4_t, n: i32) -> uint16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmulh.v8i16")] + fn vqdmulhq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; } -vqshrun_n_s32_(a, N) +vqdmulhq_s16_(a, b) } -/// Signed saturating shift right unsigned narrow +/// Signed saturating doubling multiply returning high half #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrun, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqshrun_n_s64(a: int64x2_t) -> uint32x2_t { - static_assert!(N : i32 where N >= 1 && N <= 32); +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))] +pub unsafe fn vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v2i32")] - fn vqshrun_n_s64_(a: int64x2_t, n: int64x2_t) -> uint32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmulh.v2i32")] + fn vqdmulh_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; } -vqshrun_n_s64_(a, int64x2_t(-N as i64, -N as i64)) +vqdmulh_s32_(a, b) } -/// Signed saturating shift right unsigned narrow -#[inline] -#[cfg(target_arch = "aarch64")] -#[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshrun, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqshrun_n_s64(a: int64x2_t) -> uint32x2_t { - static_assert!(N : i32 where N >= 1 && N <= 32); - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrun.v2i32")] - fn vqshrun_n_s64_(a: int64x2_t, n: i32) -> uint32x2_t; - } -vqshrun_n_s64_(a, N) -} - -/// Reciprocal square-root estimate. +/// Signed saturating doubling multiply returning high half #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frsqrte))] -pub unsafe fn vrsqrte_f32(a: float32x2_t) -> float32x2_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))] +pub unsafe fn vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2f32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.v2f32")] - fn vrsqrte_f32_(a: float32x2_t) -> float32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmulh.v4i32")] + fn vqdmulhq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; } -vrsqrte_f32_(a) +vqdmulhq_s32_(a, b) } -/// Reciprocal square-root estimate. +/// Vector saturating doubling multiply high with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frsqrte))] -pub unsafe fn vrsqrteq_f32(a: float32x4_t) -> float32x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4f32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.v4f32")] - fn vrsqrteq_f32_(a: float32x4_t) -> float32x4_t; - } -vrsqrteq_f32_(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))] +pub unsafe fn vqdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t { + let b: int16x4_t = vdup_n_s16(b); + vqdmulh_s16(a, b) } -/// Reciprocal estimate. +/// Vector saturating doubling multiply high with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frecpe))] -pub unsafe fn vrecpe_f32(a: float32x2_t) -> float32x2_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2f32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpe.v2f32")] - fn vrecpe_f32_(a: float32x2_t) -> float32x2_t; - } -vrecpe_f32_(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))] +pub unsafe fn vqdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t { + let b: int32x2_t = vdup_n_s32(b); + vqdmulh_s32(a, b) } -/// Reciprocal estimate. +/// Vector saturating doubling multiply high with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frecpe))] -pub unsafe fn vrecpeq_f32(a: float32x4_t) -> float32x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4f32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpe.v4f32")] - fn vrecpeq_f32_(a: float32x4_t) -> float32x4_t; - } -vrecpeq_f32_(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))] +pub unsafe fn vqdmulhq_nq_s16(a: int16x8_t, b: i16) -> int16x8_t { + let b: int16x8_t = vdupq_n_s16(b); + vqdmulhq_s16(a, b) } -/// Vector reinterpret cast operation +/// Vector saturating doubling multiply high with scalar #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))] +pub unsafe fn vqdmulhq_nq_s32(a: int32x4_t, b: i32) -> int32x4_t { + let b: int32x4_t = vdupq_n_s32(b); + vqdmulhq_s32(a, b) } -/// Vector reinterpret cast operation +/// Signed saturating rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))] +pub unsafe fn vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v8i8")] + fn vqrshl_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } +vqrshl_s8_(a, b) } -/// Vector reinterpret cast operation +/// Signed saturating rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))] +pub unsafe fn vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v16i8")] + fn vqrshlq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } +vqrshlq_s8_(a, b) } -/// Vector reinterpret cast operation +/// Signed saturating rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))] +pub unsafe fn vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v4i16")] + fn vqrshl_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } +vqrshl_s16_(a, b) } -/// Vector reinterpret cast operation +/// Signed saturating rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))] +pub unsafe fn vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v8i16")] + fn vqrshlq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } +vqrshlq_s16_(a, b) } -/// Vector reinterpret cast operation +/// Signed saturating rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s64_u64(a: uint64x1_t) -> int64x1_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))] +pub unsafe fn vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v2i32")] + fn vqrshl_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } +vqrshl_s32_(a, b) } -/// Vector reinterpret cast operation +/// Signed saturating rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))] +pub unsafe fn vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v4i32")] + fn vqrshlq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } +vqrshlq_s32_(a, b) } -/// Vector reinterpret cast operation +/// Signed saturating rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))] +pub unsafe fn vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v1i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v1i64")] + fn vqrshl_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t; + } +vqrshl_s64_(a, b) } -/// Vector reinterpret cast operation +/// Signed saturating rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))] +pub unsafe fn vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v2i64")] + fn vqrshlq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t; + } +vqrshlq_s64_(a, b) } -/// Vector reinterpret cast operation +/// Unsigned signed saturating rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))] +pub unsafe fn vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v8i8")] + fn vqrshl_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; + } +vqrshl_u8_(a, b) } -/// Vector reinterpret cast operation +/// Unsigned signed saturating rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))] +pub unsafe fn vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v16i8")] + fn vqrshlq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; + } +vqrshlq_u8_(a, b) } -/// Vector reinterpret cast operation +/// Unsigned signed saturating rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t { - transmute(a) -} - -/// Vector reinterpret cast operation -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))] +pub unsafe fn vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v4i16")] + fn vqrshl_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; + } +vqrshl_u16_(a, b) } -/// Vector reinterpret cast operation +/// Unsigned signed saturating rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))] +pub unsafe fn vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v8i16")] + fn vqrshlq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; + } +vqrshlq_u16_(a, b) } -/// Vector reinterpret cast operation +/// Unsigned signed saturating rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))] +pub unsafe fn vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v2i32")] + fn vqrshl_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; + } +vqrshl_u32_(a, b) } -/// Vector reinterpret cast operation +/// Unsigned signed saturating rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))] +pub unsafe fn vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v4i32")] + fn vqrshlq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; + } +vqrshlq_u32_(a, b) } -/// Vector reinterpret cast operation +/// Unsigned signed saturating rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))] +pub unsafe fn vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v1i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v1i64")] + fn vqrshl_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; + } +vqrshl_u64_(a, b) } -/// Vector reinterpret cast operation +/// Unsigned signed saturating rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u64_s64(a: int64x1_t) -> uint64x1_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))] +pub unsafe fn vqrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v2i64")] + fn vqrshlq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; + } +vqrshlq_u64_(a, b) } -/// Vector reinterpret cast operation +/// Signed saturating rounded shift right narrow #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t { - transmute(a) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v8i8")] + fn vqrshrn_n_s16_(a: int16x8_t, n: int16x8_t) -> int8x8_t; + } +vqrshrn_n_s16_(a, int16x8_t(-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16)) } -/// Vector reinterpret cast operation +/// Signed saturating rounded shift right narrow #[inline] +#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrn.v8i8")] + fn vqrshrn_n_s16_(a: int16x8_t, n: i32) -> int8x8_t; + } +vqrshrn_n_s16_(a, N) } -/// Vector reinterpret cast operation +/// Signed saturating rounded shift right narrow #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t { - transmute(a) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v4i16")] + fn vqrshrn_n_s32_(a: int32x4_t, n: int32x4_t) -> int16x4_t; + } +vqrshrn_n_s32_(a, int32x4_t(-N as i32, -N as i32, -N as i32, -N as i32)) } -/// Vector reinterpret cast operation +/// Signed saturating rounded shift right narrow #[inline] +#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrn.v4i16")] + fn vqrshrn_n_s32_(a: int32x4_t, n: i32) -> int16x4_t; + } +vqrshrn_n_s32_(a, N) } -/// Vector reinterpret cast operation +/// Signed saturating rounded shift right narrow #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t { - transmute(a) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v2i32")] + fn vqrshrn_n_s64_(a: int64x2_t, n: int64x2_t) -> int32x2_t; + } +vqrshrn_n_s64_(a, int64x2_t(-N as i64, -N as i64)) } -/// Vector reinterpret cast operation +/// Signed saturating rounded shift right narrow #[inline] +#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrn.v2i32")] + fn vqrshrn_n_s64_(a: int64x2_t, n: i32) -> int32x2_t; + } +vqrshrn_n_s64_(a, N) } -/// Vector reinterpret cast operation +/// Unsigned signed saturating rounded shift right narrow #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t { - transmute(a) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrn_n_u16(a: uint16x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v8i8")] + fn vqrshrn_n_u16_(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t; + } +vqrshrn_n_u16_(a, uint16x8_t(-N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16)) } -/// Vector reinterpret cast operation +/// Unsigned signed saturating rounded shift right narrow #[inline] +#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrn_n_u16(a: uint16x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshrn.v8i8")] + fn vqrshrn_n_u16_(a: uint16x8_t, n: i32) -> uint8x8_t; + } +vqrshrn_n_u16_(a, N) } -/// Vector reinterpret cast operation +/// Unsigned signed saturating rounded shift right narrow #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t { - transmute(a) -} +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrn_n_u32(a: uint32x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v4i16")] + fn vqrshrn_n_u32_(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t; + } +vqrshrn_n_u32_(a, uint32x4_t(-N as u32, -N as u32, -N as u32, -N as u32)) +} -/// Vector reinterpret cast operation +/// Unsigned signed saturating rounded shift right narrow #[inline] +#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrn_n_u32(a: uint32x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshrn.v4i16")] + fn vqrshrn_n_u32_(a: uint32x4_t, n: i32) -> uint16x4_t; + } +vqrshrn_n_u32_(a, N) } -/// Vector reinterpret cast operation +/// Unsigned signed saturating rounded shift right narrow #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t { - transmute(a) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrn_n_u64(a: uint64x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v2i32")] + fn vqrshrn_n_u64_(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t; + } +vqrshrn_n_u64_(a, uint64x2_t(-N as u64, -N as u64)) } -/// Vector reinterpret cast operation +/// Unsigned signed saturating rounded shift right narrow #[inline] +#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrn_n_u64(a: uint64x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshrn.v2i32")] + fn vqrshrn_n_u64_(a: uint64x2_t, n: i32) -> uint32x2_t; + } +vqrshrn_n_u64_(a, N) } -/// Vector reinterpret cast operation +/// Signed saturating rounded shift right unsigned narrow #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t { - transmute(a) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrun_n_s16(a: int16x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v8i8")] + fn vqrshrun_n_s16_(a: int16x8_t, n: int16x8_t) -> uint8x8_t; + } +vqrshrun_n_s16_(a, int16x8_t(-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16)) } -/// Vector reinterpret cast operation +/// Signed saturating rounded shift right unsigned narrow #[inline] +#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrun_n_s16(a: int16x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrun.v8i8")] + fn vqrshrun_n_s16_(a: int16x8_t, n: i32) -> uint8x8_t; + } +vqrshrun_n_s16_(a, N) } -/// Vector reinterpret cast operation +/// Signed saturating rounded shift right unsigned narrow #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t { - transmute(a) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrun_n_s32(a: int32x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v4i16")] + fn vqrshrun_n_s32_(a: int32x4_t, n: int32x4_t) -> uint16x4_t; + } +vqrshrun_n_s32_(a, int32x4_t(-N as i32, -N as i32, -N as i32, -N as i32)) } -/// Vector reinterpret cast operation +/// Signed saturating rounded shift right unsigned narrow #[inline] +#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrun_n_s32(a: int32x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrun.v4i16")] + fn vqrshrun_n_s32_(a: int32x4_t, n: i32) -> uint16x4_t; + } +vqrshrun_n_s32_(a, N) } -/// Vector reinterpret cast operation +/// Signed saturating rounded shift right unsigned narrow #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t { - transmute(a) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrun_n_s64(a: int64x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v2i32")] + fn vqrshrun_n_s64_(a: int64x2_t, n: int64x2_t) -> uint32x2_t; + } +vqrshrun_n_s64_(a, int64x2_t(-N as i64, -N as i64)) } -/// Vector reinterpret cast operation +/// Signed saturating rounded shift right unsigned narrow #[inline] +#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrun_n_s64(a: int64x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrun.v2i32")] + fn vqrshrun_n_s64_(a: int64x2_t, n: i32) -> uint32x2_t; + } +vqrshrun_n_s64_(a, N) } -/// Vector reinterpret cast operation +/// Signed saturating shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))] +pub unsafe fn vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v8i8")] + fn vqshl_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } +vqshl_s8_(a, b) } -/// Vector reinterpret cast operation +/// Signed saturating shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))] +pub unsafe fn vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v16i8")] + fn vqshlq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } +vqshlq_s8_(a, b) } -/// Vector reinterpret cast operation +/// Signed saturating shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))] +pub unsafe fn vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v4i16")] + fn vqshl_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } +vqshl_s16_(a, b) } -/// Vector reinterpret cast operation +/// Signed saturating shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))] +pub unsafe fn vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v8i16")] + fn vqshlq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } +vqshlq_s16_(a, b) } -/// Vector reinterpret cast operation +/// Signed saturating shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t { - transmute(a) -} - -/// Vector reinterpret cast operation -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))] +pub unsafe fn vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v2i32")] + fn vqshl_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } +vqshl_s32_(a, b) } -/// Vector reinterpret cast operation +/// Signed saturating shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))] +pub unsafe fn vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v4i32")] + fn vqshlq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } +vqshlq_s32_(a, b) } -/// Vector reinterpret cast operation +/// Signed saturating shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))] +pub unsafe fn vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v1i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v1i64")] + fn vqshl_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t; + } +vqshl_s64_(a, b) } -/// Vector reinterpret cast operation +/// Signed saturating shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))] +pub unsafe fn vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v2i64")] + fn vqshlq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t; + } +vqshlq_s64_(a, b) } -/// Vector reinterpret cast operation +/// Unsigned saturating shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))] +pub unsafe fn vqshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v8i8")] + fn vqshl_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; + } +vqshl_u8_(a, b) } -/// Vector reinterpret cast operation +/// Unsigned saturating shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))] +pub unsafe fn vqshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v16i8")] + fn vqshlq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; + } +vqshlq_u8_(a, b) } -/// Vector reinterpret cast operation +/// Unsigned saturating shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))] +pub unsafe fn vqshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v4i16")] + fn vqshl_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; + } +vqshl_u16_(a, b) } -/// Vector reinterpret cast operation +/// Unsigned saturating shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))] +pub unsafe fn vqshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v8i16")] + fn vqshlq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; + } +vqshlq_u16_(a, b) } -/// Vector reinterpret cast operation +/// Unsigned saturating shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))] +pub unsafe fn vqshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v2i32")] + fn vqshl_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; + } +vqshl_u32_(a, b) } -/// Vector reinterpret cast operation +/// Unsigned saturating shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))] +pub unsafe fn vqshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v4i32")] + fn vqshlq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; + } +vqshlq_u32_(a, b) } -/// Vector reinterpret cast operation +/// Unsigned saturating shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))] +pub unsafe fn vqshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v1i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v1i64")] + fn vqshl_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; + } +vqshl_u64_(a, b) } -/// Vector reinterpret cast operation +/// Unsigned saturating shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))] +pub unsafe fn vqshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v2i64")] + fn vqshlq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; + } +vqshlq_u64_(a, b) } -/// Vector reinterpret cast operation +/// Signed saturating shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshl_n_s8(a: int8x8_t) -> int8x8_t { + static_assert_imm3!(N); + vqshl_s8(a, vdup_n_s8(N.try_into().unwrap())) } -/// Vector reinterpret cast operation +/// Signed saturating shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshlq_n_s8(a: int8x16_t) -> int8x16_t { + static_assert_imm3!(N); + vqshlq_s8(a, vdupq_n_s8(N.try_into().unwrap())) } -/// Vector reinterpret cast operation +/// Signed saturating shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshl_n_s16(a: int16x4_t) -> int16x4_t { + static_assert_imm4!(N); + vqshl_s16(a, vdup_n_s16(N.try_into().unwrap())) } -/// Vector reinterpret cast operation +/// Signed saturating shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshlq_n_s16(a: int16x8_t) -> int16x8_t { + static_assert_imm4!(N); + vqshlq_s16(a, vdupq_n_s16(N.try_into().unwrap())) } -/// Vector reinterpret cast operation +/// Signed saturating shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshl_n_s32(a: int32x2_t) -> int32x2_t { + static_assert_imm5!(N); + vqshl_s32(a, vdup_n_s32(N.try_into().unwrap())) } -/// Vector reinterpret cast operation +/// Signed saturating shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t { - transmute(a) -} - -/// Vector reinterpret cast operation -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshlq_n_s32(a: int32x4_t) -> int32x4_t { + static_assert_imm5!(N); + vqshlq_s32(a, vdupq_n_s32(N.try_into().unwrap())) } -/// Vector reinterpret cast operation +/// Signed saturating shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshl_n_s64(a: int64x1_t) -> int64x1_t { + static_assert_imm6!(N); + vqshl_s64(a, vdup_n_s64(N.try_into().unwrap())) } -/// Vector reinterpret cast operation +/// Signed saturating shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshlq_n_s64(a: int64x2_t) -> int64x2_t { + static_assert_imm6!(N); + vqshlq_s64(a, vdupq_n_s64(N.try_into().unwrap())) } -/// Vector reinterpret cast operation +/// Unsigned saturating shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshl_n_u8(a: uint8x8_t) -> uint8x8_t { + static_assert_imm3!(N); + vqshl_u8(a, vdup_n_s8(N.try_into().unwrap())) } -/// Vector reinterpret cast operation +/// Unsigned saturating shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshlq_n_u8(a: uint8x16_t) -> uint8x16_t { + static_assert_imm3!(N); + vqshlq_u8(a, vdupq_n_s8(N.try_into().unwrap())) } -/// Vector reinterpret cast operation +/// Unsigned saturating shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshl_n_u16(a: uint16x4_t) -> uint16x4_t { + static_assert_imm4!(N); + vqshl_u16(a, vdup_n_s16(N.try_into().unwrap())) } -/// Vector reinterpret cast operation +/// Unsigned saturating shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshlq_n_u16(a: uint16x8_t) -> uint16x8_t { + static_assert_imm4!(N); + vqshlq_u16(a, vdupq_n_s16(N.try_into().unwrap())) } -/// Vector reinterpret cast operation +/// Unsigned saturating shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshl_n_u32(a: uint32x2_t) -> uint32x2_t { + static_assert_imm5!(N); + vqshl_u32(a, vdup_n_s32(N.try_into().unwrap())) } -/// Vector reinterpret cast operation +/// Unsigned saturating shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshlq_n_u32(a: uint32x4_t) -> uint32x4_t { + static_assert_imm5!(N); + vqshlq_u32(a, vdupq_n_s32(N.try_into().unwrap())) } -/// Vector reinterpret cast operation +/// Unsigned saturating shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshl_n_u64(a: uint64x1_t) -> uint64x1_t { + static_assert_imm6!(N); + vqshl_u64(a, vdup_n_s64(N.try_into().unwrap())) } -/// Vector reinterpret cast operation +/// Unsigned saturating shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshlq_n_u64(a: uint64x2_t) -> uint64x2_t { + static_assert_imm6!(N); + vqshlq_u64(a, vdupq_n_s64(N.try_into().unwrap())) } -/// Vector reinterpret cast operation +/// Signed saturating shift right narrow #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t { - transmute(a) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v8i8")] + fn vqshrn_n_s16_(a: int16x8_t, n: int16x8_t) -> int8x8_t; + } +vqshrn_n_s16_(a, int16x8_t(-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16)) } -/// Vector reinterpret cast operation +/// Signed saturating shift right narrow #[inline] +#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrn.v8i8")] + fn vqshrn_n_s16_(a: int16x8_t, n: i32) -> int8x8_t; + } +vqshrn_n_s16_(a, N) } -/// Vector reinterpret cast operation +/// Signed saturating shift right narrow #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t { - transmute(a) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v4i16")] + fn vqshrn_n_s32_(a: int32x4_t, n: int32x4_t) -> int16x4_t; + } +vqshrn_n_s32_(a, int32x4_t(-N as i32, -N as i32, -N as i32, -N as i32)) } -/// Vector reinterpret cast operation +/// Signed saturating shift right narrow #[inline] +#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrn.v4i16")] + fn vqshrn_n_s32_(a: int32x4_t, n: i32) -> int16x4_t; + } +vqshrn_n_s32_(a, N) } -/// Vector reinterpret cast operation +/// Signed saturating shift right narrow #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t { - transmute(a) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v2i32")] + fn vqshrn_n_s64_(a: int64x2_t, n: int64x2_t) -> int32x2_t; + } +vqshrn_n_s64_(a, int64x2_t(-N as i64, -N as i64)) } -/// Vector reinterpret cast operation +/// Signed saturating shift right narrow #[inline] +#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrn.v2i32")] + fn vqshrn_n_s64_(a: int64x2_t, n: i32) -> int32x2_t; + } +vqshrn_n_s64_(a, N) } -/// Vector reinterpret cast operation +/// Unsigned saturating shift right narrow #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t { - transmute(a) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrn_n_u16(a: uint16x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v8i8")] + fn vqshrn_n_u16_(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t; + } +vqshrn_n_u16_(a, uint16x8_t(-N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16)) } -/// Vector reinterpret cast operation +/// Unsigned saturating shift right narrow #[inline] +#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrn_n_u16(a: uint16x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshrn.v8i8")] + fn vqshrn_n_u16_(a: uint16x8_t, n: i32) -> uint8x8_t; + } +vqshrn_n_u16_(a, N) } -/// Vector reinterpret cast operation +/// Unsigned saturating shift right narrow #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t { - transmute(a) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrn_n_u32(a: uint32x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v4i16")] + fn vqshrn_n_u32_(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t; + } +vqshrn_n_u32_(a, uint32x4_t(-N as u32, -N as u32, -N as u32, -N as u32)) } -/// Vector reinterpret cast operation +/// Unsigned saturating shift right narrow #[inline] +#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrn_n_u32(a: uint32x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshrn.v4i16")] + fn vqshrn_n_u32_(a: uint32x4_t, n: i32) -> uint16x4_t; + } +vqshrn_n_u32_(a, N) } -/// Vector reinterpret cast operation +/// Unsigned saturating shift right narrow #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t { - transmute(a) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrn_n_u64(a: uint64x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v2i32")] + fn vqshrn_n_u64_(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t; + } +vqshrn_n_u64_(a, uint64x2_t(-N as u64, -N as u64)) } -/// Vector reinterpret cast operation +/// Unsigned saturating shift right narrow #[inline] +#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrn_n_u64(a: uint64x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshrn.v2i32")] + fn vqshrn_n_u64_(a: uint64x2_t, n: i32) -> uint32x2_t; + } +vqshrn_n_u64_(a, N) } -/// Vector reinterpret cast operation +/// Signed saturating shift right unsigned narrow #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t { - transmute(a) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrun_n_s16(a: int16x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v8i8")] + fn vqshrun_n_s16_(a: int16x8_t, n: int16x8_t) -> uint8x8_t; + } +vqshrun_n_s16_(a, int16x8_t(-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16)) } -/// Vector reinterpret cast operation +/// Signed saturating shift right unsigned narrow #[inline] +#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrun_n_s16(a: int16x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrun.v8i8")] + fn vqshrun_n_s16_(a: int16x8_t, n: i32) -> uint8x8_t; + } +vqshrun_n_s16_(a, N) } -/// Vector reinterpret cast operation +/// Signed saturating shift right unsigned narrow #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t { - transmute(a) +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrun_n_s32(a: int32x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v4i16")] + fn vqshrun_n_s32_(a: int32x4_t, n: int32x4_t) -> uint16x4_t; + } +vqshrun_n_s32_(a, int32x4_t(-N as i32, -N as i32, -N as i32, -N as i32)) } -/// Vector reinterpret cast operation +/// Signed saturating shift right unsigned narrow #[inline] +#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrun_n_s32(a: int32x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrun.v4i16")] + fn vqshrun_n_s32_(a: int32x4_t, n: i32) -> uint16x4_t; + } +vqshrun_n_s32_(a, N) } -/// Vector reinterpret cast operation +/// Signed saturating shift right unsigned narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrun_n_s64(a: int64x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v2i32")] + fn vqshrun_n_s64_(a: int64x2_t, n: int64x2_t) -> uint32x2_t; + } +vqshrun_n_s64_(a, int64x2_t(-N as i64, -N as i64)) +} + +/// Signed saturating shift right unsigned narrow #[inline] +#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrun_n_s64(a: int64x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrun.v2i32")] + fn vqshrun_n_s64_(a: int64x2_t, n: i32) -> uint32x2_t; + } +vqshrun_n_s64_(a, N) } -/// Vector reinterpret cast operation +/// Reciprocal square-root estimate. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frsqrte))] +pub unsafe fn vrsqrte_f32(a: float32x2_t) -> float32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.v2f32")] + fn vrsqrte_f32_(a: float32x2_t) -> float32x2_t; + } +vrsqrte_f32_(a) } -/// Vector reinterpret cast operation +/// Reciprocal square-root estimate. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frsqrte))] +pub unsafe fn vrsqrteq_f32(a: float32x4_t) -> float32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.v4f32")] + fn vrsqrteq_f32_(a: float32x4_t) -> float32x4_t; + } +vrsqrteq_f32_(a) } -/// Vector reinterpret cast operation +/// Reciprocal estimate. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frecpe))] +pub unsafe fn vrecpe_f32(a: float32x2_t) -> float32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpe.v2f32")] + fn vrecpe_f32_(a: float32x2_t) -> float32x2_t; + } +vrecpe_f32_(a) } -/// Vector reinterpret cast operation +/// Reciprocal estimate. #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t { - transmute(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frecpe))] +pub unsafe fn vrecpeq_f32(a: float32x4_t) -> float32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpe.v4f32")] + fn vrecpeq_f32_(a: float32x4_t) -> float32x4_t; + } +vrecpeq_f32_(a) } /// Vector reinterpret cast operation @@ -8514,7 +8556,7 @@ pub unsafe fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t { +pub unsafe fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t { transmute(a) } @@ -8524,7 +8566,7 @@ pub unsafe fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t { +pub unsafe fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t { transmute(a) } @@ -8534,7 +8576,7 @@ pub unsafe fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t { +pub unsafe fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t { transmute(a) } @@ -8544,7 +8586,7 @@ pub unsafe fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t { +pub unsafe fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t { transmute(a) } @@ -8554,7 +8596,7 @@ pub unsafe fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t { +pub unsafe fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t { transmute(a) } @@ -8564,7 +8606,7 @@ pub unsafe fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t { +pub unsafe fn vreinterpret_s64_u64(a: uint64x1_t) -> int64x1_t { transmute(a) } @@ -8574,7 +8616,7 @@ pub unsafe fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t { +pub unsafe fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t { transmute(a) } @@ -8584,7 +8626,7 @@ pub unsafe fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t { +pub unsafe fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t { transmute(a) } @@ -8594,7 +8636,7 @@ pub unsafe fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t { +pub unsafe fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t { transmute(a) } @@ -8604,7 +8646,7 @@ pub unsafe fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t { +pub unsafe fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t { transmute(a) } @@ -8614,7 +8656,7 @@ pub unsafe fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t { +pub unsafe fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t { transmute(a) } @@ -8624,7 +8666,7 @@ pub unsafe fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t { +pub unsafe fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t { transmute(a) } @@ -8634,7 +8676,7 @@ pub unsafe fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t { +pub unsafe fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t { transmute(a) } @@ -8644,7 +8686,7 @@ pub unsafe fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t { +pub unsafe fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t { transmute(a) } @@ -8654,7 +8696,7 @@ pub unsafe fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t { +pub unsafe fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t { transmute(a) } @@ -8664,7 +8706,7 @@ pub unsafe fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t { +pub unsafe fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t { transmute(a) } @@ -8674,7 +8716,7 @@ pub unsafe fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t { +pub unsafe fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t { transmute(a) } @@ -8684,7 +8726,7 @@ pub unsafe fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t { +pub unsafe fn vreinterpret_u64_s64(a: int64x1_t) -> uint64x1_t { transmute(a) } @@ -8694,7 +8736,7 @@ pub unsafe fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t { +pub unsafe fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t { transmute(a) } @@ -8704,7 +8746,7 @@ pub unsafe fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t { +pub unsafe fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t { transmute(a) } @@ -8714,7 +8756,7 @@ pub unsafe fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t { +pub unsafe fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t { transmute(a) } @@ -8724,7 +8766,7 @@ pub unsafe fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t { +pub unsafe fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t { transmute(a) } @@ -8734,7 +8776,7 @@ pub unsafe fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t { +pub unsafe fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t { transmute(a) } @@ -8744,7 +8786,7 @@ pub unsafe fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t { +pub unsafe fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t { transmute(a) } @@ -8754,7 +8796,7 @@ pub unsafe fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t { +pub unsafe fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t { transmute(a) } @@ -8764,7 +8806,7 @@ pub unsafe fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t { +pub unsafe fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t { transmute(a) } @@ -8774,7 +8816,7 @@ pub unsafe fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t { +pub unsafe fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t { transmute(a) } @@ -8784,7 +8826,7 @@ pub unsafe fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t { +pub unsafe fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t { transmute(a) } @@ -8794,7 +8836,7 @@ pub unsafe fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t { +pub unsafe fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t { transmute(a) } @@ -8804,7 +8846,7 @@ pub unsafe fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t { +pub unsafe fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t { transmute(a) } @@ -8814,7 +8856,7 @@ pub unsafe fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t { +pub unsafe fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t { transmute(a) } @@ -8824,7 +8866,7 @@ pub unsafe fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t { +pub unsafe fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t { transmute(a) } @@ -8834,7 +8876,7 @@ pub unsafe fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t { +pub unsafe fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t { transmute(a) } @@ -8844,7 +8886,7 @@ pub unsafe fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t { +pub unsafe fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t { transmute(a) } @@ -8854,7 +8896,7 @@ pub unsafe fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t { +pub unsafe fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t { transmute(a) } @@ -8864,7 +8906,7 @@ pub unsafe fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t { +pub unsafe fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t { transmute(a) } @@ -8874,7 +8916,7 @@ pub unsafe fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t { +pub unsafe fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t { transmute(a) } @@ -8884,7 +8926,7 @@ pub unsafe fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t { +pub unsafe fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t { transmute(a) } @@ -8894,7 +8936,7 @@ pub unsafe fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t { +pub unsafe fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t { transmute(a) } @@ -8904,7 +8946,7 @@ pub unsafe fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t { +pub unsafe fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t { transmute(a) } @@ -8914,7 +8956,7 @@ pub unsafe fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t { +pub unsafe fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t { transmute(a) } @@ -8924,7 +8966,7 @@ pub unsafe fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t { +pub unsafe fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t { transmute(a) } @@ -8934,7 +8976,7 @@ pub unsafe fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t { +pub unsafe fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t { transmute(a) } @@ -8944,7 +8986,7 @@ pub unsafe fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t { +pub unsafe fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t { transmute(a) } @@ -8954,7 +8996,7 @@ pub unsafe fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t { +pub unsafe fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t { transmute(a) } @@ -8964,7 +9006,7 @@ pub unsafe fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t { +pub unsafe fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t { transmute(a) } @@ -8974,7 +9016,7 @@ pub unsafe fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t { +pub unsafe fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t { transmute(a) } @@ -8984,7 +9026,7 @@ pub unsafe fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t { +pub unsafe fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t { transmute(a) } @@ -8994,7 +9036,7 @@ pub unsafe fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t { +pub unsafe fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t { transmute(a) } @@ -9004,7 +9046,7 @@ pub unsafe fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t { +pub unsafe fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t { transmute(a) } @@ -9014,7 +9056,7 @@ pub unsafe fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t { +pub unsafe fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t { transmute(a) } @@ -9024,7 +9066,7 @@ pub unsafe fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t { +pub unsafe fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t { transmute(a) } @@ -9034,7 +9076,7 @@ pub unsafe fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t { +pub unsafe fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t { transmute(a) } @@ -9044,7 +9086,7 @@ pub unsafe fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t { +pub unsafe fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t { transmute(a) } @@ -9054,7 +9096,7 @@ pub unsafe fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t { +pub unsafe fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t { transmute(a) } @@ -9064,7 +9106,7 @@ pub unsafe fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t { +pub unsafe fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t { transmute(a) } @@ -9074,7 +9116,7 @@ pub unsafe fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t { +pub unsafe fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t { transmute(a) } @@ -9084,7 +9126,7 @@ pub unsafe fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t { +pub unsafe fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t { transmute(a) } @@ -9094,7 +9136,7 @@ pub unsafe fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t { +pub unsafe fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t { transmute(a) } @@ -9104,7 +9146,7 @@ pub unsafe fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t { +pub unsafe fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t { transmute(a) } @@ -9114,7 +9156,7 @@ pub unsafe fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t { +pub unsafe fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t { transmute(a) } @@ -9124,7 +9166,7 @@ pub unsafe fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t { +pub unsafe fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t { transmute(a) } @@ -9134,7 +9176,7 @@ pub unsafe fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t { +pub unsafe fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t { transmute(a) } @@ -9144,7 +9186,7 @@ pub unsafe fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t { +pub unsafe fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t { transmute(a) } @@ -9154,7 +9196,7 @@ pub unsafe fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t { +pub unsafe fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t { transmute(a) } @@ -9164,7 +9206,7 @@ pub unsafe fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t { +pub unsafe fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t { transmute(a) } @@ -9174,7 +9216,7 @@ pub unsafe fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t { +pub unsafe fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t { transmute(a) } @@ -9184,7 +9226,7 @@ pub unsafe fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t { +pub unsafe fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t { transmute(a) } @@ -9194,7 +9236,7 @@ pub unsafe fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t { +pub unsafe fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t { transmute(a) } @@ -9204,7 +9246,7 @@ pub unsafe fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t { +pub unsafe fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t { transmute(a) } @@ -9214,7 +9256,7 @@ pub unsafe fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t { +pub unsafe fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t { transmute(a) } @@ -9224,7 +9266,7 @@ pub unsafe fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t { +pub unsafe fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t { transmute(a) } @@ -9234,7 +9276,7 @@ pub unsafe fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t { +pub unsafe fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t { transmute(a) } @@ -9244,7 +9286,7 @@ pub unsafe fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t { +pub unsafe fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t { transmute(a) } @@ -9254,7 +9296,7 @@ pub unsafe fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t { +pub unsafe fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t { transmute(a) } @@ -9264,7 +9306,7 @@ pub unsafe fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t { +pub unsafe fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t { transmute(a) } @@ -9274,7 +9316,7 @@ pub unsafe fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t { +pub unsafe fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t { transmute(a) } @@ -9284,7 +9326,7 @@ pub unsafe fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t { +pub unsafe fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t { transmute(a) } @@ -9294,7 +9336,7 @@ pub unsafe fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t { +pub unsafe fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t { transmute(a) } @@ -9304,7 +9346,7 @@ pub unsafe fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t { +pub unsafe fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t { transmute(a) } @@ -9314,7 +9356,7 @@ pub unsafe fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t { +pub unsafe fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t { transmute(a) } @@ -9324,7 +9366,7 @@ pub unsafe fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t { +pub unsafe fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t { transmute(a) } @@ -9334,7 +9376,7 @@ pub unsafe fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t { +pub unsafe fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t { transmute(a) } @@ -9344,7 +9386,7 @@ pub unsafe fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t { +pub unsafe fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t { transmute(a) } @@ -9354,7 +9396,7 @@ pub unsafe fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t { +pub unsafe fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t { transmute(a) } @@ -9364,7 +9406,7 @@ pub unsafe fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t { +pub unsafe fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t { transmute(a) } @@ -9374,7 +9416,7 @@ pub unsafe fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t { +pub unsafe fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t { transmute(a) } @@ -9384,7 +9426,7 @@ pub unsafe fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t { +pub unsafe fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t { transmute(a) } @@ -9394,7 +9436,7 @@ pub unsafe fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t { +pub unsafe fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t { transmute(a) } @@ -9404,7 +9446,7 @@ pub unsafe fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t { +pub unsafe fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t { transmute(a) } @@ -9414,7 +9456,7 @@ pub unsafe fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t { +pub unsafe fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t { transmute(a) } @@ -9424,7 +9466,7 @@ pub unsafe fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t { +pub unsafe fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t { transmute(a) } @@ -9434,7 +9476,7 @@ pub unsafe fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t { +pub unsafe fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t { transmute(a) } @@ -9444,7 +9486,7 @@ pub unsafe fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t { +pub unsafe fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t { transmute(a) } @@ -9454,7 +9496,7 @@ pub unsafe fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t { +pub unsafe fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t { transmute(a) } @@ -9464,7 +9506,7 @@ pub unsafe fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t { +pub unsafe fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t { transmute(a) } @@ -9474,7 +9516,7 @@ pub unsafe fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t { +pub unsafe fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t { transmute(a) } @@ -9484,7 +9526,7 @@ pub unsafe fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t { +pub unsafe fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t { transmute(a) } @@ -9494,7 +9536,7 @@ pub unsafe fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t { +pub unsafe fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t { transmute(a) } @@ -9504,7 +9546,7 @@ pub unsafe fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t { +pub unsafe fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t { transmute(a) } @@ -9514,7 +9556,7 @@ pub unsafe fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t { +pub unsafe fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t { transmute(a) } @@ -9524,7 +9566,7 @@ pub unsafe fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t { +pub unsafe fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t { transmute(a) } @@ -9534,7 +9576,7 @@ pub unsafe fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t { +pub unsafe fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t { transmute(a) } @@ -9544,7 +9586,7 @@ pub unsafe fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t { +pub unsafe fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t { transmute(a) } @@ -9554,7 +9596,7 @@ pub unsafe fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t { +pub unsafe fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t { transmute(a) } @@ -9564,7 +9606,7 @@ pub unsafe fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t { +pub unsafe fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t { transmute(a) } @@ -9574,7 +9616,7 @@ pub unsafe fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t { +pub unsafe fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t { transmute(a) } @@ -9584,7 +9626,7 @@ pub unsafe fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t { +pub unsafe fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t { transmute(a) } @@ -9594,7 +9636,7 @@ pub unsafe fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t { +pub unsafe fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t { transmute(a) } @@ -9604,7 +9646,7 @@ pub unsafe fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t { +pub unsafe fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t { transmute(a) } @@ -9614,7 +9656,7 @@ pub unsafe fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t { +pub unsafe fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t { transmute(a) } @@ -9624,7 +9666,7 @@ pub unsafe fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t { +pub unsafe fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t { transmute(a) } @@ -9634,7 +9676,7 @@ pub unsafe fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t { +pub unsafe fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t { transmute(a) } @@ -9644,7 +9686,7 @@ pub unsafe fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t { +pub unsafe fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t { transmute(a) } @@ -9654,7 +9696,7 @@ pub unsafe fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t { +pub unsafe fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t { transmute(a) } @@ -9664,7 +9706,7 @@ pub unsafe fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t { +pub unsafe fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t { transmute(a) } @@ -9674,7 +9716,7 @@ pub unsafe fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t { +pub unsafe fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t { transmute(a) } @@ -9684,7 +9726,7 @@ pub unsafe fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t { +pub unsafe fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t { transmute(a) } @@ -9694,7 +9736,7 @@ pub unsafe fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t { +pub unsafe fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t { transmute(a) } @@ -9704,7 +9746,7 @@ pub unsafe fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t { +pub unsafe fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t { transmute(a) } @@ -9714,7 +9756,7 @@ pub unsafe fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t { +pub unsafe fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t { transmute(a) } @@ -9724,7 +9766,7 @@ pub unsafe fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t { +pub unsafe fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t { transmute(a) } @@ -9734,7 +9776,7 @@ pub unsafe fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t { +pub unsafe fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t { transmute(a) } @@ -9744,7 +9786,7 @@ pub unsafe fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t { +pub unsafe fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t { transmute(a) } @@ -9754,7 +9796,7 @@ pub unsafe fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t { +pub unsafe fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t { transmute(a) } @@ -9764,7 +9806,7 @@ pub unsafe fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t { +pub unsafe fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t { transmute(a) } @@ -9774,7 +9816,7 @@ pub unsafe fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t { +pub unsafe fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t { transmute(a) } @@ -9784,7 +9826,7 @@ pub unsafe fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t { +pub unsafe fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t { transmute(a) } @@ -9794,4630 +9836,6260 @@ pub unsafe fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t { #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] -pub unsafe fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t { +pub unsafe fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t { transmute(a) } -/// Signed rounding shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))] -pub unsafe fn vrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v8i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v8i8")] - fn vrshl_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; - } -vrshl_s8_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t { + transmute(a) } -/// Signed rounding shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))] -pub unsafe fn vrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v16i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v16i8")] - fn vrshlq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; - } -vrshlq_s8_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t { + transmute(a) } -/// Signed rounding shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))] -pub unsafe fn vrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v4i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v4i16")] - fn vrshl_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; - } -vrshl_s16_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t { + transmute(a) } -/// Signed rounding shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))] -pub unsafe fn vrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v8i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v8i16")] - fn vrshlq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; - } -vrshlq_s16_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t { + transmute(a) } -/// Signed rounding shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))] -pub unsafe fn vrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v2i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v2i32")] - fn vrshl_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; - } -vrshl_s32_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t { + transmute(a) } -/// Signed rounding shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))] -pub unsafe fn vrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v4i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v4i32")] - fn vrshlq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; - } -vrshlq_s32_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t { + transmute(a) } -/// Signed rounding shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))] -pub unsafe fn vrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v1i64")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v1i64")] - fn vrshl_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t; - } -vrshl_s64_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t { + transmute(a) } -/// Signed rounding shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))] -pub unsafe fn vrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v2i64")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v2i64")] - fn vrshlq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t; - } -vrshlq_s64_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t { + transmute(a) } -/// Unsigned rounding shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))] -pub unsafe fn vrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v8i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v8i8")] - fn vrshl_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; - } -vrshl_u8_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t { + transmute(a) } -/// Unsigned rounding shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))] -pub unsafe fn vrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v16i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v16i8")] - fn vrshlq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; - } -vrshlq_u8_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t { + transmute(a) } -/// Unsigned rounding shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))] -pub unsafe fn vrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v4i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v4i16")] - fn vrshl_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; - } -vrshl_u16_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t { + transmute(a) } -/// Unsigned rounding shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))] -pub unsafe fn vrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v8i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v8i16")] - fn vrshlq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; - } -vrshlq_u16_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t { + transmute(a) } -/// Unsigned rounding shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))] -pub unsafe fn vrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v2i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v2i32")] - fn vrshl_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; - } -vrshl_u32_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t { + transmute(a) } -/// Unsigned rounding shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))] -pub unsafe fn vrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v4i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v4i32")] - fn vrshlq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; - } -vrshlq_u32_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t { + transmute(a) } -/// Unsigned rounding shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))] -pub unsafe fn vrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v1i64")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v1i64")] - fn vrshl_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; - } -vrshl_u64_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t { + transmute(a) } -/// Unsigned rounding shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))] -pub unsafe fn vrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v2i64")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v2i64")] - fn vrshlq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; - } -vrshlq_u64_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t { + transmute(a) } -/// Signed rounding shift right +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vrshr_n_s8(a: int8x8_t) -> int8x8_t { - static_assert!(N : i32 where N >= 1 && N <= 8); - vrshl_s8(a, vdup_n_s8((-N).try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t { + transmute(a) } -/// Signed rounding shift right +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vrshrq_n_s8(a: int8x16_t) -> int8x16_t { - static_assert!(N : i32 where N >= 1 && N <= 8); - vrshlq_s8(a, vdupq_n_s8((-N).try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t { + transmute(a) } -/// Signed rounding shift right +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vrshr_n_s16(a: int16x4_t) -> int16x4_t { - static_assert!(N : i32 where N >= 1 && N <= 16); - vrshl_s16(a, vdup_n_s16((-N).try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t { + transmute(a) } -/// Signed rounding shift right +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vrshrq_n_s16(a: int16x8_t) -> int16x8_t { - static_assert!(N : i32 where N >= 1 && N <= 16); - vrshlq_s16(a, vdupq_n_s16((-N).try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t { + transmute(a) } -/// Signed rounding shift right +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vrshr_n_s32(a: int32x2_t) -> int32x2_t { - static_assert!(N : i32 where N >= 1 && N <= 32); - vrshl_s32(a, vdup_n_s32((-N).try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t { + transmute(a) } -/// Signed rounding shift right +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vrshrq_n_s32(a: int32x4_t) -> int32x4_t { - static_assert!(N : i32 where N >= 1 && N <= 32); - vrshlq_s32(a, vdupq_n_s32((-N).try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t { + transmute(a) } -/// Signed rounding shift right +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vrshr_n_s64(a: int64x1_t) -> int64x1_t { - static_assert!(N : i32 where N >= 1 && N <= 64); - vrshl_s64(a, vdup_n_s64((-N).try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t { + transmute(a) } -/// Signed rounding shift right +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vrshrq_n_s64(a: int64x2_t) -> int64x2_t { - static_assert!(N : i32 where N >= 1 && N <= 64); - vrshlq_s64(a, vdupq_n_s64((-N).try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t { + transmute(a) } -/// Unsigned rounding shift right +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vrshr_n_u8(a: uint8x8_t) -> uint8x8_t { - static_assert!(N : i32 where N >= 1 && N <= 8); - vrshl_u8(a, vdup_n_s8((-N).try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t { + transmute(a) } -/// Unsigned rounding shift right +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vrshrq_n_u8(a: uint8x16_t) -> uint8x16_t { - static_assert!(N : i32 where N >= 1 && N <= 8); - vrshlq_u8(a, vdupq_n_s8((-N).try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t { + transmute(a) } -/// Unsigned rounding shift right +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vrshr_n_u16(a: uint16x4_t) -> uint16x4_t { - static_assert!(N : i32 where N >= 1 && N <= 16); - vrshl_u16(a, vdup_n_s16((-N).try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t { + transmute(a) } -/// Unsigned rounding shift right +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vrshrq_n_u16(a: uint16x8_t) -> uint16x8_t { - static_assert!(N : i32 where N >= 1 && N <= 16); - vrshlq_u16(a, vdupq_n_s16((-N).try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t { + transmute(a) } -/// Unsigned rounding shift right +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vrshr_n_u32(a: uint32x2_t) -> uint32x2_t { - static_assert!(N : i32 where N >= 1 && N <= 32); - vrshl_u32(a, vdup_n_s32((-N).try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t { + transmute(a) } -/// Unsigned rounding shift right +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vrshrq_n_u32(a: uint32x4_t) -> uint32x4_t { - static_assert!(N : i32 where N >= 1 && N <= 32); - vrshlq_u32(a, vdupq_n_s32((-N).try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t { + transmute(a) } -/// Unsigned rounding shift right +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vrshr_n_u64(a: uint64x1_t) -> uint64x1_t { - static_assert!(N : i32 where N >= 1 && N <= 64); - vrshl_u64(a, vdup_n_s64((-N).try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t { + transmute(a) } -/// Unsigned rounding shift right +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vrshrq_n_u64(a: uint64x2_t) -> uint64x2_t { - static_assert!(N : i32 where N >= 1 && N <= 64); - vrshlq_u64(a, vdupq_n_s64((-N).try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t { + transmute(a) } -/// Rounding shift right narrow +/// Vector reinterpret cast operation #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vrshrn_n_s16(a: int16x8_t) -> int8x8_t { - static_assert!(N : i32 where N >= 1 && N <= 8); - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v8i8")] - fn vrshrn_n_s16_(a: int16x8_t, n: int16x8_t) -> int8x8_t; - } -vrshrn_n_s16_(a, int16x8_t(-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16)) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t { + transmute(a) } -/// Rounding shift right narrow +/// Vector reinterpret cast operation #[inline] -#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vrshrn_n_s16(a: int16x8_t) -> int8x8_t { - static_assert!(N : i32 where N >= 1 && N <= 8); - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rshrn.v8i8")] - fn vrshrn_n_s16_(a: int16x8_t, n: i32) -> int8x8_t; - } -vrshrn_n_s16_(a, N) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t { + transmute(a) } -/// Rounding shift right narrow +/// Vector reinterpret cast operation #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vrshrn_n_s32(a: int32x4_t) -> int16x4_t { - static_assert!(N : i32 where N >= 1 && N <= 16); - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v4i16")] - fn vrshrn_n_s32_(a: int32x4_t, n: int32x4_t) -> int16x4_t; - } -vrshrn_n_s32_(a, int32x4_t(-N as i32, -N as i32, -N as i32, -N as i32)) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t { + transmute(a) } -/// Rounding shift right narrow +/// Vector reinterpret cast operation #[inline] -#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vrshrn_n_s32(a: int32x4_t) -> int16x4_t { - static_assert!(N : i32 where N >= 1 && N <= 16); - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rshrn.v4i16")] - fn vrshrn_n_s32_(a: int32x4_t, n: i32) -> int16x4_t; - } -vrshrn_n_s32_(a, N) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t { + transmute(a) } -/// Rounding shift right narrow +/// Vector reinterpret cast operation #[inline] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vrshrn_n_s64(a: int64x2_t) -> int32x2_t { - static_assert!(N : i32 where N >= 1 && N <= 32); - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v2i32")] - fn vrshrn_n_s64_(a: int64x2_t, n: int64x2_t) -> int32x2_t; - } -vrshrn_n_s64_(a, int64x2_t(-N as i64, -N as i64)) +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t { + transmute(a) } -/// Rounding shift right narrow +/// Vector reinterpret cast operation #[inline] -#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vrshrn_n_s64(a: int64x2_t) -> int32x2_t { - static_assert!(N : i32 where N >= 1 && N <= 32); - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rshrn.v2i32")] - fn vrshrn_n_s64_(a: int64x2_t, n: i32) -> int32x2_t; - } -vrshrn_n_s64_(a, N) +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t { + transmute(a) } -/// Rounding shift right narrow +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vrshrn_n_u16(a: uint16x8_t) -> uint8x8_t { - static_assert!(N : i32 where N >= 1 && N <= 8); - transmute(vrshrn_n_s16::(transmute(a))) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t { + transmute(a) } -/// Rounding shift right narrow +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vrshrn_n_u32(a: uint32x4_t) -> uint16x4_t { - static_assert!(N : i32 where N >= 1 && N <= 16); - transmute(vrshrn_n_s32::(transmute(a))) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t { + transmute(a) } -/// Rounding shift right narrow +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vrshrn_n_u64(a: uint64x2_t) -> uint32x2_t { - static_assert!(N : i32 where N >= 1 && N <= 32); - transmute(vrshrn_n_s64::(transmute(a))) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t { + transmute(a) } -/// Signed rounding shift right and accumulate +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vrsra_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - static_assert!(N : i32 where N >= 1 && N <= 8); - simd_add(a, vrshr_n_s8::(b)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t { + transmute(a) } -/// Signed rounding shift right and accumulate +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vrsraq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - static_assert!(N : i32 where N >= 1 && N <= 8); - simd_add(a, vrshrq_n_s8::(b)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t { + transmute(a) } -/// Signed rounding shift right and accumulate +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vrsra_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - static_assert!(N : i32 where N >= 1 && N <= 16); - simd_add(a, vrshr_n_s16::(b)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t { + transmute(a) } -/// Signed rounding shift right and accumulate +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vrsraq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - static_assert!(N : i32 where N >= 1 && N <= 16); - simd_add(a, vrshrq_n_s16::(b)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t { + transmute(a) } -/// Signed rounding shift right and accumulate +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vrsra_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - static_assert!(N : i32 where N >= 1 && N <= 32); - simd_add(a, vrshr_n_s32::(b)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t { + transmute(a) } -/// Signed rounding shift right and accumulate +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vrsraq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - static_assert!(N : i32 where N >= 1 && N <= 32); - simd_add(a, vrshrq_n_s32::(b)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t { + transmute(a) } -/// Signed rounding shift right and accumulate +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vrsra_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - static_assert!(N : i32 where N >= 1 && N <= 64); - simd_add(a, vrshr_n_s64::(b)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t { + transmute(a) } -/// Signed rounding shift right and accumulate +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vrsraq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - static_assert!(N : i32 where N >= 1 && N <= 64); - simd_add(a, vrshrq_n_s64::(b)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t { + transmute(a) } -/// Unsigned rounding shift right and accumulate +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vrsra_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - static_assert!(N : i32 where N >= 1 && N <= 8); - simd_add(a, vrshr_n_u8::(b)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t { + transmute(a) } -/// Unsigned rounding shift right and accumulate +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vrsraq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - static_assert!(N : i32 where N >= 1 && N <= 8); - simd_add(a, vrshrq_n_u8::(b)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t { + transmute(a) } -/// Unsigned rounding shift right and accumulate +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vrsra_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - static_assert!(N : i32 where N >= 1 && N <= 16); - simd_add(a, vrshr_n_u16::(b)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t { + transmute(a) } -/// Unsigned rounding shift right and accumulate +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vrsraq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - static_assert!(N : i32 where N >= 1 && N <= 16); - simd_add(a, vrshrq_n_u16::(b)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t { + transmute(a) } -/// Unsigned rounding shift right and accumulate +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vrsra_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - static_assert!(N : i32 where N >= 1 && N <= 32); - simd_add(a, vrshr_n_u32::(b)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t { + transmute(a) } -/// Unsigned rounding shift right and accumulate +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vrsraq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - static_assert!(N : i32 where N >= 1 && N <= 32); - simd_add(a, vrshrq_n_u32::(b)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t { + transmute(a) } -/// Unsigned rounding shift right and accumulate +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vrsra_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { - static_assert!(N : i32 where N >= 1 && N <= 64); - simd_add(a, vrshr_n_u64::(b)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t { + transmute(a) } -/// Unsigned rounding shift right and accumulate +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vrsraq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - static_assert!(N : i32 where N >= 1 && N <= 64); - simd_add(a, vrshrq_n_u64::(b)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t { + transmute(a) } -/// Signed Shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))] -pub unsafe fn vshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v8i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v8i8")] - fn vshl_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; - } -vshl_s8_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t { + transmute(a) } -/// Signed Shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))] -pub unsafe fn vshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v16i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v16i8")] - fn vshlq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; - } -vshlq_s8_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t { + transmute(a) } -/// Signed Shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))] -pub unsafe fn vshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v4i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v4i16")] - fn vshl_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; - } -vshl_s16_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t { + transmute(a) } -/// Signed Shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))] -pub unsafe fn vshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v8i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v8i16")] - fn vshlq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; - } -vshlq_s16_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t { + transmute(a) } -/// Signed Shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))] -pub unsafe fn vshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v2i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v2i32")] - fn vshl_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; - } -vshl_s32_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t { + transmute(a) } -/// Signed Shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))] -pub unsafe fn vshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v4i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v4i32")] - fn vshlq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; - } -vshlq_s32_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t { + transmute(a) } -/// Signed Shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))] -pub unsafe fn vshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v1i64")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v1i64")] - fn vshl_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t; - } -vshl_s64_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t { + transmute(a) } -/// Signed Shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))] -pub unsafe fn vshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v2i64")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v2i64")] - fn vshlq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t; - } -vshlq_s64_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t { + transmute(a) } -/// Unsigned Shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))] -pub unsafe fn vshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v8i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v8i8")] - fn vshl_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; - } -vshl_u8_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t { + transmute(a) } -/// Unsigned Shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))] -pub unsafe fn vshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v16i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v16i8")] - fn vshlq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; - } -vshlq_u8_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t { + transmute(a) } -/// Unsigned Shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))] -pub unsafe fn vshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v4i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v4i16")] - fn vshl_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; - } -vshl_u16_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t { + transmute(a) } -/// Unsigned Shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))] -pub unsafe fn vshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v8i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v8i16")] - fn vshlq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; - } -vshlq_u16_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t { + transmute(a) } -/// Unsigned Shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))] -pub unsafe fn vshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v2i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v2i32")] - fn vshl_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; - } -vshl_u32_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t { + transmute(a) } -/// Unsigned Shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))] -pub unsafe fn vshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v4i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v4i32")] - fn vshlq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; - } -vshlq_u32_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t { + transmute(a) } -/// Unsigned Shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))] -pub unsafe fn vshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v1i64")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v1i64")] - fn vshl_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; - } -vshl_u64_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t { + transmute(a) } -/// Unsigned Shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))] -pub unsafe fn vshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v2i64")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v2i64")] - fn vshlq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; - } -vshlq_u64_(a, b) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t { + transmute(a) } -/// Shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshl_n_s8(a: int8x8_t) -> int8x8_t { - static_assert_imm3!(N); - simd_shl(a, vdup_n_s8(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t { + transmute(a) } -/// Shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshlq_n_s8(a: int8x16_t) -> int8x16_t { - static_assert_imm3!(N); - simd_shl(a, vdupq_n_s8(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t { + transmute(a) } -/// Shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshl_n_s16(a: int16x4_t) -> int16x4_t { - static_assert_imm4!(N); - simd_shl(a, vdup_n_s16(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t { + transmute(a) } -/// Shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshlq_n_s16(a: int16x8_t) -> int16x8_t { - static_assert_imm4!(N); - simd_shl(a, vdupq_n_s16(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t { + transmute(a) } -/// Shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshl_n_s32(a: int32x2_t) -> int32x2_t { - static_assert_imm5!(N); - simd_shl(a, vdup_n_s32(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t { + transmute(a) } -/// Shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshlq_n_s32(a: int32x4_t) -> int32x4_t { - static_assert_imm5!(N); - simd_shl(a, vdupq_n_s32(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t { + transmute(a) } -/// Shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshl_n_u8(a: uint8x8_t) -> uint8x8_t { - static_assert_imm3!(N); - simd_shl(a, vdup_n_u8(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t { + transmute(a) } -/// Shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshlq_n_u8(a: uint8x16_t) -> uint8x16_t { - static_assert_imm3!(N); - simd_shl(a, vdupq_n_u8(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t { + transmute(a) } -/// Shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshl_n_u16(a: uint16x4_t) -> uint16x4_t { - static_assert_imm4!(N); - simd_shl(a, vdup_n_u16(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t { + transmute(a) } -/// Shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshlq_n_u16(a: uint16x8_t) -> uint16x8_t { - static_assert_imm4!(N); - simd_shl(a, vdupq_n_u16(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t { + transmute(a) } -/// Shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshl_n_u32(a: uint32x2_t) -> uint32x2_t { - static_assert_imm5!(N); - simd_shl(a, vdup_n_u32(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t { + transmute(a) } -/// Shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshlq_n_u32(a: uint32x4_t) -> uint32x4_t { - static_assert_imm5!(N); - simd_shl(a, vdupq_n_u32(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t { + transmute(a) } -/// Shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshl_n_s64(a: int64x1_t) -> int64x1_t { - static_assert_imm6!(N); - simd_shl(a, vdup_n_s64(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t { + transmute(a) } -/// Shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshlq_n_s64(a: int64x2_t) -> int64x2_t { - static_assert_imm6!(N); - simd_shl(a, vdupq_n_s64(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t { + transmute(a) } -/// Shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshl_n_u64(a: uint64x1_t) -> uint64x1_t { - static_assert_imm6!(N); - simd_shl(a, vdup_n_u64(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t { + transmute(a) } -/// Shift left +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshlq_n_u64(a: uint64x2_t) -> uint64x2_t { - static_assert_imm6!(N); - simd_shl(a, vdupq_n_u64(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t { + transmute(a) } -/// Signed shift left long +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s8", N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshll, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshll_n_s8(a: int8x8_t) -> int16x8_t { - static_assert!(N : i32 where N >= 0 && N <= 8); - simd_shl(simd_cast(a), vdupq_n_s16(N.try_into().unwrap())) -} - -/// Signed shift left long +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s16", N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshll, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshll_n_s16(a: int16x4_t) -> int32x4_t { - static_assert!(N : i32 where N >= 0 && N <= 16); - simd_shl(simd_cast(a), vdupq_n_s32(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(str))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(str))] +pub unsafe fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t { + transmute(a) } -/// Signed shift left long +/// Signed rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s32", N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshll, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshll_n_s32(a: int32x2_t) -> int64x2_t { - static_assert!(N : i32 where N >= 0 && N <= 32); - simd_shl(simd_cast(a), vdupq_n_s64(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))] +pub unsafe fn vrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v8i8")] + fn vrshl_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } +vrshl_s8_(a, b) } -/// Signed shift left long +/// Signed rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u8", N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushll, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshll_n_u8(a: uint8x8_t) -> uint16x8_t { - static_assert!(N : i32 where N >= 0 && N <= 8); - simd_shl(simd_cast(a), vdupq_n_u16(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))] +pub unsafe fn vrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v16i8")] + fn vrshlq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } +vrshlq_s8_(a, b) } -/// Signed shift left long +/// Signed rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u16", N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushll, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshll_n_u16(a: uint16x4_t) -> uint32x4_t { - static_assert!(N : i32 where N >= 0 && N <= 16); - simd_shl(simd_cast(a), vdupq_n_u32(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))] +pub unsafe fn vrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v4i16")] + fn vrshl_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } +vrshl_s16_(a, b) } -/// Signed shift left long +/// Signed rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u32", N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushll, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshll_n_u32(a: uint32x2_t) -> uint64x2_t { - static_assert!(N : i32 where N >= 0 && N <= 32); - simd_shl(simd_cast(a), vdupq_n_u64(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))] +pub unsafe fn vrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v8i16")] + fn vrshlq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } +vrshlq_s16_(a, b) } -/// Shift right +/// Signed rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshr_n_s8(a: int8x8_t) -> int8x8_t { - static_assert!(N : i32 where N >= 1 && N <= 8); - simd_shr(a, vdup_n_s8(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))] +pub unsafe fn vrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v2i32")] + fn vrshl_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } +vrshl_s32_(a, b) } -/// Shift right +/// Signed rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshrq_n_s8(a: int8x16_t) -> int8x16_t { - static_assert!(N : i32 where N >= 1 && N <= 8); - simd_shr(a, vdupq_n_s8(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))] +pub unsafe fn vrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v4i32")] + fn vrshlq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } +vrshlq_s32_(a, b) } -/// Shift right +/// Signed rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s16", N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshr_n_s16(a: int16x4_t) -> int16x4_t { - static_assert!(N : i32 where N >= 1 && N <= 16); - simd_shr(a, vdup_n_s16(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))] +pub unsafe fn vrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v1i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v1i64")] + fn vrshl_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t; + } +vrshl_s64_(a, b) } -/// Shift right +/// Signed rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s16", N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshrq_n_s16(a: int16x8_t) -> int16x8_t { - static_assert!(N : i32 where N >= 1 && N <= 16); - simd_shr(a, vdupq_n_s16(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))] +pub unsafe fn vrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v2i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v2i64")] + fn vrshlq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t; + } +vrshlq_s64_(a, b) } -/// Shift right +/// Unsigned rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s32", N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshr_n_s32(a: int32x2_t) -> int32x2_t { - static_assert!(N : i32 where N >= 1 && N <= 32); - simd_shr(a, vdup_n_s32(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))] +pub unsafe fn vrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v8i8")] + fn vrshl_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; + } +vrshl_u8_(a, b) } -/// Shift right +/// Unsigned rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s32", N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshrq_n_s32(a: int32x4_t) -> int32x4_t { - static_assert!(N : i32 where N >= 1 && N <= 32); - simd_shr(a, vdupq_n_s32(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))] +pub unsafe fn vrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v16i8")] + fn vrshlq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; + } +vrshlq_u8_(a, b) } -/// Shift right +/// Unsigned rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s64", N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshr_n_s64(a: int64x1_t) -> int64x1_t { - static_assert!(N : i32 where N >= 1 && N <= 64); - simd_shr(a, vdup_n_s64(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))] +pub unsafe fn vrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v4i16")] + fn vrshl_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; + } +vrshl_u16_(a, b) } -/// Shift right +/// Unsigned rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s64", N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshrq_n_s64(a: int64x2_t) -> int64x2_t { - static_assert!(N : i32 where N >= 1 && N <= 64); - simd_shr(a, vdupq_n_s64(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))] +pub unsafe fn vrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v8i16")] + fn vrshlq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; + } +vrshlq_u16_(a, b) } -/// Shift right +/// Unsigned rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshr_n_u8(a: uint8x8_t) -> uint8x8_t { - static_assert!(N : i32 where N >= 1 && N <= 8); - simd_shr(a, vdup_n_u8(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))] +pub unsafe fn vrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v2i32")] + fn vrshl_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; + } +vrshl_u32_(a, b) } -/// Shift right +/// Unsigned rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshrq_n_u8(a: uint8x16_t) -> uint8x16_t { - static_assert!(N : i32 where N >= 1 && N <= 8); - simd_shr(a, vdupq_n_u8(N.try_into().unwrap())) -} - -/// Shift right -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u16", N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshr_n_u16(a: uint16x4_t) -> uint16x4_t { - static_assert!(N : i32 where N >= 1 && N <= 16); - simd_shr(a, vdup_n_u16(N.try_into().unwrap())) -} - -/// Shift right -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u16", N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshrq_n_u16(a: uint16x8_t) -> uint16x8_t { - static_assert!(N : i32 where N >= 1 && N <= 16); - simd_shr(a, vdupq_n_u16(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))] +pub unsafe fn vrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v4i32")] + fn vrshlq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; + } +vrshlq_u32_(a, b) } -/// Shift right +/// Unsigned rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u32", N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshr_n_u32(a: uint32x2_t) -> uint32x2_t { - static_assert!(N : i32 where N >= 1 && N <= 32); - simd_shr(a, vdup_n_u32(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))] +pub unsafe fn vrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v1i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v1i64")] + fn vrshl_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; + } +vrshl_u64_(a, b) } -/// Shift right +/// Unsigned rounding shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u32", N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshrq_n_u32(a: uint32x4_t) -> uint32x4_t { - static_assert!(N : i32 where N >= 1 && N <= 32); - simd_shr(a, vdupq_n_u32(N.try_into().unwrap())) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))] +pub unsafe fn vrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v2i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v2i64")] + fn vrshlq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; + } +vrshlq_u64_(a, b) } -/// Shift right +/// Signed rounding shift right #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u64", N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))] #[rustc_legacy_const_generics(1)] -pub unsafe fn vshr_n_u64(a: uint64x1_t) -> uint64x1_t { - static_assert!(N : i32 where N >= 1 && N <= 64); - simd_shr(a, vdup_n_u64(N.try_into().unwrap())) +pub unsafe fn vrshr_n_s8(a: int8x8_t) -> int8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + vrshl_s8(a, vdup_n_s8((-N).try_into().unwrap())) } -/// Shift right +/// Signed rounding shift right #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u64", N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))] #[rustc_legacy_const_generics(1)] -pub unsafe fn vshrq_n_u64(a: uint64x2_t) -> uint64x2_t { - static_assert!(N : i32 where N >= 1 && N <= 64); - simd_shr(a, vdupq_n_u64(N.try_into().unwrap())) +pub unsafe fn vrshrq_n_s8(a: int8x16_t) -> int8x16_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + vrshlq_s8(a, vdupq_n_s8((-N).try_into().unwrap())) } -/// Shift right narrow +/// Signed rounding shift right #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i16", N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))] #[rustc_legacy_const_generics(1)] -pub unsafe fn vshrn_n_s16(a: int16x8_t) -> int8x8_t { - static_assert!(N : i32 where N >= 1 && N <= 8); - simd_cast(simd_shr(a, vdupq_n_s16(N.try_into().unwrap()))) +pub unsafe fn vrshr_n_s16(a: int16x4_t) -> int16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + vrshl_s16(a, vdup_n_s16((-N).try_into().unwrap())) } -/// Shift right narrow +/// Signed rounding shift right #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i32", N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))] #[rustc_legacy_const_generics(1)] -pub unsafe fn vshrn_n_s32(a: int32x4_t) -> int16x4_t { +pub unsafe fn vrshrq_n_s16(a: int16x8_t) -> int16x8_t { static_assert!(N : i32 where N >= 1 && N <= 16); - simd_cast(simd_shr(a, vdupq_n_s32(N.try_into().unwrap()))) + vrshlq_s16(a, vdupq_n_s16((-N).try_into().unwrap())) } -/// Shift right narrow +/// Signed rounding shift right #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i64", N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))] #[rustc_legacy_const_generics(1)] -pub unsafe fn vshrn_n_s64(a: int64x2_t) -> int32x2_t { +pub unsafe fn vrshr_n_s32(a: int32x2_t) -> int32x2_t { static_assert!(N : i32 where N >= 1 && N <= 32); - simd_cast(simd_shr(a, vdupq_n_s64(N.try_into().unwrap()))) + vrshl_s32(a, vdup_n_s32((-N).try_into().unwrap())) } -/// Shift right narrow +/// Signed rounding shift right #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i16", N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))] #[rustc_legacy_const_generics(1)] -pub unsafe fn vshrn_n_u16(a: uint16x8_t) -> uint8x8_t { - static_assert!(N : i32 where N >= 1 && N <= 8); - simd_cast(simd_shr(a, vdupq_n_u16(N.try_into().unwrap()))) +pub unsafe fn vrshrq_n_s32(a: int32x4_t) -> int32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + vrshlq_s32(a, vdupq_n_s32((-N).try_into().unwrap())) } -/// Shift right narrow +/// Signed rounding shift right #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i32", N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))] #[rustc_legacy_const_generics(1)] -pub unsafe fn vshrn_n_u32(a: uint32x4_t) -> uint16x4_t { - static_assert!(N : i32 where N >= 1 && N <= 16); - simd_cast(simd_shr(a, vdupq_n_u32(N.try_into().unwrap()))) +pub unsafe fn vrshr_n_s64(a: int64x1_t) -> int64x1_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + vrshl_s64(a, vdup_n_s64((-N).try_into().unwrap())) } -/// Shift right narrow +/// Signed rounding shift right #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i64", N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))] #[rustc_legacy_const_generics(1)] -pub unsafe fn vshrn_n_u64(a: uint64x2_t) -> uint32x2_t { - static_assert!(N : i32 where N >= 1 && N <= 32); - simd_cast(simd_shr(a, vdupq_n_u64(N.try_into().unwrap()))) +pub unsafe fn vrshrq_n_s64(a: int64x2_t) -> int64x2_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + vrshlq_s64(a, vdupq_n_s64((-N).try_into().unwrap())) } -/// Signed shift right and accumulate +/// Unsigned rounding shift right #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsra_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshr_n_u8(a: uint8x8_t) -> uint8x8_t { static_assert!(N : i32 where N >= 1 && N <= 8); - simd_add(a, vshr_n_s8::(b)) + vrshl_u8(a, vdup_n_s8((-N).try_into().unwrap())) } -/// Signed shift right and accumulate +/// Unsigned rounding shift right #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsraq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshrq_n_u8(a: uint8x16_t) -> uint8x16_t { static_assert!(N : i32 where N >= 1 && N <= 8); - simd_add(a, vshrq_n_s8::(b)) + vrshlq_u8(a, vdupq_n_s8((-N).try_into().unwrap())) } -/// Signed shift right and accumulate +/// Unsigned rounding shift right #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsra_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshr_n_u16(a: uint16x4_t) -> uint16x4_t { static_assert!(N : i32 where N >= 1 && N <= 16); - simd_add(a, vshr_n_s16::(b)) + vrshl_u16(a, vdup_n_s16((-N).try_into().unwrap())) } -/// Signed shift right and accumulate +/// Unsigned rounding shift right #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsraq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshrq_n_u16(a: uint16x8_t) -> uint16x8_t { static_assert!(N : i32 where N >= 1 && N <= 16); - simd_add(a, vshrq_n_s16::(b)) + vrshlq_u16(a, vdupq_n_s16((-N).try_into().unwrap())) } -/// Signed shift right and accumulate +/// Unsigned rounding shift right #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsra_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshr_n_u32(a: uint32x2_t) -> uint32x2_t { static_assert!(N : i32 where N >= 1 && N <= 32); - simd_add(a, vshr_n_s32::(b)) + vrshl_u32(a, vdup_n_s32((-N).try_into().unwrap())) } -/// Signed shift right and accumulate +/// Unsigned rounding shift right #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsraq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshrq_n_u32(a: uint32x4_t) -> uint32x4_t { static_assert!(N : i32 where N >= 1 && N <= 32); - simd_add(a, vshrq_n_s32::(b)) + vrshlq_u32(a, vdupq_n_s32((-N).try_into().unwrap())) } -/// Signed shift right and accumulate +/// Unsigned rounding shift right #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsra_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshr_n_u64(a: uint64x1_t) -> uint64x1_t { static_assert!(N : i32 where N >= 1 && N <= 64); - simd_add(a, vshr_n_s64::(b)) + vrshl_u64(a, vdup_n_s64((-N).try_into().unwrap())) } -/// Signed shift right and accumulate +/// Unsigned rounding shift right #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsraq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - static_assert!(N : i32 where N >= 1 && N <= 64); - simd_add(a, vshrq_n_s64::(b)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshrq_n_u64(a: uint64x2_t) -> uint64x2_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + vrshlq_u64(a, vdupq_n_s64((-N).try_into().unwrap())) } -/// Unsigned shift right and accumulate +/// Rounding shift right narrow #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsra_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshrn_n_s16(a: int16x8_t) -> int8x8_t { static_assert!(N : i32 where N >= 1 && N <= 8); - simd_add(a, vshr_n_u8::(b)) + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v8i8")] + fn vrshrn_n_s16_(a: int16x8_t, n: int16x8_t) -> int8x8_t; + } +vrshrn_n_s16_(a, int16x8_t(-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16)) } -/// Unsigned shift right and accumulate +/// Rounding shift right narrow #[inline] +#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsraq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshrn_n_s16(a: int16x8_t) -> int8x8_t { static_assert!(N : i32 where N >= 1 && N <= 8); - simd_add(a, vshrq_n_u8::(b)) + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rshrn.v8i8")] + fn vrshrn_n_s16_(a: int16x8_t, n: i32) -> int8x8_t; + } +vrshrn_n_s16_(a, N) } -/// Unsigned shift right and accumulate +/// Rounding shift right narrow #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsra_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshrn_n_s32(a: int32x4_t) -> int16x4_t { static_assert!(N : i32 where N >= 1 && N <= 16); - simd_add(a, vshr_n_u16::(b)) + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v4i16")] + fn vrshrn_n_s32_(a: int32x4_t, n: int32x4_t) -> int16x4_t; + } +vrshrn_n_s32_(a, int32x4_t(-N as i32, -N as i32, -N as i32, -N as i32)) } -/// Unsigned shift right and accumulate +/// Rounding shift right narrow #[inline] +#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsraq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshrn_n_s32(a: int32x4_t) -> int16x4_t { static_assert!(N : i32 where N >= 1 && N <= 16); - simd_add(a, vshrq_n_u16::(b)) + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rshrn.v4i16")] + fn vrshrn_n_s32_(a: int32x4_t, n: i32) -> int16x4_t; + } +vrshrn_n_s32_(a, N) } -/// Unsigned shift right and accumulate +/// Rounding shift right narrow #[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsra_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshrn_n_s64(a: int64x2_t) -> int32x2_t { static_assert!(N : i32 where N >= 1 && N <= 32); - simd_add(a, vshr_n_u32::(b)) + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v2i32")] + fn vrshrn_n_s64_(a: int64x2_t, n: int64x2_t) -> int32x2_t; + } +vrshrn_n_s64_(a, int64x2_t(-N as i64, -N as i64)) } -/// Unsigned shift right and accumulate +/// Rounding shift right narrow #[inline] +#[cfg(target_arch = "aarch64")] #[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsraq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshrn_n_s64(a: int64x2_t) -> int32x2_t { static_assert!(N : i32 where N >= 1 && N <= 32); - simd_add(a, vshrq_n_u32::(b)) + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rshrn.v2i32")] + fn vrshrn_n_s64_(a: int64x2_t, n: i32) -> int32x2_t; + } +vrshrn_n_s64_(a, N) } -/// Unsigned shift right and accumulate +/// Rounding shift right narrow #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsra_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { - static_assert!(N : i32 where N >= 1 && N <= 64); - simd_add(a, vshr_n_u64::(b)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshrn_n_u16(a: uint16x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + transmute(vrshrn_n_s16::(transmute(a))) } -/// Unsigned shift right and accumulate +/// Rounding shift right narrow #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsraq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - static_assert!(N : i32 where N >= 1 && N <= 64); - simd_add(a, vshrq_n_u64::(b)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshrn_n_u32(a: uint32x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + transmute(vrshrn_n_s32::(transmute(a))) } -/// Unsigned Absolute difference and Accumulate Long +/// Rounding shift right narrow #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.u8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabal))] -pub unsafe fn vabal_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t { - let d: uint8x8_t = vabd_u8(b, c); - simd_add(a, simd_cast(d)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshrn_n_u64(a: uint64x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + transmute(vrshrn_n_s64::(transmute(a))) } -/// Unsigned Absolute difference and Accumulate Long +/// Signed rounding shift right and accumulate #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.u16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabal))] -pub unsafe fn vabal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { - let d: uint16x4_t = vabd_u16(b, c); - simd_add(a, simd_cast(d)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrsra_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_add(a, vrshr_n_s8::(b)) } -/// Unsigned Absolute difference and Accumulate Long +/// Signed rounding shift right and accumulate #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.u32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabal))] -pub unsafe fn vabal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { - let d: uint32x2_t = vabd_u32(b, c); - simd_add(a, simd_cast(d)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrsraq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_add(a, vrshrq_n_s8::(b)) } -/// Signed Absolute difference and Accumulate Long +/// Signed rounding shift right and accumulate #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.s8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabal))] -pub unsafe fn vabal_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t { - let d: int8x8_t = vabd_s8(b, c); - let e: uint8x8_t = simd_cast(d); - simd_add(a, simd_cast(e)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrsra_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_add(a, vrshr_n_s16::(b)) } -/// Signed Absolute difference and Accumulate Long +/// Signed rounding shift right and accumulate #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.s16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabal))] -pub unsafe fn vabal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { - let d: int16x4_t = vabd_s16(b, c); - let e: uint16x4_t = simd_cast(d); - simd_add(a, simd_cast(e)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrsraq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_add(a, vrshrq_n_s16::(b)) } -/// Signed Absolute difference and Accumulate Long +/// Signed rounding shift right and accumulate #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.s32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabal))] -pub unsafe fn vabal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { - let d: int32x2_t = vabd_s32(b, c); - let e: uint32x2_t = simd_cast(d); - simd_add(a, simd_cast(e)) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrsra_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_add(a, vrshr_n_s32::(b)) } -/// Singned saturating Absolute value +/// Signed rounding shift right and accumulate #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))] -pub unsafe fn vqabs_s8(a: int8x8_t) -> int8x8_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v8i8")] - fn vqabs_s8_(a: int8x8_t) -> int8x8_t; - } -vqabs_s8_(a) +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrsraq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_add(a, vrshrq_n_s32::(b)) } -/// Singned saturating Absolute value +/// Signed rounding shift right and accumulate #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))] -pub unsafe fn vqabsq_s8(a: int8x16_t) -> int8x16_t { - #[allow(improper_ctypes)] - extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v16i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v16i8")] - fn vqabsq_s8_(a: int8x16_t) -> int8x16_t; +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrsra_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + simd_add(a, vrshr_n_s64::(b)) +} + +/// Signed rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrsraq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + simd_add(a, vrshrq_n_s64::(b)) +} + +/// Unsigned rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrsra_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_add(a, vrshr_n_u8::(b)) +} + +/// Unsigned rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrsraq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_add(a, vrshrq_n_u8::(b)) +} + +/// Unsigned rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrsra_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_add(a, vrshr_n_u16::(b)) +} + +/// Unsigned rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrsraq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_add(a, vrshrq_n_u16::(b)) +} + +/// Unsigned rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrsra_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_add(a, vrshr_n_u32::(b)) +} + +/// Unsigned rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrsraq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_add(a, vrshrq_n_u32::(b)) +} + +/// Unsigned rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrsra_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + simd_add(a, vrshr_n_u64::(b)) +} + +/// Unsigned rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrsraq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + simd_add(a, vrshrq_n_u64::(b)) +} + +/// Signed Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))] +pub unsafe fn vshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v8i8")] + fn vshl_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; } -vqabsq_s8_(a) +vshl_s8_(a, b) } -/// Singned saturating Absolute value +/// Signed Shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))] -pub unsafe fn vqabs_s16(a: int16x4_t) -> int16x4_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))] +pub unsafe fn vshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v4i16")] - fn vqabs_s16_(a: int16x4_t) -> int16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v16i8")] + fn vshlq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; } -vqabs_s16_(a) +vshlq_s8_(a, b) } -/// Singned saturating Absolute value +/// Signed Shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))] -pub unsafe fn vqabsq_s16(a: int16x8_t) -> int16x8_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))] +pub unsafe fn vshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v8i16")] - fn vqabsq_s16_(a: int16x8_t) -> int16x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v4i16")] + fn vshl_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; } -vqabsq_s16_(a) +vshl_s16_(a, b) } -/// Singned saturating Absolute value +/// Signed Shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))] -pub unsafe fn vqabs_s32(a: int32x2_t) -> int32x2_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))] +pub unsafe fn vshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v2i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v2i32")] - fn vqabs_s32_(a: int32x2_t) -> int32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v8i16")] + fn vshlq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; } -vqabs_s32_(a) +vshlq_s16_(a, b) } -/// Singned saturating Absolute value +/// Signed Shift left #[inline] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))] -pub unsafe fn vqabsq_s32(a: int32x4_t) -> int32x4_t { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))] +pub unsafe fn vshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { #[allow(improper_ctypes)] extern "C" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v4i32")] - fn vqabsq_s32_(a: int32x4_t) -> int32x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v2i32")] + fn vshl_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; } -vqabsq_s32_(a) +vshl_s32_(a, b) } -#[cfg(test)] -#[allow(overflowing_literals)] -mod test { - use super::*; - use crate::core_arch::simd::*; - use std::mem::transmute; - use stdarch_test::simd_test; +/// Signed Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))] +pub unsafe fn vshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v4i32")] + fn vshlq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } +vshlq_s32_(a, b) +} + +/// Signed Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))] +pub unsafe fn vshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v1i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v1i64")] + fn vshl_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t; + } +vshl_s64_(a, b) +} + +/// Signed Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))] +pub unsafe fn vshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v2i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v2i64")] + fn vshlq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t; + } +vshlq_s64_(a, b) +} + +/// Unsigned Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))] +pub unsafe fn vshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v8i8")] + fn vshl_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; + } +vshl_u8_(a, b) +} + +/// Unsigned Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))] +pub unsafe fn vshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v16i8")] + fn vshlq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; + } +vshlq_u8_(a, b) +} + +/// Unsigned Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))] +pub unsafe fn vshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v4i16")] + fn vshl_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; + } +vshl_u16_(a, b) +} + +/// Unsigned Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))] +pub unsafe fn vshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v8i16")] + fn vshlq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; + } +vshlq_u16_(a, b) +} + +/// Unsigned Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))] +pub unsafe fn vshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v2i32")] + fn vshl_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; + } +vshl_u32_(a, b) +} + +/// Unsigned Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))] +pub unsafe fn vshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v4i32")] + fn vshlq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; + } +vshlq_u32_(a, b) +} + +/// Unsigned Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))] +pub unsafe fn vshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v1i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v1i64")] + fn vshl_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; + } +vshl_u64_(a, b) +} + +/// Unsigned Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))] +pub unsafe fn vshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v2i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v2i64")] + fn vshlq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; + } +vshlq_u64_(a, b) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshl_n_s8(a: int8x8_t) -> int8x8_t { + static_assert_imm3!(N); + simd_shl(a, vdup_n_s8(N.try_into().unwrap())) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshlq_n_s8(a: int8x16_t) -> int8x16_t { + static_assert_imm3!(N); + simd_shl(a, vdupq_n_s8(N.try_into().unwrap())) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshl_n_s16(a: int16x4_t) -> int16x4_t { + static_assert_imm4!(N); + simd_shl(a, vdup_n_s16(N.try_into().unwrap())) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshlq_n_s16(a: int16x8_t) -> int16x8_t { + static_assert_imm4!(N); + simd_shl(a, vdupq_n_s16(N.try_into().unwrap())) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshl_n_s32(a: int32x2_t) -> int32x2_t { + static_assert_imm5!(N); + simd_shl(a, vdup_n_s32(N.try_into().unwrap())) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshlq_n_s32(a: int32x4_t) -> int32x4_t { + static_assert_imm5!(N); + simd_shl(a, vdupq_n_s32(N.try_into().unwrap())) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshl_n_u8(a: uint8x8_t) -> uint8x8_t { + static_assert_imm3!(N); + simd_shl(a, vdup_n_u8(N.try_into().unwrap())) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshlq_n_u8(a: uint8x16_t) -> uint8x16_t { + static_assert_imm3!(N); + simd_shl(a, vdupq_n_u8(N.try_into().unwrap())) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshl_n_u16(a: uint16x4_t) -> uint16x4_t { + static_assert_imm4!(N); + simd_shl(a, vdup_n_u16(N.try_into().unwrap())) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshlq_n_u16(a: uint16x8_t) -> uint16x8_t { + static_assert_imm4!(N); + simd_shl(a, vdupq_n_u16(N.try_into().unwrap())) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshl_n_u32(a: uint32x2_t) -> uint32x2_t { + static_assert_imm5!(N); + simd_shl(a, vdup_n_u32(N.try_into().unwrap())) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshlq_n_u32(a: uint32x4_t) -> uint32x4_t { + static_assert_imm5!(N); + simd_shl(a, vdupq_n_u32(N.try_into().unwrap())) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshl_n_s64(a: int64x1_t) -> int64x1_t { + static_assert_imm6!(N); + simd_shl(a, vdup_n_s64(N.try_into().unwrap())) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshlq_n_s64(a: int64x2_t) -> int64x2_t { + static_assert_imm6!(N); + simd_shl(a, vdupq_n_s64(N.try_into().unwrap())) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshl_n_u64(a: uint64x1_t) -> uint64x1_t { + static_assert_imm6!(N); + simd_shl(a, vdup_n_u64(N.try_into().unwrap())) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshlq_n_u64(a: uint64x2_t) -> uint64x2_t { + static_assert_imm6!(N); + simd_shl(a, vdupq_n_u64(N.try_into().unwrap())) +} + +/// Signed shift left long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s8", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshll, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshll_n_s8(a: int8x8_t) -> int16x8_t { + static_assert!(N : i32 where N >= 0 && N <= 8); + simd_shl(simd_cast(a), vdupq_n_s16(N.try_into().unwrap())) +} + +/// Signed shift left long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s16", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshll, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshll_n_s16(a: int16x4_t) -> int32x4_t { + static_assert!(N : i32 where N >= 0 && N <= 16); + simd_shl(simd_cast(a), vdupq_n_s32(N.try_into().unwrap())) +} + +/// Signed shift left long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s32", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshll, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshll_n_s32(a: int32x2_t) -> int64x2_t { + static_assert!(N : i32 where N >= 0 && N <= 32); + simd_shl(simd_cast(a), vdupq_n_s64(N.try_into().unwrap())) +} + +/// Signed shift left long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u8", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushll, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshll_n_u8(a: uint8x8_t) -> uint16x8_t { + static_assert!(N : i32 where N >= 0 && N <= 8); + simd_shl(simd_cast(a), vdupq_n_u16(N.try_into().unwrap())) +} + +/// Signed shift left long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u16", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushll, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshll_n_u16(a: uint16x4_t) -> uint32x4_t { + static_assert!(N : i32 where N >= 0 && N <= 16); + simd_shl(simd_cast(a), vdupq_n_u32(N.try_into().unwrap())) +} + +/// Signed shift left long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u32", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushll, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshll_n_u32(a: uint32x2_t) -> uint64x2_t { + static_assert!(N : i32 where N >= 0 && N <= 32); + simd_shl(simd_cast(a), vdupq_n_u64(N.try_into().unwrap())) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshr_n_s8(a: int8x8_t) -> int8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_shr(a, vdup_n_s8(N.try_into().unwrap())) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshrq_n_s8(a: int8x16_t) -> int8x16_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_shr(a, vdupq_n_s8(N.try_into().unwrap())) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s16", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshr_n_s16(a: int16x4_t) -> int16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_shr(a, vdup_n_s16(N.try_into().unwrap())) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s16", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshrq_n_s16(a: int16x8_t) -> int16x8_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_shr(a, vdupq_n_s16(N.try_into().unwrap())) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s32", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshr_n_s32(a: int32x2_t) -> int32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_shr(a, vdup_n_s32(N.try_into().unwrap())) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s32", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshrq_n_s32(a: int32x4_t) -> int32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_shr(a, vdupq_n_s32(N.try_into().unwrap())) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s64", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshr_n_s64(a: int64x1_t) -> int64x1_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + simd_shr(a, vdup_n_s64(N.try_into().unwrap())) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s64", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshrq_n_s64(a: int64x2_t) -> int64x2_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + simd_shr(a, vdupq_n_s64(N.try_into().unwrap())) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshr_n_u8(a: uint8x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_shr(a, vdup_n_u8(N.try_into().unwrap())) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshrq_n_u8(a: uint8x16_t) -> uint8x16_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_shr(a, vdupq_n_u8(N.try_into().unwrap())) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u16", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshr_n_u16(a: uint16x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_shr(a, vdup_n_u16(N.try_into().unwrap())) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u16", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshrq_n_u16(a: uint16x8_t) -> uint16x8_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_shr(a, vdupq_n_u16(N.try_into().unwrap())) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u32", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshr_n_u32(a: uint32x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_shr(a, vdup_n_u32(N.try_into().unwrap())) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u32", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshrq_n_u32(a: uint32x4_t) -> uint32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_shr(a, vdupq_n_u32(N.try_into().unwrap())) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u64", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshr_n_u64(a: uint64x1_t) -> uint64x1_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + simd_shr(a, vdup_n_u64(N.try_into().unwrap())) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u64", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshrq_n_u64(a: uint64x2_t) -> uint64x2_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + simd_shr(a, vdupq_n_u64(N.try_into().unwrap())) +} + +/// Shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i16", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_cast(simd_shr(a, vdupq_n_s16(N.try_into().unwrap()))) +} + +/// Shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i32", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_cast(simd_shr(a, vdupq_n_s32(N.try_into().unwrap()))) +} + +/// Shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i64", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_cast(simd_shr(a, vdupq_n_s64(N.try_into().unwrap()))) +} + +/// Shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i16", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshrn_n_u16(a: uint16x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_cast(simd_shr(a, vdupq_n_u16(N.try_into().unwrap()))) +} + +/// Shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i32", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshrn_n_u32(a: uint32x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_cast(simd_shr(a, vdupq_n_u32(N.try_into().unwrap()))) +} + +/// Shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i64", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshrn_n_u64(a: uint64x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_cast(simd_shr(a, vdupq_n_u64(N.try_into().unwrap()))) +} + +/// Signed shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsra_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_add(a, vshr_n_s8::(b)) +} + +/// Signed shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsraq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_add(a, vshrq_n_s8::(b)) +} + +/// Signed shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsra_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_add(a, vshr_n_s16::(b)) +} + +/// Signed shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsraq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_add(a, vshrq_n_s16::(b)) +} + +/// Signed shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsra_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_add(a, vshr_n_s32::(b)) +} + +/// Signed shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsraq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_add(a, vshrq_n_s32::(b)) +} + +/// Signed shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsra_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + simd_add(a, vshr_n_s64::(b)) +} + +/// Signed shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsraq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + simd_add(a, vshrq_n_s64::(b)) +} + +/// Unsigned shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsra_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_add(a, vshr_n_u8::(b)) +} + +/// Unsigned shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsraq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_add(a, vshrq_n_u8::(b)) +} + +/// Unsigned shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsra_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_add(a, vshr_n_u16::(b)) +} + +/// Unsigned shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsraq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_add(a, vshrq_n_u16::(b)) +} + +/// Unsigned shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsra_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_add(a, vshr_n_u32::(b)) +} + +/// Unsigned shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsraq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_add(a, vshrq_n_u32::(b)) +} + +/// Unsigned shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsra_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + simd_add(a, vshr_n_u64::(b)) +} + +/// Unsigned shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsraq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + simd_add(a, vshrq_n_u64::(b)) +} + +/// Unsigned Absolute difference and Accumulate Long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabal))] +pub unsafe fn vabal_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t { + let d: uint8x8_t = vabd_u8(b, c); + simd_add(a, simd_cast(d)) +} + +/// Unsigned Absolute difference and Accumulate Long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabal))] +pub unsafe fn vabal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { + let d: uint16x4_t = vabd_u16(b, c); + simd_add(a, simd_cast(d)) +} + +/// Unsigned Absolute difference and Accumulate Long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabal))] +pub unsafe fn vabal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { + let d: uint32x2_t = vabd_u32(b, c); + simd_add(a, simd_cast(d)) +} + +/// Signed Absolute difference and Accumulate Long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabal))] +pub unsafe fn vabal_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t { + let d: int8x8_t = vabd_s8(b, c); + let e: uint8x8_t = simd_cast(d); + simd_add(a, simd_cast(e)) +} + +/// Signed Absolute difference and Accumulate Long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabal))] +pub unsafe fn vabal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + let d: int16x4_t = vabd_s16(b, c); + let e: uint16x4_t = simd_cast(d); + simd_add(a, simd_cast(e)) +} + +/// Signed Absolute difference and Accumulate Long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabal))] +pub unsafe fn vabal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + let d: int32x2_t = vabd_s32(b, c); + let e: uint32x2_t = simd_cast(d); + simd_add(a, simd_cast(e)) +} + +/// Singned saturating Absolute value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))] +pub unsafe fn vqabs_s8(a: int8x8_t) -> int8x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v8i8")] + fn vqabs_s8_(a: int8x8_t) -> int8x8_t; + } +vqabs_s8_(a) +} + +/// Singned saturating Absolute value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))] +pub unsafe fn vqabsq_s8(a: int8x16_t) -> int8x16_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v16i8")] + fn vqabsq_s8_(a: int8x16_t) -> int8x16_t; + } +vqabsq_s8_(a) +} + +/// Singned saturating Absolute value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))] +pub unsafe fn vqabs_s16(a: int16x4_t) -> int16x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v4i16")] + fn vqabs_s16_(a: int16x4_t) -> int16x4_t; + } +vqabs_s16_(a) +} + +/// Singned saturating Absolute value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))] +pub unsafe fn vqabsq_s16(a: int16x8_t) -> int16x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v8i16")] + fn vqabsq_s16_(a: int16x8_t) -> int16x8_t; + } +vqabsq_s16_(a) +} + +/// Singned saturating Absolute value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))] +pub unsafe fn vqabs_s32(a: int32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v2i32")] + fn vqabs_s32_(a: int32x2_t) -> int32x2_t; + } +vqabs_s32_(a) +} + +/// Singned saturating Absolute value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))] +pub unsafe fn vqabsq_s32(a: int32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v4i32")] + fn vqabsq_s32_(a: int32x4_t) -> int32x4_t; + } +vqabsq_s32_(a) +} + +#[cfg(test)] +#[allow(overflowing_literals)] +mod test { + use super::*; + use crate::core_arch::simd::*; + use std::mem::transmute; + use stdarch_test::simd_test; + + #[simd_test(enable = "neon")] + unsafe fn test_vand_s8() { + let a: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i8x8 = i8x8::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F); + let e: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let r: i8x8 = transmute(vand_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i8x8 = i8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: i8x8 = i8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let r: i8x8 = transmute(vand_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vandq_s8() { + let a: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00); + let b: i8x16 = i8x16::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F); + let e: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00); + let r: i8x16 = transmute(vandq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00); + let b: i8x16 = i8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: i8x16 = i8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let r: i8x16 = transmute(vandq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vand_s16() { + let a: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03); + let b: i16x4 = i16x4::new(0x0F, 0x0F, 0x0F, 0x0F); + let e: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03); + let r: i16x4 = transmute(vand_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03); + let b: i16x4 = i16x4::new(0x00, 0x00, 0x00, 0x00); + let e: i16x4 = i16x4::new(0x00, 0x00, 0x00, 0x00); + let r: i16x4 = transmute(vand_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vandq_s16() { + let a: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i16x8 = i16x8::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F); + let e: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let r: i16x8 = transmute(vandq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i16x8 = i16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: i16x8 = i16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let r: i16x8 = transmute(vandq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vand_s32() { + let a: i32x2 = i32x2::new(0x00, 0x01); + let b: i32x2 = i32x2::new(0x0F, 0x0F); + let e: i32x2 = i32x2::new(0x00, 0x01); + let r: i32x2 = transmute(vand_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i32x2 = i32x2::new(0x00, 0x01); + let b: i32x2 = i32x2::new(0x00, 0x00); + let e: i32x2 = i32x2::new(0x00, 0x00); + let r: i32x2 = transmute(vand_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vandq_s32() { + let a: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03); + let b: i32x4 = i32x4::new(0x0F, 0x0F, 0x0F, 0x0F); + let e: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03); + let r: i32x4 = transmute(vandq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03); + let b: i32x4 = i32x4::new(0x00, 0x00, 0x00, 0x00); + let e: i32x4 = i32x4::new(0x00, 0x00, 0x00, 0x00); + let r: i32x4 = transmute(vandq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vand_u8() { + let a: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u8x8 = u8x8::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F); + let e: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let r: u8x8 = transmute(vand_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u8x8 = u8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: u8x8 = u8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let r: u8x8 = transmute(vand_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vandq_u8() { + let a: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00); + let b: u8x16 = u8x16::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F); + let e: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00); + let r: u8x16 = transmute(vandq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00); + let b: u8x16 = u8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: u8x16 = u8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let r: u8x16 = transmute(vandq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vand_u16() { + let a: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03); + let b: u16x4 = u16x4::new(0x0F, 0x0F, 0x0F, 0x0F); + let e: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03); + let r: u16x4 = transmute(vand_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03); + let b: u16x4 = u16x4::new(0x00, 0x00, 0x00, 0x00); + let e: u16x4 = u16x4::new(0x00, 0x00, 0x00, 0x00); + let r: u16x4 = transmute(vand_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vandq_u16() { + let a: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u16x8 = u16x8::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F); + let e: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let r: u16x8 = transmute(vandq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u16x8 = u16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: u16x8 = u16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let r: u16x8 = transmute(vandq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vand_u32() { + let a: u32x2 = u32x2::new(0x00, 0x01); + let b: u32x2 = u32x2::new(0x0F, 0x0F); + let e: u32x2 = u32x2::new(0x00, 0x01); + let r: u32x2 = transmute(vand_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u32x2 = u32x2::new(0x00, 0x01); + let b: u32x2 = u32x2::new(0x00, 0x00); + let e: u32x2 = u32x2::new(0x00, 0x00); + let r: u32x2 = transmute(vand_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vandq_u32() { + let a: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03); + let b: u32x4 = u32x4::new(0x0F, 0x0F, 0x0F, 0x0F); + let e: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03); + let r: u32x4 = transmute(vandq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03); + let b: u32x4 = u32x4::new(0x00, 0x00, 0x00, 0x00); + let e: u32x4 = u32x4::new(0x00, 0x00, 0x00, 0x00); + let r: u32x4 = transmute(vandq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vand_s64() { + let a: i64x1 = i64x1::new(0x00); + let b: i64x1 = i64x1::new(0x0F); + let e: i64x1 = i64x1::new(0x00); + let r: i64x1 = transmute(vand_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i64x1 = i64x1::new(0x00); + let b: i64x1 = i64x1::new(0x00); + let e: i64x1 = i64x1::new(0x00); + let r: i64x1 = transmute(vand_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vandq_s64() { + let a: i64x2 = i64x2::new(0x00, 0x01); + let b: i64x2 = i64x2::new(0x0F, 0x0F); + let e: i64x2 = i64x2::new(0x00, 0x01); + let r: i64x2 = transmute(vandq_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i64x2 = i64x2::new(0x00, 0x01); + let b: i64x2 = i64x2::new(0x00, 0x00); + let e: i64x2 = i64x2::new(0x00, 0x00); + let r: i64x2 = transmute(vandq_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vand_u64() { + let a: u64x1 = u64x1::new(0x00); + let b: u64x1 = u64x1::new(0x0F); + let e: u64x1 = u64x1::new(0x00); + let r: u64x1 = transmute(vand_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u64x1 = u64x1::new(0x00); + let b: u64x1 = u64x1::new(0x00); + let e: u64x1 = u64x1::new(0x00); + let r: u64x1 = transmute(vand_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vandq_u64() { + let a: u64x2 = u64x2::new(0x00, 0x01); + let b: u64x2 = u64x2::new(0x0F, 0x0F); + let e: u64x2 = u64x2::new(0x00, 0x01); + let r: u64x2 = transmute(vandq_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u64x2 = u64x2::new(0x00, 0x01); + let b: u64x2 = u64x2::new(0x00, 0x00); + let e: u64x2 = u64x2::new(0x00, 0x00); + let r: u64x2 = transmute(vandq_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorr_s8() { + let a: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i8x8 = i8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let r: i8x8 = transmute(vorr_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_s8() { + let a: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F); + let b: i8x16 = i8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F); + let r: i8x16 = transmute(vorrq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorr_s16() { + let a: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03); + let b: i16x4 = i16x4::new(0x00, 0x00, 0x00, 0x00); + let e: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03); + let r: i16x4 = transmute(vorr_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_s16() { + let a: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i16x8 = i16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let r: i16x8 = transmute(vorrq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorr_s32() { + let a: i32x2 = i32x2::new(0x00, 0x01); + let b: i32x2 = i32x2::new(0x00, 0x00); + let e: i32x2 = i32x2::new(0x00, 0x01); + let r: i32x2 = transmute(vorr_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_s32() { + let a: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03); + let b: i32x4 = i32x4::new(0x00, 0x00, 0x00, 0x00); + let e: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03); + let r: i32x4 = transmute(vorrq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorr_u8() { + let a: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u8x8 = u8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let r: u8x8 = transmute(vorr_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_u8() { + let a: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F); + let b: u8x16 = u8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F); + let r: u8x16 = transmute(vorrq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorr_u16() { + let a: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03); + let b: u16x4 = u16x4::new(0x00, 0x00, 0x00, 0x00); + let e: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03); + let r: u16x4 = transmute(vorr_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_u16() { + let a: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u16x8 = u16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let r: u16x8 = transmute(vorrq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorr_u32() { + let a: u32x2 = u32x2::new(0x00, 0x01); + let b: u32x2 = u32x2::new(0x00, 0x00); + let e: u32x2 = u32x2::new(0x00, 0x01); + let r: u32x2 = transmute(vorr_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_u32() { + let a: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03); + let b: u32x4 = u32x4::new(0x00, 0x00, 0x00, 0x00); + let e: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03); + let r: u32x4 = transmute(vorrq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorr_s64() { + let a: i64x1 = i64x1::new(0x00); + let b: i64x1 = i64x1::new(0x00); + let e: i64x1 = i64x1::new(0x00); + let r: i64x1 = transmute(vorr_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_s64() { + let a: i64x2 = i64x2::new(0x00, 0x01); + let b: i64x2 = i64x2::new(0x00, 0x00); + let e: i64x2 = i64x2::new(0x00, 0x01); + let r: i64x2 = transmute(vorrq_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorr_u64() { + let a: u64x1 = u64x1::new(0x00); + let b: u64x1 = u64x1::new(0x00); + let e: u64x1 = u64x1::new(0x00); + let r: u64x1 = transmute(vorr_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } #[simd_test(enable = "neon")] - unsafe fn test_vand_s8() { - let a: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let b: i8x8 = i8x8::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F); - let e: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let r: i8x8 = transmute(vand_s8(transmute(a), transmute(b))); + unsafe fn test_vorrq_u64() { + let a: u64x2 = u64x2::new(0x00, 0x01); + let b: u64x2 = u64x2::new(0x00, 0x00); + let e: u64x2 = u64x2::new(0x00, 0x01); + let r: u64x2 = transmute(vorrq_u64(transmute(a), transmute(b))); assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_veor_s8() { let a: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); let b: i8x8 = i8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); - let e: i8x8 = i8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); - let r: i8x8 = transmute(vand_s8(transmute(a), transmute(b))); + let e: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let r: i8x8 = transmute(veor_s8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vandq_s8() { - let a: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00); - let b: i8x16 = i8x16::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F); - let e: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00); - let r: i8x16 = transmute(vandq_s8(transmute(a), transmute(b))); - assert_eq!(r, e); - - let a: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00); + unsafe fn test_veorq_s8() { + let a: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F); let b: i8x16 = i8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); - let e: i8x16 = i8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); - let r: i8x16 = transmute(vandq_s8(transmute(a), transmute(b))); + let e: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F); + let r: i8x16 = transmute(veorq_s8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vand_s16() { - let a: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03); - let b: i16x4 = i16x4::new(0x0F, 0x0F, 0x0F, 0x0F); - let e: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03); - let r: i16x4 = transmute(vand_s16(transmute(a), transmute(b))); - assert_eq!(r, e); - + unsafe fn test_veor_s16() { let a: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03); let b: i16x4 = i16x4::new(0x00, 0x00, 0x00, 0x00); - let e: i16x4 = i16x4::new(0x00, 0x00, 0x00, 0x00); - let r: i16x4 = transmute(vand_s16(transmute(a), transmute(b))); + let e: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03); + let r: i16x4 = transmute(veor_s16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vandq_s16() { - let a: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let b: i16x8 = i16x8::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F); - let e: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let r: i16x8 = transmute(vandq_s16(transmute(a), transmute(b))); - assert_eq!(r, e); - + unsafe fn test_veorq_s16() { let a: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); let b: i16x8 = i16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); - let e: i16x8 = i16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); - let r: i16x8 = transmute(vandq_s16(transmute(a), transmute(b))); + let e: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let r: i16x8 = transmute(veorq_s16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vand_s32() { - let a: i32x2 = i32x2::new(0x00, 0x01); - let b: i32x2 = i32x2::new(0x0F, 0x0F); - let e: i32x2 = i32x2::new(0x00, 0x01); - let r: i32x2 = transmute(vand_s32(transmute(a), transmute(b))); - assert_eq!(r, e); - + unsafe fn test_veor_s32() { let a: i32x2 = i32x2::new(0x00, 0x01); let b: i32x2 = i32x2::new(0x00, 0x00); - let e: i32x2 = i32x2::new(0x00, 0x00); - let r: i32x2 = transmute(vand_s32(transmute(a), transmute(b))); + let e: i32x2 = i32x2::new(0x00, 0x01); + let r: i32x2 = transmute(veor_s32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vandq_s32() { - let a: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03); - let b: i32x4 = i32x4::new(0x0F, 0x0F, 0x0F, 0x0F); - let e: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03); - let r: i32x4 = transmute(vandq_s32(transmute(a), transmute(b))); - assert_eq!(r, e); - + unsafe fn test_veorq_s32() { let a: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03); let b: i32x4 = i32x4::new(0x00, 0x00, 0x00, 0x00); - let e: i32x4 = i32x4::new(0x00, 0x00, 0x00, 0x00); - let r: i32x4 = transmute(vandq_s32(transmute(a), transmute(b))); + let e: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03); + let r: i32x4 = transmute(veorq_s32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vand_u8() { + unsafe fn test_veor_u8() { let a: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let b: u8x8 = u8x8::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F); + let b: u8x8 = u8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); let e: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let r: u8x8 = transmute(vand_u8(transmute(a), transmute(b))); + let r: u8x8 = transmute(veor_u8(transmute(a), transmute(b))); assert_eq!(r, e); + } - let a: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let b: u8x8 = u8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); - let e: u8x8 = u8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); - let r: u8x8 = transmute(vand_u8(transmute(a), transmute(b))); + #[simd_test(enable = "neon")] + unsafe fn test_veorq_u8() { + let a: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F); + let b: u8x16 = u8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F); + let r: u8x16 = transmute(veorq_u8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vandq_u8() { - let a: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00); - let b: u8x16 = u8x16::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F); - let e: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00); - let r: u8x16 = transmute(vandq_u8(transmute(a), transmute(b))); + unsafe fn test_veor_u16() { + let a: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03); + let b: u16x4 = u16x4::new(0x00, 0x00, 0x00, 0x00); + let e: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03); + let r: u16x4 = transmute(veor_u16(transmute(a), transmute(b))); assert_eq!(r, e); + } - let a: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00); - let b: u8x16 = u8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); - let e: u8x16 = u8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); - let r: u8x16 = transmute(vandq_u8(transmute(a), transmute(b))); + #[simd_test(enable = "neon")] + unsafe fn test_veorq_u16() { + let a: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u16x8 = u16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let r: u16x8 = transmute(veorq_u16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vand_u16() { - let a: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03); - let b: u16x4 = u16x4::new(0x0F, 0x0F, 0x0F, 0x0F); - let e: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03); - let r: u16x4 = transmute(vand_u16(transmute(a), transmute(b))); + unsafe fn test_veor_u32() { + let a: u32x2 = u32x2::new(0x00, 0x01); + let b: u32x2 = u32x2::new(0x00, 0x00); + let e: u32x2 = u32x2::new(0x00, 0x01); + let r: u32x2 = transmute(veor_u32(transmute(a), transmute(b))); assert_eq!(r, e); + } - let a: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03); - let b: u16x4 = u16x4::new(0x00, 0x00, 0x00, 0x00); - let e: u16x4 = u16x4::new(0x00, 0x00, 0x00, 0x00); - let r: u16x4 = transmute(vand_u16(transmute(a), transmute(b))); + #[simd_test(enable = "neon")] + unsafe fn test_veorq_u32() { + let a: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03); + let b: u32x4 = u32x4::new(0x00, 0x00, 0x00, 0x00); + let e: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03); + let r: u32x4 = transmute(veorq_u32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vandq_u16() { - let a: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let b: u16x8 = u16x8::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F); - let e: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let r: u16x8 = transmute(vandq_u16(transmute(a), transmute(b))); + unsafe fn test_veor_s64() { + let a: i64x1 = i64x1::new(0x00); + let b: i64x1 = i64x1::new(0x00); + let e: i64x1 = i64x1::new(0x00); + let r: i64x1 = transmute(veor_s64(transmute(a), transmute(b))); assert_eq!(r, e); + } - let a: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let b: u16x8 = u16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); - let e: u16x8 = u16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); - let r: u16x8 = transmute(vandq_u16(transmute(a), transmute(b))); + #[simd_test(enable = "neon")] + unsafe fn test_veorq_s64() { + let a: i64x2 = i64x2::new(0x00, 0x01); + let b: i64x2 = i64x2::new(0x00, 0x00); + let e: i64x2 = i64x2::new(0x00, 0x01); + let r: i64x2 = transmute(veorq_s64(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vand_u32() { - let a: u32x2 = u32x2::new(0x00, 0x01); - let b: u32x2 = u32x2::new(0x0F, 0x0F); - let e: u32x2 = u32x2::new(0x00, 0x01); - let r: u32x2 = transmute(vand_u32(transmute(a), transmute(b))); + unsafe fn test_veor_u64() { + let a: u64x1 = u64x1::new(0x00); + let b: u64x1 = u64x1::new(0x00); + let e: u64x1 = u64x1::new(0x00); + let r: u64x1 = transmute(veor_u64(transmute(a), transmute(b))); assert_eq!(r, e); + } - let a: u32x2 = u32x2::new(0x00, 0x01); - let b: u32x2 = u32x2::new(0x00, 0x00); - let e: u32x2 = u32x2::new(0x00, 0x00); - let r: u32x2 = transmute(vand_u32(transmute(a), transmute(b))); + #[simd_test(enable = "neon")] + unsafe fn test_veorq_u64() { + let a: u64x2 = u64x2::new(0x00, 0x01); + let b: u64x2 = u64x2::new(0x00, 0x00); + let e: u64x2 = u64x2::new(0x00, 0x01); + let r: u64x2 = transmute(veorq_u64(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vandq_u32() { - let a: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03); - let b: u32x4 = u32x4::new(0x0F, 0x0F, 0x0F, 0x0F); - let e: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03); - let r: u32x4 = transmute(vandq_u32(transmute(a), transmute(b))); + unsafe fn test_vabd_s8() { + let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i8x8 = i8x8::new(16, 15, 14, 13, 12, 11, 10, 9); + let e: i8x8 = i8x8::new(15, 13, 11, 9, 7, 5, 3, 1); + let r: i8x8 = transmute(vabd_s8(transmute(a), transmute(b))); assert_eq!(r, e); + } - let a: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03); - let b: u32x4 = u32x4::new(0x00, 0x00, 0x00, 0x00); - let e: u32x4 = u32x4::new(0x00, 0x00, 0x00, 0x00); - let r: u32x4 = transmute(vandq_u32(transmute(a), transmute(b))); + #[simd_test(enable = "neon")] + unsafe fn test_vabdq_s8() { + let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: i8x16 = i8x16::new(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); + let e: i8x16 = i8x16::new(15, 13, 11, 9, 7, 5, 3, 1, 1, 3, 5, 7, 9, 11, 13, 15); + let r: i8x16 = transmute(vabdq_s8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vand_s64() { - let a: i64x1 = i64x1::new(0x00); - let b: i64x1 = i64x1::new(0x0F); - let e: i64x1 = i64x1::new(0x00); - let r: i64x1 = transmute(vand_s64(transmute(a), transmute(b))); + unsafe fn test_vabd_s16() { + let a: i16x4 = i16x4::new(1, 2, 3, 4); + let b: i16x4 = i16x4::new(16, 15, 14, 13); + let e: i16x4 = i16x4::new(15, 13, 11, 9); + let r: i16x4 = transmute(vabd_s16(transmute(a), transmute(b))); assert_eq!(r, e); + } - let a: i64x1 = i64x1::new(0x00); - let b: i64x1 = i64x1::new(0x00); - let e: i64x1 = i64x1::new(0x00); - let r: i64x1 = transmute(vand_s64(transmute(a), transmute(b))); + #[simd_test(enable = "neon")] + unsafe fn test_vabdq_s16() { + let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i16x8 = i16x8::new(16, 15, 14, 13, 12, 11, 10, 9); + let e: i16x8 = i16x8::new(15, 13, 11, 9, 7, 5, 3, 1); + let r: i16x8 = transmute(vabdq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vabd_s32() { + let a: i32x2 = i32x2::new(1, 2); + let b: i32x2 = i32x2::new(16, 15); + let e: i32x2 = i32x2::new(15, 13); + let r: i32x2 = transmute(vabd_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vabdq_s32() { + let a: i32x4 = i32x4::new(1, 2, 3, 4); + let b: i32x4 = i32x4::new(16, 15, 14, 13); + let e: i32x4 = i32x4::new(15, 13, 11, 9); + let r: i32x4 = transmute(vabdq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vabd_u8() { + let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: u8x8 = u8x8::new(16, 15, 14, 13, 12, 11, 10, 9); + let e: u8x8 = u8x8::new(15, 13, 11, 9, 7, 5, 3, 1); + let r: u8x8 = transmute(vabd_u8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vandq_s64() { - let a: i64x2 = i64x2::new(0x00, 0x01); - let b: i64x2 = i64x2::new(0x0F, 0x0F); - let e: i64x2 = i64x2::new(0x00, 0x01); - let r: i64x2 = transmute(vandq_s64(transmute(a), transmute(b))); + unsafe fn test_vabdq_u8() { + let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: u8x16 = u8x16::new(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); + let e: u8x16 = u8x16::new(15, 13, 11, 9, 7, 5, 3, 1, 1, 3, 5, 7, 9, 11, 13, 15); + let r: u8x16 = transmute(vabdq_u8(transmute(a), transmute(b))); assert_eq!(r, e); + } - let a: i64x2 = i64x2::new(0x00, 0x01); - let b: i64x2 = i64x2::new(0x00, 0x00); - let e: i64x2 = i64x2::new(0x00, 0x00); - let r: i64x2 = transmute(vandq_s64(transmute(a), transmute(b))); + #[simd_test(enable = "neon")] + unsafe fn test_vabd_u16() { + let a: u16x4 = u16x4::new(1, 2, 3, 4); + let b: u16x4 = u16x4::new(16, 15, 14, 13); + let e: u16x4 = u16x4::new(15, 13, 11, 9); + let r: u16x4 = transmute(vabd_u16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vand_u64() { - let a: u64x1 = u64x1::new(0x00); - let b: u64x1 = u64x1::new(0x0F); - let e: u64x1 = u64x1::new(0x00); - let r: u64x1 = transmute(vand_u64(transmute(a), transmute(b))); + unsafe fn test_vabdq_u16() { + let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: u16x8 = u16x8::new(16, 15, 14, 13, 12, 11, 10, 9); + let e: u16x8 = u16x8::new(15, 13, 11, 9, 7, 5, 3, 1); + let r: u16x8 = transmute(vabdq_u16(transmute(a), transmute(b))); assert_eq!(r, e); + } - let a: u64x1 = u64x1::new(0x00); - let b: u64x1 = u64x1::new(0x00); - let e: u64x1 = u64x1::new(0x00); - let r: u64x1 = transmute(vand_u64(transmute(a), transmute(b))); + #[simd_test(enable = "neon")] + unsafe fn test_vabd_u32() { + let a: u32x2 = u32x2::new(1, 2); + let b: u32x2 = u32x2::new(16, 15); + let e: u32x2 = u32x2::new(15, 13); + let r: u32x2 = transmute(vabd_u32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vandq_u64() { - let a: u64x2 = u64x2::new(0x00, 0x01); - let b: u64x2 = u64x2::new(0x0F, 0x0F); - let e: u64x2 = u64x2::new(0x00, 0x01); - let r: u64x2 = transmute(vandq_u64(transmute(a), transmute(b))); + unsafe fn test_vabdq_u32() { + let a: u32x4 = u32x4::new(1, 2, 3, 4); + let b: u32x4 = u32x4::new(16, 15, 14, 13); + let e: u32x4 = u32x4::new(15, 13, 11, 9); + let r: u32x4 = transmute(vabdq_u32(transmute(a), transmute(b))); assert_eq!(r, e); + } - let a: u64x2 = u64x2::new(0x00, 0x01); - let b: u64x2 = u64x2::new(0x00, 0x00); - let e: u64x2 = u64x2::new(0x00, 0x00); - let r: u64x2 = transmute(vandq_u64(transmute(a), transmute(b))); + #[simd_test(enable = "neon")] + unsafe fn test_vabd_f32() { + let a: f32x2 = f32x2::new(1.0, 2.0); + let b: f32x2 = f32x2::new(9.0, 3.0); + let e: f32x2 = f32x2::new(8.0, 1.0); + let r: f32x2 = transmute(vabd_f32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vorr_s8() { - let a: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let b: i8x8 = i8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); - let e: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let r: i8x8 = transmute(vorr_s8(transmute(a), transmute(b))); + unsafe fn test_vabdq_f32() { + let a: f32x4 = f32x4::new(1.0, 2.0, 5.0, -4.0); + let b: f32x4 = f32x4::new(9.0, 3.0, 2.0, 8.0); + let e: f32x4 = f32x4::new(8.0, 1.0, 3.0, 12.0); + let r: f32x4 = transmute(vabdq_f32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vorrq_s8() { - let a: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F); - let b: i8x16 = i8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); - let e: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F); - let r: i8x16 = transmute(vorrq_s8(transmute(a), transmute(b))); + unsafe fn test_vabdl_u8() { + let a: u8x8 = u8x8::new(1, 2, 3, 4, 4, 3, 2, 1); + let b: u8x8 = u8x8::new(10, 10, 10, 10, 10, 10, 10, 10); + let e: u16x8 = u16x8::new(9, 8, 7, 6, 6, 7, 8, 9); + let r: u16x8 = transmute(vabdl_u8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vorr_s16() { - let a: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03); - let b: i16x4 = i16x4::new(0x00, 0x00, 0x00, 0x00); - let e: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03); - let r: i16x4 = transmute(vorr_s16(transmute(a), transmute(b))); + unsafe fn test_vabdl_u16() { + let a: u16x4 = u16x4::new(1, 2, 3, 4); + let b: u16x4 = u16x4::new(10, 10, 10, 10); + let e: u32x4 = u32x4::new(9, 8, 7, 6); + let r: u32x4 = transmute(vabdl_u16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vorrq_s16() { - let a: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let b: i16x8 = i16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); - let e: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let r: i16x8 = transmute(vorrq_s16(transmute(a), transmute(b))); + unsafe fn test_vabdl_u32() { + let a: u32x2 = u32x2::new(1, 2); + let b: u32x2 = u32x2::new(10, 10); + let e: u64x2 = u64x2::new(9, 8); + let r: u64x2 = transmute(vabdl_u32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vorr_s32() { - let a: i32x2 = i32x2::new(0x00, 0x01); - let b: i32x2 = i32x2::new(0x00, 0x00); - let e: i32x2 = i32x2::new(0x00, 0x01); - let r: i32x2 = transmute(vorr_s32(transmute(a), transmute(b))); + unsafe fn test_vabdl_s8() { + let a: i8x8 = i8x8::new(1, 2, 3, 4, 4, 3, 2, 1); + let b: i8x8 = i8x8::new(10, 10, 10, 10, 10, 10, 10, 10); + let e: i16x8 = i16x8::new(9, 8, 7, 6, 6, 7, 8, 9); + let r: i16x8 = transmute(vabdl_s8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vorrq_s32() { - let a: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03); - let b: i32x4 = i32x4::new(0x00, 0x00, 0x00, 0x00); - let e: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03); - let r: i32x4 = transmute(vorrq_s32(transmute(a), transmute(b))); + unsafe fn test_vabdl_s16() { + let a: i16x4 = i16x4::new(1, 2, 11, 12); + let b: i16x4 = i16x4::new(10, 10, 10, 10); + let e: i32x4 = i32x4::new(9, 8, 1, 2); + let r: i32x4 = transmute(vabdl_s16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vorr_u8() { - let a: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let b: u8x8 = u8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); - let e: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let r: u8x8 = transmute(vorr_u8(transmute(a), transmute(b))); + unsafe fn test_vabdl_s32() { + let a: i32x2 = i32x2::new(1, 11); + let b: i32x2 = i32x2::new(10, 10); + let e: i64x2 = i64x2::new(9, 1); + let r: i64x2 = transmute(vabdl_s32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vorrq_u8() { - let a: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F); - let b: u8x16 = u8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); - let e: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F); - let r: u8x16 = transmute(vorrq_u8(transmute(a), transmute(b))); + unsafe fn test_vceq_u8() { + let a: u8x8 = u8x8::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u8x8 = u8x8::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x8 = transmute(vceq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u8x8 = u8x8::new(0, 0, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u8x8 = u8x8::new(0, 0xFF, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08); + let e: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0); + let r: u8x8 = transmute(vceq_u8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vorr_u16() { - let a: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03); - let b: u16x4 = u16x4::new(0x00, 0x00, 0x00, 0x00); - let e: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03); - let r: u16x4 = transmute(vorr_u16(transmute(a), transmute(b))); + unsafe fn test_vceqq_u8() { + let a: u8x16 = u8x16::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0xFF); + let b: u8x16 = u8x16::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0xFF); + let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x16 = transmute(vceqq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u8x16 = u8x16::new(0, 0, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xCC, 0x0D, 0xEE, 0xFF); + let b: u8x16 = u8x16::new(0, 0xFF, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08, 0x08, 0x00, 0x0A, 0x0A, 0xCC, 0xD0, 0xEE, 0); + let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0); + let r: u8x16 = transmute(vceqq_u8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vorrq_u16() { - let a: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let b: u16x8 = u16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); - let e: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let r: u16x8 = transmute(vorrq_u16(transmute(a), transmute(b))); + unsafe fn test_vceq_u16() { + let a: u16x4 = u16x4::new(0, 0x01, 0x02, 0x03); + let b: u16x4 = u16x4::new(0, 0x01, 0x02, 0x03); + let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x4 = transmute(vceq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u16x4 = u16x4::new(0, 0, 0x02, 0x03); + let b: u16x4 = u16x4::new(0, 0xFF_FF, 0x02, 0x04); + let e: u16x4 = u16x4::new(0xFF_FF, 0, 0xFF_FF, 0); + let r: u16x4 = transmute(vceq_u16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vorr_u32() { - let a: u32x2 = u32x2::new(0x00, 0x01); - let b: u32x2 = u32x2::new(0x00, 0x00); - let e: u32x2 = u32x2::new(0x00, 0x01); - let r: u32x2 = transmute(vorr_u32(transmute(a), transmute(b))); + unsafe fn test_vceqq_u16() { + let a: u16x8 = u16x8::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u16x8 = u16x8::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x8 = transmute(vceqq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u16x8 = u16x8::new(0, 0, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u16x8 = u16x8::new(0, 0xFF_FF, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08); + let e: u16x8 = u16x8::new(0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0); + let r: u16x8 = transmute(vceqq_u16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vorrq_u32() { - let a: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03); - let b: u32x4 = u32x4::new(0x00, 0x00, 0x00, 0x00); - let e: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03); - let r: u32x4 = transmute(vorrq_u32(transmute(a), transmute(b))); + unsafe fn test_vceq_u32() { + let a: u32x2 = u32x2::new(0, 0x01); + let b: u32x2 = u32x2::new(0, 0x01); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vceq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u32x2 = u32x2::new(0, 0); + let b: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0); + let r: u32x2 = transmute(vceq_u32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vorr_s64() { - let a: i64x1 = i64x1::new(0x00); - let b: i64x1 = i64x1::new(0x00); - let e: i64x1 = i64x1::new(0x00); - let r: i64x1 = transmute(vorr_s64(transmute(a), transmute(b))); + unsafe fn test_vceqq_u32() { + let a: u32x4 = u32x4::new(0, 0x01, 0x02, 0x03); + let b: u32x4 = u32x4::new(0, 0x01, 0x02, 0x03); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vceqq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u32x4 = u32x4::new(0, 0, 0x02, 0x03); + let b: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0x02, 0x04); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF, 0); + let r: u32x4 = transmute(vceqq_u32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vorrq_s64() { - let a: i64x2 = i64x2::new(0x00, 0x01); - let b: i64x2 = i64x2::new(0x00, 0x00); - let e: i64x2 = i64x2::new(0x00, 0x01); - let r: i64x2 = transmute(vorrq_s64(transmute(a), transmute(b))); + unsafe fn test_vceq_s8() { + let a: i8x8 = i8x8::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i8x8 = i8x8::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x8 = transmute(vceq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i8x8 = i8x8::new(-128, -128, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i8x8 = i8x8::new(-128, 0x7F, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08); + let e: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0); + let r: u8x8 = transmute(vceq_s8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vorr_u64() { - let a: u64x1 = u64x1::new(0x00); - let b: u64x1 = u64x1::new(0x00); - let e: u64x1 = u64x1::new(0x00); - let r: u64x1 = transmute(vorr_u64(transmute(a), transmute(b))); + unsafe fn test_vceqq_s8() { + let a: i8x16 = i8x16::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x7F); + let b: i8x16 = i8x16::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x7F); + let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x16 = transmute(vceqq_s8(transmute(a), transmute(b))); assert_eq!(r, e); - } - #[simd_test(enable = "neon")] - unsafe fn test_vorrq_u64() { - let a: u64x2 = u64x2::new(0x00, 0x01); - let b: u64x2 = u64x2::new(0x00, 0x00); - let e: u64x2 = u64x2::new(0x00, 0x01); - let r: u64x2 = transmute(vorrq_u64(transmute(a), transmute(b))); + let a: i8x16 = i8x16::new(-128, -128, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xCC, 0x0D, 0xEE, 0x7F); + let b: i8x16 = i8x16::new(-128, 0x7F, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08, 0x08, 0x00, 0x0A, 0x0A, 0xCC, 0xD0, 0xEE, -128); + let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0); + let r: u8x16 = transmute(vceqq_s8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_veor_s8() { - let a: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let b: i8x8 = i8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); - let e: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let r: i8x8 = transmute(veor_s8(transmute(a), transmute(b))); + unsafe fn test_vceq_s16() { + let a: i16x4 = i16x4::new(-32768, 0x01, 0x02, 0x03); + let b: i16x4 = i16x4::new(-32768, 0x01, 0x02, 0x03); + let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x4 = transmute(vceq_s16(transmute(a), transmute(b))); assert_eq!(r, e); - } - #[simd_test(enable = "neon")] - unsafe fn test_veorq_s8() { - let a: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F); - let b: i8x16 = i8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); - let e: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F); - let r: i8x16 = transmute(veorq_s8(transmute(a), transmute(b))); + let a: i16x4 = i16x4::new(-32768, -32768, 0x02, 0x03); + let b: i16x4 = i16x4::new(-32768, 0x7F_FF, 0x02, 0x04); + let e: u16x4 = u16x4::new(0xFF_FF, 0, 0xFF_FF, 0); + let r: u16x4 = transmute(vceq_s16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_veor_s16() { - let a: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03); - let b: i16x4 = i16x4::new(0x00, 0x00, 0x00, 0x00); - let e: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03); - let r: i16x4 = transmute(veor_s16(transmute(a), transmute(b))); + unsafe fn test_vceqq_s16() { + let a: i16x8 = i16x8::new(-32768, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i16x8 = i16x8::new(-32768, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x8 = transmute(vceqq_s16(transmute(a), transmute(b))); assert_eq!(r, e); - } - #[simd_test(enable = "neon")] - unsafe fn test_veorq_s16() { - let a: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let b: i16x8 = i16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); - let e: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let r: i16x8 = transmute(veorq_s16(transmute(a), transmute(b))); + let a: i16x8 = i16x8::new(-32768, -32768, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i16x8 = i16x8::new(-32768, 0x7F_FF, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08); + let e: u16x8 = u16x8::new(0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0); + let r: u16x8 = transmute(vceqq_s16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_veor_s32() { - let a: i32x2 = i32x2::new(0x00, 0x01); - let b: i32x2 = i32x2::new(0x00, 0x00); - let e: i32x2 = i32x2::new(0x00, 0x01); - let r: i32x2 = transmute(veor_s32(transmute(a), transmute(b))); + unsafe fn test_vceq_s32() { + let a: i32x2 = i32x2::new(-2147483648, 0x01); + let b: i32x2 = i32x2::new(-2147483648, 0x01); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vceq_s32(transmute(a), transmute(b))); assert_eq!(r, e); - } - #[simd_test(enable = "neon")] - unsafe fn test_veorq_s32() { - let a: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03); - let b: i32x4 = i32x4::new(0x00, 0x00, 0x00, 0x00); - let e: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03); - let r: i32x4 = transmute(veorq_s32(transmute(a), transmute(b))); + let a: i32x2 = i32x2::new(-2147483648, -2147483648); + let b: i32x2 = i32x2::new(-2147483648, 0x7F_FF_FF_FF); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0); + let r: u32x2 = transmute(vceq_s32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_veor_u8() { - let a: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let b: u8x8 = u8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); - let e: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let r: u8x8 = transmute(veor_u8(transmute(a), transmute(b))); + unsafe fn test_vceqq_s32() { + let a: i32x4 = i32x4::new(-2147483648, 0x01, 0x02, 0x03); + let b: i32x4 = i32x4::new(-2147483648, 0x01, 0x02, 0x03); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vceqq_s32(transmute(a), transmute(b))); assert_eq!(r, e); - } - #[simd_test(enable = "neon")] - unsafe fn test_veorq_u8() { - let a: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F); - let b: u8x16 = u8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); - let e: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F); - let r: u8x16 = transmute(veorq_u8(transmute(a), transmute(b))); + let a: i32x4 = i32x4::new(-2147483648, -2147483648, 0x02, 0x03); + let b: i32x4 = i32x4::new(-2147483648, 0x7F_FF_FF_FF, 0x02, 0x04); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF, 0); + let r: u32x4 = transmute(vceqq_s32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_veor_u16() { - let a: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03); - let b: u16x4 = u16x4::new(0x00, 0x00, 0x00, 0x00); - let e: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03); - let r: u16x4 = transmute(veor_u16(transmute(a), transmute(b))); + unsafe fn test_vceq_p8() { + let a: i8x8 = i8x8::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i8x8 = i8x8::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x8 = transmute(vceq_p8(transmute(a), transmute(b))); assert_eq!(r, e); - } - #[simd_test(enable = "neon")] - unsafe fn test_veorq_u16() { - let a: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let b: u16x8 = u16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); - let e: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let r: u16x8 = transmute(veorq_u16(transmute(a), transmute(b))); + let a: i8x8 = i8x8::new(-128, -128, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i8x8 = i8x8::new(-128, 0x7F, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08); + let e: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0); + let r: u8x8 = transmute(vceq_p8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_veor_u32() { - let a: u32x2 = u32x2::new(0x00, 0x01); - let b: u32x2 = u32x2::new(0x00, 0x00); - let e: u32x2 = u32x2::new(0x00, 0x01); - let r: u32x2 = transmute(veor_u32(transmute(a), transmute(b))); + unsafe fn test_vceqq_p8() { + let a: i8x16 = i8x16::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x7F); + let b: i8x16 = i8x16::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x7F); + let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x16 = transmute(vceqq_p8(transmute(a), transmute(b))); assert_eq!(r, e); - } - #[simd_test(enable = "neon")] - unsafe fn test_veorq_u32() { - let a: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03); - let b: u32x4 = u32x4::new(0x00, 0x00, 0x00, 0x00); - let e: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03); - let r: u32x4 = transmute(veorq_u32(transmute(a), transmute(b))); + let a: i8x16 = i8x16::new(-128, -128, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xCC, 0x0D, 0xEE, 0x7F); + let b: i8x16 = i8x16::new(-128, 0x7F, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08, 0x08, 0x00, 0x0A, 0x0A, 0xCC, 0xD0, 0xEE, -128); + let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0); + let r: u8x16 = transmute(vceqq_p8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_veor_s64() { - let a: i64x1 = i64x1::new(0x00); - let b: i64x1 = i64x1::new(0x00); - let e: i64x1 = i64x1::new(0x00); - let r: i64x1 = transmute(veor_s64(transmute(a), transmute(b))); + unsafe fn test_vceq_f32() { + let a: f32x2 = f32x2::new(1.2, 3.4); + let b: f32x2 = f32x2::new(1.2, 3.4); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vceq_f32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_veorq_s64() { - let a: i64x2 = i64x2::new(0x00, 0x01); - let b: i64x2 = i64x2::new(0x00, 0x00); - let e: i64x2 = i64x2::new(0x00, 0x01); - let r: i64x2 = transmute(veorq_s64(transmute(a), transmute(b))); + unsafe fn test_vceqq_f32() { + let a: f32x4 = f32x4::new(1.2, 3.4, 5.6, 7.8); + let b: f32x4 = f32x4::new(1.2, 3.4, 5.6, 7.8); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vceqq_f32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_veor_u64() { - let a: u64x1 = u64x1::new(0x00); - let b: u64x1 = u64x1::new(0x00); - let e: u64x1 = u64x1::new(0x00); - let r: u64x1 = transmute(veor_u64(transmute(a), transmute(b))); + unsafe fn test_vtst_s8() { + let a: i8x8 = i8x8::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); + let b: i8x8 = i8x8::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); + let e: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x8 = transmute(vtst_s8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_veorq_u64() { - let a: u64x2 = u64x2::new(0x00, 0x01); - let b: u64x2 = u64x2::new(0x00, 0x00); - let e: u64x2 = u64x2::new(0x00, 0x01); - let r: u64x2 = transmute(veorq_u64(transmute(a), transmute(b))); + unsafe fn test_vtstq_s8() { + let a: i8x16 = i8x16::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x7F); + let b: i8x16 = i8x16::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x7F); + let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x16 = transmute(vtstq_s8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vabd_s8() { - let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let b: i8x8 = i8x8::new(16, 15, 14, 13, 12, 11, 10, 9); - let e: i8x8 = i8x8::new(15, 13, 11, 9, 7, 5, 3, 1); - let r: i8x8 = transmute(vabd_s8(transmute(a), transmute(b))); + unsafe fn test_vtst_s16() { + let a: i16x4 = i16x4::new(-32768, 0x00, 0x01, 0x02); + let b: i16x4 = i16x4::new(-32768, 0x00, 0x01, 0x02); + let e: u16x4 = u16x4::new(0xFF_FF, 0, 0xFF_FF, 0xFF_FF); + let r: u16x4 = transmute(vtst_s16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vabdq_s8() { - let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - let b: i8x16 = i8x16::new(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); - let e: i8x16 = i8x16::new(15, 13, 11, 9, 7, 5, 3, 1, 1, 3, 5, 7, 9, 11, 13, 15); - let r: i8x16 = transmute(vabdq_s8(transmute(a), transmute(b))); + unsafe fn test_vtstq_s16() { + let a: i16x8 = i16x8::new(-32768, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); + let b: i16x8 = i16x8::new(-32768, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); + let e: u16x8 = u16x8::new(0xFF_FF, 0, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x8 = transmute(vtstq_s16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vabd_s16() { - let a: i16x4 = i16x4::new(1, 2, 3, 4); - let b: i16x4 = i16x4::new(16, 15, 14, 13); - let e: i16x4 = i16x4::new(15, 13, 11, 9); - let r: i16x4 = transmute(vabd_s16(transmute(a), transmute(b))); + unsafe fn test_vtst_s32() { + let a: i32x2 = i32x2::new(-2147483648, 0x00); + let b: i32x2 = i32x2::new(-2147483648, 0x00); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0); + let r: u32x2 = transmute(vtst_s32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vabdq_s16() { - let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let b: i16x8 = i16x8::new(16, 15, 14, 13, 12, 11, 10, 9); - let e: i16x8 = i16x8::new(15, 13, 11, 9, 7, 5, 3, 1); - let r: i16x8 = transmute(vabdq_s16(transmute(a), transmute(b))); + unsafe fn test_vtstq_s32() { + let a: i32x4 = i32x4::new(-2147483648, 0x00, 0x01, 0x02); + let b: i32x4 = i32x4::new(-2147483648, 0x00, 0x01, 0x02); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vtstq_s32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vabd_s32() { - let a: i32x2 = i32x2::new(1, 2); - let b: i32x2 = i32x2::new(16, 15); - let e: i32x2 = i32x2::new(15, 13); - let r: i32x2 = transmute(vabd_s32(transmute(a), transmute(b))); + unsafe fn test_vtst_p8() { + let a: i8x8 = i8x8::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); + let b: i8x8 = i8x8::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); + let e: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x8 = transmute(vtst_p8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vabdq_s32() { - let a: i32x4 = i32x4::new(1, 2, 3, 4); - let b: i32x4 = i32x4::new(16, 15, 14, 13); - let e: i32x4 = i32x4::new(15, 13, 11, 9); - let r: i32x4 = transmute(vabdq_s32(transmute(a), transmute(b))); + unsafe fn test_vtstq_p8() { + let a: i8x16 = i8x16::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x7F); + let b: i8x16 = i8x16::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x7F); + let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x16 = transmute(vtstq_p8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vabd_u8() { - let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let b: u8x8 = u8x8::new(16, 15, 14, 13, 12, 11, 10, 9); - let e: u8x8 = u8x8::new(15, 13, 11, 9, 7, 5, 3, 1); - let r: u8x8 = transmute(vabd_u8(transmute(a), transmute(b))); + unsafe fn test_vtst_u8() { + let a: u8x8 = u8x8::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); + let b: u8x8 = u8x8::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); + let e: u8x8 = u8x8::new(0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x8 = transmute(vtst_u8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vabdq_u8() { - let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - let b: u8x16 = u8x16::new(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); - let e: u8x16 = u8x16::new(15, 13, 11, 9, 7, 5, 3, 1, 1, 3, 5, 7, 9, 11, 13, 15); - let r: u8x16 = transmute(vabdq_u8(transmute(a), transmute(b))); + unsafe fn test_vtstq_u8() { + let a: u8x16 = u8x16::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0xFF); + let b: u8x16 = u8x16::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0xFF); + let e: u8x16 = u8x16::new(0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x16 = transmute(vtstq_u8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vabd_u16() { - let a: u16x4 = u16x4::new(1, 2, 3, 4); - let b: u16x4 = u16x4::new(16, 15, 14, 13); - let e: u16x4 = u16x4::new(15, 13, 11, 9); - let r: u16x4 = transmute(vabd_u16(transmute(a), transmute(b))); + unsafe fn test_vtst_u16() { + let a: u16x4 = u16x4::new(0, 0x00, 0x01, 0x02); + let b: u16x4 = u16x4::new(0, 0x00, 0x01, 0x02); + let e: u16x4 = u16x4::new(0, 0, 0xFF_FF, 0xFF_FF); + let r: u16x4 = transmute(vtst_u16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vabdq_u16() { - let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let b: u16x8 = u16x8::new(16, 15, 14, 13, 12, 11, 10, 9); - let e: u16x8 = u16x8::new(15, 13, 11, 9, 7, 5, 3, 1); - let r: u16x8 = transmute(vabdq_u16(transmute(a), transmute(b))); + unsafe fn test_vtstq_u16() { + let a: u16x8 = u16x8::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); + let b: u16x8 = u16x8::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); + let e: u16x8 = u16x8::new(0, 0, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x8 = transmute(vtstq_u16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vabd_u32() { - let a: u32x2 = u32x2::new(1, 2); - let b: u32x2 = u32x2::new(16, 15); - let e: u32x2 = u32x2::new(15, 13); - let r: u32x2 = transmute(vabd_u32(transmute(a), transmute(b))); + unsafe fn test_vtst_u32() { + let a: u32x2 = u32x2::new(0, 0x00); + let b: u32x2 = u32x2::new(0, 0x00); + let e: u32x2 = u32x2::new(0, 0); + let r: u32x2 = transmute(vtst_u32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vabdq_u32() { - let a: u32x4 = u32x4::new(1, 2, 3, 4); - let b: u32x4 = u32x4::new(16, 15, 14, 13); - let e: u32x4 = u32x4::new(15, 13, 11, 9); - let r: u32x4 = transmute(vabdq_u32(transmute(a), transmute(b))); + unsafe fn test_vtstq_u32() { + let a: u32x4 = u32x4::new(0, 0x00, 0x01, 0x02); + let b: u32x4 = u32x4::new(0, 0x00, 0x01, 0x02); + let e: u32x4 = u32x4::new(0, 0, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vtstq_u32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vabd_f32() { - let a: f32x2 = f32x2::new(1.0, 2.0); - let b: f32x2 = f32x2::new(9.0, 3.0); - let e: f32x2 = f32x2::new(8.0, 1.0); - let r: f32x2 = transmute(vabd_f32(transmute(a), transmute(b))); + unsafe fn test_vabs_f32() { + let a: f32x2 = f32x2::new(-0.1, -2.2); + let e: f32x2 = f32x2::new(0.1, 2.2); + let r: f32x2 = transmute(vabs_f32(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vabdq_f32() { - let a: f32x4 = f32x4::new(1.0, 2.0, 5.0, -4.0); - let b: f32x4 = f32x4::new(9.0, 3.0, 2.0, 8.0); - let e: f32x4 = f32x4::new(8.0, 1.0, 3.0, 12.0); - let r: f32x4 = transmute(vabdq_f32(transmute(a), transmute(b))); + unsafe fn test_vabsq_f32() { + let a: f32x4 = f32x4::new(-0.1, -2.2, -3.3, -6.6); + let e: f32x4 = f32x4::new(0.1, 2.2, 3.3, 6.6); + let r: f32x4 = transmute(vabsq_f32(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vabdl_u8() { - let a: u8x8 = u8x8::new(1, 2, 3, 4, 4, 3, 2, 1); - let b: u8x8 = u8x8::new(10, 10, 10, 10, 10, 10, 10, 10); - let e: u16x8 = u16x8::new(9, 8, 7, 6, 6, 7, 8, 9); - let r: u16x8 = transmute(vabdl_u8(transmute(a), transmute(b))); + unsafe fn test_vcgt_s8() { + let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x8 = transmute(vcgt_s8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vabdl_u16() { - let a: u16x4 = u16x4::new(1, 2, 3, 4); - let b: u16x4 = u16x4::new(10, 10, 10, 10); - let e: u32x4 = u32x4::new(9, 8, 7, 6); - let r: u32x4 = transmute(vabdl_u16(transmute(a), transmute(b))); + unsafe fn test_vcgtq_s8() { + let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x16 = transmute(vcgtq_s8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vabdl_u32() { - let a: u32x2 = u32x2::new(1, 2); - let b: u32x2 = u32x2::new(10, 10); - let e: u64x2 = u64x2::new(9, 8); - let r: u64x2 = transmute(vabdl_u32(transmute(a), transmute(b))); + unsafe fn test_vcgt_s16() { + let a: i16x4 = i16x4::new(1, 2, 3, 4); + let b: i16x4 = i16x4::new(0, 1, 2, 3); + let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x4 = transmute(vcgt_s16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vabdl_s8() { - let a: i8x8 = i8x8::new(1, 2, 3, 4, 4, 3, 2, 1); - let b: i8x8 = i8x8::new(10, 10, 10, 10, 10, 10, 10, 10); - let e: i16x8 = i16x8::new(9, 8, 7, 6, 6, 7, 8, 9); - let r: i16x8 = transmute(vabdl_s8(transmute(a), transmute(b))); + unsafe fn test_vcgtq_s16() { + let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x8 = transmute(vcgtq_s16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vabdl_s16() { - let a: i16x4 = i16x4::new(1, 2, 11, 12); - let b: i16x4 = i16x4::new(10, 10, 10, 10); - let e: i32x4 = i32x4::new(9, 8, 1, 2); - let r: i32x4 = transmute(vabdl_s16(transmute(a), transmute(b))); + unsafe fn test_vcgt_s32() { + let a: i32x2 = i32x2::new(1, 2); + let b: i32x2 = i32x2::new(0, 1); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vcgt_s32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vabdl_s32() { - let a: i32x2 = i32x2::new(1, 11); - let b: i32x2 = i32x2::new(10, 10); - let e: i64x2 = i64x2::new(9, 1); - let r: i64x2 = transmute(vabdl_s32(transmute(a), transmute(b))); + unsafe fn test_vcgtq_s32() { + let a: i32x4 = i32x4::new(1, 2, 3, 4); + let b: i32x4 = i32x4::new(0, 1, 2, 3); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vcgtq_s32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vceq_u8() { - let a: u8x8 = u8x8::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let b: u8x8 = u8x8::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + unsafe fn test_vcgt_u8() { + let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); - let r: u8x8 = transmute(vceq_u8(transmute(a), transmute(b))); - assert_eq!(r, e); - - let a: u8x8 = u8x8::new(0, 0, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let b: u8x8 = u8x8::new(0, 0xFF, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08); - let e: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0); - let r: u8x8 = transmute(vceq_u8(transmute(a), transmute(b))); + let r: u8x8 = transmute(vcgt_u8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vceqq_u8() { - let a: u8x16 = u8x16::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0xFF); - let b: u8x16 = u8x16::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0xFF); + unsafe fn test_vcgtq_u8() { + let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); - let r: u8x16 = transmute(vceqq_u8(transmute(a), transmute(b))); - assert_eq!(r, e); - - let a: u8x16 = u8x16::new(0, 0, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xCC, 0x0D, 0xEE, 0xFF); - let b: u8x16 = u8x16::new(0, 0xFF, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08, 0x08, 0x00, 0x0A, 0x0A, 0xCC, 0xD0, 0xEE, 0); - let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0); - let r: u8x16 = transmute(vceqq_u8(transmute(a), transmute(b))); + let r: u8x16 = transmute(vcgtq_u8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vceq_u16() { - let a: u16x4 = u16x4::new(0, 0x01, 0x02, 0x03); - let b: u16x4 = u16x4::new(0, 0x01, 0x02, 0x03); + unsafe fn test_vcgt_u16() { + let a: u16x4 = u16x4::new(1, 2, 3, 4); + let b: u16x4 = u16x4::new(0, 1, 2, 3); let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); - let r: u16x4 = transmute(vceq_u16(transmute(a), transmute(b))); - assert_eq!(r, e); - - let a: u16x4 = u16x4::new(0, 0, 0x02, 0x03); - let b: u16x4 = u16x4::new(0, 0xFF_FF, 0x02, 0x04); - let e: u16x4 = u16x4::new(0xFF_FF, 0, 0xFF_FF, 0); - let r: u16x4 = transmute(vceq_u16(transmute(a), transmute(b))); + let r: u16x4 = transmute(vcgt_u16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vceqq_u16() { - let a: u16x8 = u16x8::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let b: u16x8 = u16x8::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + unsafe fn test_vcgtq_u16() { + let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); - let r: u16x8 = transmute(vceqq_u16(transmute(a), transmute(b))); - assert_eq!(r, e); - - let a: u16x8 = u16x8::new(0, 0, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let b: u16x8 = u16x8::new(0, 0xFF_FF, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08); - let e: u16x8 = u16x8::new(0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0); - let r: u16x8 = transmute(vceqq_u16(transmute(a), transmute(b))); + let r: u16x8 = transmute(vcgtq_u16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vceq_u32() { - let a: u32x2 = u32x2::new(0, 0x01); - let b: u32x2 = u32x2::new(0, 0x01); + unsafe fn test_vcgt_u32() { + let a: u32x2 = u32x2::new(1, 2); + let b: u32x2 = u32x2::new(0, 1); let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); - let r: u32x2 = transmute(vceq_u32(transmute(a), transmute(b))); - assert_eq!(r, e); - - let a: u32x2 = u32x2::new(0, 0); - let b: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF); - let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0); - let r: u32x2 = transmute(vceq_u32(transmute(a), transmute(b))); + let r: u32x2 = transmute(vcgt_u32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vceqq_u32() { - let a: u32x4 = u32x4::new(0, 0x01, 0x02, 0x03); - let b: u32x4 = u32x4::new(0, 0x01, 0x02, 0x03); + unsafe fn test_vcgtq_u32() { + let a: u32x4 = u32x4::new(1, 2, 3, 4); + let b: u32x4 = u32x4::new(0, 1, 2, 3); let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); - let r: u32x4 = transmute(vceqq_u32(transmute(a), transmute(b))); + let r: u32x4 = transmute(vcgtq_u32(transmute(a), transmute(b))); assert_eq!(r, e); + } - let a: u32x4 = u32x4::new(0, 0, 0x02, 0x03); - let b: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0x02, 0x04); - let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF, 0); - let r: u32x4 = transmute(vceqq_u32(transmute(a), transmute(b))); + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_f32() { + let a: f32x2 = f32x2::new(1.2, 2.3); + let b: f32x2 = f32x2::new(0.1, 1.2); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vcgt_f32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vceq_s8() { - let a: i8x8 = i8x8::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let b: i8x8 = i8x8::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); - let r: u8x8 = transmute(vceq_s8(transmute(a), transmute(b))); + unsafe fn test_vcgtq_f32() { + let a: f32x4 = f32x4::new(1.2, 2.3, 3.4, 4.5); + let b: f32x4 = f32x4::new(0.1, 1.2, 2.3, 3.4); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vcgtq_f32(transmute(a), transmute(b))); assert_eq!(r, e); + } - let a: i8x8 = i8x8::new(-128, -128, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let b: i8x8 = i8x8::new(-128, 0x7F, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08); - let e: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0); - let r: u8x8 = transmute(vceq_s8(transmute(a), transmute(b))); + #[simd_test(enable = "neon")] + unsafe fn test_vclt_s8() { + let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x8 = transmute(vclt_s8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vceqq_s8() { - let a: i8x16 = i8x16::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x7F); - let b: i8x16 = i8x16::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x7F); + unsafe fn test_vcltq_s8() { + let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); - let r: u8x16 = transmute(vceqq_s8(transmute(a), transmute(b))); - assert_eq!(r, e); - - let a: i8x16 = i8x16::new(-128, -128, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xCC, 0x0D, 0xEE, 0x7F); - let b: i8x16 = i8x16::new(-128, 0x7F, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08, 0x08, 0x00, 0x0A, 0x0A, 0xCC, 0xD0, 0xEE, -128); - let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0); - let r: u8x16 = transmute(vceqq_s8(transmute(a), transmute(b))); + let r: u8x16 = transmute(vcltq_s8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vceq_s16() { - let a: i16x4 = i16x4::new(-32768, 0x01, 0x02, 0x03); - let b: i16x4 = i16x4::new(-32768, 0x01, 0x02, 0x03); + unsafe fn test_vclt_s16() { + let a: i16x4 = i16x4::new(0, 1, 2, 3); + let b: i16x4 = i16x4::new(1, 2, 3, 4); let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); - let r: u16x4 = transmute(vceq_s16(transmute(a), transmute(b))); - assert_eq!(r, e); - - let a: i16x4 = i16x4::new(-32768, -32768, 0x02, 0x03); - let b: i16x4 = i16x4::new(-32768, 0x7F_FF, 0x02, 0x04); - let e: u16x4 = u16x4::new(0xFF_FF, 0, 0xFF_FF, 0); - let r: u16x4 = transmute(vceq_s16(transmute(a), transmute(b))); + let r: u16x4 = transmute(vclt_s16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vceqq_s16() { - let a: i16x8 = i16x8::new(-32768, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let b: i16x8 = i16x8::new(-32768, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + unsafe fn test_vcltq_s16() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); - let r: u16x8 = transmute(vceqq_s16(transmute(a), transmute(b))); - assert_eq!(r, e); - - let a: i16x8 = i16x8::new(-32768, -32768, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let b: i16x8 = i16x8::new(-32768, 0x7F_FF, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08); - let e: u16x8 = u16x8::new(0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0); - let r: u16x8 = transmute(vceqq_s16(transmute(a), transmute(b))); + let r: u16x8 = transmute(vcltq_s16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vceq_s32() { - let a: i32x2 = i32x2::new(-2147483648, 0x01); - let b: i32x2 = i32x2::new(-2147483648, 0x01); + unsafe fn test_vclt_s32() { + let a: i32x2 = i32x2::new(0, 1); + let b: i32x2 = i32x2::new(1, 2); let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); - let r: u32x2 = transmute(vceq_s32(transmute(a), transmute(b))); - assert_eq!(r, e); - - let a: i32x2 = i32x2::new(-2147483648, -2147483648); - let b: i32x2 = i32x2::new(-2147483648, 0x7F_FF_FF_FF); - let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0); - let r: u32x2 = transmute(vceq_s32(transmute(a), transmute(b))); + let r: u32x2 = transmute(vclt_s32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vceqq_s32() { - let a: i32x4 = i32x4::new(-2147483648, 0x01, 0x02, 0x03); - let b: i32x4 = i32x4::new(-2147483648, 0x01, 0x02, 0x03); + unsafe fn test_vcltq_s32() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let b: i32x4 = i32x4::new(1, 2, 3, 4); let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); - let r: u32x4 = transmute(vceqq_s32(transmute(a), transmute(b))); - assert_eq!(r, e); - - let a: i32x4 = i32x4::new(-2147483648, -2147483648, 0x02, 0x03); - let b: i32x4 = i32x4::new(-2147483648, 0x7F_FF_FF_FF, 0x02, 0x04); - let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF, 0); - let r: u32x4 = transmute(vceqq_s32(transmute(a), transmute(b))); + let r: u32x4 = transmute(vcltq_s32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vceq_p8() { - let a: i8x8 = i8x8::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let b: i8x8 = i8x8::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + unsafe fn test_vclt_u8() { + let a: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); - let r: u8x8 = transmute(vceq_p8(transmute(a), transmute(b))); + let r: u8x8 = transmute(vclt_u8(transmute(a), transmute(b))); assert_eq!(r, e); + } - let a: i8x8 = i8x8::new(-128, -128, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); - let b: i8x8 = i8x8::new(-128, 0x7F, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08); - let e: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0); - let r: u8x8 = transmute(vceq_p8(transmute(a), transmute(b))); + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_u8() { + let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x16 = transmute(vcltq_u8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vceqq_p8() { - let a: i8x16 = i8x16::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x7F); - let b: i8x16 = i8x16::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x7F); - let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); - let r: u8x16 = transmute(vceqq_p8(transmute(a), transmute(b))); + unsafe fn test_vclt_u16() { + let a: u16x4 = u16x4::new(0, 1, 2, 3); + let b: u16x4 = u16x4::new(1, 2, 3, 4); + let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x4 = transmute(vclt_u16(transmute(a), transmute(b))); assert_eq!(r, e); + } - let a: i8x16 = i8x16::new(-128, -128, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xCC, 0x0D, 0xEE, 0x7F); - let b: i8x16 = i8x16::new(-128, 0x7F, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08, 0x08, 0x00, 0x0A, 0x0A, 0xCC, 0xD0, 0xEE, -128); - let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0); - let r: u8x16 = transmute(vceqq_p8(transmute(a), transmute(b))); + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_u16() { + let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x8 = transmute(vcltq_u16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vceq_f32() { - let a: f32x2 = f32x2::new(1.2, 3.4); - let b: f32x2 = f32x2::new(1.2, 3.4); + unsafe fn test_vclt_u32() { + let a: u32x2 = u32x2::new(0, 1); + let b: u32x2 = u32x2::new(1, 2); let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); - let r: u32x2 = transmute(vceq_f32(transmute(a), transmute(b))); + let r: u32x2 = transmute(vclt_u32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vceqq_f32() { - let a: f32x4 = f32x4::new(1.2, 3.4, 5.6, 7.8); - let b: f32x4 = f32x4::new(1.2, 3.4, 5.6, 7.8); + unsafe fn test_vcltq_u32() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let b: u32x4 = u32x4::new(1, 2, 3, 4); let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); - let r: u32x4 = transmute(vceqq_f32(transmute(a), transmute(b))); + let r: u32x4 = transmute(vcltq_u32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vtst_s8() { - let a: i8x8 = i8x8::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); - let b: i8x8 = i8x8::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); - let e: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); - let r: u8x8 = transmute(vtst_s8(transmute(a), transmute(b))); + unsafe fn test_vclt_f32() { + let a: f32x2 = f32x2::new(0.1, 1.2); + let b: f32x2 = f32x2::new(1.2, 2.3); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vclt_f32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vtstq_s8() { - let a: i8x16 = i8x16::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x7F); - let b: i8x16 = i8x16::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x7F); - let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); - let r: u8x16 = transmute(vtstq_s8(transmute(a), transmute(b))); + unsafe fn test_vcltq_f32() { + let a: f32x4 = f32x4::new(0.1, 1.2, 2.3, 3.4); + let b: f32x4 = f32x4::new(1.2, 2.3, 3.4, 4.5); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vcltq_f32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vtst_s16() { - let a: i16x4 = i16x4::new(-32768, 0x00, 0x01, 0x02); - let b: i16x4 = i16x4::new(-32768, 0x00, 0x01, 0x02); - let e: u16x4 = u16x4::new(0xFF_FF, 0, 0xFF_FF, 0xFF_FF); - let r: u16x4 = transmute(vtst_s16(transmute(a), transmute(b))); + unsafe fn test_vcle_s8() { + let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x8 = transmute(vcle_s8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vtstq_s16() { - let a: i16x8 = i16x8::new(-32768, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); - let b: i16x8 = i16x8::new(-32768, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); - let e: u16x8 = u16x8::new(0xFF_FF, 0, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); - let r: u16x8 = transmute(vtstq_s16(transmute(a), transmute(b))); + unsafe fn test_vcleq_s8() { + let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x16 = transmute(vcleq_s8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vtst_s32() { - let a: i32x2 = i32x2::new(-2147483648, 0x00); - let b: i32x2 = i32x2::new(-2147483648, 0x00); - let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0); - let r: u32x2 = transmute(vtst_s32(transmute(a), transmute(b))); + unsafe fn test_vcle_s16() { + let a: i16x4 = i16x4::new(0, 1, 2, 3); + let b: i16x4 = i16x4::new(1, 2, 3, 4); + let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x4 = transmute(vcle_s16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vtstq_s32() { - let a: i32x4 = i32x4::new(-2147483648, 0x00, 0x01, 0x02); - let b: i32x4 = i32x4::new(-2147483648, 0x00, 0x01, 0x02); - let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); - let r: u32x4 = transmute(vtstq_s32(transmute(a), transmute(b))); + unsafe fn test_vcleq_s16() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x8 = transmute(vcleq_s16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vtst_p8() { - let a: i8x8 = i8x8::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); - let b: i8x8 = i8x8::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); - let e: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); - let r: u8x8 = transmute(vtst_p8(transmute(a), transmute(b))); + unsafe fn test_vcle_s32() { + let a: i32x2 = i32x2::new(0, 1); + let b: i32x2 = i32x2::new(1, 2); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vcle_s32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vtstq_p8() { - let a: i8x16 = i8x16::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x7F); - let b: i8x16 = i8x16::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x7F); - let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); - let r: u8x16 = transmute(vtstq_p8(transmute(a), transmute(b))); + unsafe fn test_vcleq_s32() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let b: i32x4 = i32x4::new(1, 2, 3, 4); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vcleq_s32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vtst_u8() { - let a: u8x8 = u8x8::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); - let b: u8x8 = u8x8::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); - let e: u8x8 = u8x8::new(0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); - let r: u8x8 = transmute(vtst_u8(transmute(a), transmute(b))); + unsafe fn test_vcle_u8() { + let a: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x8 = transmute(vcle_u8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vtstq_u8() { - let a: u8x16 = u8x16::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0xFF); - let b: u8x16 = u8x16::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0xFF); - let e: u8x16 = u8x16::new(0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); - let r: u8x16 = transmute(vtstq_u8(transmute(a), transmute(b))); + unsafe fn test_vcleq_u8() { + let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x16 = transmute(vcleq_u8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vtst_u16() { - let a: u16x4 = u16x4::new(0, 0x00, 0x01, 0x02); - let b: u16x4 = u16x4::new(0, 0x00, 0x01, 0x02); - let e: u16x4 = u16x4::new(0, 0, 0xFF_FF, 0xFF_FF); - let r: u16x4 = transmute(vtst_u16(transmute(a), transmute(b))); + unsafe fn test_vcle_u16() { + let a: u16x4 = u16x4::new(0, 1, 2, 3); + let b: u16x4 = u16x4::new(1, 2, 3, 4); + let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x4 = transmute(vcle_u16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vtstq_u16() { - let a: u16x8 = u16x8::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); - let b: u16x8 = u16x8::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); - let e: u16x8 = u16x8::new(0, 0, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); - let r: u16x8 = transmute(vtstq_u16(transmute(a), transmute(b))); + unsafe fn test_vcleq_u16() { + let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x8 = transmute(vcleq_u16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vtst_u32() { - let a: u32x2 = u32x2::new(0, 0x00); - let b: u32x2 = u32x2::new(0, 0x00); - let e: u32x2 = u32x2::new(0, 0); - let r: u32x2 = transmute(vtst_u32(transmute(a), transmute(b))); + unsafe fn test_vcle_u32() { + let a: u32x2 = u32x2::new(0, 1); + let b: u32x2 = u32x2::new(1, 2); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vcle_u32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vtstq_u32() { - let a: u32x4 = u32x4::new(0, 0x00, 0x01, 0x02); - let b: u32x4 = u32x4::new(0, 0x00, 0x01, 0x02); - let e: u32x4 = u32x4::new(0, 0, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); - let r: u32x4 = transmute(vtstq_u32(transmute(a), transmute(b))); + unsafe fn test_vcleq_u32() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let b: u32x4 = u32x4::new(1, 2, 3, 4); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vcleq_u32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vabs_f32() { - let a: f32x2 = f32x2::new(-0.1, -2.2); - let e: f32x2 = f32x2::new(0.1, 2.2); - let r: f32x2 = transmute(vabs_f32(transmute(a))); + unsafe fn test_vcle_f32() { + let a: f32x2 = f32x2::new(0.1, 1.2); + let b: f32x2 = f32x2::new(1.2, 2.3); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vcle_f32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vabsq_f32() { - let a: f32x4 = f32x4::new(-0.1, -2.2, -3.3, -6.6); - let e: f32x4 = f32x4::new(0.1, 2.2, 3.3, 6.6); - let r: f32x4 = transmute(vabsq_f32(transmute(a))); + unsafe fn test_vcleq_f32() { + let a: f32x4 = f32x4::new(0.1, 1.2, 2.3, 3.4); + let b: f32x4 = f32x4::new(1.2, 2.3, 3.4, 4.5); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vcleq_f32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcgt_s8() { + unsafe fn test_vcge_s8() { let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); let b: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); - let r: u8x8 = transmute(vcgt_s8(transmute(a), transmute(b))); + let r: u8x8 = transmute(vcge_s8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcgtq_s8() { + unsafe fn test_vcgeq_s8() { let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); let b: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); - let r: u8x16 = transmute(vcgtq_s8(transmute(a), transmute(b))); + let r: u8x16 = transmute(vcgeq_s8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcgt_s16() { + unsafe fn test_vcge_s16() { let a: i16x4 = i16x4::new(1, 2, 3, 4); let b: i16x4 = i16x4::new(0, 1, 2, 3); let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); - let r: u16x4 = transmute(vcgt_s16(transmute(a), transmute(b))); + let r: u16x4 = transmute(vcge_s16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcgtq_s16() { + unsafe fn test_vcgeq_s16() { let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); let b: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); - let r: u16x8 = transmute(vcgtq_s16(transmute(a), transmute(b))); + let r: u16x8 = transmute(vcgeq_s16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcgt_s32() { + unsafe fn test_vcge_s32() { let a: i32x2 = i32x2::new(1, 2); let b: i32x2 = i32x2::new(0, 1); let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); - let r: u32x2 = transmute(vcgt_s32(transmute(a), transmute(b))); + let r: u32x2 = transmute(vcge_s32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcgtq_s32() { + unsafe fn test_vcgeq_s32() { let a: i32x4 = i32x4::new(1, 2, 3, 4); let b: i32x4 = i32x4::new(0, 1, 2, 3); let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); - let r: u32x4 = transmute(vcgtq_s32(transmute(a), transmute(b))); + let r: u32x4 = transmute(vcgeq_s32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcgt_u8() { + unsafe fn test_vcge_u8() { let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); let b: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); - let r: u8x8 = transmute(vcgt_u8(transmute(a), transmute(b))); + let r: u8x8 = transmute(vcge_u8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcgtq_u8() { + unsafe fn test_vcgeq_u8() { let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); let b: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); - let r: u8x16 = transmute(vcgtq_u8(transmute(a), transmute(b))); + let r: u8x16 = transmute(vcgeq_u8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcgt_u16() { + unsafe fn test_vcge_u16() { let a: u16x4 = u16x4::new(1, 2, 3, 4); let b: u16x4 = u16x4::new(0, 1, 2, 3); let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); - let r: u16x4 = transmute(vcgt_u16(transmute(a), transmute(b))); + let r: u16x4 = transmute(vcge_u16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcgtq_u16() { + unsafe fn test_vcgeq_u16() { let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); let b: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); - let r: u16x8 = transmute(vcgtq_u16(transmute(a), transmute(b))); + let r: u16x8 = transmute(vcgeq_u16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcgt_u32() { + unsafe fn test_vcge_u32() { let a: u32x2 = u32x2::new(1, 2); let b: u32x2 = u32x2::new(0, 1); let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); - let r: u32x2 = transmute(vcgt_u32(transmute(a), transmute(b))); + let r: u32x2 = transmute(vcge_u32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcgtq_u32() { + unsafe fn test_vcgeq_u32() { let a: u32x4 = u32x4::new(1, 2, 3, 4); let b: u32x4 = u32x4::new(0, 1, 2, 3); let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); - let r: u32x4 = transmute(vcgtq_u32(transmute(a), transmute(b))); + let r: u32x4 = transmute(vcgeq_u32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcgt_f32() { + unsafe fn test_vcge_f32() { let a: f32x2 = f32x2::new(1.2, 2.3); let b: f32x2 = f32x2::new(0.1, 1.2); let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); - let r: u32x2 = transmute(vcgt_f32(transmute(a), transmute(b))); + let r: u32x2 = transmute(vcge_f32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcgtq_f32() { + unsafe fn test_vcgeq_f32() { let a: f32x4 = f32x4::new(1.2, 2.3, 3.4, 4.5); let b: f32x4 = f32x4::new(0.1, 1.2, 2.3, 3.4); let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); - let r: u32x4 = transmute(vcgtq_f32(transmute(a), transmute(b))); + let r: u32x4 = transmute(vcgeq_f32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vclt_s8() { - let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); - let r: u8x8 = transmute(vclt_s8(transmute(a), transmute(b))); + unsafe fn test_vcls_s8() { + let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: i8x8 = i8x8::new(0, 7, 7, 7, 7, 7, 7, 7); + let r: i8x8 = transmute(vcls_s8(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcltq_s8() { - let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); - let r: u8x16 = transmute(vcltq_s8(transmute(a), transmute(b))); + unsafe fn test_vclsq_s8() { + let a: i8x16 = i8x16::new(-128, -1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7F); + let e: i8x16 = i8x16::new(0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0); + let r: i8x16 = transmute(vclsq_s8(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vclt_s16() { - let a: i16x4 = i16x4::new(0, 1, 2, 3); - let b: i16x4 = i16x4::new(1, 2, 3, 4); - let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); - let r: u16x4 = transmute(vclt_s16(transmute(a), transmute(b))); + unsafe fn test_vcls_s16() { + let a: i16x4 = i16x4::new(-32768, -1, 0x00, 0x00); + let e: i16x4 = i16x4::new(0, 15, 15, 15); + let r: i16x4 = transmute(vcls_s16(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcltq_s16() { - let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); - let r: u16x8 = transmute(vcltq_s16(transmute(a), transmute(b))); + unsafe fn test_vclsq_s16() { + let a: i16x8 = i16x8::new(-32768, -1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: i16x8 = i16x8::new(0, 15, 15, 15, 15, 15, 15, 15); + let r: i16x8 = transmute(vclsq_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcls_s32() { + let a: i32x2 = i32x2::new(-2147483648, -1); + let e: i32x2 = i32x2::new(0, 31); + let r: i32x2 = transmute(vcls_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclsq_s32() { + let a: i32x4 = i32x4::new(-2147483648, -1, 0x00, 0x00); + let e: i32x4 = i32x4::new(0, 31, 31, 31); + let r: i32x4 = transmute(vclsq_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclz_s8() { + let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01); + let e: i8x8 = i8x8::new(0, 0, 8, 7, 7, 7, 7, 7); + let r: i8x8 = transmute(vclz_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclzq_s8() { + let a: i8x16 = i8x16::new(-128, -1, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x7F); + let e: i8x16 = i8x16::new(0, 0, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1); + let r: i8x16 = transmute(vclzq_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclz_s16() { + let a: i16x4 = i16x4::new(-32768, -1, 0x00, 0x01); + let e: i16x4 = i16x4::new(0, 0, 16, 15); + let r: i16x4 = transmute(vclz_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclzq_s16() { + let a: i16x8 = i16x8::new(-32768, -1, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01); + let e: i16x8 = i16x8::new(0, 0, 16, 15, 15, 15, 15, 15); + let r: i16x8 = transmute(vclzq_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclz_s32() { + let a: i32x2 = i32x2::new(-2147483648, -1); + let e: i32x2 = i32x2::new(0, 0); + let r: i32x2 = transmute(vclz_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclzq_s32() { + let a: i32x4 = i32x4::new(-2147483648, -1, 0x00, 0x01); + let e: i32x4 = i32x4::new(0, 0, 32, 31); + let r: i32x4 = transmute(vclzq_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclz_u8() { + let a: u8x8 = u8x8::new(0, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01); + let e: u8x8 = u8x8::new(8, 8, 7, 7, 7, 7, 7, 7); + let r: u8x8 = transmute(vclz_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclzq_u8() { + let a: u8x16 = u8x16::new(0, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0xFF); + let e: u8x16 = u8x16::new(8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0); + let r: u8x16 = transmute(vclzq_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclz_u16() { + let a: u16x4 = u16x4::new(0, 0x00, 0x01, 0x01); + let e: u16x4 = u16x4::new(16, 16, 15, 15); + let r: u16x4 = transmute(vclz_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclzq_u16() { + let a: u16x8 = u16x8::new(0, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01); + let e: u16x8 = u16x8::new(16, 16, 15, 15, 15, 15, 15, 15); + let r: u16x8 = transmute(vclzq_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclz_u32() { + let a: u32x2 = u32x2::new(0, 0x00); + let e: u32x2 = u32x2::new(32, 32); + let r: u32x2 = transmute(vclz_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclzq_u32() { + let a: u32x4 = u32x4::new(0, 0x00, 0x01, 0x01); + let e: u32x4 = u32x4::new(32, 32, 31, 31); + let r: u32x4 = transmute(vclzq_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcagt_f32() { + let a: f32x2 = f32x2::new(-1.2, 0.0); + let b: f32x2 = f32x2::new(-1.1, 0.0); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0); + let r: u32x2 = transmute(vcagt_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcagtq_f32() { + let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3); + let b: f32x4 = f32x4::new(-1.1, 0.0, 1.1, 2.4); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF, 0); + let r: u32x4 = transmute(vcagtq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcage_f32() { + let a: f32x2 = f32x2::new(-1.2, 0.0); + let b: f32x2 = f32x2::new(-1.1, 0.0); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vcage_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcageq_f32() { + let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3); + let b: f32x4 = f32x4::new(-1.1, 0.0, 1.1, 2.4); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0); + let r: u32x4 = transmute(vcageq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcalt_f32() { + let a: f32x2 = f32x2::new(-1.2, 0.0); + let b: f32x2 = f32x2::new(-1.1, 0.0); + let e: u32x2 = u32x2::new(0, 0); + let r: u32x2 = transmute(vcalt_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcaltq_f32() { + let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3); + let b: f32x4 = f32x4::new(-1.1, 0.0, 1.1, 2.4); + let e: u32x4 = u32x4::new(0, 0, 0, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vcaltq_f32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vclt_s32() { - let a: i32x2 = i32x2::new(0, 1); - let b: i32x2 = i32x2::new(1, 2); - let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); - let r: u32x2 = transmute(vclt_s32(transmute(a), transmute(b))); + unsafe fn test_vcale_f32() { + let a: f32x2 = f32x2::new(-1.2, 0.0); + let b: f32x2 = f32x2::new(-1.1, 0.0); + let e: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vcale_f32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcltq_s32() { - let a: i32x4 = i32x4::new(0, 1, 2, 3); - let b: i32x4 = i32x4::new(1, 2, 3, 4); - let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); - let r: u32x4 = transmute(vcltq_s32(transmute(a), transmute(b))); + unsafe fn test_vcaleq_f32() { + let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3); + let b: f32x4 = f32x4::new(-1.1, 0.0, 1.1, 2.4); + let e: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vcaleq_f32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vclt_u8() { - let a: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); - let r: u8x8 = transmute(vclt_u8(transmute(a), transmute(b))); + unsafe fn test_vcvt_s32_f32() { + let a: f32x2 = f32x2::new(-1.1, 2.1); + let e: i32x2 = i32x2::new(-1, 2); + let r: i32x2 = transmute(vcvt_s32_f32(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcltq_u8() { - let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); - let r: u8x16 = transmute(vcltq_u8(transmute(a), transmute(b))); + unsafe fn test_vcvtq_s32_f32() { + let a: f32x4 = f32x4::new(-1.1, 2.1, -2.9, 3.9); + let e: i32x4 = i32x4::new(-1, 2, -2, 3); + let r: i32x4 = transmute(vcvtq_s32_f32(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vclt_u16() { - let a: u16x4 = u16x4::new(0, 1, 2, 3); - let b: u16x4 = u16x4::new(1, 2, 3, 4); - let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); - let r: u16x4 = transmute(vclt_u16(transmute(a), transmute(b))); + unsafe fn test_vcvt_u32_f32() { + let a: f32x2 = f32x2::new(1.1, 2.1); + let e: u32x2 = u32x2::new(1, 2); + let r: u32x2 = transmute(vcvt_u32_f32(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcltq_u16() { - let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); - let r: u16x8 = transmute(vcltq_u16(transmute(a), transmute(b))); + unsafe fn test_vcvtq_u32_f32() { + let a: f32x4 = f32x4::new(1.1, 2.1, 2.9, 3.9); + let e: u32x4 = u32x4::new(1, 2, 2, 3); + let r: u32x4 = transmute(vcvtq_u32_f32(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vclt_u32() { - let a: u32x2 = u32x2::new(0, 1); - let b: u32x2 = u32x2::new(1, 2); - let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); - let r: u32x2 = transmute(vclt_u32(transmute(a), transmute(b))); + unsafe fn test_vdup_lane_s8() { + let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: i8x8 = transmute(vdup_lane_s8::<4>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcltq_u32() { - let a: u32x4 = u32x4::new(0, 1, 2, 3); - let b: u32x4 = u32x4::new(1, 2, 3, 4); - let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); - let r: u32x4 = transmute(vcltq_u32(transmute(a), transmute(b))); + unsafe fn test_vdupq_laneq_s8() { + let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16); + let e: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let r: i8x16 = transmute(vdupq_laneq_s8::<8>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vclt_f32() { - let a: f32x2 = f32x2::new(0.1, 1.2); - let b: f32x2 = f32x2::new(1.2, 2.3); - let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); - let r: u32x2 = transmute(vclt_f32(transmute(a), transmute(b))); + unsafe fn test_vdup_lane_s16() { + let a: i16x4 = i16x4::new(1, 1, 1, 4); + let e: i16x4 = i16x4::new(1, 1, 1, 1); + let r: i16x4 = transmute(vdup_lane_s16::<2>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcltq_f32() { - let a: f32x4 = f32x4::new(0.1, 1.2, 2.3, 3.4); - let b: f32x4 = f32x4::new(1.2, 2.3, 3.4, 4.5); - let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); - let r: u32x4 = transmute(vcltq_f32(transmute(a), transmute(b))); + unsafe fn test_vdupq_laneq_s16() { + let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: i16x8 = transmute(vdupq_laneq_s16::<4>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcle_s8() { - let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); - let r: u8x8 = transmute(vcle_s8(transmute(a), transmute(b))); + unsafe fn test_vdup_lane_s32() { + let a: i32x2 = i32x2::new(1, 1); + let e: i32x2 = i32x2::new(1, 1); + let r: i32x2 = transmute(vdup_lane_s32::<1>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcleq_s8() { - let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); - let r: u8x16 = transmute(vcleq_s8(transmute(a), transmute(b))); + unsafe fn test_vdupq_laneq_s32() { + let a: i32x4 = i32x4::new(1, 1, 1, 4); + let e: i32x4 = i32x4::new(1, 1, 1, 1); + let r: i32x4 = transmute(vdupq_laneq_s32::<2>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcle_s16() { - let a: i16x4 = i16x4::new(0, 1, 2, 3); - let b: i16x4 = i16x4::new(1, 2, 3, 4); - let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); - let r: u16x4 = transmute(vcle_s16(transmute(a), transmute(b))); + unsafe fn test_vdup_laneq_s8() { + let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16); + let e: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: i8x8 = transmute(vdup_laneq_s8::<8>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcleq_s16() { - let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); - let r: u16x8 = transmute(vcleq_s16(transmute(a), transmute(b))); + unsafe fn test_vdup_laneq_s16() { + let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: i16x4 = i16x4::new(1, 1, 1, 1); + let r: i16x4 = transmute(vdup_laneq_s16::<4>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcle_s32() { - let a: i32x2 = i32x2::new(0, 1); - let b: i32x2 = i32x2::new(1, 2); - let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); - let r: u32x2 = transmute(vcle_s32(transmute(a), transmute(b))); + unsafe fn test_vdup_laneq_s32() { + let a: i32x4 = i32x4::new(1, 1, 1, 4); + let e: i32x2 = i32x2::new(1, 1); + let r: i32x2 = transmute(vdup_laneq_s32::<2>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcleq_s32() { - let a: i32x4 = i32x4::new(0, 1, 2, 3); - let b: i32x4 = i32x4::new(1, 2, 3, 4); - let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); - let r: u32x4 = transmute(vcleq_s32(transmute(a), transmute(b))); + unsafe fn test_vdupq_lane_s8() { + let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let r: i8x16 = transmute(vdupq_lane_s8::<4>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcle_u8() { - let a: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); - let r: u8x8 = transmute(vcle_u8(transmute(a), transmute(b))); + unsafe fn test_vdupq_lane_s16() { + let a: i16x4 = i16x4::new(1, 1, 1, 4); + let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: i16x8 = transmute(vdupq_lane_s16::<2>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcleq_u8() { - let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); - let r: u8x16 = transmute(vcleq_u8(transmute(a), transmute(b))); + unsafe fn test_vdupq_lane_s32() { + let a: i32x2 = i32x2::new(1, 1); + let e: i32x4 = i32x4::new(1, 1, 1, 1); + let r: i32x4 = transmute(vdupq_lane_s32::<1>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcle_u16() { - let a: u16x4 = u16x4::new(0, 1, 2, 3); - let b: u16x4 = u16x4::new(1, 2, 3, 4); - let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); - let r: u16x4 = transmute(vcle_u16(transmute(a), transmute(b))); + unsafe fn test_vdup_lane_u8() { + let a: u8x8 = u8x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: u8x8 = u8x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: u8x8 = transmute(vdup_lane_u8::<4>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcleq_u16() { - let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); - let r: u16x8 = transmute(vcleq_u16(transmute(a), transmute(b))); + unsafe fn test_vdupq_laneq_u8() { + let a: u8x16 = u8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16); + let e: u8x16 = u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let r: u8x16 = transmute(vdupq_laneq_u8::<8>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcle_u32() { - let a: u32x2 = u32x2::new(0, 1); - let b: u32x2 = u32x2::new(1, 2); - let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); - let r: u32x2 = transmute(vcle_u32(transmute(a), transmute(b))); + unsafe fn test_vdup_lane_u16() { + let a: u16x4 = u16x4::new(1, 1, 1, 4); + let e: u16x4 = u16x4::new(1, 1, 1, 1); + let r: u16x4 = transmute(vdup_lane_u16::<2>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcleq_u32() { - let a: u32x4 = u32x4::new(0, 1, 2, 3); - let b: u32x4 = u32x4::new(1, 2, 3, 4); - let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); - let r: u32x4 = transmute(vcleq_u32(transmute(a), transmute(b))); + unsafe fn test_vdupq_laneq_u16() { + let a: u16x8 = u16x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: u16x8 = u16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: u16x8 = transmute(vdupq_laneq_u16::<4>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcle_f32() { - let a: f32x2 = f32x2::new(0.1, 1.2); - let b: f32x2 = f32x2::new(1.2, 2.3); - let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); - let r: u32x2 = transmute(vcle_f32(transmute(a), transmute(b))); + unsafe fn test_vdup_lane_u32() { + let a: u32x2 = u32x2::new(1, 1); + let e: u32x2 = u32x2::new(1, 1); + let r: u32x2 = transmute(vdup_lane_u32::<1>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcleq_f32() { - let a: f32x4 = f32x4::new(0.1, 1.2, 2.3, 3.4); - let b: f32x4 = f32x4::new(1.2, 2.3, 3.4, 4.5); - let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); - let r: u32x4 = transmute(vcleq_f32(transmute(a), transmute(b))); + unsafe fn test_vdupq_laneq_u32() { + let a: u32x4 = u32x4::new(1, 1, 1, 4); + let e: u32x4 = u32x4::new(1, 1, 1, 1); + let r: u32x4 = transmute(vdupq_laneq_u32::<2>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcge_s8() { - let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let b: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); - let r: u8x8 = transmute(vcge_s8(transmute(a), transmute(b))); + unsafe fn test_vdup_laneq_u8() { + let a: u8x16 = u8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16); + let e: u8x8 = u8x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: u8x8 = transmute(vdup_laneq_u8::<8>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcgeq_s8() { - let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - let b: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); - let r: u8x16 = transmute(vcgeq_s8(transmute(a), transmute(b))); + unsafe fn test_vdup_laneq_u16() { + let a: u16x8 = u16x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: u16x4 = u16x4::new(1, 1, 1, 1); + let r: u16x4 = transmute(vdup_laneq_u16::<4>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcge_s16() { - let a: i16x4 = i16x4::new(1, 2, 3, 4); - let b: i16x4 = i16x4::new(0, 1, 2, 3); - let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); - let r: u16x4 = transmute(vcge_s16(transmute(a), transmute(b))); + unsafe fn test_vdup_laneq_u32() { + let a: u32x4 = u32x4::new(1, 1, 1, 4); + let e: u32x2 = u32x2::new(1, 1); + let r: u32x2 = transmute(vdup_laneq_u32::<2>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcgeq_s16() { - let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let b: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); - let r: u16x8 = transmute(vcgeq_s16(transmute(a), transmute(b))); + unsafe fn test_vdupq_lane_u8() { + let a: u8x8 = u8x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: u8x16 = u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let r: u8x16 = transmute(vdupq_lane_u8::<4>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcge_s32() { - let a: i32x2 = i32x2::new(1, 2); - let b: i32x2 = i32x2::new(0, 1); - let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); - let r: u32x2 = transmute(vcge_s32(transmute(a), transmute(b))); + unsafe fn test_vdupq_lane_u16() { + let a: u16x4 = u16x4::new(1, 1, 1, 4); + let e: u16x8 = u16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: u16x8 = transmute(vdupq_lane_u16::<2>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcgeq_s32() { - let a: i32x4 = i32x4::new(1, 2, 3, 4); - let b: i32x4 = i32x4::new(0, 1, 2, 3); - let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); - let r: u32x4 = transmute(vcgeq_s32(transmute(a), transmute(b))); + unsafe fn test_vdupq_lane_u32() { + let a: u32x2 = u32x2::new(1, 1); + let e: u32x4 = u32x4::new(1, 1, 1, 1); + let r: u32x4 = transmute(vdupq_lane_u32::<1>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcge_u8() { - let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let b: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); - let r: u8x8 = transmute(vcge_u8(transmute(a), transmute(b))); + unsafe fn test_vdup_lane_p8() { + let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: i8x8 = transmute(vdup_lane_p8::<4>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcgeq_u8() { - let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - let b: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); - let r: u8x16 = transmute(vcgeq_u8(transmute(a), transmute(b))); + unsafe fn test_vdupq_laneq_p8() { + let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16); + let e: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let r: i8x16 = transmute(vdupq_laneq_p8::<8>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcge_u16() { - let a: u16x4 = u16x4::new(1, 2, 3, 4); - let b: u16x4 = u16x4::new(0, 1, 2, 3); - let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); - let r: u16x4 = transmute(vcge_u16(transmute(a), transmute(b))); + unsafe fn test_vdup_lane_p16() { + let a: i16x4 = i16x4::new(1, 1, 1, 4); + let e: i16x4 = i16x4::new(1, 1, 1, 1); + let r: i16x4 = transmute(vdup_lane_p16::<2>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcgeq_u16() { - let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let b: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); - let r: u16x8 = transmute(vcgeq_u16(transmute(a), transmute(b))); + unsafe fn test_vdupq_laneq_p16() { + let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: i16x8 = transmute(vdupq_laneq_p16::<4>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcge_u32() { - let a: u32x2 = u32x2::new(1, 2); - let b: u32x2 = u32x2::new(0, 1); - let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); - let r: u32x2 = transmute(vcge_u32(transmute(a), transmute(b))); + unsafe fn test_vdup_laneq_p8() { + let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16); + let e: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: i8x8 = transmute(vdup_laneq_p8::<8>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcgeq_u32() { - let a: u32x4 = u32x4::new(1, 2, 3, 4); - let b: u32x4 = u32x4::new(0, 1, 2, 3); - let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); - let r: u32x4 = transmute(vcgeq_u32(transmute(a), transmute(b))); + unsafe fn test_vdup_laneq_p16() { + let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: i16x4 = i16x4::new(1, 1, 1, 1); + let r: i16x4 = transmute(vdup_laneq_p16::<4>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcge_f32() { - let a: f32x2 = f32x2::new(1.2, 2.3); - let b: f32x2 = f32x2::new(0.1, 1.2); - let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); - let r: u32x2 = transmute(vcge_f32(transmute(a), transmute(b))); + unsafe fn test_vdupq_lane_p8() { + let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let r: i8x16 = transmute(vdupq_lane_p8::<4>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcgeq_f32() { - let a: f32x4 = f32x4::new(1.2, 2.3, 3.4, 4.5); - let b: f32x4 = f32x4::new(0.1, 1.2, 2.3, 3.4); - let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); - let r: u32x4 = transmute(vcgeq_f32(transmute(a), transmute(b))); + unsafe fn test_vdupq_lane_p16() { + let a: i16x4 = i16x4::new(1, 1, 1, 4); + let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: i16x8 = transmute(vdupq_lane_p16::<2>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcls_s8() { - let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); - let e: i8x8 = i8x8::new(0, 7, 7, 7, 7, 7, 7, 7); - let r: i8x8 = transmute(vcls_s8(transmute(a))); + unsafe fn test_vdupq_laneq_s64() { + let a: i64x2 = i64x2::new(1, 1); + let e: i64x2 = i64x2::new(1, 1); + let r: i64x2 = transmute(vdupq_laneq_s64::<1>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vclsq_s8() { - let a: i8x16 = i8x16::new(-128, -1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7F); - let e: i8x16 = i8x16::new(0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0); - let r: i8x16 = transmute(vclsq_s8(transmute(a))); + unsafe fn test_vdupq_lane_s64() { + let a: i64x1 = i64x1::new(1); + let e: i64x2 = i64x2::new(1, 1); + let r: i64x2 = transmute(vdupq_lane_s64::<0>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcls_s16() { - let a: i16x4 = i16x4::new(-32768, -1, 0x00, 0x00); - let e: i16x4 = i16x4::new(0, 15, 15, 15); - let r: i16x4 = transmute(vcls_s16(transmute(a))); + unsafe fn test_vdupq_laneq_u64() { + let a: u64x2 = u64x2::new(1, 1); + let e: u64x2 = u64x2::new(1, 1); + let r: u64x2 = transmute(vdupq_laneq_u64::<1>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vclsq_s16() { - let a: i16x8 = i16x8::new(-32768, -1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); - let e: i16x8 = i16x8::new(0, 15, 15, 15, 15, 15, 15, 15); - let r: i16x8 = transmute(vclsq_s16(transmute(a))); + unsafe fn test_vdupq_lane_u64() { + let a: u64x1 = u64x1::new(1); + let e: u64x2 = u64x2::new(1, 1); + let r: u64x2 = transmute(vdupq_lane_u64::<0>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcls_s32() { - let a: i32x2 = i32x2::new(-2147483648, -1); - let e: i32x2 = i32x2::new(0, 31); - let r: i32x2 = transmute(vcls_s32(transmute(a))); + unsafe fn test_vdup_lane_f32() { + let a: f32x2 = f32x2::new(1., 1.); + let e: f32x2 = f32x2::new(1., 1.); + let r: f32x2 = transmute(vdup_lane_f32::<1>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vclsq_s32() { - let a: i32x4 = i32x4::new(-2147483648, -1, 0x00, 0x00); - let e: i32x4 = i32x4::new(0, 31, 31, 31); - let r: i32x4 = transmute(vclsq_s32(transmute(a))); + unsafe fn test_vdupq_laneq_f32() { + let a: f32x4 = f32x4::new(1., 1., 1., 4.); + let e: f32x4 = f32x4::new(1., 1., 1., 1.); + let r: f32x4 = transmute(vdupq_laneq_f32::<2>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vclz_s8() { - let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01); - let e: i8x8 = i8x8::new(0, 0, 8, 7, 7, 7, 7, 7); - let r: i8x8 = transmute(vclz_s8(transmute(a))); + unsafe fn test_vdup_laneq_f32() { + let a: f32x4 = f32x4::new(1., 1., 1., 4.); + let e: f32x2 = f32x2::new(1., 1.); + let r: f32x2 = transmute(vdup_laneq_f32::<2>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vclzq_s8() { - let a: i8x16 = i8x16::new(-128, -1, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x7F); - let e: i8x16 = i8x16::new(0, 0, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1); - let r: i8x16 = transmute(vclzq_s8(transmute(a))); + unsafe fn test_vdupq_lane_f32() { + let a: f32x2 = f32x2::new(1., 1.); + let e: f32x4 = f32x4::new(1., 1., 1., 1.); + let r: f32x4 = transmute(vdupq_lane_f32::<1>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vclz_s16() { - let a: i16x4 = i16x4::new(-32768, -1, 0x00, 0x01); - let e: i16x4 = i16x4::new(0, 0, 16, 15); - let r: i16x4 = transmute(vclz_s16(transmute(a))); + unsafe fn test_vdup_lane_s64() { + let a: i64x1 = i64x1::new(0); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vdup_lane_s64::<0>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vclzq_s16() { - let a: i16x8 = i16x8::new(-32768, -1, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01); - let e: i16x8 = i16x8::new(0, 0, 16, 15, 15, 15, 15, 15); - let r: i16x8 = transmute(vclzq_s16(transmute(a))); + unsafe fn test_vdup_lane_u64() { + let a: u64x1 = u64x1::new(0); + let e: u64x1 = u64x1::new(0); + let r: u64x1 = transmute(vdup_lane_u64::<0>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vclz_s32() { - let a: i32x2 = i32x2::new(-2147483648, -1); - let e: i32x2 = i32x2::new(0, 0); - let r: i32x2 = transmute(vclz_s32(transmute(a))); + unsafe fn test_vdup_laneq_s64() { + let a: i64x2 = i64x2::new(0, 1); + let e: i64x1 = i64x1::new(1); + let r: i64x1 = transmute(vdup_laneq_s64::<1>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vclzq_s32() { - let a: i32x4 = i32x4::new(-2147483648, -1, 0x00, 0x01); - let e: i32x4 = i32x4::new(0, 0, 32, 31); - let r: i32x4 = transmute(vclzq_s32(transmute(a))); + unsafe fn test_vdup_laneq_u64() { + let a: u64x2 = u64x2::new(0, 1); + let e: u64x1 = u64x1::new(1); + let r: u64x1 = transmute(vdup_laneq_u64::<1>(transmute(a))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vclz_u8() { - let a: u8x8 = u8x8::new(0, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01); - let e: u8x8 = u8x8::new(8, 8, 7, 7, 7, 7, 7, 7); - let r: u8x8 = transmute(vclz_u8(transmute(a))); + unsafe fn test_vext_s8() { + let a: i8x8 = i8x8::new(0, 8, 8, 9, 8, 9, 9, 11); + let b: i8x8 = i8x8::new(9, 11, 14, 15, 16, 17, 18, 19); + let e: i8x8 = i8x8::new(8, 9, 9, 11, 9, 11, 14, 15); + let r: i8x8 = transmute(vext_s8::<4>(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vclzq_u8() { - let a: u8x16 = u8x16::new(0, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0xFF); - let e: u8x16 = u8x16::new(8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0); - let r: u8x16 = transmute(vclzq_u8(transmute(a))); + unsafe fn test_vextq_s8() { + let a: i8x16 = i8x16::new(0, 8, 8, 9, 8, 9, 9, 11, 8, 9, 9, 11, 9, 11, 14, 15); + let b: i8x16 = i8x16::new(9, 11, 14, 15, 16, 17, 18, 19, 0, 8, 8, 9, 8, 9, 9, 11); + let e: i8x16 = i8x16::new(8, 9, 9, 11, 9, 11, 14, 15, 9, 11, 14, 15, 16, 17, 18, 19); + let r: i8x16 = transmute(vextq_s8::<8>(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vclz_u16() { - let a: u16x4 = u16x4::new(0, 0x00, 0x01, 0x01); - let e: u16x4 = u16x4::new(16, 16, 15, 15); - let r: u16x4 = transmute(vclz_u16(transmute(a))); + unsafe fn test_vext_s16() { + let a: i16x4 = i16x4::new(0, 8, 8, 9); + let b: i16x4 = i16x4::new(9, 11, 14, 15); + let e: i16x4 = i16x4::new(8, 9, 9, 11); + let r: i16x4 = transmute(vext_s16::<2>(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vclzq_u16() { - let a: u16x8 = u16x8::new(0, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01); - let e: u16x8 = u16x8::new(16, 16, 15, 15, 15, 15, 15, 15); - let r: u16x8 = transmute(vclzq_u16(transmute(a))); + unsafe fn test_vextq_s16() { + let a: i16x8 = i16x8::new(0, 8, 8, 9, 8, 9, 9, 11); + let b: i16x8 = i16x8::new(9, 11, 14, 15, 16, 17, 18, 19); + let e: i16x8 = i16x8::new(8, 9, 9, 11, 9, 11, 14, 15); + let r: i16x8 = transmute(vextq_s16::<4>(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vclz_u32() { - let a: u32x2 = u32x2::new(0, 0x00); - let e: u32x2 = u32x2::new(32, 32); - let r: u32x2 = transmute(vclz_u32(transmute(a))); + unsafe fn test_vext_s32() { + let a: i32x2 = i32x2::new(0, 8); + let b: i32x2 = i32x2::new(9, 11); + let e: i32x2 = i32x2::new(8, 9); + let r: i32x2 = transmute(vext_s32::<1>(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vclzq_u32() { - let a: u32x4 = u32x4::new(0, 0x00, 0x01, 0x01); - let e: u32x4 = u32x4::new(32, 32, 31, 31); - let r: u32x4 = transmute(vclzq_u32(transmute(a))); + unsafe fn test_vextq_s32() { + let a: i32x4 = i32x4::new(0, 8, 8, 9); + let b: i32x4 = i32x4::new(9, 11, 14, 15); + let e: i32x4 = i32x4::new(8, 9, 9, 11); + let r: i32x4 = transmute(vextq_s32::<2>(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcagt_f32() { - let a: f32x2 = f32x2::new(-1.2, 0.0); - let b: f32x2 = f32x2::new(-1.1, 0.0); - let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0); - let r: u32x2 = transmute(vcagt_f32(transmute(a), transmute(b))); + unsafe fn test_vext_u8() { + let a: u8x8 = u8x8::new(0, 8, 8, 9, 8, 9, 9, 11); + let b: u8x8 = u8x8::new(9, 11, 14, 15, 16, 17, 18, 19); + let e: u8x8 = u8x8::new(8, 9, 9, 11, 9, 11, 14, 15); + let r: u8x8 = transmute(vext_u8::<4>(transmute(a), transmute(b))); assert_eq!(r, e); } - #[simd_test(enable = "neon")] - unsafe fn test_vcagtq_f32() { - let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3); - let b: f32x4 = f32x4::new(-1.1, 0.0, 1.1, 2.4); - let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF, 0); - let r: u32x4 = transmute(vcagtq_f32(transmute(a), transmute(b))); + #[simd_test(enable = "neon")] + unsafe fn test_vextq_u8() { + let a: u8x16 = u8x16::new(0, 8, 8, 9, 8, 9, 9, 11, 8, 9, 9, 11, 9, 11, 14, 15); + let b: u8x16 = u8x16::new(9, 11, 14, 15, 16, 17, 18, 19, 0, 8, 8, 9, 8, 9, 9, 11); + let e: u8x16 = u8x16::new(8, 9, 9, 11, 9, 11, 14, 15, 9, 11, 14, 15, 16, 17, 18, 19); + let r: u8x16 = transmute(vextq_u8::<8>(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcage_f32() { - let a: f32x2 = f32x2::new(-1.2, 0.0); - let b: f32x2 = f32x2::new(-1.1, 0.0); - let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); - let r: u32x2 = transmute(vcage_f32(transmute(a), transmute(b))); + unsafe fn test_vext_u16() { + let a: u16x4 = u16x4::new(0, 8, 8, 9); + let b: u16x4 = u16x4::new(9, 11, 14, 15); + let e: u16x4 = u16x4::new(8, 9, 9, 11); + let r: u16x4 = transmute(vext_u16::<2>(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcageq_f32() { - let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3); - let b: f32x4 = f32x4::new(-1.1, 0.0, 1.1, 2.4); - let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0); - let r: u32x4 = transmute(vcageq_f32(transmute(a), transmute(b))); + unsafe fn test_vextq_u16() { + let a: u16x8 = u16x8::new(0, 8, 8, 9, 8, 9, 9, 11); + let b: u16x8 = u16x8::new(9, 11, 14, 15, 16, 17, 18, 19); + let e: u16x8 = u16x8::new(8, 9, 9, 11, 9, 11, 14, 15); + let r: u16x8 = transmute(vextq_u16::<4>(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcalt_f32() { - let a: f32x2 = f32x2::new(-1.2, 0.0); - let b: f32x2 = f32x2::new(-1.1, 0.0); - let e: u32x2 = u32x2::new(0, 0); - let r: u32x2 = transmute(vcalt_f32(transmute(a), transmute(b))); + unsafe fn test_vext_u32() { + let a: u32x2 = u32x2::new(0, 8); + let b: u32x2 = u32x2::new(9, 11); + let e: u32x2 = u32x2::new(8, 9); + let r: u32x2 = transmute(vext_u32::<1>(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcaltq_f32() { - let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3); - let b: f32x4 = f32x4::new(-1.1, 0.0, 1.1, 2.4); - let e: u32x4 = u32x4::new(0, 0, 0, 0xFF_FF_FF_FF); - let r: u32x4 = transmute(vcaltq_f32(transmute(a), transmute(b))); + unsafe fn test_vextq_u32() { + let a: u32x4 = u32x4::new(0, 8, 8, 9); + let b: u32x4 = u32x4::new(9, 11, 14, 15); + let e: u32x4 = u32x4::new(8, 9, 9, 11); + let r: u32x4 = transmute(vextq_u32::<2>(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcale_f32() { - let a: f32x2 = f32x2::new(-1.2, 0.0); - let b: f32x2 = f32x2::new(-1.1, 0.0); - let e: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF); - let r: u32x2 = transmute(vcale_f32(transmute(a), transmute(b))); + unsafe fn test_vext_p8() { + let a: i8x8 = i8x8::new(0, 8, 8, 9, 8, 9, 9, 11); + let b: i8x8 = i8x8::new(9, 11, 14, 15, 16, 17, 18, 19); + let e: i8x8 = i8x8::new(8, 9, 9, 11, 9, 11, 14, 15); + let r: i8x8 = transmute(vext_p8::<4>(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcaleq_f32() { - let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3); - let b: f32x4 = f32x4::new(-1.1, 0.0, 1.1, 2.4); - let e: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF); - let r: u32x4 = transmute(vcaleq_f32(transmute(a), transmute(b))); + unsafe fn test_vextq_p8() { + let a: i8x16 = i8x16::new(0, 8, 8, 9, 8, 9, 9, 11, 8, 9, 9, 11, 9, 11, 14, 15); + let b: i8x16 = i8x16::new(9, 11, 14, 15, 16, 17, 18, 19, 0, 8, 8, 9, 8, 9, 9, 11); + let e: i8x16 = i8x16::new(8, 9, 9, 11, 9, 11, 14, 15, 9, 11, 14, 15, 16, 17, 18, 19); + let r: i8x16 = transmute(vextq_p8::<8>(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcvt_s32_f32() { - let a: f32x2 = f32x2::new(-1.1, 2.1); - let e: i32x2 = i32x2::new(-1, 2); - let r: i32x2 = transmute(vcvt_s32_f32(transmute(a))); + unsafe fn test_vext_p16() { + let a: i16x4 = i16x4::new(0, 8, 8, 9); + let b: i16x4 = i16x4::new(9, 11, 14, 15); + let e: i16x4 = i16x4::new(8, 9, 9, 11); + let r: i16x4 = transmute(vext_p16::<2>(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcvtq_s32_f32() { - let a: f32x4 = f32x4::new(-1.1, 2.1, -2.9, 3.9); - let e: i32x4 = i32x4::new(-1, 2, -2, 3); - let r: i32x4 = transmute(vcvtq_s32_f32(transmute(a))); + unsafe fn test_vextq_p16() { + let a: i16x8 = i16x8::new(0, 8, 8, 9, 8, 9, 9, 11); + let b: i16x8 = i16x8::new(9, 11, 14, 15, 16, 17, 18, 19); + let e: i16x8 = i16x8::new(8, 9, 9, 11, 9, 11, 14, 15); + let r: i16x8 = transmute(vextq_p16::<4>(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcvt_u32_f32() { - let a: f32x2 = f32x2::new(1.1, 2.1); - let e: u32x2 = u32x2::new(1, 2); - let r: u32x2 = transmute(vcvt_u32_f32(transmute(a))); + unsafe fn test_vextq_s64() { + let a: i64x2 = i64x2::new(0, 8); + let b: i64x2 = i64x2::new(9, 11); + let e: i64x2 = i64x2::new(8, 9); + let r: i64x2 = transmute(vextq_s64::<1>(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vcvtq_u32_f32() { - let a: f32x4 = f32x4::new(1.1, 2.1, 2.9, 3.9); - let e: u32x4 = u32x4::new(1, 2, 2, 3); - let r: u32x4 = transmute(vcvtq_u32_f32(transmute(a))); + unsafe fn test_vextq_u64() { + let a: u64x2 = u64x2::new(0, 8); + let b: u64x2 = u64x2::new(9, 11); + let e: u64x2 = u64x2::new(8, 9); + let r: u64x2 = transmute(vextq_u64::<1>(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdup_lane_s8() { - let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8); - let e: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1); - let r: i8x8 = transmute(vdup_lane_s8::<4>(transmute(a))); + unsafe fn test_vext_f32() { + let a: f32x2 = f32x2::new(0., 2.); + let b: f32x2 = f32x2::new(3., 4.); + let e: f32x2 = f32x2::new(2., 3.); + let r: f32x2 = transmute(vext_f32::<1>(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdupq_laneq_s8() { - let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16); - let e: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); - let r: i8x16 = transmute(vdupq_laneq_s8::<8>(transmute(a))); + unsafe fn test_vextq_f32() { + let a: f32x4 = f32x4::new(0., 2., 2., 3.); + let b: f32x4 = f32x4::new(3., 4., 5., 6.); + let e: f32x4 = f32x4::new(2., 3., 3., 4.); + let r: f32x4 = transmute(vextq_f32::<2>(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdup_lane_s16() { - let a: i16x4 = i16x4::new(1, 1, 1, 4); - let e: i16x4 = i16x4::new(1, 1, 1, 1); - let r: i16x4 = transmute(vdup_lane_s16::<2>(transmute(a))); + unsafe fn test_vmla_s8() { + let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: i8x8 = i8x8::new(3, 3, 3, 3, 3, 3, 3, 3); + let e: i8x8 = i8x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let r: i8x8 = transmute(vmla_s8(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdupq_laneq_s16() { - let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8); - let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1); - let r: i16x8 = transmute(vdupq_laneq_s16::<4>(transmute(a))); + unsafe fn test_vmlaq_s8() { + let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let c: i8x16 = i8x16::new(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3); + let e: i8x16 = i8x16::new(6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21); + let r: i8x16 = transmute(vmlaq_s8(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdup_lane_s32() { - let a: i32x2 = i32x2::new(1, 1); - let e: i32x2 = i32x2::new(1, 1); - let r: i32x2 = transmute(vdup_lane_s32::<1>(transmute(a))); + unsafe fn test_vmla_s16() { + let a: i16x4 = i16x4::new(0, 1, 2, 3); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16x4 = i16x4::new(3, 3, 3, 3); + let e: i16x4 = i16x4::new(6, 7, 8, 9); + let r: i16x4 = transmute(vmla_s16(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdupq_laneq_s32() { - let a: i32x4 = i32x4::new(1, 1, 1, 4); - let e: i32x4 = i32x4::new(1, 1, 1, 1); - let r: i32x4 = transmute(vdupq_laneq_s32::<2>(transmute(a))); + unsafe fn test_vmlaq_s16() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: i16x8 = i16x8::new(3, 3, 3, 3, 3, 3, 3, 3); + let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let r: i16x8 = transmute(vmlaq_s16(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdup_laneq_s8() { - let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16); - let e: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1); - let r: i8x8 = transmute(vdup_laneq_s8::<8>(transmute(a))); + unsafe fn test_vmla_s32() { + let a: i32x2 = i32x2::new(0, 1); + let b: i32x2 = i32x2::new(2, 2); + let c: i32x2 = i32x2::new(3, 3); + let e: i32x2 = i32x2::new(6, 7); + let r: i32x2 = transmute(vmla_s32(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdup_laneq_s16() { - let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8); - let e: i16x4 = i16x4::new(1, 1, 1, 1); - let r: i16x4 = transmute(vdup_laneq_s16::<4>(transmute(a))); + unsafe fn test_vmlaq_s32() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let c: i32x4 = i32x4::new(3, 3, 3, 3); + let e: i32x4 = i32x4::new(6, 7, 8, 9); + let r: i32x4 = transmute(vmlaq_s32(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdup_laneq_s32() { - let a: i32x4 = i32x4::new(1, 1, 1, 4); - let e: i32x2 = i32x2::new(1, 1); - let r: i32x2 = transmute(vdup_laneq_s32::<2>(transmute(a))); + unsafe fn test_vmla_u8() { + let a: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: u8x8 = u8x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: u8x8 = u8x8::new(3, 3, 3, 3, 3, 3, 3, 3); + let e: u8x8 = u8x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let r: u8x8 = transmute(vmla_u8(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdupq_lane_s8() { - let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8); - let e: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); - let r: i8x16 = transmute(vdupq_lane_s8::<4>(transmute(a))); + unsafe fn test_vmlaq_u8() { + let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b: u8x16 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let c: u8x16 = u8x16::new(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3); + let e: u8x16 = u8x16::new(6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21); + let r: u8x16 = transmute(vmlaq_u8(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdupq_lane_s16() { - let a: i16x4 = i16x4::new(1, 1, 1, 4); - let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1); - let r: i16x8 = transmute(vdupq_lane_s16::<2>(transmute(a))); + unsafe fn test_vmla_u16() { + let a: u16x4 = u16x4::new(0, 1, 2, 3); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16x4 = u16x4::new(3, 3, 3, 3); + let e: u16x4 = u16x4::new(6, 7, 8, 9); + let r: u16x4 = transmute(vmla_u16(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdupq_lane_s32() { - let a: i32x2 = i32x2::new(1, 1); - let e: i32x4 = i32x4::new(1, 1, 1, 1); - let r: i32x4 = transmute(vdupq_lane_s32::<1>(transmute(a))); + unsafe fn test_vmlaq_u16() { + let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: u16x8 = u16x8::new(3, 3, 3, 3, 3, 3, 3, 3); + let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let r: u16x8 = transmute(vmlaq_u16(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdup_lane_u8() { - let a: u8x8 = u8x8::new(1, 1, 1, 4, 1, 6, 7, 8); - let e: u8x8 = u8x8::new(1, 1, 1, 1, 1, 1, 1, 1); - let r: u8x8 = transmute(vdup_lane_u8::<4>(transmute(a))); + unsafe fn test_vmla_u32() { + let a: u32x2 = u32x2::new(0, 1); + let b: u32x2 = u32x2::new(2, 2); + let c: u32x2 = u32x2::new(3, 3); + let e: u32x2 = u32x2::new(6, 7); + let r: u32x2 = transmute(vmla_u32(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdupq_laneq_u8() { - let a: u8x16 = u8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16); - let e: u8x16 = u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); - let r: u8x16 = transmute(vdupq_laneq_u8::<8>(transmute(a))); + unsafe fn test_vmlaq_u32() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let b: u32x4 = u32x4::new(2, 2, 2, 2); + let c: u32x4 = u32x4::new(3, 3, 3, 3); + let e: u32x4 = u32x4::new(6, 7, 8, 9); + let r: u32x4 = transmute(vmlaq_u32(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdup_lane_u16() { - let a: u16x4 = u16x4::new(1, 1, 1, 4); - let e: u16x4 = u16x4::new(1, 1, 1, 1); - let r: u16x4 = transmute(vdup_lane_u16::<2>(transmute(a))); + unsafe fn test_vmla_f32() { + let a: f32x2 = f32x2::new(0., 1.); + let b: f32x2 = f32x2::new(2., 2.); + let c: f32x2 = f32x2::new(3., 3.); + let e: f32x2 = f32x2::new(6., 7.); + let r: f32x2 = transmute(vmla_f32(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdupq_laneq_u16() { - let a: u16x8 = u16x8::new(1, 1, 1, 4, 1, 6, 7, 8); - let e: u16x8 = u16x8::new(1, 1, 1, 1, 1, 1, 1, 1); - let r: u16x8 = transmute(vdupq_laneq_u16::<4>(transmute(a))); + unsafe fn test_vmlaq_f32() { + let a: f32x4 = f32x4::new(0., 1., 2., 3.); + let b: f32x4 = f32x4::new(2., 2., 2., 2.); + let c: f32x4 = f32x4::new(3., 3., 3., 3.); + let e: f32x4 = f32x4::new(6., 7., 8., 9.); + let r: f32x4 = transmute(vmlaq_f32(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdup_lane_u32() { - let a: u32x2 = u32x2::new(1, 1); - let e: u32x2 = u32x2::new(1, 1); - let r: u32x2 = transmute(vdup_lane_u32::<1>(transmute(a))); + unsafe fn test_vmla_n_s16() { + let a: i16x4 = i16x4::new(0, 1, 2, 3); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16 = 3; + let e: i16x4 = i16x4::new(6, 7, 8, 9); + let r: i16x4 = transmute(vmla_n_s16(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdupq_laneq_u32() { - let a: u32x4 = u32x4::new(1, 1, 1, 4); - let e: u32x4 = u32x4::new(1, 1, 1, 1); - let r: u32x4 = transmute(vdupq_laneq_u32::<2>(transmute(a))); + unsafe fn test_vmlaq_n_s16() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: i16 = 3; + let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let r: i16x8 = transmute(vmlaq_n_s16(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdup_laneq_u8() { - let a: u8x16 = u8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16); - let e: u8x8 = u8x8::new(1, 1, 1, 1, 1, 1, 1, 1); - let r: u8x8 = transmute(vdup_laneq_u8::<8>(transmute(a))); + unsafe fn test_vmla_n_s32() { + let a: i32x2 = i32x2::new(0, 1); + let b: i32x2 = i32x2::new(2, 2); + let c: i32 = 3; + let e: i32x2 = i32x2::new(6, 7); + let r: i32x2 = transmute(vmla_n_s32(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdup_laneq_u16() { - let a: u16x8 = u16x8::new(1, 1, 1, 4, 1, 6, 7, 8); - let e: u16x4 = u16x4::new(1, 1, 1, 1); - let r: u16x4 = transmute(vdup_laneq_u16::<4>(transmute(a))); + unsafe fn test_vmlaq_n_s32() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let c: i32 = 3; + let e: i32x4 = i32x4::new(6, 7, 8, 9); + let r: i32x4 = transmute(vmlaq_n_s32(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdup_laneq_u32() { - let a: u32x4 = u32x4::new(1, 1, 1, 4); - let e: u32x2 = u32x2::new(1, 1); - let r: u32x2 = transmute(vdup_laneq_u32::<2>(transmute(a))); + unsafe fn test_vmla_n_u16() { + let a: u16x4 = u16x4::new(0, 1, 2, 3); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16 = 3; + let e: u16x4 = u16x4::new(6, 7, 8, 9); + let r: u16x4 = transmute(vmla_n_u16(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdupq_lane_u8() { - let a: u8x8 = u8x8::new(1, 1, 1, 4, 1, 6, 7, 8); - let e: u8x16 = u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); - let r: u8x16 = transmute(vdupq_lane_u8::<4>(transmute(a))); + unsafe fn test_vmlaq_n_u16() { + let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: u16 = 3; + let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let r: u16x8 = transmute(vmlaq_n_u16(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdupq_lane_u16() { - let a: u16x4 = u16x4::new(1, 1, 1, 4); - let e: u16x8 = u16x8::new(1, 1, 1, 1, 1, 1, 1, 1); - let r: u16x8 = transmute(vdupq_lane_u16::<2>(transmute(a))); + unsafe fn test_vmla_n_u32() { + let a: u32x2 = u32x2::new(0, 1); + let b: u32x2 = u32x2::new(2, 2); + let c: u32 = 3; + let e: u32x2 = u32x2::new(6, 7); + let r: u32x2 = transmute(vmla_n_u32(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdupq_lane_u32() { - let a: u32x2 = u32x2::new(1, 1); - let e: u32x4 = u32x4::new(1, 1, 1, 1); - let r: u32x4 = transmute(vdupq_lane_u32::<1>(transmute(a))); + unsafe fn test_vmlaq_n_u32() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let b: u32x4 = u32x4::new(2, 2, 2, 2); + let c: u32 = 3; + let e: u32x4 = u32x4::new(6, 7, 8, 9); + let r: u32x4 = transmute(vmlaq_n_u32(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdup_lane_p8() { - let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8); - let e: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1); - let r: i8x8 = transmute(vdup_lane_p8::<4>(transmute(a))); + unsafe fn test_vmla_n_f32() { + let a: f32x2 = f32x2::new(0., 1.); + let b: f32x2 = f32x2::new(2., 2.); + let c: f32 = 3.; + let e: f32x2 = f32x2::new(6., 7.); + let r: f32x2 = transmute(vmla_n_f32(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdupq_laneq_p8() { - let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16); - let e: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); - let r: i8x16 = transmute(vdupq_laneq_p8::<8>(transmute(a))); + unsafe fn test_vmlaq_n_f32() { + let a: f32x4 = f32x4::new(0., 1., 2., 3.); + let b: f32x4 = f32x4::new(2., 2., 2., 2.); + let c: f32 = 3.; + let e: f32x4 = f32x4::new(6., 7., 8., 9.); + let r: f32x4 = transmute(vmlaq_n_f32(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdup_lane_p16() { - let a: i16x4 = i16x4::new(1, 1, 1, 4); - let e: i16x4 = i16x4::new(1, 1, 1, 1); - let r: i16x4 = transmute(vdup_lane_p16::<2>(transmute(a))); + unsafe fn test_vmla_lane_s16() { + let a: i16x4 = i16x4::new(0, 1, 2, 3); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16x4 = i16x4::new(0, 3, 0, 0); + let e: i16x4 = i16x4::new(6, 7, 8, 9); + let r: i16x4 = transmute(vmla_lane_s16::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdupq_laneq_p16() { - let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8); - let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1); - let r: i16x8 = transmute(vdupq_laneq_p16::<4>(transmute(a))); + unsafe fn test_vmla_laneq_s16() { + let a: i16x4 = i16x4::new(0, 1, 2, 3); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16x8 = i16x8::new(0, 3, 0, 0, 0, 0, 0, 0); + let e: i16x4 = i16x4::new(6, 7, 8, 9); + let r: i16x4 = transmute(vmla_laneq_s16::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdup_laneq_p8() { - let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16); - let e: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1); - let r: i8x8 = transmute(vdup_laneq_p8::<8>(transmute(a))); + unsafe fn test_vmlaq_lane_s16() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: i16x4 = i16x4::new(0, 3, 0, 0); + let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let r: i16x8 = transmute(vmlaq_lane_s16::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdup_laneq_p16() { - let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8); - let e: i16x4 = i16x4::new(1, 1, 1, 1); - let r: i16x4 = transmute(vdup_laneq_p16::<4>(transmute(a))); + unsafe fn test_vmlaq_laneq_s16() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: i16x8 = i16x8::new(0, 3, 0, 0, 0, 0, 0, 0); + let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let r: i16x8 = transmute(vmlaq_laneq_s16::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdupq_lane_p8() { - let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8); - let e: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); - let r: i8x16 = transmute(vdupq_lane_p8::<4>(transmute(a))); + unsafe fn test_vmla_lane_s32() { + let a: i32x2 = i32x2::new(0, 1); + let b: i32x2 = i32x2::new(2, 2); + let c: i32x2 = i32x2::new(0, 3); + let e: i32x2 = i32x2::new(6, 7); + let r: i32x2 = transmute(vmla_lane_s32::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdupq_lane_p16() { - let a: i16x4 = i16x4::new(1, 1, 1, 4); - let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1); - let r: i16x8 = transmute(vdupq_lane_p16::<2>(transmute(a))); + unsafe fn test_vmla_laneq_s32() { + let a: i32x2 = i32x2::new(0, 1); + let b: i32x2 = i32x2::new(2, 2); + let c: i32x4 = i32x4::new(0, 3, 0, 0); + let e: i32x2 = i32x2::new(6, 7); + let r: i32x2 = transmute(vmla_laneq_s32::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdupq_laneq_s64() { - let a: i64x2 = i64x2::new(1, 1); - let e: i64x2 = i64x2::new(1, 1); - let r: i64x2 = transmute(vdupq_laneq_s64::<1>(transmute(a))); + unsafe fn test_vmlaq_lane_s32() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let c: i32x2 = i32x2::new(0, 3); + let e: i32x4 = i32x4::new(6, 7, 8, 9); + let r: i32x4 = transmute(vmlaq_lane_s32::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdupq_lane_s64() { - let a: i64x1 = i64x1::new(1); - let e: i64x2 = i64x2::new(1, 1); - let r: i64x2 = transmute(vdupq_lane_s64::<0>(transmute(a))); + unsafe fn test_vmlaq_laneq_s32() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let c: i32x4 = i32x4::new(0, 3, 0, 0); + let e: i32x4 = i32x4::new(6, 7, 8, 9); + let r: i32x4 = transmute(vmlaq_laneq_s32::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdupq_laneq_u64() { - let a: u64x2 = u64x2::new(1, 1); - let e: u64x2 = u64x2::new(1, 1); - let r: u64x2 = transmute(vdupq_laneq_u64::<1>(transmute(a))); + unsafe fn test_vmla_lane_u16() { + let a: u16x4 = u16x4::new(0, 1, 2, 3); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16x4 = u16x4::new(0, 3, 0, 0); + let e: u16x4 = u16x4::new(6, 7, 8, 9); + let r: u16x4 = transmute(vmla_lane_u16::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdupq_lane_u64() { - let a: u64x1 = u64x1::new(1); - let e: u64x2 = u64x2::new(1, 1); - let r: u64x2 = transmute(vdupq_lane_u64::<0>(transmute(a))); + unsafe fn test_vmla_laneq_u16() { + let a: u16x4 = u16x4::new(0, 1, 2, 3); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16x8 = u16x8::new(0, 3, 0, 0, 0, 0, 0, 0); + let e: u16x4 = u16x4::new(6, 7, 8, 9); + let r: u16x4 = transmute(vmla_laneq_u16::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdup_lane_f32() { - let a: f32x2 = f32x2::new(1., 1.); - let e: f32x2 = f32x2::new(1., 1.); - let r: f32x2 = transmute(vdup_lane_f32::<1>(transmute(a))); + unsafe fn test_vmlaq_lane_u16() { + let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: u16x4 = u16x4::new(0, 3, 0, 0); + let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let r: u16x8 = transmute(vmlaq_lane_u16::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdupq_laneq_f32() { - let a: f32x4 = f32x4::new(1., 1., 1., 4.); - let e: f32x4 = f32x4::new(1., 1., 1., 1.); - let r: f32x4 = transmute(vdupq_laneq_f32::<2>(transmute(a))); + unsafe fn test_vmlaq_laneq_u16() { + let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: u16x8 = u16x8::new(0, 3, 0, 0, 0, 0, 0, 0); + let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let r: u16x8 = transmute(vmlaq_laneq_u16::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdup_laneq_f32() { - let a: f32x4 = f32x4::new(1., 1., 1., 4.); - let e: f32x2 = f32x2::new(1., 1.); - let r: f32x2 = transmute(vdup_laneq_f32::<2>(transmute(a))); + unsafe fn test_vmla_lane_u32() { + let a: u32x2 = u32x2::new(0, 1); + let b: u32x2 = u32x2::new(2, 2); + let c: u32x2 = u32x2::new(0, 3); + let e: u32x2 = u32x2::new(6, 7); + let r: u32x2 = transmute(vmla_lane_u32::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdupq_lane_f32() { - let a: f32x2 = f32x2::new(1., 1.); - let e: f32x4 = f32x4::new(1., 1., 1., 1.); - let r: f32x4 = transmute(vdupq_lane_f32::<1>(transmute(a))); + unsafe fn test_vmla_laneq_u32() { + let a: u32x2 = u32x2::new(0, 1); + let b: u32x2 = u32x2::new(2, 2); + let c: u32x4 = u32x4::new(0, 3, 0, 0); + let e: u32x2 = u32x2::new(6, 7); + let r: u32x2 = transmute(vmla_laneq_u32::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdup_lane_s64() { - let a: i64x1 = i64x1::new(0); - let e: i64x1 = i64x1::new(0); - let r: i64x1 = transmute(vdup_lane_s64::<0>(transmute(a))); + unsafe fn test_vmlaq_lane_u32() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let b: u32x4 = u32x4::new(2, 2, 2, 2); + let c: u32x2 = u32x2::new(0, 3); + let e: u32x4 = u32x4::new(6, 7, 8, 9); + let r: u32x4 = transmute(vmlaq_lane_u32::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdup_lane_u64() { - let a: u64x1 = u64x1::new(0); - let e: u64x1 = u64x1::new(0); - let r: u64x1 = transmute(vdup_lane_u64::<0>(transmute(a))); + unsafe fn test_vmlaq_laneq_u32() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let b: u32x4 = u32x4::new(2, 2, 2, 2); + let c: u32x4 = u32x4::new(0, 3, 0, 0); + let e: u32x4 = u32x4::new(6, 7, 8, 9); + let r: u32x4 = transmute(vmlaq_laneq_u32::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdup_laneq_s64() { - let a: i64x2 = i64x2::new(0, 1); - let e: i64x1 = i64x1::new(1); - let r: i64x1 = transmute(vdup_laneq_s64::<1>(transmute(a))); + unsafe fn test_vmla_lane_f32() { + let a: f32x2 = f32x2::new(0., 1.); + let b: f32x2 = f32x2::new(2., 2.); + let c: f32x2 = f32x2::new(0., 3.); + let e: f32x2 = f32x2::new(6., 7.); + let r: f32x2 = transmute(vmla_lane_f32::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vdup_laneq_u64() { - let a: u64x2 = u64x2::new(0, 1); - let e: u64x1 = u64x1::new(1); - let r: u64x1 = transmute(vdup_laneq_u64::<1>(transmute(a))); + unsafe fn test_vmla_laneq_f32() { + let a: f32x2 = f32x2::new(0., 1.); + let b: f32x2 = f32x2::new(2., 2.); + let c: f32x4 = f32x4::new(0., 3., 0., 0.); + let e: f32x2 = f32x2::new(6., 7.); + let r: f32x2 = transmute(vmla_laneq_f32::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vext_s8() { - let a: i8x8 = i8x8::new(0, 8, 8, 9, 8, 9, 9, 11); - let b: i8x8 = i8x8::new(9, 11, 14, 15, 16, 17, 18, 19); - let e: i8x8 = i8x8::new(8, 9, 9, 11, 9, 11, 14, 15); - let r: i8x8 = transmute(vext_s8::<4>(transmute(a), transmute(b))); + unsafe fn test_vmlaq_lane_f32() { + let a: f32x4 = f32x4::new(0., 1., 2., 3.); + let b: f32x4 = f32x4::new(2., 2., 2., 2.); + let c: f32x2 = f32x2::new(0., 3.); + let e: f32x4 = f32x4::new(6., 7., 8., 9.); + let r: f32x4 = transmute(vmlaq_lane_f32::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vextq_s8() { - let a: i8x16 = i8x16::new(0, 8, 8, 9, 8, 9, 9, 11, 8, 9, 9, 11, 9, 11, 14, 15); - let b: i8x16 = i8x16::new(9, 11, 14, 15, 16, 17, 18, 19, 0, 8, 8, 9, 8, 9, 9, 11); - let e: i8x16 = i8x16::new(8, 9, 9, 11, 9, 11, 14, 15, 9, 11, 14, 15, 16, 17, 18, 19); - let r: i8x16 = transmute(vextq_s8::<8>(transmute(a), transmute(b))); + unsafe fn test_vmlaq_laneq_f32() { + let a: f32x4 = f32x4::new(0., 1., 2., 3.); + let b: f32x4 = f32x4::new(2., 2., 2., 2.); + let c: f32x4 = f32x4::new(0., 3., 0., 0.); + let e: f32x4 = f32x4::new(6., 7., 8., 9.); + let r: f32x4 = transmute(vmlaq_laneq_f32::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vext_s16() { - let a: i16x4 = i16x4::new(0, 8, 8, 9); - let b: i16x4 = i16x4::new(9, 11, 14, 15); - let e: i16x4 = i16x4::new(8, 9, 9, 11); - let r: i16x4 = transmute(vext_s16::<2>(transmute(a), transmute(b))); + unsafe fn test_vmlal_s8() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: i8x8 = i8x8::new(3, 3, 3, 3, 3, 3, 3, 3); + let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let r: i16x8 = transmute(vmlal_s8(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vextq_s16() { - let a: i16x8 = i16x8::new(0, 8, 8, 9, 8, 9, 9, 11); - let b: i16x8 = i16x8::new(9, 11, 14, 15, 16, 17, 18, 19); - let e: i16x8 = i16x8::new(8, 9, 9, 11, 9, 11, 14, 15); - let r: i16x8 = transmute(vextq_s16::<4>(transmute(a), transmute(b))); + unsafe fn test_vmlal_s16() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16x4 = i16x4::new(3, 3, 3, 3); + let e: i32x4 = i32x4::new(6, 7, 8, 9); + let r: i32x4 = transmute(vmlal_s16(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vext_s32() { - let a: i32x2 = i32x2::new(0, 8); - let b: i32x2 = i32x2::new(9, 11); - let e: i32x2 = i32x2::new(8, 9); - let r: i32x2 = transmute(vext_s32::<1>(transmute(a), transmute(b))); + unsafe fn test_vmlal_s32() { + let a: i64x2 = i64x2::new(0, 1); + let b: i32x2 = i32x2::new(2, 2); + let c: i32x2 = i32x2::new(3, 3); + let e: i64x2 = i64x2::new(6, 7); + let r: i64x2 = transmute(vmlal_s32(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } - - #[simd_test(enable = "neon")] - unsafe fn test_vextq_s32() { - let a: i32x4 = i32x4::new(0, 8, 8, 9); - let b: i32x4 = i32x4::new(9, 11, 14, 15); - let e: i32x4 = i32x4::new(8, 9, 9, 11); - let r: i32x4 = transmute(vextq_s32::<2>(transmute(a), transmute(b))); + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_u8() { + let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: u8x8 = u8x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: u8x8 = u8x8::new(3, 3, 3, 3, 3, 3, 3, 3); + let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let r: u16x8 = transmute(vmlal_u8(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vext_u8() { - let a: u8x8 = u8x8::new(0, 8, 8, 9, 8, 9, 9, 11); - let b: u8x8 = u8x8::new(9, 11, 14, 15, 16, 17, 18, 19); - let e: u8x8 = u8x8::new(8, 9, 9, 11, 9, 11, 14, 15); - let r: u8x8 = transmute(vext_u8::<4>(transmute(a), transmute(b))); + unsafe fn test_vmlal_u16() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16x4 = u16x4::new(3, 3, 3, 3); + let e: u32x4 = u32x4::new(6, 7, 8, 9); + let r: u32x4 = transmute(vmlal_u16(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vextq_u8() { - let a: u8x16 = u8x16::new(0, 8, 8, 9, 8, 9, 9, 11, 8, 9, 9, 11, 9, 11, 14, 15); - let b: u8x16 = u8x16::new(9, 11, 14, 15, 16, 17, 18, 19, 0, 8, 8, 9, 8, 9, 9, 11); - let e: u8x16 = u8x16::new(8, 9, 9, 11, 9, 11, 14, 15, 9, 11, 14, 15, 16, 17, 18, 19); - let r: u8x16 = transmute(vextq_u8::<8>(transmute(a), transmute(b))); + unsafe fn test_vmlal_u32() { + let a: u64x2 = u64x2::new(0, 1); + let b: u32x2 = u32x2::new(2, 2); + let c: u32x2 = u32x2::new(3, 3); + let e: u64x2 = u64x2::new(6, 7); + let r: u64x2 = transmute(vmlal_u32(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vext_u16() { - let a: u16x4 = u16x4::new(0, 8, 8, 9); - let b: u16x4 = u16x4::new(9, 11, 14, 15); - let e: u16x4 = u16x4::new(8, 9, 9, 11); - let r: u16x4 = transmute(vext_u16::<2>(transmute(a), transmute(b))); + unsafe fn test_vmlal_n_s16() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16 = 3; + let e: i32x4 = i32x4::new(6, 7, 8, 9); + let r: i32x4 = transmute(vmlal_n_s16(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vextq_u16() { - let a: u16x8 = u16x8::new(0, 8, 8, 9, 8, 9, 9, 11); - let b: u16x8 = u16x8::new(9, 11, 14, 15, 16, 17, 18, 19); - let e: u16x8 = u16x8::new(8, 9, 9, 11, 9, 11, 14, 15); - let r: u16x8 = transmute(vextq_u16::<4>(transmute(a), transmute(b))); + unsafe fn test_vmlal_n_s32() { + let a: i64x2 = i64x2::new(0, 1); + let b: i32x2 = i32x2::new(2, 2); + let c: i32 = 3; + let e: i64x2 = i64x2::new(6, 7); + let r: i64x2 = transmute(vmlal_n_s32(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vext_u32() { - let a: u32x2 = u32x2::new(0, 8); - let b: u32x2 = u32x2::new(9, 11); - let e: u32x2 = u32x2::new(8, 9); - let r: u32x2 = transmute(vext_u32::<1>(transmute(a), transmute(b))); + unsafe fn test_vmlal_n_u16() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16 = 3; + let e: u32x4 = u32x4::new(6, 7, 8, 9); + let r: u32x4 = transmute(vmlal_n_u16(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vextq_u32() { - let a: u32x4 = u32x4::new(0, 8, 8, 9); - let b: u32x4 = u32x4::new(9, 11, 14, 15); - let e: u32x4 = u32x4::new(8, 9, 9, 11); - let r: u32x4 = transmute(vextq_u32::<2>(transmute(a), transmute(b))); + unsafe fn test_vmlal_n_u32() { + let a: u64x2 = u64x2::new(0, 1); + let b: u32x2 = u32x2::new(2, 2); + let c: u32 = 3; + let e: u64x2 = u64x2::new(6, 7); + let r: u64x2 = transmute(vmlal_n_u32(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vext_p8() { - let a: i8x8 = i8x8::new(0, 8, 8, 9, 8, 9, 9, 11); - let b: i8x8 = i8x8::new(9, 11, 14, 15, 16, 17, 18, 19); - let e: i8x8 = i8x8::new(8, 9, 9, 11, 9, 11, 14, 15); - let r: i8x8 = transmute(vext_p8::<4>(transmute(a), transmute(b))); + unsafe fn test_vmlal_lane_s16() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16x4 = i16x4::new(0, 3, 0, 0); + let e: i32x4 = i32x4::new(6, 7, 8, 9); + let r: i32x4 = transmute(vmlal_lane_s16::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vextq_p8() { - let a: i8x16 = i8x16::new(0, 8, 8, 9, 8, 9, 9, 11, 8, 9, 9, 11, 9, 11, 14, 15); - let b: i8x16 = i8x16::new(9, 11, 14, 15, 16, 17, 18, 19, 0, 8, 8, 9, 8, 9, 9, 11); - let e: i8x16 = i8x16::new(8, 9, 9, 11, 9, 11, 14, 15, 9, 11, 14, 15, 16, 17, 18, 19); - let r: i8x16 = transmute(vextq_p8::<8>(transmute(a), transmute(b))); + unsafe fn test_vmlal_laneq_s16() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16x8 = i16x8::new(0, 3, 0, 0, 0, 0, 0, 0); + let e: i32x4 = i32x4::new(6, 7, 8, 9); + let r: i32x4 = transmute(vmlal_laneq_s16::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vext_p16() { - let a: i16x4 = i16x4::new(0, 8, 8, 9); - let b: i16x4 = i16x4::new(9, 11, 14, 15); - let e: i16x4 = i16x4::new(8, 9, 9, 11); - let r: i16x4 = transmute(vext_p16::<2>(transmute(a), transmute(b))); + unsafe fn test_vmlal_lane_s32() { + let a: i64x2 = i64x2::new(0, 1); + let b: i32x2 = i32x2::new(2, 2); + let c: i32x2 = i32x2::new(0, 3); + let e: i64x2 = i64x2::new(6, 7); + let r: i64x2 = transmute(vmlal_lane_s32::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vextq_p16() { - let a: i16x8 = i16x8::new(0, 8, 8, 9, 8, 9, 9, 11); - let b: i16x8 = i16x8::new(9, 11, 14, 15, 16, 17, 18, 19); - let e: i16x8 = i16x8::new(8, 9, 9, 11, 9, 11, 14, 15); - let r: i16x8 = transmute(vextq_p16::<4>(transmute(a), transmute(b))); + unsafe fn test_vmlal_laneq_s32() { + let a: i64x2 = i64x2::new(0, 1); + let b: i32x2 = i32x2::new(2, 2); + let c: i32x4 = i32x4::new(0, 3, 0, 0); + let e: i64x2 = i64x2::new(6, 7); + let r: i64x2 = transmute(vmlal_laneq_s32::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vextq_s64() { - let a: i64x2 = i64x2::new(0, 8); - let b: i64x2 = i64x2::new(9, 11); - let e: i64x2 = i64x2::new(8, 9); - let r: i64x2 = transmute(vextq_s64::<1>(transmute(a), transmute(b))); + unsafe fn test_vmlal_lane_u16() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16x4 = u16x4::new(0, 3, 0, 0); + let e: u32x4 = u32x4::new(6, 7, 8, 9); + let r: u32x4 = transmute(vmlal_lane_u16::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vextq_u64() { - let a: u64x2 = u64x2::new(0, 8); - let b: u64x2 = u64x2::new(9, 11); - let e: u64x2 = u64x2::new(8, 9); - let r: u64x2 = transmute(vextq_u64::<1>(transmute(a), transmute(b))); + unsafe fn test_vmlal_laneq_u16() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16x8 = u16x8::new(0, 3, 0, 0, 0, 0, 0, 0); + let e: u32x4 = u32x4::new(6, 7, 8, 9); + let r: u32x4 = transmute(vmlal_laneq_u16::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vext_f32() { - let a: f32x2 = f32x2::new(0., 2.); - let b: f32x2 = f32x2::new(3., 4.); - let e: f32x2 = f32x2::new(2., 3.); - let r: f32x2 = transmute(vext_f32::<1>(transmute(a), transmute(b))); + unsafe fn test_vmlal_lane_u32() { + let a: u64x2 = u64x2::new(0, 1); + let b: u32x2 = u32x2::new(2, 2); + let c: u32x2 = u32x2::new(0, 3); + let e: u64x2 = u64x2::new(6, 7); + let r: u64x2 = transmute(vmlal_lane_u32::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vextq_f32() { - let a: f32x4 = f32x4::new(0., 2., 2., 3.); - let b: f32x4 = f32x4::new(3., 4., 5., 6.); - let e: f32x4 = f32x4::new(2., 3., 3., 4.); - let r: f32x4 = transmute(vextq_f32::<2>(transmute(a), transmute(b))); + unsafe fn test_vmlal_laneq_u32() { + let a: u64x2 = u64x2::new(0, 1); + let b: u32x2 = u32x2::new(2, 2); + let c: u32x4 = u32x4::new(0, 3, 0, 0); + let e: u64x2 = u64x2::new(6, 7); + let r: u64x2 = transmute(vmlal_laneq_u32::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vmla_s8() { - let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + unsafe fn test_vmls_s8() { + let a: i8x8 = i8x8::new(6, 7, 8, 9, 10, 11, 12, 13); let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2); let c: i8x8 = i8x8::new(3, 3, 3, 3, 3, 3, 3, 3); - let e: i8x8 = i8x8::new(6, 7, 8, 9, 10, 11, 12, 13); - let r: i8x8 = transmute(vmla_s8(transmute(a), transmute(b), transmute(c))); + let e: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: i8x8 = transmute(vmls_s8(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vmlaq_s8() { - let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + unsafe fn test_vmlsq_s8() { + let a: i8x16 = i8x16::new(6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21); let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); let c: i8x16 = i8x16::new(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3); - let e: i8x16 = i8x16::new(6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21); - let r: i8x16 = transmute(vmlaq_s8(transmute(a), transmute(b), transmute(c))); + let e: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r: i8x16 = transmute(vmlsq_s8(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vmla_s16() { - let a: i16x4 = i16x4::new(0, 1, 2, 3); + unsafe fn test_vmls_s16() { + let a: i16x4 = i16x4::new(6, 7, 8, 9); let b: i16x4 = i16x4::new(2, 2, 2, 2); let c: i16x4 = i16x4::new(3, 3, 3, 3); - let e: i16x4 = i16x4::new(6, 7, 8, 9); - let r: i16x4 = transmute(vmla_s16(transmute(a), transmute(b), transmute(c))); + let e: i16x4 = i16x4::new(0, 1, 2, 3); + let r: i16x4 = transmute(vmls_s16(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vmlaq_s16() { - let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + unsafe fn test_vmlsq_s16() { + let a: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13); let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); let c: i16x8 = i16x8::new(3, 3, 3, 3, 3, 3, 3, 3); - let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13); - let r: i16x8 = transmute(vmlaq_s16(transmute(a), transmute(b), transmute(c))); + let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: i16x8 = transmute(vmlsq_s16(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vmla_s32() { - let a: i32x2 = i32x2::new(0, 1); + unsafe fn test_vmls_s32() { + let a: i32x2 = i32x2::new(6, 7); let b: i32x2 = i32x2::new(2, 2); let c: i32x2 = i32x2::new(3, 3); - let e: i32x2 = i32x2::new(6, 7); - let r: i32x2 = transmute(vmla_s32(transmute(a), transmute(b), transmute(c))); + let e: i32x2 = i32x2::new(0, 1); + let r: i32x2 = transmute(vmls_s32(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vmlaq_s32() { - let a: i32x4 = i32x4::new(0, 1, 2, 3); + unsafe fn test_vmlsq_s32() { + let a: i32x4 = i32x4::new(6, 7, 8, 9); let b: i32x4 = i32x4::new(2, 2, 2, 2); let c: i32x4 = i32x4::new(3, 3, 3, 3); - let e: i32x4 = i32x4::new(6, 7, 8, 9); - let r: i32x4 = transmute(vmlaq_s32(transmute(a), transmute(b), transmute(c))); + let e: i32x4 = i32x4::new(0, 1, 2, 3); + let r: i32x4 = transmute(vmlsq_s32(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vmla_u8() { - let a: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + unsafe fn test_vmls_u8() { + let a: u8x8 = u8x8::new(6, 7, 8, 9, 10, 11, 12, 13); let b: u8x8 = u8x8::new(2, 2, 2, 2, 2, 2, 2, 2); let c: u8x8 = u8x8::new(3, 3, 3, 3, 3, 3, 3, 3); - let e: u8x8 = u8x8::new(6, 7, 8, 9, 10, 11, 12, 13); - let r: u8x8 = transmute(vmla_u8(transmute(a), transmute(b), transmute(c))); + let e: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: u8x8 = transmute(vmls_u8(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vmlaq_u8() { - let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + unsafe fn test_vmlsq_u8() { + let a: u8x16 = u8x16::new(6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21); let b: u8x16 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); let c: u8x16 = u8x16::new(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3); - let e: u8x16 = u8x16::new(6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21); - let r: u8x16 = transmute(vmlaq_u8(transmute(a), transmute(b), transmute(c))); + let e: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r: u8x16 = transmute(vmlsq_u8(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vmla_u16() { - let a: u16x4 = u16x4::new(0, 1, 2, 3); + unsafe fn test_vmls_u16() { + let a: u16x4 = u16x4::new(6, 7, 8, 9); let b: u16x4 = u16x4::new(2, 2, 2, 2); let c: u16x4 = u16x4::new(3, 3, 3, 3); - let e: u16x4 = u16x4::new(6, 7, 8, 9); - let r: u16x4 = transmute(vmla_u16(transmute(a), transmute(b), transmute(c))); + let e: u16x4 = u16x4::new(0, 1, 2, 3); + let r: u16x4 = transmute(vmls_u16(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vmlaq_u16() { - let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + unsafe fn test_vmlsq_u16() { + let a: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13); let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2); let c: u16x8 = u16x8::new(3, 3, 3, 3, 3, 3, 3, 3); - let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13); - let r: u16x8 = transmute(vmlaq_u16(transmute(a), transmute(b), transmute(c))); + let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: u16x8 = transmute(vmlsq_u16(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vmla_u32() { - let a: u32x2 = u32x2::new(0, 1); + unsafe fn test_vmls_u32() { + let a: u32x2 = u32x2::new(6, 7); let b: u32x2 = u32x2::new(2, 2); let c: u32x2 = u32x2::new(3, 3); - let e: u32x2 = u32x2::new(6, 7); - let r: u32x2 = transmute(vmla_u32(transmute(a), transmute(b), transmute(c))); + let e: u32x2 = u32x2::new(0, 1); + let r: u32x2 = transmute(vmls_u32(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vmlaq_u32() { - let a: u32x4 = u32x4::new(0, 1, 2, 3); + unsafe fn test_vmlsq_u32() { + let a: u32x4 = u32x4::new(6, 7, 8, 9); let b: u32x4 = u32x4::new(2, 2, 2, 2); let c: u32x4 = u32x4::new(3, 3, 3, 3); - let e: u32x4 = u32x4::new(6, 7, 8, 9); - let r: u32x4 = transmute(vmlaq_u32(transmute(a), transmute(b), transmute(c))); + let e: u32x4 = u32x4::new(0, 1, 2, 3); + let r: u32x4 = transmute(vmlsq_u32(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vmla_f32() { - let a: f32x2 = f32x2::new(0., 1.); + unsafe fn test_vmls_f32() { + let a: f32x2 = f32x2::new(6., 7.); let b: f32x2 = f32x2::new(2., 2.); let c: f32x2 = f32x2::new(3., 3.); - let e: f32x2 = f32x2::new(6., 7.); - let r: f32x2 = transmute(vmla_f32(transmute(a), transmute(b), transmute(c))); + let e: f32x2 = f32x2::new(0., 1.); + let r: f32x2 = transmute(vmls_f32(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vmlaq_f32() { - let a: f32x4 = f32x4::new(0., 1., 2., 3.); + unsafe fn test_vmlsq_f32() { + let a: f32x4 = f32x4::new(6., 7., 8., 9.); let b: f32x4 = f32x4::new(2., 2., 2., 2.); let c: f32x4 = f32x4::new(3., 3., 3., 3.); - let e: f32x4 = f32x4::new(6., 7., 8., 9.); - let r: f32x4 = transmute(vmlaq_f32(transmute(a), transmute(b), transmute(c))); + let e: f32x4 = f32x4::new(0., 1., 2., 3.); + let r: f32x4 = transmute(vmlsq_f32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_n_s16() { + let a: i16x4 = i16x4::new(6, 7, 8, 9); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16 = 3; + let e: i16x4 = i16x4::new(0, 1, 2, 3); + let r: i16x4 = transmute(vmls_n_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_n_s16() { + let a: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: i16 = 3; + let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: i16x8 = transmute(vmlsq_n_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_n_s32() { + let a: i32x2 = i32x2::new(6, 7); + let b: i32x2 = i32x2::new(2, 2); + let c: i32 = 3; + let e: i32x2 = i32x2::new(0, 1); + let r: i32x2 = transmute(vmls_n_s32(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vmlal_s8() { - let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2); - let c: i8x8 = i8x8::new(3, 3, 3, 3, 3, 3, 3, 3); - let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13); - let r: i16x8 = transmute(vmlal_s8(transmute(a), transmute(b), transmute(c))); + unsafe fn test_vmlsq_n_s32() { + let a: i32x4 = i32x4::new(6, 7, 8, 9); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let c: i32 = 3; + let e: i32x4 = i32x4::new(0, 1, 2, 3); + let r: i32x4 = transmute(vmlsq_n_s32(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vmlal_s16() { - let a: i32x4 = i32x4::new(0, 1, 2, 3); - let b: i16x4 = i16x4::new(2, 2, 2, 2); - let c: i16x4 = i16x4::new(3, 3, 3, 3); - let e: i32x4 = i32x4::new(6, 7, 8, 9); - let r: i32x4 = transmute(vmlal_s16(transmute(a), transmute(b), transmute(c))); + unsafe fn test_vmls_n_u16() { + let a: u16x4 = u16x4::new(6, 7, 8, 9); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16 = 3; + let e: u16x4 = u16x4::new(0, 1, 2, 3); + let r: u16x4 = transmute(vmls_n_u16(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vmlal_s32() { - let a: i64x2 = i64x2::new(0, 1); - let b: i32x2 = i32x2::new(2, 2); - let c: i32x2 = i32x2::new(3, 3); - let e: i64x2 = i64x2::new(6, 7); - let r: i64x2 = transmute(vmlal_s32(transmute(a), transmute(b), transmute(c))); + unsafe fn test_vmlsq_n_u16() { + let a: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: u16 = 3; + let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: u16x8 = transmute(vmlsq_n_u16(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vmlal_u8() { - let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let b: u8x8 = u8x8::new(2, 2, 2, 2, 2, 2, 2, 2); - let c: u8x8 = u8x8::new(3, 3, 3, 3, 3, 3, 3, 3); - let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13); - let r: u16x8 = transmute(vmlal_u8(transmute(a), transmute(b), transmute(c))); + unsafe fn test_vmls_n_u32() { + let a: u32x2 = u32x2::new(6, 7); + let b: u32x2 = u32x2::new(2, 2); + let c: u32 = 3; + let e: u32x2 = u32x2::new(0, 1); + let r: u32x2 = transmute(vmls_n_u32(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vmlal_u16() { - let a: u32x4 = u32x4::new(0, 1, 2, 3); - let b: u16x4 = u16x4::new(2, 2, 2, 2); - let c: u16x4 = u16x4::new(3, 3, 3, 3); - let e: u32x4 = u32x4::new(6, 7, 8, 9); - let r: u32x4 = transmute(vmlal_u16(transmute(a), transmute(b), transmute(c))); + unsafe fn test_vmlsq_n_u32() { + let a: u32x4 = u32x4::new(6, 7, 8, 9); + let b: u32x4 = u32x4::new(2, 2, 2, 2); + let c: u32 = 3; + let e: u32x4 = u32x4::new(0, 1, 2, 3); + let r: u32x4 = transmute(vmlsq_n_u32(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vmlal_u32() { - let a: u64x2 = u64x2::new(0, 1); - let b: u32x2 = u32x2::new(2, 2); - let c: u32x2 = u32x2::new(3, 3); - let e: u64x2 = u64x2::new(6, 7); - let r: u64x2 = transmute(vmlal_u32(transmute(a), transmute(b), transmute(c))); + unsafe fn test_vmls_n_f32() { + let a: f32x2 = f32x2::new(6., 7.); + let b: f32x2 = f32x2::new(2., 2.); + let c: f32 = 3.; + let e: f32x2 = f32x2::new(0., 1.); + let r: f32x2 = transmute(vmls_n_f32(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vmls_s8() { - let a: i8x8 = i8x8::new(6, 7, 8, 9, 10, 11, 12, 13); - let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2); - let c: i8x8 = i8x8::new(3, 3, 3, 3, 3, 3, 3, 3); - let e: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let r: i8x8 = transmute(vmls_s8(transmute(a), transmute(b), transmute(c))); + unsafe fn test_vmlsq_n_f32() { + let a: f32x4 = f32x4::new(6., 7., 8., 9.); + let b: f32x4 = f32x4::new(2., 2., 2., 2.); + let c: f32 = 3.; + let e: f32x4 = f32x4::new(0., 1., 2., 3.); + let r: f32x4 = transmute(vmlsq_n_f32(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vmlsq_s8() { - let a: i8x16 = i8x16::new(6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21); - let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); - let c: i8x16 = i8x16::new(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3); - let e: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - let r: i8x16 = transmute(vmlsq_s8(transmute(a), transmute(b), transmute(c))); + unsafe fn test_vmls_lane_s16() { + let a: i16x4 = i16x4::new(6, 7, 8, 9); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16x4 = i16x4::new(0, 3, 0, 0); + let e: i16x4 = i16x4::new(0, 1, 2, 3); + let r: i16x4 = transmute(vmls_lane_s16::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vmls_s16() { + unsafe fn test_vmls_laneq_s16() { let a: i16x4 = i16x4::new(6, 7, 8, 9); let b: i16x4 = i16x4::new(2, 2, 2, 2); - let c: i16x4 = i16x4::new(3, 3, 3, 3); + let c: i16x8 = i16x8::new(0, 3, 0, 0, 0, 0, 0, 0); let e: i16x4 = i16x4::new(0, 1, 2, 3); - let r: i16x4 = transmute(vmls_s16(transmute(a), transmute(b), transmute(c))); + let r: i16x4 = transmute(vmls_laneq_s16::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vmlsq_s16() { + unsafe fn test_vmlsq_lane_s16() { let a: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13); let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); - let c: i16x8 = i16x8::new(3, 3, 3, 3, 3, 3, 3, 3); + let c: i16x4 = i16x4::new(0, 3, 0, 0); let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let r: i16x8 = transmute(vmlsq_s16(transmute(a), transmute(b), transmute(c))); + let r: i16x8 = transmute(vmlsq_lane_s16::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vmls_s32() { + unsafe fn test_vmlsq_laneq_s16() { + let a: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: i16x8 = i16x8::new(0, 3, 0, 0, 0, 0, 0, 0); + let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: i16x8 = transmute(vmlsq_laneq_s16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_lane_s32() { let a: i32x2 = i32x2::new(6, 7); let b: i32x2 = i32x2::new(2, 2); - let c: i32x2 = i32x2::new(3, 3); + let c: i32x2 = i32x2::new(0, 3); let e: i32x2 = i32x2::new(0, 1); - let r: i32x2 = transmute(vmls_s32(transmute(a), transmute(b), transmute(c))); + let r: i32x2 = transmute(vmls_lane_s32::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vmlsq_s32() { + unsafe fn test_vmls_laneq_s32() { + let a: i32x2 = i32x2::new(6, 7); + let b: i32x2 = i32x2::new(2, 2); + let c: i32x4 = i32x4::new(0, 3, 0, 0); + let e: i32x2 = i32x2::new(0, 1); + let r: i32x2 = transmute(vmls_laneq_s32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_lane_s32() { let a: i32x4 = i32x4::new(6, 7, 8, 9); let b: i32x4 = i32x4::new(2, 2, 2, 2); - let c: i32x4 = i32x4::new(3, 3, 3, 3); + let c: i32x2 = i32x2::new(0, 3); let e: i32x4 = i32x4::new(0, 1, 2, 3); - let r: i32x4 = transmute(vmlsq_s32(transmute(a), transmute(b), transmute(c))); + let r: i32x4 = transmute(vmlsq_lane_s32::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vmls_u8() { - let a: u8x8 = u8x8::new(6, 7, 8, 9, 10, 11, 12, 13); - let b: u8x8 = u8x8::new(2, 2, 2, 2, 2, 2, 2, 2); - let c: u8x8 = u8x8::new(3, 3, 3, 3, 3, 3, 3, 3); - let e: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let r: u8x8 = transmute(vmls_u8(transmute(a), transmute(b), transmute(c))); + unsafe fn test_vmlsq_laneq_s32() { + let a: i32x4 = i32x4::new(6, 7, 8, 9); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let c: i32x4 = i32x4::new(0, 3, 0, 0); + let e: i32x4 = i32x4::new(0, 1, 2, 3); + let r: i32x4 = transmute(vmlsq_laneq_s32::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vmlsq_u8() { - let a: u8x16 = u8x16::new(6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21); - let b: u8x16 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); - let c: u8x16 = u8x16::new(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3); - let e: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - let r: u8x16 = transmute(vmlsq_u8(transmute(a), transmute(b), transmute(c))); + unsafe fn test_vmls_lane_u16() { + let a: u16x4 = u16x4::new(6, 7, 8, 9); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16x4 = u16x4::new(0, 3, 0, 0); + let e: u16x4 = u16x4::new(0, 1, 2, 3); + let r: u16x4 = transmute(vmls_lane_u16::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vmls_u16() { + unsafe fn test_vmls_laneq_u16() { let a: u16x4 = u16x4::new(6, 7, 8, 9); let b: u16x4 = u16x4::new(2, 2, 2, 2); - let c: u16x4 = u16x4::new(3, 3, 3, 3); + let c: u16x8 = u16x8::new(0, 3, 0, 0, 0, 0, 0, 0); let e: u16x4 = u16x4::new(0, 1, 2, 3); - let r: u16x4 = transmute(vmls_u16(transmute(a), transmute(b), transmute(c))); + let r: u16x4 = transmute(vmls_laneq_u16::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vmlsq_u16() { + unsafe fn test_vmlsq_lane_u16() { let a: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13); let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2); - let c: u16x8 = u16x8::new(3, 3, 3, 3, 3, 3, 3, 3); + let c: u16x4 = u16x4::new(0, 3, 0, 0); let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let r: u16x8 = transmute(vmlsq_u16(transmute(a), transmute(b), transmute(c))); + let r: u16x8 = transmute(vmlsq_lane_u16::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vmls_u32() { + unsafe fn test_vmlsq_laneq_u16() { + let a: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: u16x8 = u16x8::new(0, 3, 0, 0, 0, 0, 0, 0); + let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: u16x8 = transmute(vmlsq_laneq_u16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_lane_u32() { let a: u32x2 = u32x2::new(6, 7); let b: u32x2 = u32x2::new(2, 2); - let c: u32x2 = u32x2::new(3, 3); + let c: u32x2 = u32x2::new(0, 3); let e: u32x2 = u32x2::new(0, 1); - let r: u32x2 = transmute(vmls_u32(transmute(a), transmute(b), transmute(c))); + let r: u32x2 = transmute(vmls_lane_u32::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vmlsq_u32() { + unsafe fn test_vmls_laneq_u32() { + let a: u32x2 = u32x2::new(6, 7); + let b: u32x2 = u32x2::new(2, 2); + let c: u32x4 = u32x4::new(0, 3, 0, 0); + let e: u32x2 = u32x2::new(0, 1); + let r: u32x2 = transmute(vmls_laneq_u32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_lane_u32() { let a: u32x4 = u32x4::new(6, 7, 8, 9); let b: u32x4 = u32x4::new(2, 2, 2, 2); - let c: u32x4 = u32x4::new(3, 3, 3, 3); + let c: u32x2 = u32x2::new(0, 3); let e: u32x4 = u32x4::new(0, 1, 2, 3); - let r: u32x4 = transmute(vmlsq_u32(transmute(a), transmute(b), transmute(c))); + let r: u32x4 = transmute(vmlsq_lane_u32::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vmls_f32() { + unsafe fn test_vmlsq_laneq_u32() { + let a: u32x4 = u32x4::new(6, 7, 8, 9); + let b: u32x4 = u32x4::new(2, 2, 2, 2); + let c: u32x4 = u32x4::new(0, 3, 0, 0); + let e: u32x4 = u32x4::new(0, 1, 2, 3); + let r: u32x4 = transmute(vmlsq_laneq_u32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_lane_f32() { let a: f32x2 = f32x2::new(6., 7.); let b: f32x2 = f32x2::new(2., 2.); - let c: f32x2 = f32x2::new(3., 3.); + let c: f32x2 = f32x2::new(0., 3.); let e: f32x2 = f32x2::new(0., 1.); - let r: f32x2 = transmute(vmls_f32(transmute(a), transmute(b), transmute(c))); + let r: f32x2 = transmute(vmls_lane_f32::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vmlsq_f32() { + unsafe fn test_vmls_laneq_f32() { + let a: f32x2 = f32x2::new(6., 7.); + let b: f32x2 = f32x2::new(2., 2.); + let c: f32x4 = f32x4::new(0., 3., 0., 0.); + let e: f32x2 = f32x2::new(0., 1.); + let r: f32x2 = transmute(vmls_laneq_f32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_lane_f32() { let a: f32x4 = f32x4::new(6., 7., 8., 9.); let b: f32x4 = f32x4::new(2., 2., 2., 2.); - let c: f32x4 = f32x4::new(3., 3., 3., 3.); + let c: f32x2 = f32x2::new(0., 3.); let e: f32x4 = f32x4::new(0., 1., 2., 3.); - let r: f32x4 = transmute(vmlsq_f32(transmute(a), transmute(b), transmute(c))); + let r: f32x4 = transmute(vmlsq_lane_f32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_laneq_f32() { + let a: f32x4 = f32x4::new(6., 7., 8., 9.); + let b: f32x4 = f32x4::new(2., 2., 2., 2.); + let c: f32x4 = f32x4::new(0., 3., 0., 0.); + let e: f32x4 = f32x4::new(0., 1., 2., 3.); + let r: f32x4 = transmute(vmlsq_laneq_f32::<1>(transmute(a), transmute(b), transmute(c))); assert_eq!(r, e); } @@ -14481,6 +16153,126 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_n_s16() { + let a: i32x4 = i32x4::new(6, 7, 8, 9); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16 = 3; + let e: i32x4 = i32x4::new(0, 1, 2, 3); + let r: i32x4 = transmute(vmlsl_n_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_n_s32() { + let a: i64x2 = i64x2::new(6, 7); + let b: i32x2 = i32x2::new(2, 2); + let c: i32 = 3; + let e: i64x2 = i64x2::new(0, 1); + let r: i64x2 = transmute(vmlsl_n_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_n_u16() { + let a: u32x4 = u32x4::new(6, 7, 8, 9); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16 = 3; + let e: u32x4 = u32x4::new(0, 1, 2, 3); + let r: u32x4 = transmute(vmlsl_n_u16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_n_u32() { + let a: u64x2 = u64x2::new(6, 7); + let b: u32x2 = u32x2::new(2, 2); + let c: u32 = 3; + let e: u64x2 = u64x2::new(0, 1); + let r: u64x2 = transmute(vmlsl_n_u32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_lane_s16() { + let a: i32x4 = i32x4::new(6, 7, 8, 9); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16x4 = i16x4::new(0, 3, 0, 0); + let e: i32x4 = i32x4::new(0, 1, 2, 3); + let r: i32x4 = transmute(vmlsl_lane_s16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_laneq_s16() { + let a: i32x4 = i32x4::new(6, 7, 8, 9); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16x8 = i16x8::new(0, 3, 0, 0, 0, 0, 0, 0); + let e: i32x4 = i32x4::new(0, 1, 2, 3); + let r: i32x4 = transmute(vmlsl_laneq_s16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_lane_s32() { + let a: i64x2 = i64x2::new(6, 7); + let b: i32x2 = i32x2::new(2, 2); + let c: i32x2 = i32x2::new(0, 3); + let e: i64x2 = i64x2::new(0, 1); + let r: i64x2 = transmute(vmlsl_lane_s32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_laneq_s32() { + let a: i64x2 = i64x2::new(6, 7); + let b: i32x2 = i32x2::new(2, 2); + let c: i32x4 = i32x4::new(0, 3, 0, 0); + let e: i64x2 = i64x2::new(0, 1); + let r: i64x2 = transmute(vmlsl_laneq_s32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_lane_u16() { + let a: u32x4 = u32x4::new(6, 7, 8, 9); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16x4 = u16x4::new(0, 3, 0, 0); + let e: u32x4 = u32x4::new(0, 1, 2, 3); + let r: u32x4 = transmute(vmlsl_lane_u16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_laneq_u16() { + let a: u32x4 = u32x4::new(6, 7, 8, 9); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16x8 = u16x8::new(0, 3, 0, 0, 0, 0, 0, 0); + let e: u32x4 = u32x4::new(0, 1, 2, 3); + let r: u32x4 = transmute(vmlsl_laneq_u16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_lane_u32() { + let a: u64x2 = u64x2::new(6, 7); + let b: u32x2 = u32x2::new(2, 2); + let c: u32x2 = u32x2::new(0, 3); + let e: u64x2 = u64x2::new(0, 1); + let r: u64x2 = transmute(vmlsl_lane_u32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_laneq_u32() { + let a: u64x2 = u64x2::new(6, 7); + let b: u32x2 = u32x2::new(2, 2); + let c: u32x4 = u32x4::new(0, 3, 0, 0); + let e: u64x2 = u64x2::new(0, 1); + let r: u64x2 = transmute(vmlsl_laneq_u32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vneg_s8() { let a: i8x8 = i8x8::new(0, 1, -1, 2, -2, 3, -3, 4); diff --git a/crates/core_arch/src/lib.rs b/crates/core_arch/src/lib.rs index 5e1012fa1e..b0eea59861 100644 --- a/crates/core_arch/src/lib.rs +++ b/crates/core_arch/src/lib.rs @@ -37,7 +37,8 @@ external_doc, allow_internal_unstable, decl_macro, - extended_key_value_attributes + extended_key_value_attributes, + bench_black_box )] #![cfg_attr(test, feature(test, abi_vectorcall))] #![cfg_attr(all(test, target_arch = "wasm32"), feature(wasm_simd))] diff --git a/crates/stdarch-gen/neon.spec b/crates/stdarch-gen/neon.spec index 0de37ad219..f0b7448cc3 100644 --- a/crates/stdarch-gen/neon.spec +++ b/crates/stdarch-gen/neon.spec @@ -1222,6 +1222,68 @@ generate float64x*_t arm = vmla. generate float*_t +/// Vector multiply accumulate with scalar +name = vmla +n-suffix +multi_fn = vmla-self-noext, a, b, {vdup-nself-noext, c} +a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +c = 3 +validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 + +aarch64 = mla +arm = vmla. +generate int16x4_t:int16x4_t:i16:int16x4_t, int16x8_t:int16x8_t:i16:int16x8_t, int32x2_t:int32x2_t:i32:int32x2_t, int32x4_t:int32x4_t:i32:int32x4_t +generate uint16x4_t:uint16x4_t:u16:uint16x4_t, uint16x8_t:uint16x8_t:u16:uint16x8_t, uint32x2_t:uint32x2_t:u32:uint32x2_t, uint32x4_t:uint32x4_t:u32:uint32x4_t + +/// Vector multiply accumulate with scalar +name = vmla +n-suffix +multi_fn = vmla-self-noext, a, b, {vdup-nself-noext, c} +a = 0., 1., 2., 3. +b = 2., 2., 2., 2. +c = 3. +validate 6., 7., 8., 9. + +aarch64 = fmul +arm = vmla. +generate float32x2_t:float32x2_t:f32:float32x2_t, float32x4_t:float32x4_t:f32:float32x4_t + +/// Vector multiply accumulate with scalar +name = vmla +in2-lane-suffixes +constn = LANE +multi_fn = static_assert_imm-in2_exp_len-LANE +multi_fn = vmla-self-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}} +a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +c = 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +n = 1 +validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 + +aarch64 = mla +arm = vmla. +generate int16x4_t, int16x4_t:int16x4_t:int16x8_t:int16x4_t, int16x8_t:int16x8_t:int16x4_t:int16x8_t, int16x8_t +generate int32x2_t, int32x2_t:int32x2_t:int32x4_t:int32x2_t, int32x4_t:int32x4_t:int32x2_t:int32x4_t, int32x4_t +generate uint16x4_t, uint16x4_t:uint16x4_t:uint16x8_t:uint16x4_t, uint16x8_t:uint16x8_t:uint16x4_t:uint16x8_t, uint16x8_t +generate uint32x2_t, uint32x2_t:uint32x2_t:uint32x4_t:uint32x2_t, uint32x4_t:uint32x4_t:uint32x2_t:uint32x4_t, uint32x4_t + +/// Vector multiply accumulate with scalar +name = vmla +in2-lane-suffixes +constn = LANE +multi_fn = static_assert_imm-in2_exp_len-LANE +multi_fn = vmla-self-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}} +a = 0., 1., 2., 3. +b = 2., 2., 2., 2. +c = 0., 3., 0., 0. +n = 1 +validate 6., 7., 8., 9. + +aarch64 = fmul +arm = vmla. +generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t, float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t + /// Signed multiply-add long name = vmlal multi_fn = simd_add, a, {vmull-self-noext, b, c} @@ -1246,6 +1308,41 @@ arm = vmlal.s aarch64 = umlal generate uint16x8_t:uint8x8_t:uint8x8_t:uint16x8_t, uint32x4_t:uint16x4_t:uint16x4_t:uint32x4_t, uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t +/// Vector widening multiply accumulate with scalar +name = vmlal +n-suffix +multi_fn = vmlal-self-noext, a, b, {vdup-nself-noext, c} +a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +c = 3 +validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 + +arm = vmlal.s +aarch64 = smlal +generate int32x4_t:int16x4_t:i16:int32x4_t, int64x2_t:int32x2_t:i32:int64x2_t +aarch64 = umlal +generate uint32x4_t:uint16x4_t:u16:uint32x4_t, uint64x2_t:uint32x2_t:u32:uint64x2_t + +/// Vector widening multiply accumulate with scalar +name = vmlal_lane +in2-suffix +constn = LANE +multi_fn = static_assert_imm-in2_exp_len-LANE +multi_fn = vmlal-self-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}} +a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +c = 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +n = 1 +validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 + +arm = vmlal.s +aarch64 = smlal +generate int32x4_t:int16x4_t:int16x4_t:int32x4_t, int32x4_t:int16x4_t:int16x8_t:int32x4_t +generate int64x2_t:int32x2_t:int32x2_t:int64x2_t, int64x2_t:int32x2_t:int32x4_t:int64x2_t +aarch64 = umlal +generate uint32x4_t:uint16x4_t:uint16x4_t:uint32x4_t, uint32x4_t:uint16x4_t:uint16x8_t:uint32x4_t +generate uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t, uint64x2_t:uint32x2_t:uint32x4_t:uint64x2_t + /// Signed multiply-add long name = vmlal_high no-q @@ -1276,6 +1373,39 @@ validate 8, 9, 10, 11, 12, 13, 14, 15 aarch64 = umlal2 generate uint16x8_t:uint8x16_t:uint8x16_t:uint16x8_t, uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t, uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t +/// Multiply-add long +name = vmlal_high_n +no-q +multi_fn = vmlal_high-noqself-noext, a, b, {vdupq_n-noqself-noext, c} +a = 8, 7, 6, 5, 4, 3, 2, 1 +b = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7 +c = 2 +validate 8, 9, 10, 11, 12, 13, 14, 15 + +aarch64 = smlal2 +generate int32x4_t:int16x8_t:i16:int32x4_t, int64x2_t:int32x4_t:i32:int64x2_t +aarch64 = umlal2 +generate uint32x4_t:uint16x8_t:u16:uint32x4_t, uint64x2_t:uint32x4_t:u32:uint64x2_t + +/// Multiply-add long +name = vmlal_high_lane +in2-suffix +constn = LANE +multi_fn = static_assert_imm-in2_exp_len-LANE +multi_fn = vmlal_high-noqself-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}} +a = 8, 7, 6, 5, 4, 3, 2, 1 +b = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7 +c = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +n = 1 +validate 8, 9, 10, 11, 12, 13, 14, 15 + +aarch64 = smlal2 +generate int32x4_t:int16x8_t:int16x4_t:int32x4_t, int32x4_t:int16x8_t:int16x8_t:int32x4_t +generate int64x2_t:int32x4_t:int32x2_t:int64x2_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t +aarch64 = umlal2 +generate uint32x4_t:uint16x8_t:uint16x4_t:uint32x4_t, uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t +generate uint64x2_t:uint32x4_t:uint32x2_t:uint64x2_t, uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t + /// Multiply-subtract from accumulator name = vmls multi_fn = simd_sub, a, {simd_mul, b, c} @@ -1302,6 +1432,68 @@ generate float64x*_t arm = vmls. generate float*_t +/// Vector multiply subtract with scalar +name = vmls +n-suffix +multi_fn = vmls-self-noext, a, b, {vdup-nself-noext, c} +a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 +b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +c = 3 +validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + +aarch64 = mls +arm = vmls. +generate int16x4_t:int16x4_t:i16:int16x4_t, int16x8_t:int16x8_t:i16:int16x8_t, int32x2_t:int32x2_t:i32:int32x2_t, int32x4_t:int32x4_t:i32:int32x4_t +generate uint16x4_t:uint16x4_t:u16:uint16x4_t, uint16x8_t:uint16x8_t:u16:uint16x8_t, uint32x2_t:uint32x2_t:u32:uint32x2_t, uint32x4_t:uint32x4_t:u32:uint32x4_t + +/// Vector multiply subtract with scalar +name = vmls +n-suffix +multi_fn = vmls-self-noext, a, b, {vdup-nself-noext, c} +a = 6., 7., 8., 9. +b = 2., 2., 2., 2. +c = 3. +validate 0., 1., 2., 3. + +aarch64 = fmul +arm = vmls. +generate float32x2_t:float32x2_t:f32:float32x2_t, float32x4_t:float32x4_t:f32:float32x4_t + +/// Vector multiply subtract with scalar +name = vmls +in2-lane-suffixes +constn = LANE +multi_fn = static_assert_imm-in2_exp_len-LANE +multi_fn = vmls-self-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}} +a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 +b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +c = 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +n = 1 +validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + +aarch64 = mls +arm = vmls. +generate int16x4_t, int16x4_t:int16x4_t:int16x8_t:int16x4_t, int16x8_t:int16x8_t:int16x4_t:int16x8_t, int16x8_t +generate int32x2_t, int32x2_t:int32x2_t:int32x4_t:int32x2_t, int32x4_t:int32x4_t:int32x2_t:int32x4_t, int32x4_t +generate uint16x4_t, uint16x4_t:uint16x4_t:uint16x8_t:uint16x4_t, uint16x8_t:uint16x8_t:uint16x4_t:uint16x8_t, uint16x8_t +generate uint32x2_t, uint32x2_t:uint32x2_t:uint32x4_t:uint32x2_t, uint32x4_t:uint32x4_t:uint32x2_t:uint32x4_t, uint32x4_t + +/// Vector multiply subtract with scalar +name = vmls +in2-lane-suffixes +constn = LANE +multi_fn = static_assert_imm-in2_exp_len-LANE +multi_fn = vmls-self-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}} +a = 6., 7., 8., 9. +b = 2., 2., 2., 2. +c = 0., 3., 0., 0. +n = 1 +validate 0., 1., 2., 3. + +aarch64 = fmul +arm = vmls. +generate float32x2_t, float32x2_t:float32x2_t:float32x4_t:float32x2_t, float32x4_t:float32x4_t:float32x2_t:float32x4_t, float32x4_t + /// Signed multiply-subtract long name = vmlsl multi_fn = simd_sub, a, {vmull-self-noext, b, c} @@ -1314,7 +1506,7 @@ arm = vmlsl.s aarch64 = smlsl generate int16x8_t:int8x8_t:int8x8_t:int16x8_t, int32x4_t:int16x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int32x2_t:int64x2_t -/// Signed multiply-subtract long +/// Unsigned multiply-subtract long name = vmlsl multi_fn = simd_sub, a, {vmull-self-noext, b, c} a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 @@ -1326,6 +1518,41 @@ arm = vmlsl.s aarch64 = umlsl generate uint16x8_t:uint8x8_t:uint8x8_t:uint16x8_t, uint32x4_t:uint16x4_t:uint16x4_t:uint32x4_t, uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t +/// Vector widening multiply subtract with scalar +name = vmlsl +n-suffix +multi_fn = vmlsl-self-noext, a, b, {vdup-nself-noext, c} +a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 +b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +c = 3 +validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + +arm = vmlsl.s +aarch64 = smlsl +generate int32x4_t:int16x4_t:i16:int32x4_t, int64x2_t:int32x2_t:i32:int64x2_t +aarch64 = umlsl +generate uint32x4_t:uint16x4_t:u16:uint32x4_t, uint64x2_t:uint32x2_t:u32:uint64x2_t + +/// Vector widening multiply subtract with scalar +name = vmlsl_lane +in2-suffix +constn = LANE +multi_fn = static_assert_imm-in2_exp_len-LANE +multi_fn = vmlsl-self-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}} +a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 +b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +c = 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +n = 1 +validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 + +arm = vmlsl.s +aarch64 = smlsl +generate int32x4_t:int16x4_t:int16x4_t:int32x4_t, int32x4_t:int16x4_t:int16x8_t:int32x4_t +generate int64x2_t:int32x2_t:int32x2_t:int64x2_t, int64x2_t:int32x2_t:int32x4_t:int64x2_t +aarch64 = umlsl +generate uint32x4_t:uint16x4_t:uint16x4_t:uint32x4_t, uint32x4_t:uint16x4_t:uint16x8_t:uint32x4_t +generate uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t, uint64x2_t:uint32x2_t:uint32x4_t:uint64x2_t + /// Signed multiply-subtract long name = vmlsl_high no-q @@ -1356,6 +1583,39 @@ validate 14, 13, 12, 11, 10, 9, 8, 7 aarch64 = umlsl2 generate uint16x8_t:uint8x16_t:uint8x16_t:uint16x8_t, uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t, uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t +/// Multiply-subtract long +name = vmlsl_high_n +no-q +multi_fn = vmlsl_high-noqself-noext, a, b, {vdupq_n-noqself-noext, c} +a = 14, 15, 16, 17, 18, 19, 20, 21 +b = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7 +c = 2 +validate 14, 13, 12, 11, 10, 9, 8, 7 + +aarch64 = smlsl2 +generate int32x4_t:int16x8_t:i16:int32x4_t, int64x2_t:int32x4_t:i32:int64x2_t +aarch64 = umlsl2 +generate uint32x4_t:uint16x8_t:u16:uint32x4_t, uint64x2_t:uint32x4_t:u32:uint64x2_t + +/// Multiply-subtract long +name = vmlsl_high_lane +in2-suffix +constn = LANE +multi_fn = static_assert_imm-in2_exp_len-LANE +multi_fn = vmlsl_high-noqself-noext, a, b, {simd_shuffle-in_len-noext, c, c, {dup-in_len-LANE as u32}} +a = 14, 15, 16, 17, 18, 19, 20, 21 +b = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7 +c = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +n = 1 +validate 14, 13, 12, 11, 10, 9, 8, 7 + +aarch64 = smlsl2 +generate int32x4_t:int16x8_t:int16x4_t:int32x4_t, int32x4_t:int16x8_t:int16x8_t:int32x4_t +generate int64x2_t:int32x4_t:int32x2_t:int64x2_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t +aarch64 = umlsl2 +generate uint32x4_t:uint16x8_t:uint16x4_t:uint32x4_t, uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t +generate uint64x2_t:uint32x4_t:uint32x2_t:uint64x2_t, uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t + /// Extract narrow name = vmovn_high no-q diff --git a/crates/stdarch-gen/src/main.rs b/crates/stdarch-gen/src/main.rs index f659bab99b..5a905d92fe 100644 --- a/crates/stdarch-gen/src/main.rs +++ b/crates/stdarch-gen/src/main.rs @@ -349,6 +349,7 @@ enum Suffix { OutSuffix, Lane, In2, + In2Lane, } #[derive(Clone, Copy)] @@ -847,6 +848,7 @@ fn gen_aarch64( OutSuffix => format!("{}{}", current_name, type_to_suffix(out_t)), Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[1])), In2 => format!("{}{}", current_name, type_to_suffix(in_t[2])), + In2Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[2])), }; let current_fn = if let Some(current_fn) = current_fn.clone() { if link_aarch64.is_some() { @@ -1259,6 +1261,7 @@ fn gen_arm( OutSuffix => format!("{}{}", current_name, type_to_suffix(out_t)), Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[1])), In2 => format!("{}{}", current_name, type_to_suffix(in_t[2])), + In2Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[2])), }; let current_aarch64 = current_aarch64 .clone() @@ -2216,6 +2219,8 @@ mod test { suffix = Lane; } else if line.starts_with("in2-suffix") { suffix = In2; + } else if line.starts_with("in2-lane-suffixes") { + suffix = In2Lane; } else if line.starts_with("a = ") { a = line[4..].split(',').map(|v| v.trim().to_string()).collect(); } else if line.starts_with("b = ") { diff --git a/crates/stdarch-test/src/lib.rs b/crates/stdarch-test/src/lib.rs index 8f6aa4a267..cc48a65b25 100644 --- a/crates/stdarch-test/src/lib.rs +++ b/crates/stdarch-test/src/lib.rs @@ -3,7 +3,7 @@ //! This basically just disassembles the current executable and then parses the //! output once globally and then provides the `assert` function which makes //! assertions about the disassembly of a function. -#![feature(test)] // For black_box +#![feature(bench_black_box)] // For black_box #![deny(rust_2018_idioms)] #![allow(clippy::missing_docs_in_private_items, clippy::print_stdout)]