Skip to content

Commit 06a170b

Browse files
committed
Upgrade more intrinsics to the new version
1 parent 74e454d commit 06a170b

File tree

7 files changed

+54
-24
lines changed

7 files changed

+54
-24
lines changed

crates/core_arch/src/aarch64/neon/generated.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21420,7 +21420,7 @@ pub fn vrbit_s8(a: int8x8_t) -> int8x8_t {
2142021420
unsafe extern "unadjusted" {
2142121421
#[cfg_attr(
2142221422
any(target_arch = "aarch64", target_arch = "arm64ec"),
21423-
link_name = "llvm.aarch64.neon.rbit.v8i8"
21423+
link_name = "llvm.bitreverse.v8i8"
2142421424
)]
2142521425
fn _vrbit_s8(a: int8x8_t) -> int8x8_t;
2142621426
}
@@ -21436,7 +21436,7 @@ pub fn vrbitq_s8(a: int8x16_t) -> int8x16_t {
2143621436
unsafe extern "unadjusted" {
2143721437
#[cfg_attr(
2143821438
any(target_arch = "aarch64", target_arch = "arm64ec"),
21439-
link_name = "llvm.aarch64.neon.rbit.v16i8"
21439+
link_name = "llvm.bitreverse.v16i8"
2144021440
)]
2144121441
fn _vrbitq_s8(a: int8x16_t) -> int8x16_t;
2144221442
}
@@ -23871,7 +23871,7 @@ pub fn vrndn_f64(a: float64x1_t) -> float64x1_t {
2387123871
unsafe extern "unadjusted" {
2387223872
#[cfg_attr(
2387323873
any(target_arch = "aarch64", target_arch = "arm64ec"),
23874-
link_name = "llvm.aarch64.neon.frintn.v1f64"
23874+
link_name = "llvm.roundeven.v1f64"
2387523875
)]
2387623876
fn _vrndn_f64(a: float64x1_t) -> float64x1_t;
2387723877
}
@@ -23887,7 +23887,7 @@ pub fn vrndnq_f64(a: float64x2_t) -> float64x2_t {
2388723887
unsafe extern "unadjusted" {
2388823888
#[cfg_attr(
2388923889
any(target_arch = "aarch64", target_arch = "arm64ec"),
23890-
link_name = "llvm.aarch64.neon.frintn.v2f64"
23890+
link_name = "llvm.roundeven.v2f64"
2389123891
)]
2389223892
fn _vrndnq_f64(a: float64x2_t) -> float64x2_t;
2389323893
}

crates/core_arch/src/arm_shared/neon/generated.rs

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,7 @@ pub fn __crc32w(crc: u32, data: u32) -> u32 {
204204
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadal_s8)"]
205205
#[inline]
206206
#[target_feature(enable = "neon")]
207+
#[cfg(target_arch = "arm")]
207208
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
208209
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s8"))]
209210
#[cfg_attr(
@@ -221,6 +222,7 @@ fn priv_vpadal_s8(a: int16x4_t, b: int8x8_t) -> int16x4_t {
221222
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadalq_s8)"]
222223
#[inline]
223224
#[target_feature(enable = "neon")]
225+
#[cfg(target_arch = "arm")]
224226
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
225227
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s8"))]
226228
#[cfg_attr(
@@ -238,6 +240,7 @@ fn priv_vpadalq_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t {
238240
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadal_s16)"]
239241
#[inline]
240242
#[target_feature(enable = "neon")]
243+
#[cfg(target_arch = "arm")]
241244
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
242245
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s16"))]
243246
#[cfg_attr(
@@ -255,6 +258,7 @@ fn priv_vpadal_s16(a: int32x2_t, b: int16x4_t) -> int32x2_t {
255258
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadalq_s16)"]
256259
#[inline]
257260
#[target_feature(enable = "neon")]
261+
#[cfg(target_arch = "arm")]
258262
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
259263
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s16"))]
260264
#[cfg_attr(
@@ -272,6 +276,7 @@ fn priv_vpadalq_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t {
272276
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadal_s32)"]
273277
#[inline]
274278
#[target_feature(enable = "neon")]
279+
#[cfg(target_arch = "arm")]
275280
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
276281
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s32"))]
277282
#[cfg_attr(
@@ -289,6 +294,7 @@ fn priv_vpadal_s32(a: int64x1_t, b: int32x2_t) -> int64x1_t {
289294
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadalq_s32)"]
290295
#[inline]
291296
#[target_feature(enable = "neon")]
297+
#[cfg(target_arch = "arm")]
292298
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
293299
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s32"))]
294300
#[cfg_attr(
@@ -306,6 +312,7 @@ fn priv_vpadalq_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t {
306312
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadal_u8)"]
307313
#[inline]
308314
#[target_feature(enable = "neon")]
315+
#[cfg(target_arch = "arm")]
309316
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
310317
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u8"))]
311318
#[cfg_attr(
@@ -323,6 +330,7 @@ fn priv_vpadal_u8(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t {
323330
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadalq_u8)"]
324331
#[inline]
325332
#[target_feature(enable = "neon")]
333+
#[cfg(target_arch = "arm")]
326334
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
327335
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u8"))]
328336
#[cfg_attr(
@@ -340,6 +348,7 @@ fn priv_vpadalq_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t {
340348
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadal_u16)"]
341349
#[inline]
342350
#[target_feature(enable = "neon")]
351+
#[cfg(target_arch = "arm")]
343352
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
344353
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u16"))]
345354
#[cfg_attr(
@@ -357,6 +366,7 @@ fn priv_vpadal_u16(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t {
357366
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadalq_u16)"]
358367
#[inline]
359368
#[target_feature(enable = "neon")]
369+
#[cfg(target_arch = "arm")]
360370
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
361371
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u16"))]
362372
#[cfg_attr(
@@ -374,6 +384,7 @@ fn priv_vpadalq_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t {
374384
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadal_u32)"]
375385
#[inline]
376386
#[target_feature(enable = "neon")]
387+
#[cfg(target_arch = "arm")]
377388
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
378389
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u32"))]
379390
#[cfg_attr(
@@ -391,6 +402,7 @@ fn priv_vpadal_u32(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t {
391402
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/priv_vpadalq_u32)"]
392403
#[inline]
393404
#[target_feature(enable = "neon")]
405+
#[cfg(target_arch = "arm")]
394406
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
395407
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u32"))]
396408
#[cfg_attr(
@@ -58712,7 +58724,7 @@ pub fn vrndn_f16(a: float16x4_t) -> float16x4_t {
5871258724
unsafe extern "unadjusted" {
5871358725
#[cfg_attr(
5871458726
any(target_arch = "aarch64", target_arch = "arm64ec"),
58715-
link_name = "llvm.aarch64.neon.frintn.v4f16"
58727+
link_name = "llvm.roundeven.v4f16"
5871658728
)]
5871758729
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v4f16")]
5871858730
fn _vrndn_f16(a: float16x4_t) -> float16x4_t;
@@ -58734,7 +58746,7 @@ pub fn vrndnq_f16(a: float16x8_t) -> float16x8_t {
5873458746
unsafe extern "unadjusted" {
5873558747
#[cfg_attr(
5873658748
any(target_arch = "aarch64", target_arch = "arm64ec"),
58737-
link_name = "llvm.aarch64.neon.frintn.v8f16"
58749+
link_name = "llvm.roundeven.v8f16"
5873858750
)]
5873958751
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v8f16")]
5874058752
fn _vrndnq_f16(a: float16x8_t) -> float16x8_t;
@@ -58763,7 +58775,7 @@ pub fn vrndn_f32(a: float32x2_t) -> float32x2_t {
5876358775
unsafe extern "unadjusted" {
5876458776
#[cfg_attr(
5876558777
any(target_arch = "aarch64", target_arch = "arm64ec"),
58766-
link_name = "llvm.aarch64.neon.frintn.v2f32"
58778+
link_name = "llvm.roundeven.v2f32"
5876758779
)]
5876858780
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v2f32")]
5876958781
fn _vrndn_f32(a: float32x2_t) -> float32x2_t;
@@ -58792,7 +58804,7 @@ pub fn vrndnq_f32(a: float32x4_t) -> float32x4_t {
5879258804
unsafe extern "unadjusted" {
5879358805
#[cfg_attr(
5879458806
any(target_arch = "aarch64", target_arch = "arm64ec"),
58795-
link_name = "llvm.aarch64.neon.frintn.v4f32"
58807+
link_name = "llvm.roundeven.v4f32"
5879658808
)]
5879758809
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v4f32")]
5879858810
fn _vrndnq_f32(a: float32x4_t) -> float32x4_t;
@@ -61531,6 +61543,8 @@ pub fn vsha256su1q_u32(tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t)
6153161543
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v16i8)"]
6153261544
#[inline]
6153361545
#[target_feature(enable = "neon")]
61546+
#[cfg(target_arch = "arm")]
61547+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
6153461548
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
6153561549
fn vshiftins_v16i8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
6153661550
unsafe extern "unadjusted" {
@@ -61543,6 +61557,8 @@ fn vshiftins_v16i8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
6154361557
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v1i64)"]
6154461558
#[inline]
6154561559
#[target_feature(enable = "neon")]
61560+
#[cfg(target_arch = "arm")]
61561+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
6154661562
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
6154761563
fn vshiftins_v1i64(a: int64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t {
6154861564
unsafe extern "unadjusted" {
@@ -61555,6 +61571,8 @@ fn vshiftins_v1i64(a: int64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t {
6155561571
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v2i32)"]
6155661572
#[inline]
6155761573
#[target_feature(enable = "neon")]
61574+
#[cfg(target_arch = "arm")]
61575+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
6155861576
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
6155961577
fn vshiftins_v2i32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
6156061578
unsafe extern "unadjusted" {
@@ -61567,6 +61585,8 @@ fn vshiftins_v2i32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
6156761585
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v2i64)"]
6156861586
#[inline]
6156961587
#[target_feature(enable = "neon")]
61588+
#[cfg(target_arch = "arm")]
61589+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
6157061590
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
6157161591
fn vshiftins_v2i64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t {
6157261592
unsafe extern "unadjusted" {
@@ -61579,6 +61599,8 @@ fn vshiftins_v2i64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t {
6157961599
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v4i16)"]
6158061600
#[inline]
6158161601
#[target_feature(enable = "neon")]
61602+
#[cfg(target_arch = "arm")]
61603+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
6158261604
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
6158361605
fn vshiftins_v4i16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
6158461606
unsafe extern "unadjusted" {
@@ -61591,6 +61613,8 @@ fn vshiftins_v4i16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
6159161613
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v4i32)"]
6159261614
#[inline]
6159361615
#[target_feature(enable = "neon")]
61616+
#[cfg(target_arch = "arm")]
61617+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
6159461618
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
6159561619
fn vshiftins_v4i32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
6159661620
unsafe extern "unadjusted" {
@@ -61603,6 +61627,8 @@ fn vshiftins_v4i32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
6160361627
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v8i16)"]
6160461628
#[inline]
6160561629
#[target_feature(enable = "neon")]
61630+
#[cfg(target_arch = "arm")]
61631+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
6160661632
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
6160761633
fn vshiftins_v8i16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
6160861634
unsafe extern "unadjusted" {
@@ -61615,6 +61641,8 @@ fn vshiftins_v8i16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
6161561641
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v8i8)"]
6161661642
#[inline]
6161761643
#[target_feature(enable = "neon")]
61644+
#[cfg(target_arch = "arm")]
61645+
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
6161861646
#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
6161961647
fn vshiftins_v8i8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
6162061648
unsafe extern "unadjusted" {

crates/core_arch/src/x86/avx.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -573,7 +573,7 @@ pub fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
573573
#[stable(feature = "simd_x86", since = "1.27.0")]
574574
pub fn _mm256_dp_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
575575
static_assert_uimm_bits!(IMM8, 8);
576-
unsafe { vdpps(a, b, IMM8) }
576+
unsafe { vdpps(a, b, IMM8 as i8) }
577577
}
578578

579579
/// Horizontal addition of adjacent pairs in the two packed vectors
@@ -3043,7 +3043,7 @@ unsafe extern "C" {
30433043
#[link_name = "llvm.x86.avx.round.ps.256"]
30443044
fn roundps256(a: __m256, b: i32) -> __m256;
30453045
#[link_name = "llvm.x86.avx.dp.ps.256"]
3046-
fn vdpps(a: __m256, b: __m256, imm8: i32) -> __m256;
3046+
fn vdpps(a: __m256, b: __m256, imm8: i8) -> __m256;
30473047
#[link_name = "llvm.x86.avx.hadd.pd.256"]
30483048
fn vhaddpd(a: __m256d, b: __m256d) -> __m256d;
30493049
#[link_name = "llvm.x86.avx.hadd.ps.256"]

crates/core_arch/src/x86/avx2.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2146,7 +2146,7 @@ pub fn _mm256_movemask_epi8(a: __m256i) -> i32 {
21462146
#[stable(feature = "simd_x86", since = "1.27.0")]
21472147
pub fn _mm256_mpsadbw_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
21482148
static_assert_uimm_bits!(IMM8, 8);
2149-
unsafe { transmute(mpsadbw(a.as_u8x32(), b.as_u8x32(), IMM8)) }
2149+
unsafe { transmute(mpsadbw(a.as_u8x32(), b.as_u8x32(), IMM8 as i8)) }
21502150
}
21512151

21522152
/// Multiplies the low 32-bit integers from each packed 64-bit element in
@@ -3800,7 +3800,7 @@ unsafe extern "C" {
38003800
#[link_name = "llvm.x86.avx2.maskstore.q.256"]
38013801
fn maskstoreq256(mem_addr: *mut i8, mask: i64x4, a: i64x4);
38023802
#[link_name = "llvm.x86.avx2.mpsadbw"]
3803-
fn mpsadbw(a: u8x32, b: u8x32, imm8: i32) -> u16x16;
3803+
fn mpsadbw(a: u8x32, b: u8x32, imm8: i8) -> u16x16;
38043804
#[link_name = "llvm.x86.avx2.pmul.hr.sw"]
38053805
fn pmulhrsw(a: i16x16, b: i16x16) -> i16x16;
38063806
#[link_name = "llvm.x86.avx2.packsswb"]

crates/core_arch/src/x86/avx512bf16.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,11 @@ unsafe extern "C" {
2222
#[link_name = "llvm.x86.avx512bf16.cvtneps2bf16.512"]
2323
fn cvtneps2bf16_512(a: f32x16) -> i16x16;
2424
#[link_name = "llvm.x86.avx512bf16.dpbf16ps.128"]
25-
fn dpbf16ps(a: f32x4, b: i32x4, c: i32x4) -> f32x4;
25+
fn dpbf16ps(a: f32x4, b: i16x8, c: i16x8) -> f32x4;
2626
#[link_name = "llvm.x86.avx512bf16.dpbf16ps.256"]
27-
fn dpbf16ps_256(a: f32x8, b: i32x8, c: i32x8) -> f32x8;
27+
fn dpbf16ps_256(a: f32x8, b: i16x16, c: i16x16) -> f32x8;
2828
#[link_name = "llvm.x86.avx512bf16.dpbf16ps.512"]
29-
fn dpbf16ps_512(a: f32x16, b: i32x16, c: i32x16) -> f32x16;
29+
fn dpbf16ps_512(a: f32x16, b: i16x32, c: i16x32) -> f32x16;
3030
}
3131

3232
/// Convert packed single-precision (32-bit) floating-point elements in two 128-bit vectors
@@ -250,7 +250,7 @@ pub fn _mm512_maskz_cvtneps_pbh(k: __mmask16, a: __m512) -> __m256bh {
250250
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
251251
#[cfg_attr(test, assert_instr("vdpbf16ps"))]
252252
pub fn _mm_dpbf16_ps(src: __m128, a: __m128bh, b: __m128bh) -> __m128 {
253-
unsafe { transmute(dpbf16ps(src.as_f32x4(), a.as_i32x4(), b.as_i32x4())) }
253+
unsafe { transmute(dpbf16ps(src.as_f32x4(), a.as_i16x8(), b.as_i16x8())) }
254254
}
255255

256256
/// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b,
@@ -295,7 +295,7 @@ pub fn _mm_maskz_dpbf16_ps(k: __mmask8, src: __m128, a: __m128bh, b: __m128bh) -
295295
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
296296
#[cfg_attr(test, assert_instr("vdpbf16ps"))]
297297
pub fn _mm256_dpbf16_ps(src: __m256, a: __m256bh, b: __m256bh) -> __m256 {
298-
unsafe { transmute(dpbf16ps_256(src.as_f32x8(), a.as_i32x8(), b.as_i32x8())) }
298+
unsafe { transmute(dpbf16ps_256(src.as_f32x8(), a.as_i16x16(), b.as_i16x16())) }
299299
}
300300

301301
/// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b,
@@ -341,7 +341,7 @@ pub fn _mm256_maskz_dpbf16_ps(k: __mmask8, src: __m256, a: __m256bh, b: __m256bh
341341
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
342342
#[cfg_attr(test, assert_instr("vdpbf16ps"))]
343343
pub fn _mm512_dpbf16_ps(src: __m512, a: __m512bh, b: __m512bh) -> __m512 {
344-
unsafe { transmute(dpbf16ps_512(src.as_f32x16(), a.as_i32x16(), b.as_i32x16())) }
344+
unsafe { transmute(dpbf16ps_512(src.as_f32x16(), a.as_i16x32(), b.as_i16x32())) }
345345
}
346346

347347
/// Compute dot-product of BF16 (16-bit) floating-point pairs in a and b,

crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2943,7 +2943,7 @@ intrinsics:
29432943
- LLVMLink:
29442944
name: "rbit.{neon_type}"
29452945
links:
2946-
- link: "llvm.aarch64.neon.rbit.{neon_type}"
2946+
- link: "llvm.bitreverse.{neon_type}"
29472947
arch: aarch64,arm64ec
29482948

29492949
- name: "vrbit{neon_type[0].no}"
@@ -3096,7 +3096,7 @@ intrinsics:
30963096
- LLVMLink:
30973097
name: "frintn.{neon_type}"
30983098
links:
3099-
- link: "llvm.aarch64.neon.frintn.{neon_type}"
3099+
- link: "llvm.roundeven.{neon_type}"
31003100
arch: aarch64,arm64ec
31013101

31023102
- name: "vrndns_{type}"

crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2469,7 +2469,7 @@ intrinsics:
24692469
- LLVMLink:
24702470
name: "llvm.frinn.{neon_type}"
24712471
links:
2472-
- link: "llvm.aarch64.neon.frintn.{neon_type}"
2472+
- link: "llvm.roundeven.{neon_type}"
24732473
arch: aarch64,arm64ec
24742474
- link: "llvm.arm.neon.vrintn.{neon_type}"
24752475
arch: arm
@@ -2492,7 +2492,7 @@ intrinsics:
24922492
- LLVMLink:
24932493
name: "llvm.frinn.{neon_type}"
24942494
links:
2495-
- link: "llvm.aarch64.neon.frintn.{neon_type}"
2495+
- link: "llvm.roundeven.{neon_type}"
24962496
arch: aarch64,arm64ec
24972497
- link: "llvm.arm.neon.vrintn.{neon_type}"
24982498
arch: arm
@@ -13202,6 +13202,7 @@ intrinsics:
1320213202
return_type: "{neon_type[0]}"
1320313203
safety: safe
1320413204
attr:
13205+
- *target-is-arm
1320513206
- *neon-v7
1320613207
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['{type[2]}']]}]]
1320713208
- *neon-cfg-arm-unstable
@@ -13227,6 +13228,7 @@ intrinsics:
1322713228
return_type: "{neon_type[0]}"
1322813229
safety: safe
1322913230
attr:
13231+
- *target-is-arm
1323013232
- *neon-v7
1323113233
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['{type[2]}']]}]]
1323213234
- *neon-cfg-arm-unstable
@@ -13812,8 +13814,8 @@ intrinsics:
1381213814
return_type: "{neon_type[1]}"
1381313815
safety: safe
1381413816
attr:
13815-
#- *target-is-arm
13816-
#- *neon-v7
13817+
- *target-is-arm
13818+
- *neon-v7
1381713819
- *neon-arm-unstable
1381813820
types:
1381913821
- ['_v8i8', "int8x8_t", '8']

0 commit comments

Comments
 (0)