Skip to content

Commit 13fe608

Browse files
committed
AVX512FP16 Part 0: Types
1 parent 89f0baa commit 13fe608

File tree

6 files changed

+208
-3
lines changed

6 files changed

+208
-3
lines changed

crates/core_arch/src/lib.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@
3434
target_feature_11,
3535
generic_arg_infer,
3636
asm_experimental_arch,
37-
sha512_sm_x86
37+
sha512_sm_x86,
38+
f16
3839
)]
3940
#![cfg_attr(test, feature(test, abi_vectorcall, stdarch_internal))]
4041
#![deny(clippy::missing_inline_in_public_items)]

crates/core_arch/src/simd.rs

Lines changed: 71 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,10 @@
33
#![allow(non_camel_case_types)]
44

55
macro_rules! simd_ty {
6-
($id:ident [$ety:ident]: $($elem_name:ident),*) => {
6+
($(#[$stability:meta])? $id:ident [$ety:ident]: $($elem_name:ident),*) => {
77
#[repr(simd)]
88
#[derive(Copy, Clone, Debug, PartialEq)]
9+
$(#[$stability])?
910
pub(crate) struct $id { $(pub $elem_name: $ety),* }
1011

1112
#[allow(clippy::use_self)]
@@ -186,9 +187,20 @@ simd_ty!(
186187
simd_ty!(i32x4[i32]: x0, x1, x2, x3);
187188
simd_ty!(i64x2[i64]: x0, x1);
188189

190+
simd_ty!(
191+
#[unstable(feature = "f16", issue = "116909")]
192+
f16x8[f16]:
193+
x0,
194+
x1,
195+
x2,
196+
x3,
197+
x4,
198+
x5,
199+
x6,
200+
x7
201+
);
189202
simd_ty!(f32x4[f32]: x0, x1, x2, x3);
190203
simd_ty!(f64x2[f64]: x0, x1);
191-
simd_ty!(f64x4[f64]: x0, x1, x2, x3);
192204

193205
simd_m_ty!(
194206
m8x16[i8]:
@@ -359,6 +371,26 @@ simd_ty!(
359371
);
360372
simd_ty!(i64x4[i64]: x0, x1, x2, x3);
361373

374+
simd_ty!(
375+
#[unstable(feature = "f16", issue = "116909")]
376+
f16x16[f16]:
377+
x0,
378+
x1,
379+
x2,
380+
x3,
381+
x4,
382+
x5,
383+
x6,
384+
x7,
385+
x8,
386+
x9,
387+
x10,
388+
x11,
389+
x12,
390+
x13,
391+
x14,
392+
x15
393+
);
362394
simd_ty!(
363395
f32x8[f32]:
364396
x0,
@@ -370,6 +402,7 @@ simd_ty!(
370402
x6,
371403
x7
372404
);
405+
simd_ty!(f64x4[f64]: x0, x1, x2, x3);
373406

374407
simd_m_ty!(
375408
m8x32[i8]:
@@ -688,6 +721,42 @@ simd_ty!(
688721
x15
689722
);
690723

724+
simd_ty!(
725+
#[unstable(feature = "f16", issue = "116909")]
726+
f16x32[f16]:
727+
x0,
728+
x1,
729+
x2,
730+
x3,
731+
x4,
732+
x5,
733+
x6,
734+
x7,
735+
x8,
736+
x9,
737+
x10,
738+
x11,
739+
x12,
740+
x13,
741+
x14,
742+
x15,
743+
x16,
744+
x17,
745+
x18,
746+
x19,
747+
x20,
748+
x21,
749+
x22,
750+
x23,
751+
x24,
752+
x25,
753+
x26,
754+
x27,
755+
x28,
756+
x29,
757+
x30,
758+
x31
759+
);
691760
simd_ty!(
692761
f32x16[f32]:
693762
x0,

crates/core_arch/src/x86/mod.rs

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,41 @@ types! {
335335
u16, u16, u16, u16, u16, u16, u16, u16,
336336
u16, u16, u16, u16, u16, u16, u16, u16
337337
);
338+
339+
/// 128-bit wide set of 8 `f16` types, x86-specific
340+
///
341+
/// This type is the same as the `__m128h` type defined by Intel,
342+
/// representing a 128-bit SIMD register which internally is consisted of
343+
/// 8 packed `f16` instances. its purpose is for f16 related intrinsic
344+
/// implementations.
345+
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
346+
pub struct __m128h(f16, f16, f16, f16, f16, f16, f16, f16);
347+
348+
/// 256-bit wide set of 16 `f16` types, x86-specific
349+
///
350+
/// This type is the same as the `__m256h` type defined by Intel,
351+
/// representing a 256-bit SIMD register which internally is consisted of
352+
/// 16 packed `f16` instances. its purpose is for f16 related intrinsic
353+
/// implementations.
354+
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
355+
pub struct __m256h(
356+
f16, f16, f16, f16, f16, f16, f16, f16,
357+
f16, f16, f16, f16, f16, f16, f16, f16
358+
);
359+
360+
/// 512-bit wide set of 32 `f16` types, x86-specific
361+
///
362+
/// This type is the same as the `__m512h` type defined by Intel,
363+
/// representing a 512-bit SIMD register which internally is consisted of
364+
/// 32 packed `f16` instances. its purpose is for f16 related intrinsic
365+
/// implementations.
366+
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
367+
pub struct __m512h(
368+
f16, f16, f16, f16, f16, f16, f16, f16,
369+
f16, f16, f16, f16, f16, f16, f16, f16,
370+
f16, f16, f16, f16, f16, f16, f16, f16,
371+
f16, f16, f16, f16, f16, f16, f16, f16
372+
);
338373
}
339374

340375
/// The BFloat16 type used in AVX-512 intrinsics.
@@ -761,6 +796,50 @@ impl m512bhExt for __m512bh {
761796
}
762797
}
763798

799+
#[allow(non_camel_case_types)]
800+
pub(crate) trait m128hExt: Sized {
801+
fn as_m128h(self) -> __m128h;
802+
803+
#[inline]
804+
fn as_f16x8(self) -> crate::core_arch::simd::f16x8 {
805+
unsafe { transmute(self.as_m128h()) }
806+
}
807+
}
808+
809+
impl m128hExt for __m128h {
810+
#[inline]
811+
fn as_m128h(self) -> Self {
812+
self
813+
}
814+
}
815+
816+
#[allow(non_camel_case_types)]
817+
pub(crate) trait m256hExt: Sized {
818+
fn as_m256h(self) -> __m256h;
819+
820+
#[inline]
821+
fn as_f16x16(self) -> crate::core_arch::simd::f16x16 {
822+
unsafe { transmute(self.as_m256h()) }
823+
}
824+
}
825+
826+
impl m256hExt for __m256h {
827+
#[inline]
828+
fn as_m256h(self) -> Self {
829+
self
830+
}
831+
}
832+
833+
#[allow(non_camel_case_types)]
834+
pub(crate) trait m512hExt: Sized {
835+
fn as_m512h(self) -> __m512h;
836+
837+
#[inline]
838+
fn as_f16x32(self) -> crate::core_arch::simd::f16x32 {
839+
unsafe { transmute(self.as_m512h()) }
840+
}
841+
}
842+
764843
mod eflags;
765844
#[stable(feature = "simd_x86", since = "1.27.0")]
766845
pub use self::eflags::*;

crates/core_arch/src/x86/test.rs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,17 @@ pub unsafe fn get_m128(a: __m128, idx: usize) -> f32 {
3636
transmute::<_, [f32; 4]>(a)[idx]
3737
}
3838

39+
#[track_caller]
40+
#[target_feature(enable = "avx512fp16")]
41+
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
42+
pub unsafe fn assert_eq_m128h(a: __m128h, b: __m128h) {
43+
// FIXME: use `_mm_cmp_ph_mask::<_CMP_EQ_OQ>` when it's implemented
44+
let r = _mm_cmpeq_epi16_mask(transmute(a), transmute(b));
45+
if r != 0b1111_1111 {
46+
panic!("{:?} != {:?}", a, b);
47+
}
48+
}
49+
3950
// not actually an intrinsic but useful in various tests as we proted from
4051
// `i64x2::new` which is backwards from `_mm_set_epi64x`
4152
#[target_feature(enable = "sse2")]
@@ -77,6 +88,17 @@ pub unsafe fn get_m256(a: __m256, idx: usize) -> f32 {
7788
transmute::<_, [f32; 8]>(a)[idx]
7889
}
7990

91+
#[track_caller]
92+
#[target_feature(enable = "avx512fp16")]
93+
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
94+
pub unsafe fn assert_eq_m256h(a: __m256h, b: __m256h) {
95+
// FIXME: use `_mm256_cmp_ph_mask::<_CMP_EQ_OQ>` when it's implemented
96+
let r = _mm256_cmpeq_epi16_mask(transmute(a), transmute(b));
97+
if r != 0b11111111_11111111 {
98+
panic!("{:?} != {:?}", a, b);
99+
}
100+
}
101+
80102
#[target_feature(enable = "avx512f")]
81103
pub unsafe fn get_m512(a: __m512, idx: usize) -> f32 {
82104
transmute::<_, [f32; 16]>(a)[idx]
@@ -151,3 +173,14 @@ pub unsafe fn assert_eq_m512d(a: __m512d, b: __m512d) {
151173
panic!("{:?} != {:?}", a, b);
152174
}
153175
}
176+
177+
#[track_caller]
178+
#[target_feature(enable = "avx512fp16")]
179+
#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
180+
pub unsafe fn assert_eq_m512h(a: __m512h, b: __m512h) {
181+
// FIXME: use `_mm512_cmp_ph_mask::<_CMP_EQ_OQ>` when it's implemented
182+
let r = _mm512_cmpeq_epi16_mask(transmute(a), transmute(b));
183+
if r != 0b11111111_11111111_11111111_11111111 {
184+
panic!("{:?} != {:?}", a, b);
185+
}
186+
}

crates/stdarch-verify/src/lib.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,14 +182,17 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream {
182182
"__m128" => quote! { &M128 },
183183
"__m128bh" => quote! { &M128BH },
184184
"__m128d" => quote! { &M128D },
185+
"__m128h" => quote! { &M128H },
185186
"__m128i" => quote! { &M128I },
186187
"__m256" => quote! { &M256 },
187188
"__m256bh" => quote! { &M256BH },
188189
"__m256d" => quote! { &M256D },
190+
"__m256h" => quote! { &M256H },
189191
"__m256i" => quote! { &M256I },
190192
"__m512" => quote! { &M512 },
191193
"__m512bh" => quote! { &M512BH },
192194
"__m512d" => quote! { &M512D },
195+
"__m512h" => quote! { &M512H },
193196
"__m512i" => quote! { &M512I },
194197
"__mmask8" => quote! { &MMASK8 },
195198
"__mmask16" => quote! { &MMASK16 },
@@ -201,6 +204,7 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream {
201204
"_MM_PERM_ENUM" => quote! { &MM_PERM_ENUM },
202205
"bool" => quote! { &BOOL },
203206
"bf16" => quote! { &BF16 },
207+
"f16" => quote! { &F16 },
204208
"f32" => quote! { &F32 },
205209
"f64" => quote! { &F64 },
206210
"i16" => quote! { &I16 },

0 commit comments

Comments
 (0)