Skip to content

Commit fd5aa3c

Browse files
committed
std_detect: Add aarch64/linux/LLVM SME features
Add detection for SME features supported by LLVM and the Linux Kernel. Include commented-out hwcap fields for features supported by Linux but not by LLVM. This commit adds feature detection for the following features: - FEAT_SME - FEAT_SME_F16F16 - FEAT_SME_F64F64 - FEAT_SME_F8F16 - FEAT_SME_F8F32 - FEAT_SME_FA64 - FEAT_SME_I16I64 - FEAT_SME_LUTv2 - FEAT_SME2 - FEAT_SME2p1 - FEAT_SSVE_FP8DOT2 - FEAT_SSVE_FP8DOT4 - FEAT_SSVE_FP8FMA Linux features: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h LLVM features: llvm-project/llvm/lib/Target/AArch64/AArch64.td
1 parent 36e4bb9 commit fd5aa3c

File tree

3 files changed

+137
-0
lines changed

3 files changed

+137
-0
lines changed

crates/std_detect/src/detect/arch/aarch64.rs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,19 @@ features! {
7272
/// * `"fp8dot4"` - FEAT_FP8DOT4
7373
/// * `"fp8dot2"` - FEAT_FP8DOT2
7474
/// * `"wfxt"` - FEAT_WFxT
75+
/// * `"sme"` - FEAT_SME
76+
/// * `"sme-i16i64"` - FEAT_SME_I16I64
77+
/// * `"sme-f64f64"` - FEAT_SME_F64F64
78+
/// * `"sme-fa64"` - FEAT_SME_FA64
79+
/// * `"sme2"` - FEAT_SME2
80+
/// * `"sme2p1"` - FEAT_SME2p1
81+
/// * `"sme-f16f16"` - FEAT_SME_F16F16
82+
/// * `"sme-lutv2"` - FEAT_SME_LUTv2
83+
/// * `"sme-f8f16"` - FEAT_SME_F8F16
84+
/// * `"sme-f8f32"` - FEAT_SME_F8F32
85+
/// * `"ssve-fp8fma"` - FEAT_SSVE_FP8FMA
86+
/// * `"ssve-fp8dot4"` - FEAT_SSVE_FP8DOT4
87+
/// * `"ssve-fp8dot2"` - FEAT_SSVE_FP8DOT2
7588
///
7689
/// [docs]: https://developer.arm.com/documentation/ddi0487/latest
7790
#[stable(feature = "simd_aarch64", since = "1.60.0")]
@@ -210,4 +223,30 @@ features! {
210223
/// FEAT_FP8DOT2 (F8DP2 Instructions)
211224
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] wfxt: "wfxt";
212225
/// FEAT_WFxT (WFET and WFIT Instructions)
226+
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme: "sme";
227+
/// FEAT_SME (Scalable Matrix Extension)
228+
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_i16i64: "sme-i16i64";
229+
/// FEAT_SME_I16I64 (16-bit to 64-bit integer widening outer product instructions)
230+
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f64f64: "sme-f64f64";
231+
/// FEAT_SME_F64F64 (Double-precision floating-point outer product instructions)
232+
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_fa64: "sme-fa64";
233+
/// FEAT_SME_FA64 (Full A64 instruction set support in Streaming SVE mode)
234+
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme2: "sme2";
235+
/// FEAT_SME2 (SME Version 2)
236+
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme2p1: "sme2p1";
237+
/// FEAT_SME2p1 (SME Version 2.1)
238+
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f16f16: "sme-f16f16";
239+
/// FEAT_SME_F16F16 (Non-widening half-precision FP16 to FP16 arithmetic for SME2)
240+
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_lutv2: "sme-lutv2";
241+
/// FEAT_SME_LUTv2 (LUTI4 Instruction)
242+
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f8f16: "sme-f8f16";
243+
/// FEAT_SME_F8F16
244+
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f8f32: "sme-f8f32";
245+
/// FEAT_SME_F8F32
246+
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ssve_fp8fma: "ssve-fp8fma";
247+
/// FEAT_SSVE_FP8FMA
248+
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ssve_fp8dot4: "ssve-fp8dot4";
249+
/// FEAT_SSVE_FP8DOT4
250+
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ssve_fp8dot2: "ssve-fp8dot2";
251+
/// FEAT_SSVE_FP8DOT2
213252
}

crates/std_detect/src/detect/os/linux/aarch64.rs

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,13 +103,26 @@ struct AtHwcap {
103103
// afp: bool,
104104
// rpres: bool,
105105
// mte3: bool,
106+
sme: bool,
107+
smei16i64: bool,
108+
smef64f64: bool,
109+
// smei8i32: bool,
110+
// smef16f32: bool,
111+
// smeb16f32: bool,
112+
// smef32f32: bool,
113+
smefa64: bool,
106114
wfxt: bool,
107115
// ebf16: bool,
108116
// sveebf16: bool,
109117
cssc: bool,
110118
// rprfm: bool,
111119
sve2p1: bool,
120+
sme2: bool,
121+
sme2p1: bool,
122+
// smei16i32: bool,
123+
// smebi32i32: bool,
112124
smeb16b16: bool,
125+
smef16f16: bool,
113126
mops: bool,
114127
hbc: bool,
115128
sveb16b16: bool,
@@ -124,6 +137,12 @@ struct AtHwcap {
124137
f8dp2: bool,
125138
f8e4m3: bool,
126139
f8e5m2: bool,
140+
smelutv2: bool,
141+
smef8f16: bool,
142+
smef8f32: bool,
143+
smesf8fma: bool,
144+
smesf8dp4: bool,
145+
smesf8dp2: bool,
127146
}
128147

129148
impl From<auxvec::AuxVec> for AtHwcap {
@@ -187,13 +206,26 @@ impl From<auxvec::AuxVec> for AtHwcap {
187206
// afp: bit::test(auxv.hwcap2, 20),
188207
// rpres: bit::test(auxv.hwcap2, 21),
189208
// mte3: bit::test(auxv.hwcap2, 22),
209+
sme: bit::test(auxv.hwcap2, 23),
210+
smei16i64: bit::test(auxv.hwcap2, 24),
211+
smef64f64: bit::test(auxv.hwcap2, 25),
212+
// smei8i32: bit::test(auxv.hwcap2, 26),
213+
// smef16f32: bit::test(auxv.hwcap2, 27),
214+
// smeb16f32: bit::test(auxv.hwcap2, 28),
215+
// smef32f32: bit::test(auxv.hwcap2, 29),
216+
smefa64: bit::test(auxv.hwcap2, 30),
190217
wfxt: bit::test(auxv.hwcap2, 31),
191218
// ebf16: bit::test(auxv.hwcap2, 32),
192219
// sveebf16: bit::test(auxv.hwcap2, 33),
193220
cssc: bit::test(auxv.hwcap2, 34),
194221
// rprfm: bit::test(auxv.hwcap2, 35),
195222
sve2p1: bit::test(auxv.hwcap2, 36),
223+
sme2: bit::test(auxv.hwcap2, 37),
224+
sme2p1: bit::test(auxv.hwcap2, 38),
225+
// smei16i32: bit::test(auxv.hwcap2, 39),
226+
// smebi32i32: bit::test(auxv.hwcap2, 40),
196227
smeb16b16: bit::test(auxv.hwcap2, 41),
228+
smef16f16: bit::test(auxv.hwcap2, 42),
197229
mops: bit::test(auxv.hwcap2, 43),
198230
hbc: bit::test(auxv.hwcap2, 44),
199231
sveb16b16: bit::test(auxv.hwcap2, 45),
@@ -208,6 +240,12 @@ impl From<auxvec::AuxVec> for AtHwcap {
208240
f8dp2: bit::test(auxv.hwcap2, 54),
209241
f8e4m3: bit::test(auxv.hwcap2, 55),
210242
f8e5m2: bit::test(auxv.hwcap2, 56),
243+
smelutv2: bit::test(auxv.hwcap2, 57),
244+
smef8f16: bit::test(auxv.hwcap2, 58),
245+
smef8f32: bit::test(auxv.hwcap2, 59),
246+
smesf8fma: bit::test(auxv.hwcap2, 60),
247+
smesf8dp4: bit::test(auxv.hwcap2, 61),
248+
smesf8dp2: bit::test(auxv.hwcap2, 62),
211249
}
212250
}
213251
}
@@ -278,13 +316,26 @@ impl From<super::cpuinfo::CpuInfo> for AtHwcap {
278316
// afp: f.has("afp"),
279317
// rpres: f.has("rpres"),
280318
// mte3: f.has("mte3"),
319+
sme: f.has("sme"),
320+
smei16i64: f.has("smei16i64"),
321+
smef64f64: f.has("smef64f64"),
322+
// smei8i32: f.has("smei8i32"),
323+
// smef16f32: f.has("smef16f32"),
324+
// smeb16f32: f.has("smeb16f32"),
325+
// smef32f32: f.has("smef32f32"),
326+
smefa64: f.has("smefa64"),
281327
wfxt: f.has("wfxt"),
282328
// ebf16: f.has("ebf16"),
283329
// sveebf16: f.has("sveebf16"),
284330
cssc: f.has("cssc"),
285331
// rprfm: f.has("rprfm"),
286332
sve2p1: f.has("sve2p1"),
333+
sme2: f.has("sme2"),
334+
sme2p1: f.has("sme2p1"),
335+
// smei16i32: f.has("smei16i32"),
336+
// smebi32i32: f.has("smebi32i32"),
287337
smeb16b16: f.has("smeb16b16"),
338+
smef16f16: f.has("smef16f16"),
288339
mops: f.has("mops"),
289340
hbc: f.has("hbc"),
290341
sveb16b16: f.has("sveb16b16"),
@@ -299,6 +350,12 @@ impl From<super::cpuinfo::CpuInfo> for AtHwcap {
299350
f8dp2: f.has("f8dp2"),
300351
f8e4m3: f.has("f8e4m3"),
301352
f8e5m2: f.has("f8e5m2"),
353+
smelutv2: f.has("smelutv2"),
354+
smef8f16: f.has("smef8f16"),
355+
smef8f32: f.has("smef8f32"),
356+
smesf8fma: f.has("smesf8fma"),
357+
smesf8dp4: f.has("smesf8dp4"),
358+
smesf8dp2: f.has("smesf8dp2"),
302359
}
303360
}
304361
}
@@ -431,6 +488,25 @@ impl AtHwcap {
431488
enable_feature(Feature::fp8dot4, self.f8dp4);
432489
enable_feature(Feature::fp8dot2, self.f8dp2);
433490
enable_feature(Feature::wfxt, self.wfxt);
491+
enable_feature(Feature::sme, self.sme && self.bf16);
492+
enable_feature(Feature::sme_i16i64, self.smei16i64 && self.sme);
493+
enable_feature(Feature::sme_f64f64, self.smef64f64 && self.sme);
494+
// enable_feature(Feature::sme_i8i32, self.smei8i32);
495+
// enable_feature(Feature::sme_f16f32, self.smef16f32);
496+
// enable_feature(Feature::sme_b16f32, self.smeb16f32);
497+
// enable_feature(Feature::sme_f32f32, self.smef32f32);
498+
enable_feature(Feature::sme_fa64, self.smefa64 && self.sme && sve2);
499+
enable_feature(Feature::sme2, self.sme2 && self.sme);
500+
enable_feature(Feature::sme2p1, self.sme2p1 && self.sme2 && self.sme);
501+
// enable_feature(Feature::sme_i16i32, self.smei16i32);
502+
// enable_feature(Feature::sme_bi32i32, self.smebi32i32);
503+
enable_feature(Feature::sme_f16f16, self.smef16f16);
504+
enable_feature(Feature::sme_lutv2, self.smelutv2);
505+
enable_feature(Feature::sme_f8f16, self.smef8f16 && self.sme2 && self.f8cvt);
506+
enable_feature(Feature::sme_f8f32, self.smef8f32 && self.sme2 && self.f8cvt);
507+
enable_feature(Feature::ssve_fp8fma, self.smesf8fma && self.sme2);
508+
enable_feature(Feature::ssve_fp8dot4, self.smesf8dp4 && self.sme2);
509+
enable_feature(Feature::ssve_fp8dot2, self.smesf8dp2 && self.sme2);
434510
}
435511
value
436512
}

crates/std_detect/tests/cpu-detection.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,28 @@ fn aarch64_linux() {
121121
println!("fp8dot4: {}", is_aarch64_feature_detected!("fp8dot4"));
122122
println!("fp8dot2: {}", is_aarch64_feature_detected!("fp8dot2"));
123123
println!("wfxt: {}", is_aarch64_feature_detected!("wfxt"));
124+
println!("sme: {}", is_aarch64_feature_detected!("sme"));
125+
println!("sme-i16i64: {}", is_aarch64_feature_detected!("sme-i16i64"));
126+
println!("sme-f64f64: {}", is_aarch64_feature_detected!("sme-f64f64"));
127+
println!("sme-fa64: {}", is_aarch64_feature_detected!("sme-fa64"));
128+
println!("sme2: {}", is_aarch64_feature_detected!("sme2"));
129+
println!("sme2p1: {}", is_aarch64_feature_detected!("sme2p1"));
130+
println!("sme-f16f16: {}", is_aarch64_feature_detected!("sme-f16f16"));
131+
println!("sme-lutv2: {}", is_aarch64_feature_detected!("sme-lutv2"));
132+
println!("sme-f8f16: {}", is_aarch64_feature_detected!("sme-f8f16"));
133+
println!("sme-f8f32: {}", is_aarch64_feature_detected!("sme-f8f32"));
134+
println!(
135+
"ssve-fp8fma: {}",
136+
is_aarch64_feature_detected!("ssve-fp8fma")
137+
);
138+
println!(
139+
"ssve-fp8dot4: {}",
140+
is_aarch64_feature_detected!("ssve-fp8dot4")
141+
);
142+
println!(
143+
"ssve-fp8dot2: {}",
144+
is_aarch64_feature_detected!("ssve-fp8dot2")
145+
);
124146
}
125147

126148
#[test]

0 commit comments

Comments
 (0)