Skip to content

Commit 36e4bb9

Browse files
committed
std_detect: Add aarch64/linux/LLVM features
Add detection for various aarch64 CPU features already supported by LLVM and Linux. This commit adds feature detection for the following features: - FEAT_CSSC - FEAT_ECV - FEAT_FAMINMAX - FEAT_FLAGM2 - FEAT_FP8 - FEAT_FP8DOT2 - FEAT_FP8DOT4 - FEAT_FP8FMA - FEAT_HBC - FEAT_LSE128 - FEAT_LUT - FEAT_MOPS - FEAT_LRCPC3 - FEAT_SVE_B16B16 - FEAT_SVE2p1 - FEAT_WFxT It also adds feature detection for FEAT_FPMR. It is somewhat of a special case because FPMR only exists as a feature in LLVM 18, it has been removed from the LLVM upstream. On that account the intention is for it to be detectable at runtime through stdarch but not have a corresponding compile-time Rust target feature. Linux features: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h LLVM features: llvm-project/llvm/lib/Target/AArch64/AArch64.td
1 parent 11578e7 commit 36e4bb9

File tree

5 files changed

+194
-10
lines changed

5 files changed

+194
-10
lines changed

crates/core_arch/src/lib.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,10 @@
6464
)]
6565
#![cfg_attr(
6666
test,
67-
feature(stdarch_arm_feature_detection, stdarch_powerpc_feature_detection)
67+
feature(
68+
stdarch_arm_feature_detection,
69+
stdarch_powerpc_feature_detection
70+
)
6871
)]
6972

7073
#[cfg(test)]

crates/std_detect/src/detect/arch/aarch64.rs

Lines changed: 61 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,22 +22,27 @@ features! {
2222
/// * `"crc"` - FEAT_CRC
2323
/// * `"lse"` - FEAT_LSE
2424
/// * `"lse2"` - FEAT_LSE2
25+
/// * `"lse128"` - FEAT_LSE128
2526
/// * `"rdm"` - FEAT_RDM
2627
/// * `"rcpc"` - FEAT_LRCPC
2728
/// * `"rcpc2"` - FEAT_LRCPC2
29+
/// * `"rcpc3"` - FEAT_LRCPC3
2830
/// * `"dotprod"` - FEAT_DotProd
2931
/// * `"tme"` - FEAT_TME
3032
/// * `"fhm"` - FEAT_FHM
3133
/// * `"dit"` - FEAT_DIT
3234
/// * `"flagm"` - FEAT_FLAGM
35+
/// * `"flagm2"` - FEAT_FLAGM2
3336
/// * `"ssbs"` - FEAT_SSBS & FEAT_SSBS2
3437
/// * `"sb"` - FEAT_SB
3538
/// * `"paca"` - FEAT_PAuth (address authentication)
3639
/// * `"pacg"` - FEAT_Pauth (generic authentication)
3740
/// * `"dpb"` - FEAT_DPB
3841
/// * `"dpb2"` - FEAT_DPB2
42+
/// * `"sve-b16b16"` - FEAT_SVE_B16B16
3943
/// * `"sve2"` - FEAT_SVE2
40-
/// * `"sve2-aes"` - FEAT_SVE2_AES
44+
/// * `"sve2p1"` - FEAT_SVE2p1
45+
/// * `"sve2-aes"` - FEAT_SVE_AES & FEAT_SVE_PMULL128 (SVE2 AES crypto)
4146
/// * `"sve2-sm4"` - FEAT_SVE2_SM4
4247
/// * `"sve2-sha3"` - FEAT_SVE2_SHA3
4348
/// * `"sve2-bitperm"` - FEAT_SVE2_BitPerm
@@ -55,6 +60,18 @@ features! {
5560
/// * `"sha2"` - FEAT_SHA1 & FEAT_SHA256
5661
/// * `"sha3"` - FEAT_SHA512 & FEAT_SHA3
5762
/// * `"sm4"` - FEAT_SM3 & FEAT_SM4
63+
/// * `"hbc"` - FEAT_HBC
64+
/// * `"mops"` - FEAT_MOPS
65+
/// * `"ecv"` - FEAT_ECV
66+
/// * `"cssc"` - FEAT_CSSC
67+
/// * `"fpmr"` - FEAT_FPMR
68+
/// * `"lut"` - FEAT_LUT
69+
/// * `"faminmax"` - FEAT_FAMINMAX
70+
/// * `"fp8"` - FEAT_FP8
71+
/// * `"fp8fma"` - FEAT_FP8FMA
72+
/// * `"fp8dot4"` - FEAT_FP8DOT4
73+
/// * `"fp8dot2"` - FEAT_FP8DOT2
74+
/// * `"wfxt"` - FEAT_WFxT
5875
///
5976
/// [docs]: https://developer.arm.com/documentation/ddi0487/latest
6077
#[stable(feature = "simd_aarch64", since = "1.60.0")]
@@ -67,6 +84,14 @@ features! {
6784
@NO_RUNTIME_DETECTION: "v8.5a";
6885
@NO_RUNTIME_DETECTION: "v8.6a";
6986
@NO_RUNTIME_DETECTION: "v8.7a";
87+
@NO_RUNTIME_DETECTION: "v8.8a";
88+
@NO_RUNTIME_DETECTION: "v8.9a";
89+
@NO_RUNTIME_DETECTION: "v9.1a";
90+
@NO_RUNTIME_DETECTION: "v9.2a";
91+
@NO_RUNTIME_DETECTION: "v9.3a";
92+
@NO_RUNTIME_DETECTION: "v9.4a";
93+
@NO_RUNTIME_DETECTION: "v9.5a";
94+
@NO_RUNTIME_DETECTION: "v9a";
7095
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] asimd: "neon";
7196
/// FEAT_AdvSIMD (Advanced SIMD/NEON)
7297
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] pmull: "pmull";
@@ -85,12 +110,16 @@ features! {
85110
/// FEAT_LSE (Large System Extension - atomics)
86111
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] lse2: "lse2";
87112
/// FEAT_LSE2 (unaligned and register-pair atomics)
113+
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] lse128: "lse128";
114+
/// FEAT_LSE128 (128-bit atomics)
88115
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rdm: "rdm";
89116
/// FEAT_RDM (Rounding Doubling Multiply - ASIMDRDM)
90117
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rcpc: "rcpc";
91118
/// FEAT_LRCPC (Release consistent Processor consistent)
92119
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rcpc2: "rcpc2";
93120
/// FEAT_LRCPC2 (RCPC with immediate offsets)
121+
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] rcpc3: "rcpc3";
122+
/// FEAT_LRCPC3 (RCPC Instructions v3)
94123
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dotprod: "dotprod";
95124
/// FEAT_DotProd (Vector Dot-Product - ASIMDDP)
96125
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] tme: "tme";
@@ -101,6 +130,8 @@ features! {
101130
/// FEAT_DIT (Data Independent Timing instructions)
102131
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] flagm: "flagm";
103132
/// FEAT_FLAGM (flag manipulation instructions)
133+
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] flagm2: "flagm2";
134+
/// FEAT_FLAGM2 (flag manipulation instructions)
104135
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] ssbs: "ssbs";
105136
/// FEAT_SSBS & FEAT_SSBS2 (speculative store bypass safe)
106137
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sb: "sb";
@@ -115,14 +146,18 @@ features! {
115146
/// FEAT_DPB2 (aka dcpodp - data cache clean to point of deep persistence)
116147
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2: "sve2";
117148
/// FEAT_SVE2 (Scalable Vector Extension 2)
149+
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sve2p1: "sve2p1";
150+
/// FEAT_SVE2p1 (Scalable Vector Extension 2.1)
118151
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_aes: "sve2-aes";
119-
/// FEAT_SVE_AES (SVE2 AES crypto)
152+
/// FEAT_SVE_AES & FEAT_SVE_PMULL128 (SVE2 AES crypto)
120153
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_sm4: "sve2-sm4";
121154
/// FEAT_SVE_SM4 (SVE2 SM4 crypto)
122155
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_sha3: "sve2-sha3";
123156
/// FEAT_SVE_SHA3 (SVE2 SHA3 crypto)
124157
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_bitperm: "sve2-bitperm";
125158
/// FEAT_SVE_BitPerm (SVE2 bit permutation instructions)
159+
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sve_b16b16: "sve-b16b16";
160+
/// FEAT_SVE_B16B16 (SVE or SME Instructions)
126161
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] frintts: "frintts";
127162
/// FEAT_FRINTTS (float to integer rounding instructions)
128163
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] i8mm: "i8mm";
@@ -151,4 +186,28 @@ features! {
151186
/// FEAT_SHA512 & FEAT_SHA3 (SHA2-512 & SHA3 instructions)
152187
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sm4: "sm4";
153188
/// FEAT_SM3 & FEAT_SM4 (SM3 & SM4 instructions)
189+
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] hbc: "hbc";
190+
/// FEAT_HBC (Hinted conditional branches)
191+
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] mops: "mops";
192+
/// FEAT_MOPS (Standardization of memory operations)
193+
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ecv: "ecv";
194+
/// FEAT_ECV (Enhanced Counter Virtualization)
195+
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] cssc: "cssc";
196+
/// FEAT_CSSC (Common Short Sequence Compression instructions)
197+
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fpmr: "fpmr";
198+
/// FEAT_FPMR (Special-purpose AArch64-FPMR register)
199+
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] lut: "lut";
200+
/// FEAT_LUT (Lookup Table Instructions)
201+
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] faminmax: "faminmax";
202+
/// FEAT_FAMINMAX (FAMIN and FAMAX SIMD/SVE/SME instructions)
203+
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8: "fp8";
204+
/// FEAT_FP8 (F8CVT Instructions)
205+
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8fma: "fp8fma";
206+
/// FEAT_FP8FMA (F8FMA Instructions)
207+
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8dot4: "fp8dot4";
208+
/// FEAT_FP8DOT4 (F8DP4 Instructions)
209+
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8dot2: "fp8dot2";
210+
/// FEAT_FP8DOT2 (F8DP2 Instructions)
211+
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] wfxt: "wfxt";
212+
/// FEAT_WFxT (WFET and WFIT Instructions)
154213
}

crates/std_detect/src/detect/os/linux/aarch64.rs

Lines changed: 110 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -83,11 +83,11 @@ struct AtHwcap {
8383
dcpodp: bool,
8484
sve2: bool,
8585
sveaes: bool,
86-
// svepmull: No LLVM support.
86+
svepmull: bool,
8787
svebitperm: bool,
8888
svesha3: bool,
8989
svesm4: bool,
90-
// flagm2: No LLVM support.
90+
flagm2: bool,
9191
frint: bool,
9292
// svei8mm: See i8mm feature.
9393
svef32mm: bool,
@@ -99,6 +99,31 @@ struct AtHwcap {
9999
rng: bool,
100100
bti: bool,
101101
mte: bool,
102+
ecv: bool,
103+
// afp: bool,
104+
// rpres: bool,
105+
// mte3: bool,
106+
wfxt: bool,
107+
// ebf16: bool,
108+
// sveebf16: bool,
109+
cssc: bool,
110+
// rprfm: bool,
111+
sve2p1: bool,
112+
smeb16b16: bool,
113+
mops: bool,
114+
hbc: bool,
115+
sveb16b16: bool,
116+
lrcpc3: bool,
117+
lse128: bool,
118+
fpmr: bool,
119+
lut: bool,
120+
faminmax: bool,
121+
f8cvt: bool,
122+
f8fma: bool,
123+
f8dp4: bool,
124+
f8dp2: bool,
125+
f8e4m3: bool,
126+
f8e5m2: bool,
102127
}
103128

104129
impl From<auxvec::AuxVec> for AtHwcap {
@@ -137,14 +162,16 @@ impl From<auxvec::AuxVec> for AtHwcap {
137162
sb: bit::test(auxv.hwcap, 29),
138163
paca: bit::test(auxv.hwcap, 30),
139164
pacg: bit::test(auxv.hwcap, 31),
165+
166+
// AT_HWCAP2
140167
dcpodp: bit::test(auxv.hwcap2, 0),
141168
sve2: bit::test(auxv.hwcap2, 1),
142169
sveaes: bit::test(auxv.hwcap2, 2),
143-
// svepmull: bit::test(auxv.hwcap2, 3),
170+
svepmull: bit::test(auxv.hwcap2, 3),
144171
svebitperm: bit::test(auxv.hwcap2, 4),
145172
svesha3: bit::test(auxv.hwcap2, 5),
146173
svesm4: bit::test(auxv.hwcap2, 6),
147-
// flagm2: bit::test(auxv.hwcap2, 7),
174+
flagm2: bit::test(auxv.hwcap2, 7),
148175
frint: bit::test(auxv.hwcap2, 8),
149176
// svei8mm: bit::test(auxv.hwcap2, 9),
150177
svef32mm: bit::test(auxv.hwcap2, 10),
@@ -156,6 +183,31 @@ impl From<auxvec::AuxVec> for AtHwcap {
156183
rng: bit::test(auxv.hwcap2, 16),
157184
bti: bit::test(auxv.hwcap2, 17),
158185
mte: bit::test(auxv.hwcap2, 18),
186+
ecv: bit::test(auxv.hwcap2, 19),
187+
// afp: bit::test(auxv.hwcap2, 20),
188+
// rpres: bit::test(auxv.hwcap2, 21),
189+
// mte3: bit::test(auxv.hwcap2, 22),
190+
wfxt: bit::test(auxv.hwcap2, 31),
191+
// ebf16: bit::test(auxv.hwcap2, 32),
192+
// sveebf16: bit::test(auxv.hwcap2, 33),
193+
cssc: bit::test(auxv.hwcap2, 34),
194+
// rprfm: bit::test(auxv.hwcap2, 35),
195+
sve2p1: bit::test(auxv.hwcap2, 36),
196+
smeb16b16: bit::test(auxv.hwcap2, 41),
197+
mops: bit::test(auxv.hwcap2, 43),
198+
hbc: bit::test(auxv.hwcap2, 44),
199+
sveb16b16: bit::test(auxv.hwcap2, 45),
200+
lrcpc3: bit::test(auxv.hwcap2, 46),
201+
lse128: bit::test(auxv.hwcap2, 47),
202+
fpmr: bit::test(auxv.hwcap2, 48),
203+
lut: bit::test(auxv.hwcap2, 49),
204+
faminmax: bit::test(auxv.hwcap2, 50),
205+
f8cvt: bit::test(auxv.hwcap2, 51),
206+
f8fma: bit::test(auxv.hwcap2, 52),
207+
f8dp4: bit::test(auxv.hwcap2, 53),
208+
f8dp2: bit::test(auxv.hwcap2, 54),
209+
f8e4m3: bit::test(auxv.hwcap2, 55),
210+
f8e5m2: bit::test(auxv.hwcap2, 56),
159211
}
160212
}
161213
}
@@ -201,14 +253,16 @@ impl From<super::cpuinfo::CpuInfo> for AtHwcap {
201253
sb: f.has("sb"),
202254
paca: f.has("paca"),
203255
pacg: f.has("pacg"),
256+
257+
// AT_HWCAP2
204258
dcpodp: f.has("dcpodp"),
205259
sve2: f.has("sve2"),
206260
sveaes: f.has("sveaes"),
207-
// svepmull: f.has("svepmull"),
261+
svepmull: f.has("svepmull"),
208262
svebitperm: f.has("svebitperm"),
209263
svesha3: f.has("svesha3"),
210264
svesm4: f.has("svesm4"),
211-
// flagm2: f.has("flagm2"),
265+
flagm2: f.has("flagm2"),
212266
frint: f.has("frint"),
213267
// svei8mm: f.has("svei8mm"),
214268
svef32mm: f.has("svef32mm"),
@@ -220,6 +274,31 @@ impl From<super::cpuinfo::CpuInfo> for AtHwcap {
220274
rng: f.has("rng"),
221275
bti: f.has("bti"),
222276
mte: f.has("mte"),
277+
ecv: f.has("ecv"),
278+
// afp: f.has("afp"),
279+
// rpres: f.has("rpres"),
280+
// mte3: f.has("mte3"),
281+
wfxt: f.has("wfxt"),
282+
// ebf16: f.has("ebf16"),
283+
// sveebf16: f.has("sveebf16"),
284+
cssc: f.has("cssc"),
285+
// rprfm: f.has("rprfm"),
286+
sve2p1: f.has("sve2p1"),
287+
smeb16b16: f.has("smeb16b16"),
288+
mops: f.has("mops"),
289+
hbc: f.has("hbc"),
290+
sveb16b16: f.has("sveb16b16"),
291+
lrcpc3: f.has("lrcpc3"),
292+
lse128: f.has("lse128"),
293+
fpmr: f.has("fpmr"),
294+
lut: f.has("lut"),
295+
faminmax: f.has("faminmax"),
296+
f8cvt: f.has("f8cvt"),
297+
f8fma: f.has("f8fma"),
298+
f8dp4: f.has("f8dp4"),
299+
f8dp2: f.has("f8dp2"),
300+
f8e4m3: f.has("f8e4m3"),
301+
f8e5m2: f.has("f8e5m2"),
223302
}
224303
}
225304
}
@@ -267,11 +346,14 @@ impl AtHwcap {
267346
enable_feature(Feature::crc, self.crc32);
268347
enable_feature(Feature::lse, self.atomics);
269348
enable_feature(Feature::lse2, self.uscat);
349+
enable_feature(Feature::lse128, self.lse128);
270350
enable_feature(Feature::rcpc, self.lrcpc);
271351
// RCPC2 (rcpc-immo in LLVM) requires RCPC support
272352
enable_feature(Feature::rcpc2, self.ilrcpc && self.lrcpc);
353+
enable_feature(Feature::rcpc3, self.lrcpc3);
273354
enable_feature(Feature::dit, self.dit);
274355
enable_feature(Feature::flagm, self.flagm);
356+
enable_feature(Feature::flagm2, self.flagm2);
275357
enable_feature(Feature::ssbs, self.ssbs);
276358
enable_feature(Feature::sb, self.sb);
277359
enable_feature(Feature::paca, self.paca);
@@ -317,8 +399,12 @@ impl AtHwcap {
317399
// SVE2 requires SVE
318400
let sve2 = self.sve2 && self.sve && asimd;
319401
enable_feature(Feature::sve2, sve2);
402+
enable_feature(Feature::sve2p1, self.sve2p1);
320403
// SVE2 extensions require SVE2 and crypto features
321-
enable_feature(Feature::sve2_aes, self.sveaes && sve2 && self.aes);
404+
enable_feature(
405+
Feature::sve2_aes,
406+
self.sveaes && self.svepmull && sve2 && self.aes,
407+
);
322408
enable_feature(
323409
Feature::sve2_sm4,
324410
self.svesm4 && sve2 && self.sm3 && self.sm4,
@@ -328,6 +414,23 @@ impl AtHwcap {
328414
self.svesha3 && sve2 && self.sha512 && self.sha3 && self.sha1 && self.sha2,
329415
);
330416
enable_feature(Feature::sve2_bitperm, self.svebitperm && self.sve2);
417+
// SVE_B16B16 can be implemented either for SVE or SME
418+
enable_feature(
419+
Feature::sve_b16b16,
420+
self.bf16 && (self.sveb16b16 || self.smeb16b16),
421+
);
422+
enable_feature(Feature::hbc, self.hbc);
423+
enable_feature(Feature::mops, self.mops);
424+
enable_feature(Feature::ecv, self.ecv);
425+
enable_feature(Feature::lut, self.lut);
426+
enable_feature(Feature::cssc, self.cssc);
427+
enable_feature(Feature::fpmr, self.fpmr);
428+
enable_feature(Feature::faminmax, self.faminmax);
429+
enable_feature(Feature::fp8, self.f8cvt);
430+
enable_feature(Feature::fp8fma, self.f8fma);
431+
enable_feature(Feature::fp8dot4, self.f8dp4);
432+
enable_feature(Feature::fp8dot2, self.f8dp2);
433+
enable_feature(Feature::wfxt, self.wfxt);
331434
}
332435
value
333436
}

0 commit comments

Comments
 (0)