Skip to content

Commit 7a533ec

Browse files
authored
Add VAES intrinsics (#942)
1 parent 777efaf commit 7a533ec

File tree

2 files changed

+335
-0
lines changed

2 files changed

+335
-0
lines changed
Lines changed: 332 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,332 @@
1+
//! Vectorized AES Instructions (VAES)
2+
//!
3+
//! The intrinsics here correspond to those in the `immintrin.h` C header.
4+
//!
5+
//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
6+
//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
7+
//!
8+
//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
9+
10+
use crate::core_arch::x86::__m256i;
11+
use crate::core_arch::x86::__m512i;
12+
13+
#[cfg(test)]
14+
use stdarch_test::assert_instr;
15+
16+
#[allow(improper_ctypes)]
17+
extern "C" {
18+
#[link_name = "llvm.x86.aesni.aesenc.256"]
19+
fn aesenc_256(a: __m256i, round_key: __m256i) -> __m256i;
20+
#[link_name = "llvm.x86.aesni.aesenclast.256"]
21+
fn aesenclast_256(a: __m256i, round_key: __m256i) -> __m256i;
22+
#[link_name = "llvm.x86.aesni.aesdec.256"]
23+
fn aesdec_256(a: __m256i, round_key: __m256i) -> __m256i;
24+
#[link_name = "llvm.x86.aesni.aesdeclast.256"]
25+
fn aesdeclast_256(a: __m256i, round_key: __m256i) -> __m256i;
26+
#[link_name = "llvm.x86.aesni.aesenc.512"]
27+
fn aesenc_512(a: __m512i, round_key: __m512i) -> __m512i;
28+
#[link_name = "llvm.x86.aesni.aesenclast.512"]
29+
fn aesenclast_512(a: __m512i, round_key: __m512i) -> __m512i;
30+
#[link_name = "llvm.x86.aesni.aesdec.512"]
31+
fn aesdec_512(a: __m512i, round_key: __m512i) -> __m512i;
32+
#[link_name = "llvm.x86.aesni.aesdeclast.512"]
33+
fn aesdeclast_512(a: __m512i, round_key: __m512i) -> __m512i;
34+
}
35+
36+
/// Performs one round of an AES encryption flow on each 128-bit word (state) in `a` using
37+
/// the corresponding 128-bit word (key) in `round_key`.
38+
///
39+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesenc_epi128)
40+
#[inline]
41+
#[target_feature(enable = "avx512vaes,avx512vl")]
42+
#[cfg_attr(test, assert_instr(vaesenc))]
43+
pub unsafe fn _mm256_aesenc_epi128(a: __m256i, round_key: __m256i) -> __m256i {
44+
aesenc_256(a, round_key)
45+
}
46+
47+
/// Performs the last round of an AES encryption flow on each 128-bit word (state) in `a` using
48+
/// the corresponding 128-bit word (key) in `round_key`.
49+
///
50+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesenclast_epi128)
51+
#[inline]
52+
#[target_feature(enable = "avx512vaes,avx512vl")]
53+
#[cfg_attr(test, assert_instr(vaesenclast))]
54+
pub unsafe fn _mm256_aesenclast_epi128(a: __m256i, round_key: __m256i) -> __m256i {
55+
aesenclast_256(a, round_key)
56+
}
57+
58+
/// Performs one round of an AES decryption flow on each 128-bit word (state) in `a` using
59+
/// the corresponding 128-bit word (key) in `round_key`.
60+
///
61+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesdec_epi128)
62+
#[inline]
63+
#[target_feature(enable = "avx512vaes,avx512vl")]
64+
#[cfg_attr(test, assert_instr(vaesdec))]
65+
pub unsafe fn _mm256_aesdec_epi128(a: __m256i, round_key: __m256i) -> __m256i {
66+
aesdec_256(a, round_key)
67+
}
68+
69+
/// Performs the last round of an AES decryption flow on each 128-bit word (state) in `a` using
70+
/// the corresponding 128-bit word (key) in `round_key`.
71+
///
72+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesdeclast_epi128)
73+
#[inline]
74+
#[target_feature(enable = "avx512vaes,avx512vl")]
75+
#[cfg_attr(test, assert_instr(vaesdeclast))]
76+
pub unsafe fn _mm256_aesdeclast_epi128(a: __m256i, round_key: __m256i) -> __m256i {
77+
aesdeclast_256(a, round_key)
78+
}
79+
80+
/// Performs one round of an AES encryption flow on each 128-bit word (state) in `a` using
81+
/// the corresponding 128-bit word (key) in `round_key`.
82+
///
83+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesenc_epi128)
84+
#[inline]
85+
#[target_feature(enable = "avx512vaes,avx512f")]
86+
#[cfg_attr(test, assert_instr(vaesenc))]
87+
pub unsafe fn _mm512_aesenc_epi128(a: __m512i, round_key: __m512i) -> __m512i {
88+
aesenc_512(a, round_key)
89+
}
90+
91+
/// Performs the last round of an AES encryption flow on each 128-bit word (state) in `a` using
92+
/// the corresponding 128-bit word (key) in `round_key`.
93+
///
94+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesenclast_epi128)
95+
#[inline]
96+
#[target_feature(enable = "avx512vaes,avx512f")]
97+
#[cfg_attr(test, assert_instr(vaesenclast))]
98+
pub unsafe fn _mm512_aesenclast_epi128(a: __m512i, round_key: __m512i) -> __m512i {
99+
aesenclast_512(a, round_key)
100+
}
101+
102+
/// Performs one round of an AES decryption flow on each 128-bit word (state) in `a` using
103+
/// the corresponding 128-bit word (key) in `round_key`.
104+
///
105+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesdec_epi128)
106+
#[inline]
107+
#[target_feature(enable = "avx512vaes,avx512f")]
108+
#[cfg_attr(test, assert_instr(vaesdec))]
109+
pub unsafe fn _mm512_aesdec_epi128(a: __m512i, round_key: __m512i) -> __m512i {
110+
aesdec_512(a, round_key)
111+
}
112+
113+
/// Performs the last round of an AES decryption flow on each 128-bit word (state) in `a` using
114+
/// the corresponding 128-bit word (key) in `round_key`.
115+
///
116+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesdeclast_epi128)
117+
#[inline]
118+
#[target_feature(enable = "avx512vaes,avx512f")]
119+
#[cfg_attr(test, assert_instr(vaesdeclast))]
120+
pub unsafe fn _mm512_aesdeclast_epi128(a: __m512i, round_key: __m512i) -> __m512i {
121+
aesdeclast_512(a, round_key)
122+
}
123+
124+
#[cfg(test)]
125+
mod tests {
126+
// The constants in the tests below are just bit patterns. They should not
127+
// be interpreted as integers; signedness does not make sense for them, but
128+
// __mXXXi happens to be defined in terms of signed integers.
129+
#![allow(overflowing_literals)]
130+
131+
use stdarch_test::simd_test;
132+
133+
use crate::core_arch::x86::*;
134+
135+
// the first parts of these tests are straight ports from the AES-NI tests
136+
// the second parts directly compare the two, for inputs that are different across lanes
137+
// and "more random" than the standard test vectors
138+
// ideally we'd be using quickcheck here instead
139+
140+
#[target_feature(enable = "avx2")]
141+
unsafe fn helper_for_256_avx512vaes(
142+
linear: unsafe fn(__m128i, __m128i) -> __m128i,
143+
vectorized: unsafe fn(__m256i, __m256i) -> __m256i,
144+
) {
145+
let a = _mm256_set_epi64x(
146+
0xDCB4DB3657BF0B7D,
147+
0x18DB0601068EDD9F,
148+
0xB76B908233200DC5,
149+
0xE478235FA8E22D5E,
150+
);
151+
let k = _mm256_set_epi64x(
152+
0x672F6F105A94CEA7,
153+
0x8298B8FFCA5F829C,
154+
0xA3927047B3FB61D8,
155+
0x978093862CDE7187,
156+
);
157+
let mut a_decomp = [_mm_setzero_si128(); 2];
158+
a_decomp[0] = _mm256_extracti128_si256(a, 0);
159+
a_decomp[1] = _mm256_extracti128_si256(a, 1);
160+
let mut k_decomp = [_mm_setzero_si128(); 2];
161+
k_decomp[0] = _mm256_extracti128_si256(k, 0);
162+
k_decomp[1] = _mm256_extracti128_si256(k, 1);
163+
let r = vectorized(a, k);
164+
let mut e_decomp = [_mm_setzero_si128(); 2];
165+
for i in 0..2 {
166+
e_decomp[i] = linear(a_decomp[i], k_decomp[i]);
167+
}
168+
assert_eq_m128i(_mm256_extracti128_si256(r, 0), e_decomp[0]);
169+
assert_eq_m128i(_mm256_extracti128_si256(r, 1), e_decomp[1]);
170+
}
171+
172+
#[target_feature(enable = "sse2")]
173+
unsafe fn setup_state_key<T>(broadcast: unsafe fn(__m128i) -> T) -> (T, T) {
174+
// Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx.
175+
let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff);
176+
let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee);
177+
(broadcast(a), broadcast(k))
178+
}
179+
180+
#[target_feature(enable = "avx2")]
181+
unsafe fn setup_state_key_256() -> (__m256i, __m256i) {
182+
setup_state_key(_mm256_broadcastsi128_si256)
183+
}
184+
185+
#[target_feature(enable = "avx512f")]
186+
unsafe fn setup_state_key_512() -> (__m512i, __m512i) {
187+
setup_state_key(_mm512_broadcast_i32x4)
188+
}
189+
190+
#[simd_test(enable = "avx512vaes,avx512vl")]
191+
unsafe fn test_mm256_aesdec_epi128() {
192+
// Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx.
193+
let (a, k) = setup_state_key_256();
194+
let e = _mm_set_epi64x(0x044e4f5176fec48f, 0xb57ecfa381da39ee);
195+
let e = _mm256_broadcastsi128_si256(e);
196+
let r = _mm256_aesdec_epi128(a, k);
197+
assert_eq_m256i(r, e);
198+
199+
helper_for_256_avx512vaes(_mm_aesdec_si128, _mm256_aesdec_epi128);
200+
}
201+
202+
#[simd_test(enable = "avx512vaes,avx512vl")]
203+
unsafe fn test_mm256_aesdeclast_epi128() {
204+
// Constants taken from https://msdn.microsoft.com/en-us/library/cc714178.aspx.
205+
let (a, k) = setup_state_key_256();
206+
let e = _mm_set_epi64x(0x36cad57d9072bf9e, 0xf210dd981fa4a493);
207+
let e = _mm256_broadcastsi128_si256(e);
208+
let r = _mm256_aesdeclast_epi128(a, k);
209+
assert_eq_m256i(r, e);
210+
211+
helper_for_256_avx512vaes(_mm_aesdeclast_si128, _mm256_aesdeclast_epi128);
212+
}
213+
214+
#[simd_test(enable = "avx512vaes,avx512vl")]
215+
unsafe fn test_mm256_aesenc_epi128() {
216+
// Constants taken from https://msdn.microsoft.com/en-us/library/cc664810.aspx.
217+
// they are repeated appropriately
218+
let (a, k) = setup_state_key_256();
219+
let e = _mm_set_epi64x(0x16ab0e57dfc442ed, 0x28e4ee1884504333);
220+
let e = _mm256_broadcastsi128_si256(e);
221+
let r = _mm256_aesenc_epi128(a, k);
222+
assert_eq_m256i(r, e);
223+
224+
helper_for_256_avx512vaes(_mm_aesenc_si128, _mm256_aesenc_epi128);
225+
}
226+
227+
#[simd_test(enable = "avx512vaes,avx512vl")]
228+
unsafe fn test_mm256_aesenclast_epi128() {
229+
// Constants taken from https://msdn.microsoft.com/en-us/library/cc714136.aspx.
230+
let (a, k) = setup_state_key_256();
231+
let e = _mm_set_epi64x(0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8);
232+
let e = _mm256_broadcastsi128_si256(e);
233+
let r = _mm256_aesenclast_epi128(a, k);
234+
assert_eq_m256i(r, e);
235+
236+
helper_for_256_avx512vaes(_mm_aesenclast_si128, _mm256_aesenclast_epi128);
237+
}
238+
239+
#[target_feature(enable = "avx512f")]
240+
unsafe fn helper_for_512_avx512vaes(
241+
linear: unsafe fn(__m128i, __m128i) -> __m128i,
242+
vectorized: unsafe fn(__m512i, __m512i) -> __m512i,
243+
) {
244+
let a = _mm512_set_epi64(
245+
0xDCB4DB3657BF0B7D,
246+
0x18DB0601068EDD9F,
247+
0xB76B908233200DC5,
248+
0xE478235FA8E22D5E,
249+
0xAB05CFFA2621154C,
250+
0x1171B47A186174C9,
251+
0x8C6B6C0E7595CEC9,
252+
0xBE3E7D4934E961BD,
253+
);
254+
let k = _mm512_set_epi64(
255+
0x672F6F105A94CEA7,
256+
0x8298B8FFCA5F829C,
257+
0xA3927047B3FB61D8,
258+
0x978093862CDE7187,
259+
0xB1927AB22F31D0EC,
260+
0xA9A5DA619BE4D7AF,
261+
0xCA2590F56884FDC6,
262+
0x19BE9F660038BDB5,
263+
);
264+
let mut a_decomp = [_mm_setzero_si128(); 4];
265+
a_decomp[0] = _mm512_extracti32x4_epi32(a, 0);
266+
a_decomp[1] = _mm512_extracti32x4_epi32(a, 1);
267+
a_decomp[2] = _mm512_extracti32x4_epi32(a, 2);
268+
a_decomp[3] = _mm512_extracti32x4_epi32(a, 3);
269+
let mut k_decomp = [_mm_setzero_si128(); 4];
270+
k_decomp[0] = _mm512_extracti32x4_epi32(k, 0);
271+
k_decomp[1] = _mm512_extracti32x4_epi32(k, 1);
272+
k_decomp[2] = _mm512_extracti32x4_epi32(k, 2);
273+
k_decomp[3] = _mm512_extracti32x4_epi32(k, 3);
274+
let r = vectorized(a, k);
275+
let mut e_decomp = [_mm_setzero_si128(); 4];
276+
for i in 0..4 {
277+
e_decomp[i] = linear(a_decomp[i], k_decomp[i]);
278+
}
279+
assert_eq_m128i(_mm512_extracti32x4_epi32(r, 0), e_decomp[0]);
280+
assert_eq_m128i(_mm512_extracti32x4_epi32(r, 1), e_decomp[1]);
281+
assert_eq_m128i(_mm512_extracti32x4_epi32(r, 2), e_decomp[2]);
282+
assert_eq_m128i(_mm512_extracti32x4_epi32(r, 3), e_decomp[3]);
283+
}
284+
285+
#[simd_test(enable = "avx512vaes,avx512f")]
286+
unsafe fn test_mm512_aesdec_epi128() {
287+
// Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx.
288+
let (a, k) = setup_state_key_512();
289+
let e = _mm_set_epi64x(0x044e4f5176fec48f, 0xb57ecfa381da39ee);
290+
let e = _mm512_broadcast_i32x4(e);
291+
let r = _mm512_aesdec_epi128(a, k);
292+
assert_eq_m512i(r, e);
293+
294+
helper_for_512_avx512vaes(_mm_aesdec_si128, _mm512_aesdec_epi128);
295+
}
296+
297+
#[simd_test(enable = "avx512vaes,avx512f")]
298+
unsafe fn test_mm512_aesdeclast_epi128() {
299+
// Constants taken from https://msdn.microsoft.com/en-us/library/cc714178.aspx.
300+
let (a, k) = setup_state_key_512();
301+
let e = _mm_set_epi64x(0x36cad57d9072bf9e, 0xf210dd981fa4a493);
302+
let e = _mm512_broadcast_i32x4(e);
303+
let r = _mm512_aesdeclast_epi128(a, k);
304+
assert_eq_m512i(r, e);
305+
306+
helper_for_512_avx512vaes(_mm_aesdeclast_si128, _mm512_aesdeclast_epi128);
307+
}
308+
309+
#[simd_test(enable = "avx512vaes,avx512f")]
310+
unsafe fn test_mm512_aesenc_epi128() {
311+
// Constants taken from https://msdn.microsoft.com/en-us/library/cc664810.aspx.
312+
let (a, k) = setup_state_key_512();
313+
let e = _mm_set_epi64x(0x16ab0e57dfc442ed, 0x28e4ee1884504333);
314+
let e = _mm512_broadcast_i32x4(e);
315+
let r = _mm512_aesenc_epi128(a, k);
316+
assert_eq_m512i(r, e);
317+
318+
helper_for_512_avx512vaes(_mm_aesenc_si128, _mm512_aesenc_epi128);
319+
}
320+
321+
#[simd_test(enable = "avx512vaes,avx512f")]
322+
unsafe fn test_mm512_aesenclast_epi128() {
323+
// Constants taken from https://msdn.microsoft.com/en-us/library/cc714136.aspx.
324+
let (a, k) = setup_state_key_512();
325+
let e = _mm_set_epi64x(0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8);
326+
let e = _mm512_broadcast_i32x4(e);
327+
let r = _mm512_aesenclast_epi128(a, k);
328+
assert_eq_m512i(r, e);
329+
330+
helper_for_512_avx512vaes(_mm_aesenclast_si128, _mm512_aesenclast_epi128);
331+
}
332+
}

crates/core_arch/src/x86/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -651,6 +651,9 @@ pub use self::avx512f::*;
651651
mod avx512ifma;
652652
pub use self::avx512ifma::*;
653653

654+
mod avx512vaes;
655+
pub use self::avx512vaes::*;
656+
654657
mod bt;
655658
pub use self::bt::*;
656659

0 commit comments

Comments
 (0)