diff --git a/crates/core_arch/avx512bw.md b/crates/core_arch/avx512bw.md
index 7484e8792b..367cb5de2a 100644
--- a/crates/core_arch/avx512bw.md
+++ b/crates/core_arch/avx512bw.md
@@ -1,34 +1,34 @@
["AVX512BW"]
* [x] [`_mm512_loadu_epi16`]
- * [_] [`_mm512_mask_loadu_epi16`]
- * [_] [`_mm512_maskz_loadu_epi16`]
+ * [_] [`_mm512_mask_loadu_epi16`] //need i1
+ * [_] [`_mm512_maskz_loadu_epi16`] //need i1
* [x] [`_mm_loadu_epi16`]
- * [_] [`_mm_mask_loadu_epi16`]
- * [_] [`_mm_maskz_loadu_epi16`]
+ * [_] [`_mm_mask_loadu_epi16`] //need i1
+ * [_] [`_mm_maskz_loadu_epi16`] //need i1
* [x] [`_mm256_loadu_epi16`]
- * [_] [`_mm256_mask_loadu_epi16`]
- * [_] [`_mm256_maskz_loadu_epi16`]
+ * [_] [`_mm256_mask_loadu_epi16`] //need i1
+ * [_] [`_mm256_maskz_loadu_epi16`] //need i1
* [x] [`_mm512_loadu_epi8`]
- * [_] [`_mm512_mask_loadu_epi8`]
- * [_] [`_mm512_maskz_loadu_epi8`]
+ * [_] [`_mm512_mask_loadu_epi8`] //need i1
+ * [_] [`_mm512_maskz_loadu_epi8`] //need i1
* [x] [`_mm_loadu_epi8`]
- * [_] [`_mm_mask_loadu_epi8`]
- * [_] [`_mm_maskz_loadu_epi8`]
+ * [_] [`_mm_mask_loadu_epi8`] //need i1
+ * [_] [`_mm_maskz_loadu_epi8`] //need i1
* [x] [`_mm256_loadu_epi8`]
- * [_] [`_mm256_mask_loadu_epi8`]
- * [_] [`_mm256_maskz_loadu_epi8`]
+ * [_] [`_mm256_mask_loadu_epi8`] //need i1
+ * [_] [`_mm256_maskz_loadu_epi8`] //need i1
* [_] [`_mm512_mask_storeu_epi16`]
* [x] [`_mm512_storeu_epi16`]
- * [_] [`_mm_mask_storeu_epi16`]
+ * [_] [`_mm_mask_storeu_epi16`] //need i1
* [x] [`_mm_storeu_epi16`]
- * [_] [`_mm256_mask_storeu_epi16`]
+ * [_] [`_mm256_mask_storeu_epi16`] //need i1
* [x] [`_mm256_storeu_epi16`]
- * [_] [`_mm512_mask_storeu_epi8`]
+ * [_] [`_mm512_mask_storeu_epi8`] //need i1
* [x] [`_mm512_storeu_epi8`]
- * [_] [`_mm_mask_storeu_epi8`]
+ * [_] [`_mm_mask_storeu_epi8`] //need i1
* [x] [`_mm_storeu_epi8`]
- * [_] [`_mm256_mask_storeu_epi8`]
+ * [_] [`_mm256_mask_storeu_epi8`] //need i1
* [x] [`_mm256_storeu_epi8`]
* [x] [`_mm512_abs_epi16`]
* [x] [`_mm512_mask_abs_epi16`]
diff --git a/crates/core_arch/avx512f.md b/crates/core_arch/avx512f.md
index e61f25507c..1ad80147cf 100644
--- a/crates/core_arch/avx512f.md
+++ b/crates/core_arch/avx512f.md
@@ -1,5 +1,5 @@
["AVX512F"]
-
+
* [x] [`_mm512_abs_epi32`]
* [x] [`_mm512_mask_abs_epi32`]
* [x] [`_mm512_maskz_abs_epi32`]
@@ -2025,165 +2025,314 @@
* [x] [`_mm_maskz_cvtepi8_epi32`]
* [x] [`_mm256_mask_cvtepi8_epi32`]
* [x] [`_mm256_maskz_cvtepi8_epi32`]
-
- * [x] [`_mm512_mask_cvtsepi64_epi32`]
- * [x] [`_mm512_mask_cvtsepi64_epi8`]
- * [ ] [`_mm512_mask_cvtsepi64_storeu_epi16`]
- * [ ] [`_mm512_mask_cvtsepi64_storeu_epi32`]
- * [ ] [`_mm512_mask_cvtsepi64_storeu_epi8`]
- * [x] [`_mm512_cvt_roundepi32_ps`]
- * [x] [`_mm512_cvt_roundepu32_ps`]
- * [x] [`_mm512_cvt_roundpd_epi32`]
- * [x] [`_mm512_cvt_roundpd_epu32`]
- * [x] [`_mm512_cvt_roundpd_ps`]
- * [x] [`_mm512_cvt_roundph_ps`]
- * [x] [`_mm512_cvt_roundps_epi32`]
- * [x] [`_mm512_cvt_roundps_epu32`]
- * [x] [`_mm512_cvt_roundps_pd`]
-
- * [x] [`_mm512_mask_cvtsepi64_epi16`]
* [x] [`_mm512_cvtepi8_epi64`]
+ * [x] [`_mm512_mask_cvtepi8_epi64`]
+ * [x] [`_mm512_maskz_cvtepi8_epi64`]
+ * [x] [`_mm_mask_cvtepi8_epi64`]
+ * [x] [`_mm_maskz_cvtepi8_epi64`]
+ * [x] [`_mm256_mask_cvtepi8_epi64`]
+ * [x] [`_mm256_maskz_cvtepi8_epi64`]
* [x] [`_mm512_cvtepu16_epi32`]
+ * [x] [`_mm512_mask_cvtepu16_epi32`]
+ * [x] [`_mm512_maskz_cvtepu16_epi32`]
+ * [x] [`_mm_mask_cvtepu16_epi32`]
+ * [x] [`_mm_maskz_cvtepu16_epi32`]
+ * [x] [`_mm256_mask_cvtepu16_epi32`]
+ * [x] [`_mm256_maskz_cvtepu16_epi32`]
* [x] [`_mm512_cvtepu16_epi64`]
+ * [x] [`_mm512_mask_cvtepu16_epi64`]
+ * [x] [`_mm512_maskz_cvtepu16_epi64`]
+ * [x] [`_mm_mask_cvtepu16_epi64`]
+ * [x] [`_mm_maskz_cvtepu16_epi64`]
+ * [x] [`_mm256_mask_cvtepu16_epi64`]
+ * [x] [`_mm256_maskz_cvtepu16_epi64`]
* [x] [`_mm512_cvtepu32_epi64`]
- * [x] [`_mm512_cvtepu32_pd`]
+ * [x] [`_mm512_mask_cvtepu32_epi64`]
+ * [x] [`_mm512_maskz_cvtepu32_epi64`]
+ * [x] [`_mm_mask_cvtepu32_epi64`]
+ * [x] [`_mm_maskz_cvtepu32_epi64`]
+ * [x] [`_mm256_mask_cvtepu32_epi64`]
+ * [x] [`_mm256_maskz_cvtepu32_epi64`]
* [x] [`_mm512_cvtepu32_ps`]
+ * [x] [`_mm512_mask_cvtepu32_ps`]
+ * [x] [`_mm512_maskz_cvtepu32_ps`]
+ * [x] [`_mm512_cvtepu32_pd`]
+ * [x] [`_mm512_mask_cvtepu32_pd`]
+ * [x] [`_mm512_maskz_cvtepu32_pd`]
+ * [x] [`_mm_cvtepu32_pd`]
+ * [x] [`_mm_mask_cvtepu32_pd`]
+ * [x] [`_mm_maskz_cvtepu32_pd`]
+ * [x] [`_mm256_cvtepu32_pd`]
+ * [x] [`_mm256_mask_cvtepu32_pd`]
+ * [x] [`_mm256_maskz_cvtepu32_pd`]
* [x] [`_mm512_cvtepu32lo_pd`]
+ * [x] [`_mm512_mask_cvtepu32lo_pd`]
* [x] [`_mm512_cvtepu8_epi32`]
+ * [x] [`_mm512_mask_cvtepu8_epi32`]
+ * [x] [`_mm512_maskz_cvtepu8_epi32`]
+ * [x] [`_mm_mask_cvtepu8_epi32`]
+ * [x] [`_mm_maskz_cvtepu8_epi32`]
+ * [x] [`_mm256_mask_cvtepu8_epi32`]
+ * [x] [`_mm256_maskz_cvtepu8_epi32`]
* [x] [`_mm512_cvtepu8_epi64`]
+ * [x] [`_mm512_mask_cvtepu8_epi64`]
+ * [x] [`_mm512_maskz_cvtepu8_epi64`]
+ * [x] [`_mm_mask_cvtepu8_epi64`]
+ * [x] [`_mm_maskz_cvtepu8_epi64`]
+ * [x] [`_mm256_mask_cvtepu8_epi64`]
+ * [x] [`_mm256_maskz_cvtepu8_epi64`]
* [x] [`_mm512_cvtpd_epi32`]
+ * [x] [`_mm512_mask_cvtpd_epi32`]
+ * [x] [`_mm512_maskz_cvtpd_epi32`]
+ * [x] [`_mm_mask_cvtpd_epi32`]
+ * [x] [`_mm_maskz_cvtpd_epi32`]
+ * [x] [`_mm256_mask_cvtpd_epi32`]
+ * [x] [`_mm256_maskz_cvtpd_epi32`]
* [x] [`_mm512_cvtpd_epu32`]
+ * [x] [`_mm512_mask_cvtpd_epu32`]
+ * [x] [`_mm512_maskz_cvtpd_epu32`]
+ * [x] [`_mm_cvtpd_epu32`]
+ * [x] [`_mm_mask_cvtpd_epu32`]
+ * [x] [`_mm_maskz_cvtpd_epu32`]
+ * [x] [`_mm256_cvtpd_epu32`]
+ * [x] [`_mm256_mask_cvtpd_epu32`]
+ * [x] [`_mm256_maskz_cvtpd_epu32`]
* [x] [`_mm512_cvtpd_ps`]
+ * [x] [`_mm512_mask_cvtpd_ps`]
+ * [x] [`_mm512_maskz_cvtpd_ps`]
+ * [x] [`_mm_mask_cvtpd_ps`]
+ * [x] [`_mm_maskz_cvtpd_ps`]
+ * [x] [`_mm256_mask_cvtpd_ps`]
+ * [x] [`_mm256_maskz_cvtpd_ps`]
* [x] [`_mm512_cvtpd_pslo`]
+ * [x] [`_mm512_mask_cvtpd_pslo`]
* [x] [`_mm512_cvtph_ps`]
+ * [x] [`_mm512_mask_cvtph_ps`]
+ * [x] [`_mm512_maskz_cvtph_ps`]
+ * [x] [`_mm_mask_cvtph_ps`]
+ * [x] [`_mm_maskz_cvtph_ps`]
+ * [x] [`_mm256_mask_cvtph_ps`]
+ * [x] [`_mm256_maskz_cvtph_ps`]
* [x] [`_mm512_cvtps_epi32`]
+ * [x] [`_mm512_mask_cvtps_epi32`]
+ * [x] [`_mm512_maskz_cvtps_epi32`]
+ * [x] [`_mm_mask_cvtps_epi32`]
+ * [x] [`_mm_maskz_cvtps_epi32`]
+ * [x] [`_mm256_mask_cvtps_epi32`]
+ * [x] [`_mm256_maskz_cvtps_epi32`]
* [x] [`_mm512_cvtps_epu32`]
+ * [x] [`_mm512_mask_cvtps_epu32`]
+ * [x] [`_mm512_maskz_cvtps_epu32`]
+ * [x] [`_mm_cvtps_epu32`]
+ * [x] [`_mm_mask_cvtps_epu32`]
+ * [x] [`_mm_maskz_cvtps_epu32`]
+ * [x] [`_mm256_cvtps_epu32`]
+ * [x] [`_mm256_mask_cvtps_epu32`]
+ * [x] [`_mm256_maskz_cvtps_epu32`]
* [x] [`_mm512_cvtps_pd`]
+ * [x] [`_mm512_mask_cvtps_pd`]
+ * [x] [`_mm512_maskz_cvtps_pd`]
* [x] [`_mm512_cvtps_ph`]
+ * [x] [`_mm512_mask_cvtps_ph`]
+ * [x] [`_mm512_maskz_cvtps_ph`]
+ * [x] [`_mm_mask_cvtps_ph`]
+ * [x] [`_mm_maskz_cvtps_ph`]
+ * [x] [`_mm256_mask_cvtps_ph`]
+ * [x] [`_mm256_maskz_cvtps_ph`]
* [x] [`_mm512_cvtpslo_pd`]
+ * [x] [`_mm512_mask_cvtpslo_pd`]
* [x] [`_mm512_cvtsepi32_epi16`]
+ * [x] [`_mm512_mask_cvtsepi32_epi16`]
+ * [x] [`_mm512_maskz_cvtsepi32_epi16`]
+ * [x] [`_mm_cvtsepi32_epi16`]
+ * [x] [`_mm_mask_cvtsepi32_epi16`]
+ * [x] [`_mm_maskz_cvtsepi32_epi16`]
+ * [x] [`_mm256_cvtsepi32_epi16`]
+ * [x] [`_mm256_mask_cvtsepi32_epi16`]
+ * [x] [`_mm256_maskz_cvtsepi32_epi16`]
* [x] [`_mm512_cvtsepi32_epi8`]
+ * [x] [`_mm512_mask_cvtsepi32_epi8`]
+ * [x] [`_mm512_maskz_cvtsepi32_epi8`]
+ * [x] [`_mm_cvtsepi32_epi8`]
+ * [x] [`_mm_mask_cvtsepi32_epi8`]
+ * [x] [`_mm_maskz_cvtsepi32_epi8`]
+ * [x] [`_mm256_cvtsepi32_epi8`]
+ * [x] [`_mm256_mask_cvtsepi32_epi8`]
+ * [x] [`_mm256_maskz_cvtsepi32_epi8`]
+ * [x] [`_mm512_mask_cvtsepi32_storeu_epi16`]
+ * [x] [`_mm_mask_cvtsepi32_storeu_epi16`]
+ * [x] [`_mm256_mask_cvtsepi32_storeu_epi16`]
+ * [x] [`_mm512_mask_cvtsepi32_storeu_epi8`]
+ * [x] [`_mm_mask_cvtsepi32_storeu_epi8`]
+ * [x] [`_mm256_mask_cvtsepi32_storeu_epi8`]
* [x] [`_mm512_cvtsepi64_epi16`]
+ * [x] [`_mm512_mask_cvtsepi64_epi16`]
+ * [x] [`_mm512_maskz_cvtsepi64_epi16`]
+ * [x] [`_mm_cvtsepi64_epi16`]
+ * [x] [`_mm_mask_cvtsepi64_epi16`]
+ * [x] [`_mm_maskz_cvtsepi64_epi16`]
+ * [x] [`_mm256_cvtsepi64_epi16`]
+ * [x] [`_mm256_mask_cvtsepi64_epi16`]
+ * [x] [`_mm256_maskz_cvtsepi64_epi16`]
* [x] [`_mm512_cvtsepi64_epi32`]
+ * [x] [`_mm512_mask_cvtsepi64_epi32`]
+ * [x] [`_mm512_maskz_cvtsepi64_epi32`]
+ * [x] [`_mm_cvtsepi64_epi32`]
+ * [x] [`_mm_mask_cvtsepi64_epi32`]
+ * [x] [`_mm_maskz_cvtsepi64_epi32`]
+ * [x] [`_mm256_cvtsepi64_epi32`]
+ * [x] [`_mm256_mask_cvtsepi64_epi32`]
+ * [x] [`_mm256_maskz_cvtsepi64_epi32`]
* [x] [`_mm512_cvtsepi64_epi8`]
- * [x] [`_mm512_cvtt_roundpd_epi32`]
- * [x] [`_mm512_cvtt_roundpd_epu32`]
- * [x] [`_mm512_cvtt_roundps_epi32`]
- * [x] [`_mm512_cvtt_roundps_epu32`]
- * [x] [`_mm512_cvttpd_epi32`]
- * [x] [`_mm512_cvttpd_epu32`]
- * [x] [`_mm512_cvttps_epi32`]
- * [x] [`_mm512_cvttps_epu32`]
+ * [x] [`_mm512_mask_cvtsepi64_epi8`]
+ * [x] [`_mm512_maskz_cvtsepi64_epi8`]
+ * [x] [`_mm_cvtsepi64_epi8`]
+ * [x] [`_mm_mask_cvtsepi64_epi8`]
+ * [x] [`_mm_maskz_cvtsepi64_epi8`]
+ * [x] [`_mm256_cvtsepi64_epi8`]
+ * [x] [`_mm256_mask_cvtsepi64_epi8`]
+ * [x] [`_mm256_maskz_cvtsepi64_epi8`]
+ * [x] [`_mm512_mask_cvtsepi64_storeu_epi16`]
+ * [x] [`_mm_mask_cvtsepi64_storeu_epi16`]
+ * [x] [`_mm256_mask_cvtsepi64_storeu_epi16`]
+ * [x] [`_mm512_mask_cvtsepi64_storeu_epi32`]
+ * [x] [`_mm_mask_cvtsepi64_storeu_epi32`]
+ * [x] [`_mm256_mask_cvtsepi64_storeu_epi32`]
+ * [x] [`_mm512_mask_cvtsepi64_storeu_epi8`]
+ * [x] [`_mm_mask_cvtsepi64_storeu_epi8`]
+ * [x] [`_mm256_mask_cvtsepi64_storeu_epi8`]
* [x] [`_mm512_cvtusepi32_epi16`]
+ * [x] [`_mm512_mask_cvtusepi32_epi16`]
+ * [x] [`_mm512_maskz_cvtusepi32_epi16`]
+ * [x] [`_mm_cvtusepi32_epi16`]
+ * [x] [`_mm_mask_cvtusepi32_epi16`]
+ * [x] [`_mm_maskz_cvtusepi32_epi16`]
+ * [x] [`_mm256_cvtusepi32_epi16`]
+ * [x] [`_mm256_mask_cvtusepi32_epi16`]
+ * [x] [`_mm256_maskz_cvtusepi32_epi16`]
* [x] [`_mm512_cvtusepi32_epi8`]
+ * [x] [`_mm512_mask_cvtusepi32_epi8`]
+ * [x] [`_mm512_maskz_cvtusepi32_epi8`]
+ * [x] [`_mm_cvtusepi32_epi8`]
+ * [x] [`_mm_mask_cvtusepi32_epi8`]
+ * [x] [`_mm_maskz_cvtusepi32_epi8`]
+ * [x] [`_mm256_cvtusepi32_epi8`]
+ * [x] [`_mm256_mask_cvtusepi32_epi8`]
+ * [x] [`_mm256_maskz_cvtusepi32_epi8`]
+ * [x] [`_mm512_mask_cvtusepi32_storeu_epi16`]
+ * [x] [`_mm_mask_cvtusepi32_storeu_epi16`]
+ * [x] [`_mm256_mask_cvtusepi32_storeu_epi16`]
+ * [x] [`_mm512_mask_cvtusepi32_storeu_epi8`]
+ * [x] [`_mm_mask_cvtusepi32_storeu_epi8`]
+ * [x] [`_mm256_mask_cvtusepi32_storeu_epi8`]
* [x] [`_mm512_cvtusepi64_epi16`]
+ * [x] [`_mm512_mask_cvtusepi64_epi16`]
+ * [x] [`_mm512_maskz_cvtusepi64_epi16`]
+ * [x] [`_mm_cvtusepi64_epi16`]
+ * [x] [`_mm_mask_cvtusepi64_epi16`]
+ * [x] [`_mm_maskz_cvtusepi64_epi16`]
+ * [x] [`_mm256_cvtusepi64_epi16`]
+ * [x] [`_mm256_mask_cvtusepi64_epi16`]
+ * [x] [`_mm256_maskz_cvtusepi64_epi16`]
* [x] [`_mm512_cvtusepi64_epi32`]
+ * [x] [`_mm512_mask_cvtusepi64_epi32`]
+ * [x] [`_mm512_maskz_cvtusepi64_epi32`]
+ * [x] [`_mm_cvtusepi64_epi32`]
+ * [x] [`_mm_mask_cvtusepi64_epi32`]
+ * [x] [`_mm_maskz_cvtusepi64_epi32`]
+ * [x] [`_mm256_cvtusepi64_epi32`]
+ * [x] [`_mm256_mask_cvtusepi64_epi32`]
+ * [x] [`_mm256_maskz_cvtusepi64_epi32`]
* [x] [`_mm512_cvtusepi64_epi8`]
- * [x] [`_mm512_int2mask`]
- * [x] [`_mm512_kand`]
- * [x] [`_mm512_kandn`]
- * [x] [`_mm512_kmov`]
- * [x] [`_mm512_knot`]
- * [x] [`_mm512_kor`]
- * [x] [`_mm512_kortestc`]
- * [ ] [`_mm512_kortestz`]
- * [x] [`_mm512_kunpackb`]
- * [x] [`_mm512_kxnor`]
- * [x] [`_mm512_kxor`]
- * [x] [`_mm512_mask2int`]
- * [x] [`_mm512_mask_cvt_roundepi32_ps`]
- * [x] [`_mm512_mask_cvt_roundepu32_ps`]
- * [x] [`_mm512_mask_cvt_roundpd_epi32`]
- * [x] [`_mm512_mask_cvt_roundpd_epu32`]
- * [x] [`_mm512_mask_cvt_roundpd_ps`]
- * [x] [`_mm512_mask_cvt_roundph_ps`]
- * [x] [`_mm512_mask_cvt_roundps_epi32`]
- * [x] [`_mm512_mask_cvt_roundps_epu32`]
- * [x] [`_mm512_mask_cvt_roundps_pd`]
- * [x] [`_mm512_mask_cvtepi8_epi64`]
- * [x] [`_mm512_mask_cvtepu16_epi32`]
- * [x] [`_mm512_mask_cvtepu16_epi64`]
- * [x] [`_mm512_mask_cvtepu32_epi64`]
- * [x] [`_mm512_mask_cvtepu32_pd`]
- * [x] [`_mm512_mask_cvtepu32_ps`]
- * [x] [`_mm512_mask_cvtepu32lo_pd`]
- * [x] [`_mm512_mask_cvtepu8_epi32`]
- * [x] [`_mm512_mask_cvtepu8_epi64`]
- * [x] [`_mm512_mask_cvtpd_epi32`]
- * [x] [`_mm512_mask_cvtpd_epu32`]
- * [x] [`_mm512_mask_cvtpd_ps`]
- * [x] [`_mm512_mask_cvtpd_pslo`]
- * [x] [`_mm512_mask_cvtph_ps`]
- * [x] [`_mm512_mask_cvtps_epi32`]
- * [x] [`_mm512_mask_cvtps_epu32`]
- * [x] [`_mm512_mask_cvtps_pd`]
- * [x] [`_mm512_mask_cvtps_ph`]
- * [x] [`_mm512_mask_cvtpslo_pd`]
- * [x] [`_mm512_mask_cvtsepi32_epi16`]
- * [x] [`_mm512_mask_cvtsepi32_epi8`]
- * [ ] [`_mm512_mask_cvtsepi32_storeu_epi16`]
- * [ ] [`_mm512_mask_cvtsepi32_storeu_epi8`]
- * [x] [`_mm512_mask_cvtt_roundpd_epi32`]
- * [x] [`_mm512_mask_cvtt_roundpd_epu32`]
- * [x] [`_mm512_mask_cvtt_roundps_epi32`]
- * [x] [`_mm512_mask_cvtt_roundps_epu32`]
+ * [x] [`_mm512_mask_cvtusepi64_epi8`]
+ * [x] [`_mm512_maskz_cvtusepi64_epi8`]
+ * [x] [`_mm_cvtusepi64_epi8`]
+ * [x] [`_mm_mask_cvtusepi64_epi8`]
+ * [x] [`_mm_maskz_cvtusepi64_epi8`]
+ * [x] [`_mm256_cvtusepi64_epi8`]
+ * [x] [`_mm256_mask_cvtusepi64_epi8`]
+ * [x] [`_mm256_maskz_cvtusepi64_epi8`]
+ * [x] [`_mm512_mask_cvtusepi64_storeu_epi16`]
+ * [x] [`_mm_mask_cvtusepi64_storeu_epi16`]
+ * [x] [`_mm256_mask_cvtusepi64_storeu_epi16`]
+ * [x] [`_mm512_mask_cvtusepi64_storeu_epi32`]
+ * [x] [`_mm_mask_cvtusepi64_storeu_epi32`]
+ * [x] [`_mm256_mask_cvtusepi64_storeu_epi32`]
+ * [x] [`_mm512_mask_cvtusepi64_storeu_epi8`]
+ * [x] [`_mm_mask_cvtusepi64_storeu_epi8`]
+ * [x] [`_mm256_mask_cvtusepi64_storeu_epi8`]
+ * [x] [`_mm512_cvtsi512_si32`]
+ * [x] [`_mm512_cvttpd_epi32`]
* [x] [`_mm512_mask_cvttpd_epi32`]
+ * [x] [`_mm512_maskz_cvttpd_epi32`]
+ * [x] [`_mm_mask_cvttpd_epi32`]
+ * [x] [`_mm_maskz_cvttpd_epi32`]
+ * [x] [`_mm256_mask_cvttpd_epi32`]
+ * [x] [`_mm256_maskz_cvttpd_epi32`]
+ * [x] [`_mm512_cvttpd_epu32`]
* [x] [`_mm512_mask_cvttpd_epu32`]
+ * [x] [`_mm512_maskz_cvttpd_epu32`]
+ * [x] [`_mm_cvttpd_epu32`]
+ * [x] [`_mm_mask_cvttpd_epu32`]
+ * [x] [`_mm_maskz_cvttpd_epu32`]
+ * [x] [`_mm256_cvttpd_epu32`]
+ * [x] [`_mm256_mask_cvttpd_epu32`]
+ * [x] [`_mm256_maskz_cvttpd_epu32`]
+ * [x] [`_mm512_cvttps_epi32`]
* [x] [`_mm512_mask_cvttps_epi32`]
+ * [x] [`_mm512_maskz_cvttps_epi32`]
+ * [x] [`_mm_mask_cvttps_epi32`]
+ * [x] [`_mm_maskz_cvttps_epi32`]
+ * [x] [`_mm256_mask_cvttps_epi32`]
+ * [x] [`_mm256_maskz_cvttps_epi32`]
+ * [x] [`_mm512_cvttps_epu32`]
* [x] [`_mm512_mask_cvttps_epu32`]
- * [x] [`_mm512_mask_cvtusepi32_epi16`]
- * [x] [`_mm512_mask_cvtusepi32_epi8`]
- * [ ] [`_mm512_mask_cvtusepi32_storeu_epi16`]
- * [ ] [`_mm512_mask_cvtusepi32_storeu_epi8`]
- * [x] [`_mm512_mask_cvtusepi64_epi16`]
- * [x] [`_mm512_mask_cvtusepi64_epi32`]
- * [x] [`_mm512_mask_cvtusepi64_epi8`]
- * [ ] [`_mm512_mask_cvtusepi64_storeu_epi16`]
- * [ ] [`_mm512_mask_cvtusepi64_storeu_epi32`]
- * [ ] [`_mm512_mask_cvtusepi64_storeu_epi8`]
+ * [x] [`_mm512_maskz_cvttps_epu32`]
+ * [x] [`_mm_cvttps_epu32`]
+ * [x] [`_mm_mask_cvttps_epu32`]
+ * [x] [`_mm_maskz_cvttps_epu32`]
+ * [x] [`_mm256_cvttps_epu32`]
+ * [x] [`_mm256_mask_cvttps_epu32`]
+ * [x] [`_mm256_maskz_cvttps_epu32`]
+ * [x] [`_mm512_cvt_roundepi32_ps`]
+ * [x] [`_mm512_mask_cvt_roundepi32_ps`]
* [x] [`_mm512_maskz_cvt_roundepi32_ps`]
+ * [x] [`_mm512_cvt_roundepu32_ps`]
+ * [x] [`_mm512_mask_cvt_roundepu32_ps`]
* [x] [`_mm512_maskz_cvt_roundepu32_ps`]
+ * [x] [`_mm512_cvt_roundpd_epi32`]
+ * [x] [`_mm512_mask_cvt_roundpd_epi32`]
* [x] [`_mm512_maskz_cvt_roundpd_epi32`]
+ * [x] [`_mm512_cvt_roundpd_epu32`]
+ * [x] [`_mm512_mask_cvt_roundpd_epu32`]
* [x] [`_mm512_maskz_cvt_roundpd_epu32`]
+ * [x] [`_mm512_cvt_roundpd_ps`]
+ * [x] [`_mm512_mask_cvt_roundpd_ps`]
* [x] [`_mm512_maskz_cvt_roundpd_ps`]
+ * [x] [`_mm512_cvt_roundph_ps`]
+ * [x] [`_mm512_mask_cvt_roundph_ps`]
* [x] [`_mm512_maskz_cvt_roundph_ps`]
+ * [x] [`_mm512_cvt_roundps_epi32`]
+ * [x] [`_mm512_mask_cvt_roundps_epi32`]
* [x] [`_mm512_maskz_cvt_roundps_epi32`]
+ * [x] [`_mm512_cvt_roundps_epu32`]
+ * [x] [`_mm512_mask_cvt_roundps_epu32`]
* [x] [`_mm512_maskz_cvt_roundps_epu32`]
+ * [x] [`_mm512_cvt_roundps_pd`]
+ * [x] [`_mm512_mask_cvt_roundps_pd`]
* [x] [`_mm512_maskz_cvt_roundps_pd`]
- * [x] [`_mm512_maskz_cvtepi8_epi64`]
- * [x] [`_mm512_maskz_cvtepu16_epi32`]
- * [x] [`_mm512_maskz_cvtepu16_epi64`]
- * [x] [`_mm512_maskz_cvtepu32_epi64`]
- * [x] [`_mm512_maskz_cvtepu32_pd`]
- * [x] [`_mm512_maskz_cvtepu32_ps`]
- * [x] [`_mm512_maskz_cvtepu8_epi32`]
- * [x] [`_mm512_maskz_cvtepu8_epi64`]
- * [x] [`_mm512_maskz_cvtpd_epi32`]
- * [x] [`_mm512_maskz_cvtpd_epu32`]
- * [x] [`_mm512_maskz_cvtpd_ps`]
- * [x] [`_mm512_maskz_cvtph_ps`]
- * [x] [`_mm512_maskz_cvtps_epi32`]
- * [x] [`_mm512_maskz_cvtps_epu32`]
- * [x] [`_mm512_maskz_cvtps_pd`]
- * [x] [`_mm512_maskz_cvtps_ph`]
- * [x] [`_mm512_maskz_cvtsepi32_epi16`]
- * [x] [`_mm512_maskz_cvtsepi32_epi8`]
- * [x] [`_mm512_maskz_cvtsepi64_epi16`]
- * [x] [`_mm512_maskz_cvtsepi64_epi32`]
- * [x] [`_mm512_maskz_cvtsepi64_epi8`]
+ * [x] [`_mm512_cvtt_roundpd_epi32`]
+ * [x] [`_mm512_mask_cvtt_roundpd_epi32`]
* [x] [`_mm512_maskz_cvtt_roundpd_epi32`]
+ * [x] [`_mm512_cvtt_roundpd_epu32`]
+ * [x] [`_mm512_mask_cvtt_roundpd_epu32`]
* [x] [`_mm512_maskz_cvtt_roundpd_epu32`]
+ * [x] [`_mm512_cvtt_roundps_epi32`]
+ * [x] [`_mm512_mask_cvtt_roundps_epi32`]
* [x] [`_mm512_maskz_cvtt_roundps_epi32`]
+ * [x] [`_mm512_cvtt_roundps_epu32`]
+ * [x] [`_mm512_mask_cvtt_roundps_epu32`]
* [x] [`_mm512_maskz_cvtt_roundps_epu32`]
- * [x] [`_mm512_maskz_cvttpd_epi32`]
- * [x] [`_mm512_maskz_cvttpd_epu32`]
- * [x] [`_mm512_maskz_cvttps_epi32`]
- * [x] [`_mm512_maskz_cvttps_epu32`]
- * [x] [`_mm512_maskz_cvtusepi32_epi16`]
- * [x] [`_mm512_maskz_cvtusepi32_epi8`]
- * [x] [`_mm512_maskz_cvtusepi64_epi16`]
- * [x] [`_mm512_maskz_cvtusepi64_epi32`]
- * [x] [`_mm512_maskz_cvtusepi64_epi8`]
* [x] [`_mm_add_round_sd`]
* [x] [`_mm_add_round_ss`]
* [x] [`_mm_cmp_round_sd_mask`]
@@ -2193,60 +2342,60 @@
* [x] [`_mm_comi_round_sd`]
* [x] [`_mm_comi_round_ss`]
* [x] [`_mm_cvt_roundi32_ss`]
- * [ ] [`_mm_cvt_roundi64_sd`]
- * [ ] [`_mm_cvt_roundi64_ss`]
+ * [x] [`_mm_cvt_roundi64_sd`]
+ * [x] [`_mm_cvt_roundi64_ss`]
* [x] [`_mm_cvt_roundsd_i32`]
- * [ ] [`_mm_cvt_roundsd_i64`]
+ * [x] [`_mm_cvt_roundsd_i64`]
* [x] [`_mm_cvt_roundsd_si32`]
- * [ ] [`_mm_cvt_roundsd_si64`]
+ * [x] [`_mm_cvt_roundsd_si64`]
* [x] [`_mm_cvt_roundsd_ss`]
* [x] [`_mm_cvt_roundsd_u32`]
- * [ ] [`_mm_cvt_roundsd_u64`]
+ * [x] [`_mm_cvt_roundsd_u64`]
* [x] [`_mm_cvt_roundsi32_ss`]
- * [ ] [`_mm_cvt_roundsi64_sd`]
- * [ ] [`_mm_cvt_roundsi64_ss`]
+ * [x] [`_mm_cvt_roundsi64_sd`]
+ * [x] [`_mm_cvt_roundsi64_ss`]
* [x] [`_mm_cvt_roundss_i32`]
- * [ ] [`_mm_cvt_roundss_i64`]
+ * [x] [`_mm_cvt_roundss_i64`]
* [x] [`_mm_cvt_roundss_sd`]
* [x] [`_mm_cvt_roundss_si32`]
- * [ ] [`_mm_cvt_roundss_si64`]
+ * [x] [`_mm_cvt_roundss_si64`]
* [x] [`_mm_cvt_roundss_u32`]
- * [ ] [`_mm_cvt_roundss_u64`]
+ * [x] [`_mm_cvt_roundss_u64`]
* [x] [`_mm_cvt_roundu32_ss`]
- * [ ] [`_mm_cvt_roundu64_sd`]
- * [ ] [`_mm_cvt_roundu64_ss`]
+ * [x] [`_mm_cvt_roundu64_sd`]
+ * [x] [`_mm_cvt_roundu64_ss`]
* [x] [`_mm_cvti32_sd`]
* [x] [`_mm_cvti32_ss`]
- * [ ] [`_mm_cvti64_sd`]
- * [ ] [`_mm_cvti64_ss`]
+ * [x] [`_mm_cvti64_sd`]
+ * [x] [`_mm_cvti64_ss`]
* [x] [`_mm_cvtsd_i32`]
- * [ ] [`_mm_cvtsd_i64`]
+ * [x] [`_mm_cvtsd_i64`]
* [x] [`_mm_cvtsd_u32`]
- * [ ] [`_mm_cvtsd_u64`]
+ * [x] [`_mm_cvtsd_u64`]
* [x] [`_mm_cvtss_i32`]
- * [ ] [`_mm_cvtss_i64`]
+ * [x] [`_mm_cvtss_i64`]
* [x] [`_mm_cvtss_u32`]
- * [ ] [`_mm_cvtss_u64`]
+ * [x] [`_mm_cvtss_u64`]
* [x] [`_mm_cvtt_roundsd_i32`]
* [x] [`_mm_cvtt_roundsd_i64`]
* [x] [`_mm_cvtt_roundsd_si32`]
- * [ ] [`_mm_cvtt_roundsd_si64`]
+ * [x] [`_mm_cvtt_roundsd_si64`]
* [x] [`_mm_cvtt_roundsd_u32`]
- * [ ] [`_mm_cvtt_roundsd_u64`]
+ * [x] [`_mm_cvtt_roundsd_u64`]
* [x] [`_mm_cvtt_roundss_i32`]
- * [ ] [`_mm_cvtt_roundss_i64`]
+ * [x] [`_mm_cvtt_roundss_i64`]
* [x] [`_mm_cvtt_roundss_si32`]
- * [ ] [`_mm_cvtt_roundss_si64`]
+ * [x] [`_mm_cvtt_roundss_si64`]
* [x] [`_mm_cvtt_roundss_u32`]
- * [ ] [`_mm_cvtt_roundss_u64`]
+ * [x] [`_mm_cvtt_roundss_u64`]
* [x] [`_mm_cvttsd_i32`]
- * [ ] [`_mm_cvttsd_i64`]
+ * [x] [`_mm_cvttsd_i64`]
* [x] [`_mm_cvttsd_u32`]
- * [ ] [`_mm_cvttsd_u64`]
+ * [x] [`_mm_cvttsd_u64`]
* [x] [`_mm_cvttss_i32`]
- * [ ] [`_mm_cvttss_i64`]
+ * [x] [`_mm_cvttss_i64`]
* [x] [`_mm_cvttss_u32`]
- * [ ] [`_mm_cvttss_u64`]
+ * [x] [`_mm_cvttss_u64`]
* [x] [`_mm_cvtu32_sd`]
* [x] [`_mm_cvtu32_ss`]
* [x] [`_mm_cvtu64_sd`]
@@ -2333,8 +2482,8 @@
* [x] [`_mm_mask_getmant_round_ss`]
* [x] [`_mm_mask_getmant_sd`]
* [x] [`_mm_mask_getmant_ss`]
- * [ ] [`_mm_mask_load_sd`]
- * [ ] [`_mm_mask_load_ss`]
+ * [ ] [`_mm_mask_load_sd`] //need i1
+ * [ ] [`_mm_mask_load_ss`] //need i1
* [x] [`_mm_mask_max_round_sd`]
* [x] [`_mm_mask_max_round_ss`]
* [x] [`_mm_mask_max_sd`]
@@ -2365,8 +2514,8 @@
* [x] [`_mm_mask_sqrt_round_ss`]
* [x] [`_mm_mask_sqrt_sd`]
* [x] [`_mm_mask_sqrt_ss`]
- * [ ] [`_mm_mask_store_sd`]
- * [ ] [`_mm_mask_store_ss`]
+ * [ ] [`_mm_mask_store_sd`] //need i1
+ * [ ] [`_mm_mask_store_ss`] //need i1
* [x] [`_mm_mask_sub_round_sd`]
* [x] [`_mm_mask_sub_round_ss`]
* [x] [`_mm_mask_sub_sd`]
@@ -2411,8 +2560,8 @@
* [x] [`_mm_maskz_getmant_round_ss`]
* [x] [`_mm_maskz_getmant_sd`]
* [x] [`_mm_maskz_getmant_ss`]
- * [ ] [`_mm_maskz_load_sd`]
- * [ ] [`_mm_maskz_load_ss`]
+ * [ ] [`_mm_maskz_load_sd`] //need i1
+ * [ ] [`_mm_maskz_load_ss`] //need i1
* [x] [`_mm_maskz_max_round_sd`]
* [x] [`_mm_maskz_max_round_ss`]
* [x] [`_mm_maskz_max_sd`]
@@ -2469,4 +2618,16 @@
* [x] [`_mm_sqrt_round_ss`]
* [x] [`_mm_sub_round_sd`]
* [x] [`_mm_sub_round_ss`]
+ * [x] [`_mm512_int2mask`]
+ * [x] [`_mm512_kand`]
+ * [x] [`_mm512_kandn`]
+ * [x] [`_mm512_kmov`]
+ * [x] [`_mm512_knot`]
+ * [x] [`_mm512_kor`]
+ * [x] [`_mm512_kortestc`]
+ * [ ] [`_mm512_kortestz`] //not sure
+ * [x] [`_mm512_kunpackb`]
+ * [x] [`_mm512_kxnor`]
+ * [x] [`_mm512_kxor`]
+ * [x] [`_mm512_mask2int`]
diff --git a/crates/core_arch/avx512vbmi2.md b/crates/core_arch/avx512vbmi2.md
index 4bb6a0ed0c..693af9d930 100644
--- a/crates/core_arch/avx512vbmi2.md
+++ b/crates/core_arch/avx512vbmi2.md
@@ -12,12 +12,12 @@
* [x] [`_mm256_maskz_compress_epi8`]
* [x] [`_mm512_mask_compress_epi8`]
* [x] [`_mm512_maskz_compress_epi8`]
- * [_] [`_mm_mask_compressstoreu_epi16`]
- * [_] [`_mm256_mask_compressstoreu_epi16`]
- * [_] [`_mm512_mask_compressstoreu_epi16`]
- * [_] [`_mm_mask_compressstoreu_epi8`]
- * [_] [`_mm256_mask_compressstoreu_epi8`]
- * [_] [`_mm512_mask_compressstoreu_epi8`]
+ * [_] [`_mm_mask_compressstoreu_epi16`] //need i1
+ * [_] [`_mm256_mask_compressstoreu_epi16`] //need i1
+ * [_] [`_mm512_mask_compressstoreu_epi16`] //need i1
+ * [_] [`_mm_mask_compressstoreu_epi8`] //need i1
+ * [_] [`_mm256_mask_compressstoreu_epi8`] //need i1
+ * [_] [`_mm512_mask_compressstoreu_epi8`] //need i1
* [x] [`_mm_mask_expand_epi16`]
* [x] [`_mm_maskz_expand_epi16`]
* [x] [`_mm256_mask_expand_epi16`]
@@ -30,18 +30,18 @@
* [x] [`_mm256_maskz_expand_epi8`]
* [x] [`_mm512_mask_expand_epi8`]
* [x] [`_mm512_maskz_expand_epi8`]
- * [_] [`_mm_mask_expandloadu_epi16`]
- * [_] [`_mm_maskz_expandloadu_epi16`]
- * [_] [`_mm256_mask_expandloadu_epi16`]
- * [_] [`_mm256_maskz_expandloadu_epi16`]
- * [_] [`_mm512_mask_expandloadu_epi16`]
- * [_] [`_mm512_maskz_expandloadu_epi16`]
- * [_] [`_mm_mask_expandloadu_epi8`]
- * [_] [`_mm_maskz_expandloadu_epi8`]
- * [_] [`_mm256_mask_expandloadu_epi8`]
- * [_] [`_mm256_maskz_expandloadu_epi8`]
- * [_] [`_mm512_mask_expandloadu_epi8`]
- * [_] [`_mm512_maskz_expandloadu_epi8`]
+ * [_] [`_mm_mask_expandloadu_epi16`] //need i1
+ * [_] [`_mm_maskz_expandloadu_epi16`] //need i1
+ * [_] [`_mm256_mask_expandloadu_epi16`] //need i1
+ * [_] [`_mm256_maskz_expandloadu_epi16`] //need i1
+ * [_] [`_mm512_mask_expandloadu_epi16`] //need i1
+ * [_] [`_mm512_maskz_expandloadu_epi16`] //need i1
+ * [_] [`_mm_mask_expandloadu_epi8`] //need i1
+ * [_] [`_mm_maskz_expandloadu_epi8`] //need i1
+ * [_] [`_mm256_mask_expandloadu_epi8`] //need i1
+ * [_] [`_mm256_maskz_expandloadu_epi8`] //need i1
+ * [_] [`_mm512_mask_expandloadu_epi8`] //need i1
+ * [_] [`_mm512_maskz_expandloadu_epi8`] //need i1
* [x] [`_mm_mask_shldi_epi16`]
* [x] [`_mm_maskz_shldi_epi16`]
* [x] [`_mm_shldi_epi16`]
diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs
index 0e5a1ba461..f72f56a355 100644
--- a/crates/core_arch/src/x86/avx512f.rs
+++ b/crates/core_arch/src/x86/avx512f.rs
@@ -10696,6 +10696,52 @@ pub unsafe fn _mm512_maskz_cvtps_epi32(k: __mmask16, a: __m512) -> __m512i {
))
}
+/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtps_epi32&expand=1735)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtps2dq))]
+pub unsafe fn _mm256_mask_cvtps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
+ let convert = _mm256_cvtps_epi32(a);
+ transmute(simd_select_bitmask(k, convert.as_i32x8(), src.as_i32x8()))
+}
+
+/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtps_epi32&expand=1736)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtps2dq))]
+pub unsafe fn _mm256_maskz_cvtps_epi32(k: __mmask8, a: __m256) -> __m256i {
+ let convert = _mm256_cvtps_epi32(a);
+ let zero = _mm256_setzero_si256().as_i32x8();
+ transmute(simd_select_bitmask(k, convert.as_i32x8(), zero))
+}
+
+/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtps_epi32&expand=1732)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtps2dq))]
+pub unsafe fn _mm_mask_cvtps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
+ let convert = _mm_cvtps_epi32(a);
+ transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4()))
+}
+
+/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtps_epi32&expand=1733)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtps2dq))]
+pub unsafe fn _mm_maskz_cvtps_epi32(k: __mmask8, a: __m128) -> __m128i {
+ let convert = _mm_cvtps_epi32(a);
+ let zero = _mm_setzero_si128().as_i32x4();
+ transmute(simd_select_bitmask(k, convert.as_i32x4(), zero))
+}
+
/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtps_epu32&expand=1755)
@@ -10741,6 +10787,82 @@ pub unsafe fn _mm512_maskz_cvtps_epu32(k: __mmask16, a: __m512) -> __m512i {
))
}
+/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtps_epu32&expand=1752)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtps2udq))]
+pub unsafe fn _mm256_cvtps_epu32(a: __m256) -> __m256i {
+ transmute(vcvtps2udq256(
+ a.as_f32x8(),
+ _mm256_setzero_si256().as_u32x8(),
+ 0b11111111,
+ ))
+}
+
+/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtps_epu32&expand=1753)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtps2udq))]
+pub unsafe fn _mm256_mask_cvtps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
+ transmute(vcvtps2udq256(a.as_f32x8(), src.as_u32x8(), k))
+}
+
+/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtps_epu32&expand=1754)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtps2udq))]
+pub unsafe fn _mm256_maskz_cvtps_epu32(k: __mmask8, a: __m256) -> __m256i {
+ transmute(vcvtps2udq256(
+ a.as_f32x8(),
+ _mm256_setzero_si256().as_u32x8(),
+ k,
+ ))
+}
+
+/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_epu32&expand=1749)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtps2udq))]
+pub unsafe fn _mm_cvtps_epu32(a: __m128) -> __m128i {
+ transmute(vcvtps2udq128(
+ a.as_f32x4(),
+ _mm_setzero_si128().as_u32x4(),
+ 0b11111111,
+ ))
+}
+
+/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtps_epu32&expand=1750)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtps2udq))]
+pub unsafe fn _mm_mask_cvtps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
+ transmute(vcvtps2udq128(a.as_f32x4(), src.as_u32x4(), k))
+}
+
+/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtps_epu32&expand=1751)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtps2udq))]
+pub unsafe fn _mm_maskz_cvtps_epu32(k: __mmask8, a: __m128) -> __m128i {
+ transmute(vcvtps2udq128(
+ a.as_f32x4(),
+ _mm_setzero_si128().as_u32x4(),
+ k,
+ ))
+}
+
/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtps_pd&expand=1769)
@@ -10861,6 +10983,270 @@ pub unsafe fn _mm512_maskz_cvtpd_ps(k: __mmask8, a: __m512d) -> __m256 {
))
}
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtpd_ps&expand=1710)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtpd2ps))]
+pub unsafe fn _mm256_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m256d) -> __m128 {
+ let convert = _mm256_cvtpd_ps(a);
+ transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4()))
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtpd_ps&expand=1711)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtpd2ps))]
+pub unsafe fn _mm256_maskz_cvtpd_ps(k: __mmask8, a: __m256d) -> __m128 {
+ let convert = _mm256_cvtpd_ps(a);
+ let zero = _mm_setzero_ps().as_f32x4();
+ transmute(simd_select_bitmask(k, convert.as_f32x4(), zero))
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtpd_ps&expand=1707)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtpd2ps))]
+pub unsafe fn _mm_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m128d) -> __m128 {
+ let convert = _mm_cvtpd_ps(a);
+ transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4()))
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtpd_ps&expand=1708)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtpd2ps))]
+pub unsafe fn _mm_maskz_cvtpd_ps(k: __mmask8, a: __m128d) -> __m128 {
+ let convert = _mm_cvtpd_ps(a);
+ let zero = _mm_setzero_ps().as_f32x4();
+ transmute(simd_select_bitmask(k, convert.as_f32x4(), zero))
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtpd_epi32&expand=1675)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtpd2dq))]
+pub unsafe fn _mm512_cvtpd_epi32(a: __m512d) -> __m256i {
+ transmute(vcvtpd2dq(
+ a.as_f64x8(),
+ _mm256_setzero_si256().as_i32x8(),
+ 0b11111111,
+ _MM_FROUND_CUR_DIRECTION,
+ ))
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtpd_epi32&expand=1676)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtpd2dq))]
+pub unsafe fn _mm512_mask_cvtpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
+ transmute(vcvtpd2dq(
+ a.as_f64x8(),
+ src.as_i32x8(),
+ k,
+ _MM_FROUND_CUR_DIRECTION,
+ ))
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtpd_epi32&expand=1677)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtpd2dq))]
+pub unsafe fn _mm512_maskz_cvtpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
+ transmute(vcvtpd2dq(
+ a.as_f64x8(),
+ _mm256_setzero_si256().as_i32x8(),
+ k,
+ _MM_FROUND_CUR_DIRECTION,
+ ))
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtpd_epi32&expand=1673)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtpd2dq))]
+pub unsafe fn _mm256_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
+ let convert = _mm256_cvtpd_epi32(a);
+ transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4()))
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtpd_epi32&expand=1674)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtpd2dq))]
+pub unsafe fn _mm256_maskz_cvtpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
+ let convert = _mm256_cvtpd_epi32(a);
+ transmute(simd_select_bitmask(
+ k,
+ convert.as_i32x4(),
+ _mm_setzero_si128().as_i32x4(),
+ ))
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtpd_epi32&expand=1670)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtpd2dq))]
+pub unsafe fn _mm_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
+ let convert = _mm_cvtpd_epi32(a);
+ transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4()))
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtpd_epi32&expand=1671)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtpd2dq))]
+pub unsafe fn _mm_maskz_cvtpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
+ let convert = _mm_cvtpd_epi32(a);
+ transmute(simd_select_bitmask(
+ k,
+ convert.as_i32x4(),
+ _mm_setzero_si128().as_i32x4(),
+ ))
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtpd_epu32&expand=1693)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtpd2udq))]
+pub unsafe fn _mm512_cvtpd_epu32(a: __m512d) -> __m256i {
+ transmute(vcvtpd2udq(
+ a.as_f64x8(),
+ _mm256_setzero_si256().as_u32x8(),
+ 0b11111111,
+ _MM_FROUND_CUR_DIRECTION,
+ ))
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtpd_epu32&expand=1694)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtpd2udq))]
+pub unsafe fn _mm512_mask_cvtpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
+ transmute(vcvtpd2udq(
+ a.as_f64x8(),
+ src.as_u32x8(),
+ k,
+ _MM_FROUND_CUR_DIRECTION,
+ ))
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtpd_epu32&expand=1695)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtpd2udq))]
+pub unsafe fn _mm512_maskz_cvtpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
+ transmute(vcvtpd2udq(
+ a.as_f64x8(),
+ _mm256_setzero_si256().as_u32x8(),
+ k,
+ _MM_FROUND_CUR_DIRECTION,
+ ))
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtpd_epu32&expand=1690)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtpd2udq))]
+pub unsafe fn _mm256_cvtpd_epu32(a: __m256d) -> __m128i {
+ transmute(vcvtpd2udq256(
+ a.as_f64x4(),
+ _mm_setzero_si128().as_u32x4(),
+ 0b11111111,
+ ))
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtpd_epu32&expand=1691)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtpd2udq))]
+pub unsafe fn _mm256_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
+ transmute(vcvtpd2udq256(a.as_f64x4(), src.as_u32x4(), k))
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtpd_epu32&expand=1692)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtpd2udq))]
+pub unsafe fn _mm256_maskz_cvtpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
+ transmute(vcvtpd2udq256(
+ a.as_f64x4(),
+ _mm_setzero_si128().as_u32x4(),
+ k,
+ ))
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_epu32&expand=1687)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtpd2udq))]
+pub unsafe fn _mm_cvtpd_epu32(a: __m128d) -> __m128i {
+ transmute(vcvtpd2udq128(
+ a.as_f64x2(),
+ _mm_setzero_si128().as_u32x4(),
+ 0b11111111,
+ ))
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtpd_epu32&expand=1688)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtpd2udq))]
+pub unsafe fn _mm_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
+ transmute(vcvtpd2udq128(a.as_f64x2(), src.as_u32x4(), k))
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtpd_epu32&expand=1689)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtpd2udq))]
+pub unsafe fn _mm_maskz_cvtpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
+ transmute(vcvtpd2udq128(
+ a.as_f64x2(),
+ _mm_setzero_si128().as_u32x4(),
+ k,
+ ))
+}
+
/// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst. The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtpd_pslo&expand=1715)
@@ -11016,6 +11402,52 @@ pub unsafe fn _mm512_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m512i {
transmute(simd_select_bitmask(k, convert, zero))
}
+/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi8_epi64&expand=1542)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsxbq))]
+pub unsafe fn _mm256_mask_cvtepi8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
+ let convert = _mm256_cvtepi8_epi64(a).as_i64x4();
+ transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
+}
+
+/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi8_epi64&expand=1543)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsxbq))]
+pub unsafe fn _mm256_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m256i {
+ let convert = _mm256_cvtepi8_epi64(a).as_i64x4();
+ let zero = _mm256_setzero_si256().as_i64x4();
+ transmute(simd_select_bitmask(k, convert, zero))
+}
+
+/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi8_epi64&expand=1539)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsxbq))]
+pub unsafe fn _mm_mask_cvtepi8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+ let convert = _mm_cvtepi8_epi64(a).as_i64x2();
+ transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
+}
+
+/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi8_epi64&expand=1540)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsxbq))]
+pub unsafe fn _mm_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m128i {
+ let convert = _mm_cvtepi8_epi64(a).as_i64x2();
+ let zero = _mm_setzero_si128().as_i64x2();
+ transmute(simd_select_bitmask(k, convert, zero))
+}
+
/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu8_epi32&expand=1621)
@@ -11050,6 +11482,52 @@ pub unsafe fn _mm512_maskz_cvtepu8_epi32(k: __mmask16, a: __m128i) -> __m512i {
transmute(simd_select_bitmask(k, convert, zero))
}
+/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu8_epi32&expand=1619)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovzxbd))]
+pub unsafe fn _mm256_mask_cvtepu8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
+ let convert = _mm256_cvtepu8_epi32(a).as_i32x8();
+ transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
+}
+
+/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm256_maskz_cvtepu8_epi32&expand=1620)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovzxbd))]
+pub unsafe fn _mm256_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m256i {
+ let convert = _mm256_cvtepu8_epi32(a).as_i32x8();
+ let zero = _mm256_setzero_si256().as_i32x8();
+ transmute(simd_select_bitmask(k, convert, zero))
+}
+
+/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu8_epi32&expand=1616)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovzxbd))]
+pub unsafe fn _mm_mask_cvtepu8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+ let convert = _mm_cvtepu8_epi32(a).as_i32x4();
+ transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
+}
+
+/// Zero extend packed unsigned 8-bit integers in th elow 4 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_cvtepu8_epi32&expand=1617)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovzxbd))]
+pub unsafe fn _mm_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m128i {
+ let convert = _mm_cvtepu8_epi32(a).as_i32x4();
+ let zero = _mm_setzero_si128().as_i32x4();
+ transmute(simd_select_bitmask(k, convert, zero))
+}
+
/// Zero extend packed unsigned 8-bit integers in the low 8 byte sof a to packed 64-bit integers, and store the results in dst.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu8_epi64&expand=1630)
@@ -11085,6 +11563,52 @@ pub unsafe fn _mm512_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m512i {
transmute(simd_select_bitmask(k, convert, zero))
}
+/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu8_epi64&expand=1628)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovzxbq))]
+pub unsafe fn _mm256_mask_cvtepu8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
+ let convert = _mm256_cvtepu8_epi64(a).as_i64x4();
+ transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
+}
+
+/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu8_epi64&expand=1629)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovzxbq))]
+pub unsafe fn _mm256_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m256i {
+ let convert = _mm256_cvtepu8_epi64(a).as_i64x4();
+ let zero = _mm256_setzero_si256().as_i64x4();
+ transmute(simd_select_bitmask(k, convert, zero))
+}
+
+/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu8_epi64&expand=1625)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovzxbq))]
+pub unsafe fn _mm_mask_cvtepu8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+ let convert = _mm_cvtepu8_epi64(a).as_i64x2();
+ transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
+}
+
+/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu8_epi64&expand=1626)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovzxbq))]
+pub unsafe fn _mm_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m128i {
+ let convert = _mm_cvtepu8_epi64(a).as_i64x2();
+ let zero = _mm_setzero_si128().as_i64x2();
+ transmute(simd_select_bitmask(k, convert, zero))
+}
+
/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi16_epi32&expand=1389)
@@ -11279,6 +11803,52 @@ pub unsafe fn _mm512_maskz_cvtepu16_epi32(k: __mmask16, a: __m256i) -> __m512i {
transmute(simd_select_bitmask(k, convert, zero))
}
+/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu16_epi32&expand=1551)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovzxwd))]
+pub unsafe fn _mm256_mask_cvtepu16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
+ let convert = _mm256_cvtepu16_epi32(a).as_i32x8();
+ transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
+}
+
+/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu16_epi32&expand=1552)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovzxwd))]
+pub unsafe fn _mm256_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m256i {
+ let convert = _mm256_cvtepu16_epi32(a).as_i32x8();
+ let zero = _mm256_setzero_si256().as_i32x8();
+ transmute(simd_select_bitmask(k, convert, zero))
+}
+
+/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu16_epi32&expand=1548)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovzxwd))]
+pub unsafe fn _mm_mask_cvtepu16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+ let convert = _mm_cvtepu16_epi32(a).as_i32x4();
+ transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
+}
+
+/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu16_epi32&expand=1549)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovzxwd))]
+pub unsafe fn _mm_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m128i {
+ let convert = _mm_cvtepu16_epi32(a).as_i32x4();
+ let zero = _mm_setzero_si128().as_i32x4();
+ transmute(simd_select_bitmask(k, convert, zero))
+}
+
/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu16_epi64&expand=1562)
@@ -11313,6 +11883,52 @@ pub unsafe fn _mm512_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m512i {
transmute(simd_select_bitmask(k, convert, zero))
}
+/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu16_epi64&expand=1560)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovzxwq))]
+pub unsafe fn _mm256_mask_cvtepu16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
+ let convert = _mm256_cvtepu16_epi64(a).as_i64x4();
+ transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
+}
+
+/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu16_epi64&expand=1561)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovzxwq))]
+pub unsafe fn _mm256_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m256i {
+ let convert = _mm256_cvtepu16_epi64(a).as_i64x4();
+ let zero = _mm256_setzero_si256().as_i64x4();
+ transmute(simd_select_bitmask(k, convert, zero))
+}
+
+/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu16_epi64&expand=1557)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovzxwq))]
+pub unsafe fn _mm_mask_cvtepu16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+ let convert = _mm_cvtepu16_epi64(a).as_i64x2();
+ transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
+}
+
+/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu16_epi64&expand=1558)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovzxwq))]
+pub unsafe fn _mm_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m128i {
+ let convert = _mm_cvtepu16_epi64(a).as_i64x2();
+ let zero = _mm_setzero_si128().as_i64x2();
+ transmute(simd_select_bitmask(k, convert, zero))
+}
+
/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi32_epi64&expand=1428)
@@ -11427,6 +12043,52 @@ pub unsafe fn _mm512_maskz_cvtepu32_epi64(k: __mmask8, a: __m256i) -> __m512i {
transmute(simd_select_bitmask(k, convert, zero))
}
+/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu32_epi64&expand=1569)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovzxdq))]
+pub unsafe fn _mm256_mask_cvtepu32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
+ let convert = _mm256_cvtepu32_epi64(a).as_i64x4();
+ transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
+}
+
+/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu32_epi64&expand=1570)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovzxdq))]
+pub unsafe fn _mm256_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m256i {
+ let convert = _mm256_cvtepu32_epi64(a).as_i64x4();
+ let zero = _mm256_setzero_si256().as_i64x4();
+ transmute(simd_select_bitmask(k, convert, zero))
+}
+
+/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu32_epi64&expand=1566)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovzxdq))]
+pub unsafe fn _mm_mask_cvtepu32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+ let convert = _mm_cvtepu32_epi64(a).as_i64x2();
+ transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
+}
+
+/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu32_epi64&expand=1567)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovzxdq))]
+pub unsafe fn _mm_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m128i {
+ let convert = _mm_cvtepu32_epi64(a).as_i64x2();
+ let zero = _mm_setzero_si128().as_i64x2();
+ transmute(simd_select_bitmask(k, convert, zero))
+}
+
/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi32_ps&expand=1455)
@@ -11655,6 +12317,75 @@ pub unsafe fn _mm512_maskz_cvtepu32_pd(k: __mmask8, a: __m256i) -> __m512d {
transmute(simd_select_bitmask(k, convert, zero))
}
+/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu32_pd&expand=1577)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtudq2pd))]
+pub unsafe fn _mm256_cvtepu32_pd(a: __m128i) -> __m256d {
+ let a = a.as_u32x4();
+ transmute::(simd_cast(a))
+}
+
+/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu32_pd&expand=1578)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtudq2pd))]
+pub unsafe fn _mm256_mask_cvtepu32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
+ let convert = _mm256_cvtepu32_pd(a).as_f64x4();
+ transmute(simd_select_bitmask(k, convert, src.as_f64x4()))
+}
+
+/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu32_pd&expand=1579)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtudq2pd))]
+pub unsafe fn _mm256_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m256d {
+ let convert = _mm256_cvtepu32_pd(a).as_f64x4();
+ let zero = _mm256_setzero_pd().as_f64x4();
+ transmute(simd_select_bitmask(k, convert, zero))
+}
+
+/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu32_pd&expand=1574)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtudq2pd))]
+pub unsafe fn _mm_cvtepu32_pd(a: __m128i) -> __m128d {
+ let a = a.as_u32x4();
+ let u64: u32x2 = simd_shuffle2(a, a, [0, 1]);
+ transmute::(simd_cast(u64))
+}
+
+/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu32_pd&expand=1575)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtudq2pd))]
+pub unsafe fn _mm_mask_cvtepu32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
+ let convert = _mm_cvtepu32_pd(a).as_f64x2();
+ transmute(simd_select_bitmask(k, convert, src.as_f64x2()))
+}
+
+/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu32_pd&expand=1576)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtudq2pd))]
+pub unsafe fn _mm_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m128d {
+ let convert = _mm_cvtepu32_pd(a).as_f64x2();
+ let zero = _mm_setzero_pd().as_f64x2();
+ transmute(simd_select_bitmask(k, convert, zero))
+}
+
/// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi32lo_pd&expand=1464)
@@ -12249,6 +12980,74 @@ pub unsafe fn _mm512_maskz_cvtsepi32_epi16(k: __mmask16, a: __m512i) -> __m256i
))
}
+/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsepi32_epi16&expand=1816)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsdw))]
+pub unsafe fn _mm256_cvtsepi32_epi16(a: __m256i) -> __m128i {
+ transmute(vpmovsdw256(
+ a.as_i32x8(),
+ _mm_setzero_si128().as_i16x8(),
+ 0b11111111,
+ ))
+}
+
+/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtsepi32_epi16&expand=1817)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsdw))]
+pub unsafe fn _mm256_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
+ transmute(vpmovsdw256(a.as_i32x8(), src.as_i16x8(), k))
+}
+
+/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtsepi32_epi16&expand=1818)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsdw))]
+pub unsafe fn _mm256_maskz_cvtsepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
+ transmute(vpmovsdw256(a.as_i32x8(), _mm_setzero_si128().as_i16x8(), k))
+}
+
+/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsepi32_epi16&expand=1813)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsdw))]
+pub unsafe fn _mm_cvtsepi32_epi16(a: __m128i) -> __m128i {
+ transmute(vpmovsdw128(
+ a.as_i32x4(),
+ _mm_setzero_si128().as_i16x8(),
+ 0b11111111,
+ ))
+}
+
+/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsepi32_epi16&expand=1814)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsdw))]
+pub unsafe fn _mm_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+ transmute(vpmovsdw128(a.as_i32x4(), src.as_i16x8(), k))
+}
+
+/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsepi32_epi16&expand=1815)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsdw))]
+pub unsafe fn _mm_maskz_cvtsepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
+ transmute(vpmovsdw128(a.as_i32x4(), _mm_setzero_si128().as_i16x8(), k))
+}
+
/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtsepi32_epi8&expand=1828)
@@ -12283,6 +13082,74 @@ pub unsafe fn _mm512_maskz_cvtsepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
transmute(vpmovsdb(a.as_i32x16(), _mm_setzero_si128().as_i8x16(), k))
}
+/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsepi32_epi8&expand=1825)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsdb))]
+pub unsafe fn _mm256_cvtsepi32_epi8(a: __m256i) -> __m128i {
+ transmute(vpmovsdb256(
+ a.as_i32x8(),
+ _mm_setzero_si128().as_i8x16(),
+ 0b11111111,
+ ))
+}
+
+/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtsepi32_epi8&expand=1826)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsdb))]
+pub unsafe fn _mm256_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
+ transmute(vpmovsdb256(a.as_i32x8(), src.as_i8x16(), k))
+}
+
+/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtsepi32_epi8&expand=1827)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsdb))]
+pub unsafe fn _mm256_maskz_cvtsepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
+ transmute(vpmovsdb256(a.as_i32x8(), _mm_setzero_si128().as_i8x16(), k))
+}
+
+/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsepi32_epi8&expand=1822)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsdb))]
+pub unsafe fn _mm_cvtsepi32_epi8(a: __m128i) -> __m128i {
+ transmute(vpmovsdb128(
+ a.as_i32x4(),
+ _mm_setzero_si128().as_i8x16(),
+ 0b11111111,
+ ))
+}
+
+/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsepi32_epi8&expand=1823)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsdb))]
+pub unsafe fn _mm_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+ transmute(vpmovsdb128(a.as_i32x4(), src.as_i8x16(), k))
+}
+
+/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsepi32_epi8&expand=1824)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsdb))]
+pub unsafe fn _mm_maskz_cvtsepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
+ transmute(vpmovsdb128(a.as_i32x4(), _mm_setzero_si128().as_i8x16(), k))
+}
+
/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtsepi64_epi32&expand=1852)
@@ -12317,6 +13184,74 @@ pub unsafe fn _mm512_maskz_cvtsepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
transmute(vpmovsqd(a.as_i64x8(), _mm256_setzero_si256().as_i32x8(), k))
}
+/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsepi64_epi32&expand=1849)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsqd))]
+pub unsafe fn _mm256_cvtsepi64_epi32(a: __m256i) -> __m128i {
+ transmute(vpmovsqd256(
+ a.as_i64x4(),
+ _mm_setzero_si128().as_i32x4(),
+ 0b11111111,
+ ))
+}
+
+/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtsepi64_epi32&expand=1850)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsqd))]
+pub unsafe fn _mm256_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
+ transmute(vpmovsqd256(a.as_i64x4(), src.as_i32x4(), k))
+}
+
+/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtsepi64_epi32&expand=1851)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsqd))]
+pub unsafe fn _mm256_maskz_cvtsepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
+ transmute(vpmovsqd256(a.as_i64x4(), _mm_setzero_si128().as_i32x4(), k))
+}
+
+/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsepi64_epi32&expand=1846)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsqd))]
+pub unsafe fn _mm_cvtsepi64_epi32(a: __m128i) -> __m128i {
+ transmute(vpmovsqd128(
+ a.as_i64x2(),
+ _mm_setzero_si128().as_i32x4(),
+ 0b11111111,
+ ))
+}
+
+/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsepi64_epi32&expand=1847)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsqd))]
+pub unsafe fn _mm_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+ transmute(vpmovsqd128(a.as_i64x2(), src.as_i32x4(), k))
+}
+
+/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsepi64_epi32&expand=1848)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsqd))]
+pub unsafe fn _mm_maskz_cvtsepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
+ transmute(vpmovsqd128(a.as_i64x2(), _mm_setzero_si128().as_i32x4(), k))
+}
+
/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtsepi64_epi16&expand=1843)
@@ -12351,6 +13286,74 @@ pub unsafe fn _mm512_maskz_cvtsepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
transmute(vpmovsqw(a.as_i64x8(), _mm_setzero_si128().as_i16x8(), k))
}
+/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsepi64_epi16&expand=1840)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsqw))]
+pub unsafe fn _mm256_cvtsepi64_epi16(a: __m256i) -> __m128i {
+ transmute(vpmovsqw256(
+ a.as_i64x4(),
+ _mm_setzero_si128().as_i16x8(),
+ 0b11111111,
+ ))
+}
+
+/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtsepi64_epi16&expand=1841)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsqw))]
+pub unsafe fn _mm256_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
+ transmute(vpmovsqw256(a.as_i64x4(), src.as_i16x8(), k))
+}
+
+/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtsepi64_epi16&expand=1842)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsqw))]
+pub unsafe fn _mm256_maskz_cvtsepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
+ transmute(vpmovsqw256(a.as_i64x4(), _mm_setzero_si128().as_i16x8(), k))
+}
+
+/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsepi64_epi16&expand=1837)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsqw))]
+pub unsafe fn _mm_cvtsepi64_epi16(a: __m128i) -> __m128i {
+ transmute(vpmovsqw128(
+ a.as_i64x2(),
+ _mm_setzero_si128().as_i16x8(),
+ 0b11111111,
+ ))
+}
+
+/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsepi64_epi16&expand=1838)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsqw))]
+pub unsafe fn _mm_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+ transmute(vpmovsqw128(a.as_i64x2(), src.as_i16x8(), k))
+}
+
+/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsepi64_epi16&expand=1839)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsqw))]
+pub unsafe fn _mm_maskz_cvtsepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
+ transmute(vpmovsqw128(a.as_i64x2(), _mm_setzero_si128().as_i16x8(), k))
+}
+
/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtsepi64_epi8&expand=1861)
@@ -12385,6 +13388,74 @@ pub unsafe fn _mm512_maskz_cvtsepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
transmute(vpmovsqb(a.as_i64x8(), _mm_setzero_si128().as_i8x16(), k))
}
+/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsepi64_epi8&expand=1858)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsqb))]
+pub unsafe fn _mm256_cvtsepi64_epi8(a: __m256i) -> __m128i {
+ transmute(vpmovsqb256(
+ a.as_i64x4(),
+ _mm_setzero_si128().as_i8x16(),
+ 0b11111111,
+ ))
+}
+
+/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtsepi64_epi8&expand=1859)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsqb))]
+pub unsafe fn _mm256_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
+ transmute(vpmovsqb256(a.as_i64x4(), src.as_i8x16(), k))
+}
+
+/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtsepi64_epi8&expand=1860)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsqb))]
+pub unsafe fn _mm256_maskz_cvtsepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
+ transmute(vpmovsqb256(a.as_i64x4(), _mm_setzero_si128().as_i8x16(), k))
+}
+
+/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsepi64_epi8&expand=1855)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsqb))]
+pub unsafe fn _mm_cvtsepi64_epi8(a: __m128i) -> __m128i {
+ transmute(vpmovsqb128(
+ a.as_i64x2(),
+ _mm_setzero_si128().as_i8x16(),
+ 0b11111111,
+ ))
+}
+
+/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsepi64_epi8&expand=1856)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsqb))]
+pub unsafe fn _mm_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+ transmute(vpmovsqb128(a.as_i64x2(), src.as_i8x16(), k))
+}
+
+/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsepi64_epi8&expand=1857)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsqb))]
+pub unsafe fn _mm_maskz_cvtsepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
+ transmute(vpmovsqb128(a.as_i64x2(), _mm_setzero_si128().as_i8x16(), k))
+}
+
/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtusepi32_epi16&expand=2054)
@@ -12423,6 +13494,82 @@ pub unsafe fn _mm512_maskz_cvtusepi32_epi16(k: __mmask16, a: __m512i) -> __m256i
))
}
+/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtusepi32_epi16&expand=2051)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusdw))]
+pub unsafe fn _mm256_cvtusepi32_epi16(a: __m256i) -> __m128i {
+ transmute(vpmovusdw256(
+ a.as_u32x8(),
+ _mm_setzero_si128().as_u16x8(),
+ 0b11111111,
+ ))
+}
+
+/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtusepi32_epi16&expand=2052)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusdw))]
+pub unsafe fn _mm256_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
+ transmute(vpmovusdw256(a.as_u32x8(), src.as_u16x8(), k))
+}
+
+/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtusepi32_epi16&expand=2053)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusdw))]
+pub unsafe fn _mm256_maskz_cvtusepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
+ transmute(vpmovusdw256(
+ a.as_u32x8(),
+ _mm_setzero_si128().as_u16x8(),
+ k,
+ ))
+}
+
+/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtusepi32_epi16&expand=2048)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusdw))]
+pub unsafe fn _mm_cvtusepi32_epi16(a: __m128i) -> __m128i {
+ transmute(vpmovusdw128(
+ a.as_u32x4(),
+ _mm_setzero_si128().as_u16x8(),
+ 0b11111111,
+ ))
+}
+
+/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtusepi32_epi16&expand=2049)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusdw))]
+pub unsafe fn _mm_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+ transmute(vpmovusdw128(a.as_u32x4(), src.as_u16x8(), k))
+}
+
+/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtusepi32_epi16&expand=2050)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusdw))]
+pub unsafe fn _mm_maskz_cvtusepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
+ transmute(vpmovusdw128(
+ a.as_u32x4(),
+ _mm_setzero_si128().as_u16x8(),
+ k,
+ ))
+}
+
/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtusepi32_epi8&expand=2063)
@@ -12457,6 +13604,82 @@ pub unsafe fn _mm512_maskz_cvtusepi32_epi8(k: __mmask16, a: __m512i) -> __m128i
transmute(vpmovusdb(a.as_u32x16(), _mm_setzero_si128().as_u8x16(), k))
}
+/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtusepi32_epi8&expand=2060)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusdb))]
+pub unsafe fn _mm256_cvtusepi32_epi8(a: __m256i) -> __m128i {
+ transmute(vpmovusdb256(
+ a.as_u32x8(),
+ _mm_setzero_si128().as_u8x16(),
+ 0b11111111,
+ ))
+}
+
+/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtusepi32_epi8&expand=2061)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusdb))]
+pub unsafe fn _mm256_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
+ transmute(vpmovusdb256(a.as_u32x8(), src.as_u8x16(), k))
+}
+
+/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtusepi32_epi8&expand=2062)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusdb))]
+pub unsafe fn _mm256_maskz_cvtusepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
+ transmute(vpmovusdb256(
+ a.as_u32x8(),
+ _mm_setzero_si128().as_u8x16(),
+ k,
+ ))
+}
+
+/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtusepi32_epi8&expand=2057)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusdb))]
+pub unsafe fn _mm_cvtusepi32_epi8(a: __m128i) -> __m128i {
+ transmute(vpmovusdb128(
+ a.as_u32x4(),
+ _mm_setzero_si128().as_u8x16(),
+ 0b11111111,
+ ))
+}
+
+/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtusepi32_epi8&expand=2058)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusdb))]
+pub unsafe fn _mm_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+ transmute(vpmovusdb128(a.as_u32x4(), src.as_u8x16(), k))
+}
+
+/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtusepi32_epi8&expand=2059)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusdb))]
+pub unsafe fn _mm_maskz_cvtusepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
+ transmute(vpmovusdb128(
+ a.as_u32x4(),
+ _mm_setzero_si128().as_u8x16(),
+ k,
+ ))
+}
+
/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtusepi64_epi32&expand=2087)
@@ -12495,6 +13718,82 @@ pub unsafe fn _mm512_maskz_cvtusepi64_epi32(k: __mmask8, a: __m512i) -> __m256i
))
}
+/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtusepi64_epi32&expand=2084)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusqd))]
+pub unsafe fn _mm256_cvtusepi64_epi32(a: __m256i) -> __m128i {
+ transmute(vpmovusqd256(
+ a.as_u64x4(),
+ _mm_setzero_si128().as_u32x4(),
+ 0b11111111,
+ ))
+}
+
+/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtusepi64_epi32&expand=2085)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusqd))]
+pub unsafe fn _mm256_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
+ transmute(vpmovusqd256(a.as_u64x4(), src.as_u32x4(), k))
+}
+
+/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtusepi64_epi32&expand=2086)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusqd))]
+pub unsafe fn _mm256_maskz_cvtusepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
+ transmute(vpmovusqd256(
+ a.as_u64x4(),
+ _mm_setzero_si128().as_u32x4(),
+ k,
+ ))
+}
+
+/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtusepi64_epi32&expand=2081)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusqd))]
+pub unsafe fn _mm_cvtusepi64_epi32(a: __m128i) -> __m128i {
+ transmute(vpmovusqd128(
+ a.as_u64x2(),
+ _mm_setzero_si128().as_u32x4(),
+ 0b11111111,
+ ))
+}
+
+/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtusepi64_epi32&expand=2082)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusqd))]
+pub unsafe fn _mm_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+ transmute(vpmovusqd128(a.as_u64x2(), src.as_u32x4(), k))
+}
+
+/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtusepi64_epi32&expand=2083)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusqd))]
+pub unsafe fn _mm_maskz_cvtusepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
+ transmute(vpmovusqd128(
+ a.as_u64x2(),
+ _mm_setzero_si128().as_u32x4(),
+ k,
+ ))
+}
+
/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtusepi64_epi16&expand=2078)
@@ -12529,6 +13828,82 @@ pub unsafe fn _mm512_maskz_cvtusepi64_epi16(k: __mmask8, a: __m512i) -> __m128i
transmute(vpmovusqw(a.as_u64x8(), _mm_setzero_si128().as_u16x8(), k))
}
+/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtusepi64_epi16&expand=2075)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusqw))]
+pub unsafe fn _mm256_cvtusepi64_epi16(a: __m256i) -> __m128i {
+ transmute(vpmovusqw256(
+ a.as_u64x4(),
+ _mm_setzero_si128().as_u16x8(),
+ 0b11111111,
+ ))
+}
+
+/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtusepi64_epi16&expand=2076)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusqw))]
+pub unsafe fn _mm256_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
+ transmute(vpmovusqw256(a.as_u64x4(), src.as_u16x8(), k))
+}
+
+/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtusepi64_epi16&expand=2077)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusqw))]
+pub unsafe fn _mm256_maskz_cvtusepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
+ transmute(vpmovusqw256(
+ a.as_u64x4(),
+ _mm_setzero_si128().as_u16x8(),
+ k,
+ ))
+}
+
+/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtusepi64_epi16&expand=2072)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusqw))]
+pub unsafe fn _mm_cvtusepi64_epi16(a: __m128i) -> __m128i {
+ transmute(vpmovusqw128(
+ a.as_u64x2(),
+ _mm_setzero_si128().as_u16x8(),
+ 0b11111111,
+ ))
+}
+
+/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtusepi64_epi16&expand=2073)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusqw))]
+pub unsafe fn _mm_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+ transmute(vpmovusqw128(a.as_u64x2(), src.as_u16x8(), k))
+}
+
+/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtusepi64_epi16&expand=2074)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusqw))]
+pub unsafe fn _mm_maskz_cvtusepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
+ transmute(vpmovusqw128(
+ a.as_u64x2(),
+ _mm_setzero_si128().as_u16x8(),
+ k,
+ ))
+}
+
/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtusepi64_epi8&expand=2096)
@@ -12563,6 +13938,82 @@ pub unsafe fn _mm512_maskz_cvtusepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
transmute(vpmovusqb(a.as_u64x8(), _mm_setzero_si128().as_u8x16(), k))
}
+/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtusepi64_epi8&expand=2093)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusqb))]
+pub unsafe fn _mm256_cvtusepi64_epi8(a: __m256i) -> __m128i {
+ transmute(vpmovusqb256(
+ a.as_u64x4(),
+ _mm_setzero_si128().as_u8x16(),
+ 0b11111111,
+ ))
+}
+
+/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtusepi64_epi8&expand=2094)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusqb))]
+pub unsafe fn _mm256_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
+ transmute(vpmovusqb256(a.as_u64x4(), src.as_u8x16(), k))
+}
+
+/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtusepi64_epi8&expand=2095)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusqb))]
+pub unsafe fn _mm256_maskz_cvtusepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
+ transmute(vpmovusqb256(
+ a.as_u64x4(),
+ _mm_setzero_si128().as_u8x16(),
+ k,
+ ))
+}
+
+/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtusepi64_epi8&expand=2090)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusqb))]
+pub unsafe fn _mm_cvtusepi64_epi8(a: __m128i) -> __m128i {
+ transmute(vpmovusqb128(
+ a.as_u64x2(),
+ _mm_setzero_si128().as_u8x16(),
+ 0b11111111,
+ ))
+}
+
+/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtusepi64_epi8&expand=2091)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusqb))]
+pub unsafe fn _mm_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+ transmute(vpmovusqb128(a.as_u64x2(), src.as_u8x16(), k))
+}
+
+/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtusepi64_epi8&expand=2092)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusqb))]
+pub unsafe fn _mm_maskz_cvtusepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
+ transmute(vpmovusqb128(
+ a.as_u64x2(),
+ _mm_setzero_si128().as_u8x16(),
+ k,
+ ))
+}
+
/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
///
/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
@@ -13271,8 +14722,7 @@ pub unsafe fn _mm512_maskz_cvt_roundps_ph(k: __mmask16, a: __m512, sae: i32) ->
}
/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
-///
-/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
+/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:
/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
@@ -13282,28 +14732,27 @@ pub unsafe fn _mm512_maskz_cvt_roundps_ph(k: __mmask16, a: __m512, sae: i32) ->
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvt_roundps_ph&expand=1352)
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
-#[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))]
+#[cfg_attr(test, assert_instr(vcvtps2ph, imm8 = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm256_mask_cvt_roundps_ph(
src: __m128i,
k: __mmask8,
a: __m256,
- sae: i32,
+ imm8: i32,
) -> __m128i {
let a = a.as_f32x8();
let src = src.as_i16x8();
macro_rules! call {
- ($imm4:expr) => {
- vcvtps2ph256(a, $imm4, src, k)
+ ($imm8:expr) => {
+ vcvtps2ph256(a, $imm8, src, k)
};
}
- let r = constify_imm4_round!(sae, call);
+ let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
-///
-/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
+/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
@@ -13313,23 +14762,22 @@ pub unsafe fn _mm256_mask_cvt_roundps_ph(
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvt_roundps_ph&expand=1353)
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
-#[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))]
+#[cfg_attr(test, assert_instr(vcvtps2ph, imm8 = 8))]
#[rustc_args_required_const(2)]
-pub unsafe fn _mm256_maskz_cvt_roundps_ph(k: __mmask8, a: __m256, sae: i32) -> __m128i {
+pub unsafe fn _mm256_maskz_cvt_roundps_ph(k: __mmask8, a: __m256, imm8: i32) -> __m128i {
let a = a.as_f32x8();
let zero = _mm_setzero_si128().as_i16x8();
macro_rules! call {
- ($imm4:expr) => {
- vcvtps2ph256(a, $imm4, zero, k)
+ ($imm8:expr) => {
+ vcvtps2ph256(a, $imm8, zero, k)
};
}
- let r = constify_imm4_round!(sae, call);
+ let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
-///
-/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
+/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
@@ -13339,23 +14787,22 @@ pub unsafe fn _mm256_maskz_cvt_roundps_ph(k: __mmask8, a: __m256, sae: i32) -> _
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundps_ph&expand=1350)
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
-#[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))]
+#[cfg_attr(test, assert_instr(vcvtps2ph, imm8 = 8))]
#[rustc_args_required_const(3)]
-pub unsafe fn _mm_mask_cvt_roundps_ph(src: __m128i, k: __mmask8, a: __m128, sae: i32) -> __m128i {
+pub unsafe fn _mm_mask_cvt_roundps_ph(src: __m128i, k: __mmask8, a: __m128, imm8: i32) -> __m128i {
let a = a.as_f32x4();
let src = src.as_i16x8();
macro_rules! call {
- ($imm4:expr) => {
- vcvtps2ph128(a, $imm4, src, k)
+ ($imm8:expr) => {
+ vcvtps2ph128(a, $imm8, src, k)
};
}
- let r = constify_imm4_round!(sae, call);
+ let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
-///
-/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
+/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
@@ -13365,17 +14812,17 @@ pub unsafe fn _mm_mask_cvt_roundps_ph(src: __m128i, k: __mmask8, a: __m128, sae:
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundps_ph&expand=1351)
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
-#[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))]
+#[cfg_attr(test, assert_instr(vcvtps2ph, imm8 = 8))]
#[rustc_args_required_const(2)]
-pub unsafe fn _mm_maskz_cvt_roundps_ph(k: __mmask8, a: __m128, sae: i32) -> __m128i {
+pub unsafe fn _mm_maskz_cvt_roundps_ph(k: __mmask8, a: __m128, imm8: i32) -> __m128i {
let a = a.as_f32x4();
let zero = _mm_setzero_si128().as_i16x8();
macro_rules! call {
- ($imm4:expr) => {
- vcvtps2ph128(a, $imm4, zero, k)
+ ($imm8:expr) => {
+ vcvtps2ph128(a, $imm8, zero, k)
};
}
- let r = constify_imm4_round!(sae, call);
+ let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
@@ -13439,6 +14886,106 @@ pub unsafe fn _mm512_maskz_cvtps_ph(k: __mmask16, a: __m512, sae: i32) -> __m256
transmute(r)
}
+/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
+/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
+/// _MM_FROUND_TO_NEG_INF // round down\
+/// _MM_FROUND_TO_POS_INF // round up\
+/// _MM_FROUND_TO_ZERO // truncate\
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtps_ph&expand=1776)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtps2ph, imm8 = 8))]
+#[rustc_args_required_const(3)]
+pub unsafe fn _mm256_mask_cvtps_ph(src: __m128i, k: __mmask8, a: __m256, imm8: i32) -> __m128i {
+ let a = a.as_f32x8();
+ let src = src.as_i16x8();
+ macro_rules! call {
+ ($imm8:expr) => {
+ vcvtps2ph256(a, $imm8, src, k)
+ };
+ }
+ let r = constify_imm8_sae!(imm8, call);
+ transmute(r)
+}
+
+/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
+/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
+/// _MM_FROUND_TO_NEG_INF // round down\
+/// _MM_FROUND_TO_POS_INF // round up\
+/// _MM_FROUND_TO_ZERO // truncate\
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtps_ph&expand=1777)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtps2ph, imm8 = 8))]
+#[rustc_args_required_const(2)]
+pub unsafe fn _mm256_maskz_cvtps_ph(k: __mmask8, a: __m256, imm8: i32) -> __m128i {
+ let a = a.as_f32x8();
+ let zero = _mm_setzero_si128().as_i16x8();
+ macro_rules! call {
+ ($imm8:expr) => {
+ vcvtps2ph256(a, $imm8, zero, k)
+ };
+ }
+ let r = constify_imm8_sae!(imm8, call);
+ transmute(r)
+}
+
+/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
+/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
+/// _MM_FROUND_TO_NEG_INF // round down\
+/// _MM_FROUND_TO_POS_INF // round up\
+/// _MM_FROUND_TO_ZERO // truncate\
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtps_ph&expand=1773)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtps2ph, imm8 = 8))]
+#[rustc_args_required_const(3)]
+pub unsafe fn _mm_mask_cvtps_ph(src: __m128i, k: __mmask8, a: __m128, imm8: i32) -> __m128i {
+ let a = a.as_f32x4();
+ let src = src.as_i16x8();
+ macro_rules! call {
+ ($imm8:expr) => {
+ vcvtps2ph128(a, $imm8, src, k)
+ };
+ }
+ let r = constify_imm8_sae!(imm8, call);
+ transmute(r)
+}
+
+/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
+/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
+/// _MM_FROUND_TO_NEAREST_INT // round to nearest\
+/// _MM_FROUND_TO_NEG_INF // round down\
+/// _MM_FROUND_TO_POS_INF // round up\
+/// _MM_FROUND_TO_ZERO // truncate\
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtps_ph&expand=1774)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtps2ph, imm8 = 8))]
+#[rustc_args_required_const(2)]
+pub unsafe fn _mm_maskz_cvtps_ph(k: __mmask8, a: __m128, imm8: i32) -> __m128i {
+ let a = a.as_f32x4();
+ let zero = _mm_setzero_si128().as_i16x8();
+ macro_rules! call {
+ ($imm8:expr) => {
+ vcvtps2ph128(a, $imm8, zero, k)
+ };
+ }
+ let r = constify_imm8_sae!(imm8, call);
+ transmute(r)
+}
+
/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
///
@@ -13549,6 +15096,52 @@ pub unsafe fn _mm512_maskz_cvtph_ps(k: __mmask16, a: __m256i) -> __m512 {
))
}
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_ps&expand=1721)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2ps))]
+pub unsafe fn _mm256_mask_cvtph_ps(src: __m256, k: __mmask8, a: __m128i) -> __m256 {
+ let convert = _mm256_cvtph_ps(a);
+ transmute(simd_select_bitmask(k, convert.as_f32x8(), src.as_f32x8()))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_ps&expand=1722)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2ps))]
+pub unsafe fn _mm256_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m256 {
+ let convert = _mm256_cvtph_ps(a);
+ let zero = _mm256_setzero_ps().as_f32x8();
+ transmute(simd_select_bitmask(k, convert.as_f32x8(), zero))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_ps&expand=1718)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2ps))]
+pub unsafe fn _mm_mask_cvtph_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
+ let convert = _mm_cvtph_ps(a);
+ transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4()))
+}
+
+/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_ps&expand=1719)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvtph2ps))]
+pub unsafe fn _mm_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m128 {
+ let convert = _mm_cvtph_ps(a);
+ let zero = _mm_setzero_ps().as_f32x4();
+ transmute(simd_select_bitmask(k, convert.as_f32x4(), zero))
+}
+
/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
///
@@ -13624,7 +15217,7 @@ pub unsafe fn _mm512_maskz_cvtt_roundps_epi32(k: __mmask16, a: __m512, sae: i32)
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_cvtt_roundps_epu32(a: __m512, sae: i32) -> __m512i {
let a = a.as_f32x16();
- let zero = _mm512_setzero_si512().as_i32x16();
+ let zero = _mm512_setzero_si512().as_u32x16();
macro_rules! call {
($imm4:expr) => {
vcvttps2udq(a, zero, 0b11111111_11111111, $imm4)
@@ -13649,7 +15242,7 @@ pub unsafe fn _mm512_mask_cvtt_roundps_epu32(
sae: i32,
) -> __m512i {
let a = a.as_f32x16();
- let src = src.as_i32x16();
+ let src = src.as_u32x16();
macro_rules! call {
($imm4:expr) => {
vcvttps2udq(a, src, k, $imm4)
@@ -13669,7 +15262,7 @@ pub unsafe fn _mm512_mask_cvtt_roundps_epu32(
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_cvtt_roundps_epu32(k: __mmask16, a: __m512, sae: i32) -> __m512i {
let a = a.as_f32x16();
- let zero = _mm512_setzero_si512().as_i32x16();
+ let zero = _mm512_setzero_si512().as_u32x16();
macro_rules! call {
($imm4:expr) => {
vcvttps2udq(a, zero, k, $imm4)
@@ -13834,6 +15427,54 @@ pub unsafe fn _mm512_maskz_cvttps_epi32(k: __mmask16, a: __m512) -> __m512i {
))
}
+/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttps_epi32&expand=1982)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttps2dq))]
+pub unsafe fn _mm256_mask_cvttps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
+ transmute(vcvttps2dq256(a.as_f32x8(), src.as_i32x8(), k))
+}
+
+/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttps_epi32&expand=1983)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttps2dq))]
+pub unsafe fn _mm256_maskz_cvttps_epi32(k: __mmask8, a: __m256) -> __m256i {
+ transmute(vcvttps2dq256(
+ a.as_f32x8(),
+ _mm256_setzero_si256().as_i32x8(),
+ k,
+ ))
+}
+
+/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttps_epi32&expand=1979)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttps2dq))]
+pub unsafe fn _mm_mask_cvttps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
+ transmute(vcvttps2dq128(a.as_f32x4(), src.as_i32x4(), k))
+}
+
+/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttps_epi32&expand=1980)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttps2dq))]
+pub unsafe fn _mm_maskz_cvttps_epi32(k: __mmask8, a: __m128) -> __m128i {
+ transmute(vcvttps2dq128(
+ a.as_f32x4(),
+ _mm_setzero_si128().as_i32x4(),
+ k,
+ ))
+}
+
/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttps_epu32&expand=2002)
@@ -13843,7 +15484,7 @@ pub unsafe fn _mm512_maskz_cvttps_epi32(k: __mmask16, a: __m512) -> __m512i {
pub unsafe fn _mm512_cvttps_epu32(a: __m512) -> __m512i {
transmute(vcvttps2udq(
a.as_f32x16(),
- _mm512_setzero_si512().as_i32x16(),
+ _mm512_setzero_si512().as_u32x16(),
0b11111111_11111111,
_MM_FROUND_CUR_DIRECTION,
))
@@ -13858,7 +15499,7 @@ pub unsafe fn _mm512_cvttps_epu32(a: __m512) -> __m512i {
pub unsafe fn _mm512_mask_cvttps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
transmute(vcvttps2udq(
a.as_f32x16(),
- src.as_i32x16(),
+ src.as_u32x16(),
k,
_MM_FROUND_CUR_DIRECTION,
))
@@ -13873,12 +15514,88 @@ pub unsafe fn _mm512_mask_cvttps_epu32(src: __m512i, k: __mmask16, a: __m512) ->
pub unsafe fn _mm512_maskz_cvttps_epu32(k: __mmask16, a: __m512) -> __m512i {
transmute(vcvttps2udq(
a.as_f32x16(),
- _mm512_setzero_si512().as_i32x16(),
+ _mm512_setzero_si512().as_u32x16(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
+/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttps_epu32&expand=1999)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttps2udq))]
+pub unsafe fn _mm256_cvttps_epu32(a: __m256) -> __m256i {
+ transmute(vcvttps2udq256(
+ a.as_f32x8(),
+ _mm256_setzero_si256().as_u32x8(),
+ 0b11111111,
+ ))
+}
+
+/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttps_epu32&expand=2000)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttps2udq))]
+pub unsafe fn _mm256_mask_cvttps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
+ transmute(vcvttps2udq256(a.as_f32x8(), src.as_u32x8(), k))
+}
+
+/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttps_epu32&expand=2001)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttps2udq))]
+pub unsafe fn _mm256_maskz_cvttps_epu32(k: __mmask8, a: __m256) -> __m256i {
+ transmute(vcvttps2udq256(
+ a.as_f32x8(),
+ _mm256_setzero_si256().as_u32x8(),
+ k,
+ ))
+}
+
+/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttps_epu32&expand=1996)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttps2udq))]
+pub unsafe fn _mm_cvttps_epu32(a: __m128) -> __m128i {
+ transmute(vcvttps2udq128(
+ a.as_f32x4(),
+ _mm_setzero_si128().as_u32x4(),
+ 0b11111111,
+ ))
+}
+
+/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttps_epu32&expand=1997)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttps2udq))]
+pub unsafe fn _mm_mask_cvttps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
+ transmute(vcvttps2udq128(a.as_f32x4(), src.as_u32x4(), k))
+}
+
+/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttps_epu32&expand=1998)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttps2udq))]
+pub unsafe fn _mm_maskz_cvttps_epu32(k: __mmask8, a: __m128) -> __m128i {
+ transmute(vcvttps2udq128(
+ a.as_f32x4(),
+ _mm_setzero_si128().as_u32x4(),
+ k,
+ ))
+}
+
/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
///
@@ -13944,6 +15661,54 @@ pub unsafe fn _mm512_maskz_cvttpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
))
}
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttpd_epi32&expand=1945)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttpd2dq))]
+pub unsafe fn _mm256_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
+ transmute(vcvttpd2dq256(a.as_f64x4(), src.as_i32x4(), k))
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttpd_epi32&expand=1946)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttpd2dq))]
+pub unsafe fn _mm256_maskz_cvttpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
+ transmute(vcvttpd2dq256(
+ a.as_f64x4(),
+ _mm_setzero_si128().as_i32x4(),
+ k,
+ ))
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttpd_epi32&expand=1942)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttpd2dq))]
+pub unsafe fn _mm_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
+ transmute(vcvttpd2dq128(a.as_f64x2(), src.as_i32x4(), k))
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttpd_epi32&expand=1943)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttpd2dq))]
+pub unsafe fn _mm_maskz_cvttpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
+ transmute(vcvttpd2dq128(
+ a.as_f64x2(),
+ _mm_setzero_si128().as_i32x4(),
+ k,
+ ))
+}
+
/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttpd_epu32&expand=1965)
@@ -13989,6 +15754,82 @@ pub unsafe fn _mm512_maskz_cvttpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
))
}
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttpd_epu32&expand=1962)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttpd2udq))]
+pub unsafe fn _mm256_cvttpd_epu32(a: __m256d) -> __m128i {
+ transmute(vcvttpd2udq256(
+ a.as_f64x4(),
+ _mm_setzero_si128().as_i32x4(),
+ 0b11111111,
+ ))
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttpd_epu32&expand=1963)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttpd2udq))]
+pub unsafe fn _mm256_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
+ transmute(vcvttpd2udq256(a.as_f64x4(), src.as_i32x4(), k))
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttpd_epu32&expand=1964)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttpd2udq))]
+pub unsafe fn _mm256_maskz_cvttpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
+ transmute(vcvttpd2udq256(
+ a.as_f64x4(),
+ _mm_setzero_si128().as_i32x4(),
+ k,
+ ))
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttpd_epu32&expand=1959)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttpd2udq))]
+pub unsafe fn _mm_cvttpd_epu32(a: __m128d) -> __m128i {
+ transmute(vcvttpd2udq128(
+ a.as_f64x2(),
+ _mm_setzero_si128().as_i32x4(),
+ 0b11111111,
+ ))
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttpd_epu32&expand=1960)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttpd2udq))]
+pub unsafe fn _mm_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
+ transmute(vcvttpd2udq128(a.as_f64x2(), src.as_i32x4(), k))
+}
+
+/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttpd_epu32&expand=1961)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vcvttpd2udq))]
+pub unsafe fn _mm_maskz_cvttpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
+ transmute(vcvttpd2udq128(
+ a.as_f64x2(),
+ _mm_setzero_si128().as_i32x4(),
+ k,
+ ))
+}
+
/// Returns vector of type `__m512d` with all elements set to zero.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setzero_pd&expand=5018)
@@ -23876,6 +25717,17 @@ pub unsafe fn _mm512_castsi512_pd(a: __m512i) -> __m512d {
transmute(a)
}
+/// Copy the lower 32-bit integer in a to dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtsi512_si32&expand=1882)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(vmovd))]
+pub unsafe fn _mm512_cvtsi512_si32(a: __m512i) -> i32 {
+ let extract: i32 = simd_extract(a.as_i32x16(), 0);
+ transmute(extract)
+}
+
/// Broadcast the low packed 32-bit integer from a to all elements of dst.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_broadcastd_epi32&expand=545)
@@ -29901,6 +31753,66 @@ pub unsafe fn _mm_mask_cvtepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a:
vpmovdwmem128(mem_addr as *mut i8, a.as_i32x4(), k);
}
+/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtsepi32_storeu_epi16&expand=1833)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpmovsdw))]
+pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
+ vpmovsdwmem(mem_addr as *mut i8, a.as_i32x16(), k);
+}
+
+/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtsepi32_storeu_epi16&expand=1832)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsdw))]
+pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
+ vpmovsdwmem256(mem_addr as *mut i8, a.as_i32x8(), k);
+}
+
+/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsepi32_storeu_epi16&expand=1831)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsdw))]
+pub unsafe fn _mm_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
+ vpmovsdwmem128(mem_addr as *mut i8, a.as_i32x4(), k);
+}
+
+/// Convert packed unsigned 32-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtusepi32_storeu_epi16&expand=2068)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpmovusdw))]
+pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
+ vpmovusdwmem(mem_addr as *mut i8, a.as_i32x16(), k);
+}
+
+/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtusepi32_storeu_epi16&expand=2067)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusdw))]
+pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
+ vpmovusdwmem256(mem_addr as *mut i8, a.as_i32x8(), k);
+}
+
+/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtusepi32_storeu_epi16&expand=2066)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusdw))]
+pub unsafe fn _mm_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
+ vpmovusdwmem128(mem_addr as *mut i8, a.as_i32x4(), k);
+}
+
/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi32_storeu_epi8&expand=1463)
@@ -29931,6 +31843,66 @@ pub unsafe fn _mm_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: _
vpmovdbmem128(mem_addr as *mut i8, a.as_i32x4(), k);
}
+/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtsepi32_storeu_epi8&expand=1836)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpmovsdb))]
+pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
+ vpmovsdbmem(mem_addr as *mut i8, a.as_i32x16(), k);
+}
+
+/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtsepi32_storeu_epi8&expand=1835)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsdb))]
+pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
+ vpmovsdbmem256(mem_addr as *mut i8, a.as_i32x8(), k);
+}
+
+/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsepi32_storeu_epi8&expand=1834)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsdb))]
+pub unsafe fn _mm_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
+ vpmovsdbmem128(mem_addr as *mut i8, a.as_i32x4(), k);
+}
+
+/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtusepi32_storeu_epi8&expand=2071)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpmovusdb))]
+pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
+ vpmovusdbmem(mem_addr as *mut i8, a.as_i32x16(), k);
+}
+
+/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtusepi32_storeu_epi8&expand=2070)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusdb))]
+pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
+ vpmovusdbmem256(mem_addr as *mut i8, a.as_i32x8(), k);
+}
+
+/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtusepi32_storeu_epi8&expand=2069)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusdb))]
+pub unsafe fn _mm_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
+ vpmovusdbmem128(mem_addr as *mut i8, a.as_i32x4(), k);
+}
+
/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi64_storeu_epi16&expand=1513)
@@ -29961,6 +31933,66 @@ pub unsafe fn _mm_mask_cvtepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a:
vpmovqwmem128(mem_addr as *mut i8, a.as_i64x2(), k);
}
+/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtsepi64_storeu_epi16&expand=1866)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpmovsqw))]
+pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
+ vpmovsqwmem(mem_addr as *mut i8, a.as_i64x8(), k);
+}
+
+/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtsepi64_storeu_epi16&expand=1865)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsqw))]
+pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
+ vpmovsqwmem256(mem_addr as *mut i8, a.as_i64x4(), k);
+}
+
+/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsepi64_storeu_epi16&expand=1864)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsqw))]
+pub unsafe fn _mm_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
+ vpmovsqwmem128(mem_addr as *mut i8, a.as_i64x2(), k);
+}
+
+/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtusepi64_storeu_epi16&expand=2101)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpmovusqw))]
+pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
+ vpmovusqwmem(mem_addr as *mut i8, a.as_i64x8(), k);
+}
+
+/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtusepi64_storeu_epi16&expand=2100)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusqw))]
+pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
+ vpmovusqwmem256(mem_addr as *mut i8, a.as_i64x4(), k);
+}
+
+/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtusepi64_storeu_epi16&expand=2099)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusqw))]
+pub unsafe fn _mm_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
+ vpmovusqwmem128(mem_addr as *mut i8, a.as_i64x2(), k);
+}
+
/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi64_storeu_epi8&expand=1519)
@@ -29991,6 +32023,66 @@ pub unsafe fn _mm_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: _
vpmovqbmem128(mem_addr as *mut i8, a.as_i64x2(), k);
}
+/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtsepi64_storeu_epi8&expand=1872)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpmovsqb))]
+pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
+ vpmovsqbmem(mem_addr as *mut i8, a.as_i64x8(), k);
+}
+
+/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtsepi64_storeu_epi8&expand=1871)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsqb))]
+pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
+ vpmovsqbmem256(mem_addr as *mut i8, a.as_i64x4(), k);
+}
+
+/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsepi64_storeu_epi8&expand=1870)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsqb))]
+pub unsafe fn _mm_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
+ vpmovsqbmem128(mem_addr as *mut i8, a.as_i64x2(), k);
+}
+
+/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtusepi64_storeu_epi8&expand=2107)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpmovusqb))]
+pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
+ vpmovusqbmem(mem_addr as *mut i8, a.as_i64x8(), k);
+}
+
+/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtusepi64_storeu_epi8&expand=2106)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusqb))]
+pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
+ vpmovusqbmem256(mem_addr as *mut i8, a.as_i64x4(), k);
+}
+
+/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtusepi64_storeu_epi8&expand=2105)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusqb))]
+pub unsafe fn _mm_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
+ vpmovusqbmem128(mem_addr as *mut i8, a.as_i64x2(), k);
+}
+
///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi64_storeu_epi32&expand=1516)
@@ -30021,6 +32113,66 @@ pub unsafe fn _mm_mask_cvtepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a:
vpmovqdmem128(mem_addr as *mut i8, a.as_i64x2(), k);
}
+/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtsepi64_storeu_epi32&expand=1869)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpmovsqd))]
+pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
+ vpmovsqdmem(mem_addr as *mut i8, a.as_i64x8(), k);
+}
+
+/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtsepi64_storeu_epi32&expand=1868)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsqd))]
+pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
+ vpmovsqdmem256(mem_addr as *mut i8, a.as_i64x4(), k);
+}
+
+/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsepi64_storeu_epi32&expand=1867)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovsqd))]
+pub unsafe fn _mm_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
+ vpmovsqdmem128(mem_addr as *mut i8, a.as_i64x2(), k);
+}
+
+/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtusepi64_storeu_epi32&expand=2104)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vpmovusqd))]
+pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
+ vpmovusqdmem(mem_addr as *mut i8, a.as_i64x8(), k);
+}
+
+/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtusepi64_storeu_epi32&expand=2103)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusqd))]
+pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
+ vpmovusqdmem256(mem_addr as *mut i8, a.as_i64x4(), k);
+}
+
+/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtusepi64_storeu_epi32&expand=2102)
+#[inline]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vpmovusqd))]
+pub unsafe fn _mm_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
+ vpmovusqdmem128(mem_addr as *mut i8, a.as_i64x2(), k);
+}
+
/// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_epi32&expand=5628)
@@ -35275,8 +37427,8 @@ pub unsafe fn _mm_maskz_fixupimm_sd(
}
/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
-///
/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fixupimm_round_ss&expand=2511)
#[inline]
#[target_feature(enable = "avx512f")]
@@ -35304,8 +37456,8 @@ pub unsafe fn _mm_fixupimm_round_ss(
}
/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
-///
/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fixupimm_round_ss&expand=2512)
#[inline]
#[target_feature(enable = "avx512f")]
@@ -35334,8 +37486,8 @@ pub unsafe fn _mm_mask_fixupimm_round_ss(
}
/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
-///
/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fixupimm_round_ss&expand=2513)
#[inline]
#[target_feature(enable = "avx512f")]
@@ -35364,8 +37516,8 @@ pub unsafe fn _mm_maskz_fixupimm_round_ss(
}
/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
-///
/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fixupimm_round_sd&expand=2508)
#[inline]
#[target_feature(enable = "avx512f")]
@@ -35393,8 +37545,8 @@ pub unsafe fn _mm_fixupimm_round_sd(
}
/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
-///
/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fixupimm_round_sd&expand=2509)
#[inline]
#[target_feature(enable = "avx512f")]
@@ -35423,8 +37575,8 @@ pub unsafe fn _mm_mask_fixupimm_round_sd(
}
/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
-///
/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fixupimm_round_sd&expand=2510)
#[inline]
#[target_feature(enable = "avx512f")]
@@ -35588,13 +37740,12 @@ pub unsafe fn _mm_maskz_cvt_roundss_sd(k: __mmask8, a: __m128d, b: __m128, sae:
transmute(r)
}
-/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
-///
-/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
-/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
-/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
-/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
-/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
+/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundsd_ss&expand=1361)
@@ -35615,13 +37766,12 @@ pub unsafe fn _mm_cvt_roundsd_ss(a: __m128, b: __m128d, rounding: i32) -> __m128
transmute(r)
}
-/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
-///
-/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
-/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions
-/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions
-/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions
-/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions
+/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
+/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_cvt_roundsd_ss&expand=1362)
@@ -35649,7 +37799,6 @@ pub unsafe fn _mm_mask_cvt_roundsd_ss(
}
/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
-///
/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
@@ -35681,7 +37830,6 @@ pub unsafe fn _mm_maskz_cvt_roundsd_ss(
}
/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
-///
/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
@@ -35706,7 +37854,6 @@ pub unsafe fn _mm_cvt_roundss_si32(a: __m128, rounding: i32) -> i32 {
}
/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
-///
/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
@@ -35731,7 +37878,6 @@ pub unsafe fn _mm_cvt_roundss_i32(a: __m128, rounding: i32) -> i32 {
}
/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
-///
/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
@@ -35776,7 +37922,6 @@ pub unsafe fn _mm_cvtss_u32(a: __m128) -> u32 {
}
/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
-///
/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
@@ -35801,7 +37946,6 @@ pub unsafe fn _mm_cvt_roundsd_si32(a: __m128d, rounding: i32) -> i32 {
}
/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
-///
/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
@@ -35826,7 +37970,6 @@ pub unsafe fn _mm_cvt_roundsd_i32(a: __m128d, rounding: i32) -> i32 {
}
/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
-///
/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
@@ -35921,7 +38064,6 @@ pub unsafe fn _mm_cvt_roundsi32_ss(a: __m128, b: i32, rounding: i32) -> __m128 {
}
/// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
-///
/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
@@ -36028,7 +38170,7 @@ pub unsafe fn _mm_cvtt_roundss_u32(a: __m128, sae: i32) -> u32 {
/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvttss_i32&expand=2022)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttss_i32&expand=2022)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtss2si))]
@@ -36038,7 +38180,7 @@ pub unsafe fn _mm_cvttss_i32(a: __m128) -> i32 {
/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvttss_u32&expand=2026)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttss_u32&expand=2026)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtss2usi))]
@@ -36049,7 +38191,7 @@ pub unsafe fn _mm_cvttss_u32(a: __m128) -> u32 {
/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtt_roundsd_si32&expand=1930)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsd_si32&expand=1930)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsd2si, sae = 8))]
@@ -36068,7 +38210,7 @@ pub unsafe fn _mm_cvtt_roundsd_si32(a: __m128d, sae: i32) -> i32 {
/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtt_roundsd_i32&expand=1928)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsd_i32&expand=1928)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsd2si, sae = 8))]
@@ -36105,7 +38247,7 @@ pub unsafe fn _mm_cvtt_roundsd_u32(a: __m128d, sae: i32) -> u32 {
/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvttsd_i32&expand=2015)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_i32&expand=2015)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsd2si))]
@@ -36115,7 +38257,7 @@ pub unsafe fn _mm_cvttsd_i32(a: __m128d) -> i32 {
/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvttsd_u32&expand=2020)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_u32&expand=2020)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsd2usi))]
@@ -36125,7 +38267,7 @@ pub unsafe fn _mm_cvttsd_u32(a: __m128d) -> u32 {
/// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtu32_ss&expand=2032)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu32_ss&expand=2032)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtusi2ss))]
@@ -36137,7 +38279,7 @@ pub unsafe fn _mm_cvtu32_ss(a: __m128, b: u32) -> __m128 {
/// Convert the unsigned 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtu32_sd&expand=2031)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu32_sd&expand=2031)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtusi2sd))]
@@ -36147,34 +38289,10 @@ pub unsafe fn _mm_cvtu32_sd(a: __m128d, b: u32) -> __m128d {
transmute(r)
}
-/// Convert the unsigned 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtu64_ss&expand=2035)
-#[inline]
-#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(mov))] // should be vcvtusi2ss
-pub unsafe fn _mm_cvtu64_ss(a: __m128, b: u64) -> __m128 {
- let b = b as f32;
- let r = simd_insert(a, 0, b);
- transmute(r)
-}
-
-/// Convert the unsigned 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
-///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtu64_sd&expand=2034)
-#[inline]
-#[target_feature(enable = "avx512f")]
-#[cfg_attr(test, assert_instr(mov))] // should be vcvtusi2sd
-pub unsafe fn _mm_cvtu64_sd(a: __m128d, b: u64) -> __m128d {
- let b = b as f64;
- let r = simd_insert(a, 0, b);
- transmute(r)
-}
-
/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_comi_round_ss&expand=1175)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comi_round_ss&expand=1175)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 5, sae = 4))] //should be vcomiss
@@ -36194,7 +38312,7 @@ pub unsafe fn _mm_comi_round_ss(a: __m128, b: __m128, imm8: i32, sae: i32) -> i3
/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
///
-/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_comi_round_sd&expand=1174)
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comi_round_sd&expand=1174)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 5, sae = 4))] //should be vcomisd
@@ -36723,16 +38841,29 @@ extern "C" {
#[link_name = "llvm.x86.avx512.mask.cvtps2dq.512"]
fn vcvtps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
+
#[link_name = "llvm.x86.avx512.mask.cvtps2udq.512"]
fn vcvtps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
+ #[link_name = "llvm.x86.avx512.mask.cvtps2udq.256"]
+ fn vcvtps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8;
+ #[link_name = "llvm.x86.avx512.mask.cvtps2udq.128"]
+ fn vcvtps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4;
+
#[link_name = "llvm.x86.avx512.mask.cvtps2pd.512"]
fn vcvtps2pd(a: f32x8, src: f64x8, mask: u8, sae: i32) -> f64x8;
#[link_name = "llvm.x86.avx512.mask.cvtpd2ps.512"]
fn vcvtpd2ps(a: f64x8, src: f32x8, mask: u8, rounding: i32) -> f32x8;
+
#[link_name = "llvm.x86.avx512.mask.cvtpd2dq.512"]
fn vcvtpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
+
#[link_name = "llvm.x86.avx512.mask.cvtpd2udq.512"]
fn vcvtpd2udq(a: f64x8, src: u32x8, mask: u8, rounding: i32) -> u32x8;
+ #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.256"]
+ fn vcvtpd2udq256(a: f64x4, src: u32x4, mask: u8) -> u32x4;
+ #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.128"]
+ fn vcvtpd2udq128(a: f64x2, src: u32x4, mask: u8) -> u32x4;
+
#[link_name = "llvm.x86.avx512.sitofp.round.v16f32.v16i32"]
fn vcvtdq2ps(a: i32x16, rounding: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.uitofp.round.v16f32.v16i32"]
@@ -36750,12 +38881,31 @@ extern "C" {
#[link_name = "llvm.x86.avx512.mask.cvttps2dq.512"]
fn vcvttps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
+ #[link_name = "llvm.x86.avx512.mask.cvttps2dq.256"]
+ fn vcvttps2dq256(a: f32x8, src: i32x8, mask: u8) -> i32x8;
+ #[link_name = "llvm.x86.avx512.mask.cvttps2dq.128"]
+ fn vcvttps2dq128(a: f32x4, src: i32x4, mask: u8) -> i32x4;
+
#[link_name = "llvm.x86.avx512.mask.cvttps2udq.512"]
- fn vcvttps2udq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> u32x16;
+ fn vcvttps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
+ #[link_name = "llvm.x86.avx512.mask.cvttps2udq.256"]
+ fn vcvttps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8;
+ #[link_name = "llvm.x86.avx512.mask.cvttps2udq.128"]
+ fn vcvttps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4;
+
#[link_name = "llvm.x86.avx512.mask.cvttpd2dq.512"]
fn vcvttpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
+ #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.256"]
+ fn vcvttpd2dq256(a: f64x4, src: i32x4, mask: u8) -> i32x4;
+ #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.128"]
+ fn vcvttpd2dq128(a: f64x2, src: i32x4, mask: u8) -> i32x4;
+
#[link_name = "llvm.x86.avx512.mask.cvttpd2udq.512"]
fn vcvttpd2udq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> u32x8;
+ #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.256"]
+ fn vcvttpd2udq256(a: f64x4, src: i32x4, mask: u8) -> u32x4;
+ #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.128"]
+ fn vcvttpd2udq128(a: f64x2, src: i32x4, mask: u8) -> u32x4;
#[link_name = "llvm.x86.avx512.mask.pmov.dw.128"]
fn vpmovdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8;
@@ -36782,6 +38932,20 @@ extern "C" {
#[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.128"]
fn vpmovdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
+ #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.512"]
+ fn vpmovsdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
+ #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.256"]
+ fn vpmovsdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
+ #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.128"]
+ fn vpmovsdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
+
+ #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.512"]
+ fn vpmovusdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
+ #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.256"]
+ fn vpmovusdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
+ #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.128"]
+ fn vpmovusdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
+
#[link_name = "llvm.x86.avx512.mask.pmov.db.mem.512"]
fn vpmovdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
#[link_name = "llvm.x86.avx512.mask.pmov.db.mem.256"]
@@ -36789,18 +38953,62 @@ extern "C" {
#[link_name = "llvm.x86.avx512.mask.pmov.db.mem.128"]
fn vpmovdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
+ #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.512"]
+ fn vpmovsdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
+ #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.256"]
+ fn vpmovsdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
+ #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.128"]
+ fn vpmovsdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
+
+ #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.512"]
+ fn vpmovusdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
+ #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.256"]
+ fn vpmovusdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
+ #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.128"]
+ fn vpmovusdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
+
#[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.512"]
fn vpmovqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.256"]
fn vpmovqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.128"]
fn vpmovqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
+
+ #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.512"]
+ fn vpmovsqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
+ #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.256"]
+ fn vpmovsqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
+ #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.128"]
+ fn vpmovsqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
+
+ #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.512"]
+ fn vpmovusqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
+ #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.256"]
+ fn vpmovusqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
+ #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.128"]
+ fn vpmovusqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
+
#[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.512"]
fn vpmovqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.256"]
fn vpmovqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.128"]
fn vpmovqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
+
+ #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.512"]
+ fn vpmovsqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
+ #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.256"]
+ fn vpmovsqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
+ #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.128"]
+ fn vpmovsqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
+
+ #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.512"]
+ fn vpmovusqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
+ #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.256"]
+ fn vpmovusqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
+ #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.128"]
+ fn vpmovusqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
+
#[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.512"]
fn vpmovqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.256"]
@@ -36808,29 +39016,92 @@ extern "C" {
#[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.128"]
fn vpmovqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
+ #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.512"]
+ fn vpmovsqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
+ #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.256"]
+ fn vpmovsqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
+ #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.128"]
+ fn vpmovsqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
+
+ #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.512"]
+ fn vpmovusqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
+ #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.256"]
+ fn vpmovusqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
+ #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.128"]
+ fn vpmovusqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
+
#[link_name = "llvm.x86.avx512.mask.pmov.qb.512"]
fn vpmovqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
#[link_name = "llvm.x86.avx512.mask.pmovs.dw.512"]
fn vpmovsdw(a: i32x16, src: i16x16, mask: u16) -> i16x16;
+ #[link_name = "llvm.x86.avx512.mask.pmovs.dw.256"]
+ fn vpmovsdw256(a: i32x8, src: i16x8, mask: u8) -> i16x8;
+ #[link_name = "llvm.x86.avx512.mask.pmovs.dw.128"]
+ fn vpmovsdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8;
+
#[link_name = "llvm.x86.avx512.mask.pmovs.db.512"]
fn vpmovsdb(a: i32x16, src: i8x16, mask: u16) -> i8x16;
+ #[link_name = "llvm.x86.avx512.mask.pmovs.db.256"]
+ fn vpmovsdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16;
+ #[link_name = "llvm.x86.avx512.mask.pmovs.db.128"]
+ fn vpmovsdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16;
+
#[link_name = "llvm.x86.avx512.mask.pmovs.qd.512"]
fn vpmovsqd(a: i64x8, src: i32x8, mask: u8) -> i32x8;
+ #[link_name = "llvm.x86.avx512.mask.pmovs.qd.256"]
+ fn vpmovsqd256(a: i64x4, src: i32x4, mask: u8) -> i32x4;
+ #[link_name = "llvm.x86.avx512.mask.pmovs.qd.128"]
+ fn vpmovsqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4;
+
#[link_name = "llvm.x86.avx512.mask.pmovs.qw.512"]
fn vpmovsqw(a: i64x8, src: i16x8, mask: u8) -> i16x8;
+ #[link_name = "llvm.x86.avx512.mask.pmovs.qw.256"]
+ fn vpmovsqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8;
+ #[link_name = "llvm.x86.avx512.mask.pmovs.qw.128"]
+ fn vpmovsqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8;
+
#[link_name = "llvm.x86.avx512.mask.pmovs.qb.512"]
fn vpmovsqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
+ #[link_name = "llvm.x86.avx512.mask.pmovs.qb.256"]
+ fn vpmovsqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16;
+ #[link_name = "llvm.x86.avx512.mask.pmovs.qb.128"]
+ fn vpmovsqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16;
+
#[link_name = "llvm.x86.avx512.mask.pmovus.dw.512"]
fn vpmovusdw(a: u32x16, src: u16x16, mask: u16) -> u16x16;
+ #[link_name = "llvm.x86.avx512.mask.pmovus.dw.256"]
+ fn vpmovusdw256(a: u32x8, src: u16x8, mask: u8) -> u16x8;
+ #[link_name = "llvm.x86.avx512.mask.pmovus.dw.128"]
+ fn vpmovusdw128(a: u32x4, src: u16x8, mask: u8) -> u16x8;
+
#[link_name = "llvm.x86.avx512.mask.pmovus.db.512"]
fn vpmovusdb(a: u32x16, src: u8x16, mask: u16) -> u8x16;
+ #[link_name = "llvm.x86.avx512.mask.pmovus.db.256"]
+ fn vpmovusdb256(a: u32x8, src: u8x16, mask: u8) -> u8x16;
+ #[link_name = "llvm.x86.avx512.mask.pmovus.db.128"]
+ fn vpmovusdb128(a: u32x4, src: u8x16, mask: u8) -> u8x16;
+
#[link_name = "llvm.x86.avx512.mask.pmovus.qd.512"]
fn vpmovusqd(a: u64x8, src: u32x8, mask: u8) -> u32x8;
+ #[link_name = "llvm.x86.avx512.mask.pmovus.qd.256"]
+ fn vpmovusqd256(a: u64x4, src: u32x4, mask: u8) -> u32x4;
+ #[link_name = "llvm.x86.avx512.mask.pmovus.qd.128"]
+ fn vpmovusqd128(a: u64x2, src: u32x4, mask: u8) -> u32x4;
+
#[link_name = "llvm.x86.avx512.mask.pmovus.qw.512"]
fn vpmovusqw(a: u64x8, src: u16x8, mask: u8) -> u16x8;
+ #[link_name = "llvm.x86.avx512.mask.pmovus.qw.256"]
+ fn vpmovusqw256(a: u64x4, src: u16x8, mask: u8) -> u16x8;
+ #[link_name = "llvm.x86.avx512.mask.pmovus.qw.128"]
+ fn vpmovusqw128(a: u64x2, src: u16x8, mask: u8) -> u16x8;
+
#[link_name = "llvm.x86.avx512.mask.pmovus.qb.512"]
fn vpmovusqb(a: u64x8, src: u8x16, mask: u8) -> u8x16;
+ #[link_name = "llvm.x86.avx512.mask.pmovus.qb.256"]
+ fn vpmovusqb256(a: u64x4, src: u8x16, mask: u8) -> u8x16;
+ #[link_name = "llvm.x86.avx512.mask.pmovus.qb.128"]
+ fn vpmovusqb128(a: u64x2, src: u8x16, mask: u8) -> u8x16;
#[link_name = "llvm.x86.avx512.gather.dpd.512"]
fn vgatherdpd(src: f64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> f64x8;
@@ -37208,31 +39479,21 @@ extern "C" {
#[link_name = "llvm.x86.avx512.vcvtss2si32"]
fn vcvtss2si(a: f32x4, rounding: i32) -> i32;
- #[link_name = "llvm.x86.avx512.vcvtss2si64"]
- fn vcvtss2si64(a: f32x4, rounding: i32) -> i64;
#[link_name = "llvm.x86.avx512.vcvtss2usi32"]
fn vcvtss2usi(a: f32x4, rounding: i32) -> u32;
- #[link_name = "llvm.x86.avx512.vcvtss2usi64"]
- fn vcvtss2usi64(a: f32x4, rounding: i32) -> u64;
+
#[link_name = "llvm.x86.avx512.vcvtsd2si32"]
fn vcvtsd2si(a: f64x2, rounding: i32) -> i32;
- #[link_name = "llvm.x86.avx512.vcvtsd2si64"]
- fn vcvtsd2si64(a: f64x2, rounding: i32) -> i64;
#[link_name = "llvm.x86.avx512.vcvtsd2usi32"]
fn vcvtsd2usi(a: f64x2, rounding: i32) -> u32;
- #[link_name = "llvm.x86.avx512.vcvtsd2usi64"]
- fn vcvtsd2usi64(a: f64x2, rounding: i32) -> u64;
#[link_name = "llvm.x86.avx512.cvtsi2ss32"]
fn vcvtsi2ss(a: f32x4, b: i32, rounding: i32) -> f32x4;
- #[link_name = "llvm.x86.avx512.cvtsi2ss64"]
- fn vcvtsi2ss64(a: f32x4, b: i64, rounding: i32) -> f32x4;
#[link_name = "llvm.x86.avx512.cvtsi2sd64"]
fn vcvtsi2sd(a: f64x2, b: i64, rounding: i32) -> f64x2;
+
#[link_name = "llvm.x86.avx512.cvtusi2ss"]
fn vcvtusi2ss(a: f32x4, b: u32, rounding: i32) -> f32x4;
- #[link_name = "llvm.x86.avx512.cvtusi642ss"]
- fn vcvtusi2ss64(a: f32x4, b: u64, rounding: i32) -> f32x4;
#[link_name = "llvm.x86.avx512.cvtusi642sd"]
fn vcvtusi2sd(a: f64x2, b: u64, rounding: i32) -> f64x2;
@@ -40778,23 +43039,12 @@ mod tests {
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
+ #[rustfmt::skip]
let e = _mm512_setr_ps(
- -0.99999994,
- -0.99999994,
- -0.99999994,
- -0.99999994,
- -0.99999994,
- -0.99999994,
- -0.99999994,
- -0.99999994,
- 0.00000007,
- 0.00000007,
- 0.00000007,
- 0.00000007,
- 0.00000007,
- 0.00000007,
- 0.00000007,
- 0.00000007,
+ -0.99999994, -0.99999994, -0.99999994, -0.99999994,
+ -0.99999994, -0.99999994, -0.99999994, -0.99999994,
+ 0.00000007, 0.00000007, 0.00000007, 0.00000007,
+ 0.00000007, 0.00000007, 0.00000007, 0.00000007,
);
assert_eq_m512(r, e);
}
@@ -40807,6 +43057,7 @@ mod tests {
let r =
_mm512_maskz_fmadd_round_ps(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, _mm512_setzero_ps());
+ #[rustfmt::skip]
let r = _mm512_maskz_fmadd_round_ps(
0b00000000_11111111,
a,
@@ -40814,23 +43065,12 @@ mod tests {
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
+ #[rustfmt::skip]
let e = _mm512_setr_ps(
- -0.99999994,
- -0.99999994,
- -0.99999994,
- -0.99999994,
- -0.99999994,
- -0.99999994,
- -0.99999994,
- -0.99999994,
- 0.,
- 0.,
- 0.,
- 0.,
- 0.,
- 0.,
- 0.,
- 0.,
+ -0.99999994, -0.99999994, -0.99999994, -0.99999994,
+ -0.99999994, -0.99999994, -0.99999994, -0.99999994,
+ 0., 0., 0., 0.,
+ 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
@@ -40850,23 +43090,12 @@ mod tests {
0b00000000_11111111,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
+ #[rustfmt::skip]
let e = _mm512_setr_ps(
- -0.99999994,
- -0.99999994,
- -0.99999994,
- -0.99999994,
- -0.99999994,
- -0.99999994,
- -0.99999994,
- -0.99999994,
- -1.,
- -1.,
- -1.,
- -1.,
- -1.,
- -1.,
- -1.,
- -1.,
+ -0.99999994, -0.99999994, -0.99999994, -0.99999994,
+ -0.99999994, -0.99999994, -0.99999994, -0.99999994,
+ -1., -1., -1., -1.,
+ -1., -1., -1., -1.,
);
assert_eq_m512(r, e);
}
@@ -40899,23 +43128,12 @@ mod tests {
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
+ #[rustfmt::skip]
let e = _mm512_setr_ps(
- -0.99999994,
- -0.99999994,
- -0.99999994,
- -0.99999994,
- -0.99999994,
- -0.99999994,
- -0.99999994,
- -0.99999994,
- 0.00000007,
- 0.00000007,
- 0.00000007,
- 0.00000007,
- 0.00000007,
- 0.00000007,
- 0.00000007,
- 0.00000007,
+ -0.99999994, -0.99999994, -0.99999994, -0.99999994,
+ -0.99999994, -0.99999994, -0.99999994, -0.99999994,
+ 0.00000007, 0.00000007, 0.00000007, 0.00000007,
+ 0.00000007, 0.00000007, 0.00000007, 0.00000007,
);
assert_eq_m512(r, e);
}
@@ -40935,23 +43153,12 @@ mod tests {
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
+ #[rustfmt::skip]
let e = _mm512_setr_ps(
- -0.99999994,
- -0.99999994,
- -0.99999994,
- -0.99999994,
- -0.99999994,
- -0.99999994,
- -0.99999994,
- -0.99999994,
- 0.,
- 0.,
- 0.,
- 0.,
- 0.,
- 0.,
- 0.,
- 0.,
+ -0.99999994, -0.99999994, -0.99999994, -0.99999994,
+ -0.99999994, -0.99999994, -0.99999994, -0.99999994,
+ 0., 0., 0., 0.,
+ 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
@@ -40971,23 +43178,12 @@ mod tests {
0b00000000_11111111,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
+ #[rustfmt::skip]
let e = _mm512_setr_ps(
- -0.99999994,
- -0.99999994,
- -0.99999994,
- -0.99999994,
- -0.99999994,
- -0.99999994,
- -0.99999994,
- -0.99999994,
- 1.,
- 1.,
- 1.,
- 1.,
- 1.,
- 1.,
- 1.,
- 1.,
+ -0.99999994, -0.99999994, -0.99999994, -0.99999994,
+ -0.99999994, -0.99999994, -0.99999994, -0.99999994,
+ 1., 1., 1., 1.,
+ 1., 1., 1., 1.,
);
assert_eq_m512(r, e);
}
@@ -40998,23 +43194,12 @@ mod tests {
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(-1.);
let r = _mm512_fmaddsub_round_ps(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ #[rustfmt::skip]
let e = _mm512_setr_ps(
- 1.0000001,
- -0.99999994,
- 1.0000001,
- -0.99999994,
- 1.0000001,
- -0.99999994,
- 1.0000001,
- -0.99999994,
- 1.0000001,
- -0.99999994,
- 1.0000001,
- -0.99999994,
- 1.0000001,
- -0.99999994,
- 1.0000001,
- -0.99999994,
+ 1.0000001, -0.99999994, 1.0000001, -0.99999994,
+ 1.0000001, -0.99999994, 1.0000001, -0.99999994,
+ 1.0000001, -0.99999994, 1.0000001, -0.99999994,
+ 1.0000001, -0.99999994, 1.0000001, -0.99999994,
);
assert_eq_m512(r, e);
let r = _mm512_fmaddsub_round_ps(a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
@@ -41045,23 +43230,12 @@ mod tests {
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
+ #[rustfmt::skip]
let e = _mm512_setr_ps(
- 1.0000001,
- -0.99999994,
- 1.0000001,
- -0.99999994,
- 1.0000001,
- -0.99999994,
- 1.0000001,
- -0.99999994,
- 0.00000007,
- 0.00000007,
- 0.00000007,
- 0.00000007,
- 0.00000007,
- 0.00000007,
- 0.00000007,
- 0.00000007,
+ 1.0000001, -0.99999994, 1.0000001, -0.99999994,
+ 1.0000001, -0.99999994, 1.0000001, -0.99999994,
+ 0.00000007, 0.00000007, 0.00000007, 0.00000007,
+ 0.00000007, 0.00000007, 0.00000007, 0.00000007,
);
assert_eq_m512(r, e);
}
@@ -41086,23 +43260,12 @@ mod tests {
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
+ #[rustfmt::skip]
let e = _mm512_setr_ps(
- 1.0000001,
- -0.99999994,
- 1.0000001,
- -0.99999994,
- 1.0000001,
- -0.99999994,
- 1.0000001,
- -0.99999994,
- 0.,
- 0.,
- 0.,
- 0.,
- 0.,
- 0.,
- 0.,
- 0.,
+ 1.0000001, -0.99999994, 1.0000001, -0.99999994,
+ 1.0000001, -0.99999994, 1.0000001, -0.99999994,
+ 0., 0., 0., 0.,
+ 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
@@ -41127,23 +43290,12 @@ mod tests {
0b00000000_11111111,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
+ #[rustfmt::skip]
let e = _mm512_setr_ps(
- 1.0000001,
- -0.99999994,
- 1.0000001,
- -0.99999994,
- 1.0000001,
- -0.99999994,
- 1.0000001,
- -0.99999994,
- -1.,
- -1.,
- -1.,
- -1.,
- -1.,
- -1.,
- -1.,
- -1.,
+ 1.0000001, -0.99999994, 1.0000001, -0.99999994,
+ 1.0000001, -0.99999994, 1.0000001, -0.99999994,
+ -1., -1., -1., -1.,
+ -1., -1., -1., -1.,
);
assert_eq_m512(r, e);
}
@@ -41154,23 +43306,12 @@ mod tests {
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(-1.);
let r = _mm512_fmsubadd_round_ps(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ #[rustfmt::skip]
let e = _mm512_setr_ps(
- -0.99999994,
- 1.0000001,
- -0.99999994,
- 1.0000001,
- -0.99999994,
- 1.0000001,
- -0.99999994,
- 1.0000001,
- -0.99999994,
- 1.0000001,
- -0.99999994,
- 1.0000001,
- -0.99999994,
- 1.0000001,
- -0.99999994,
- 1.0000001,
+ -0.99999994, 1.0000001, -0.99999994, 1.0000001,
+ -0.99999994, 1.0000001, -0.99999994, 1.0000001,
+ -0.99999994, 1.0000001, -0.99999994, 1.0000001,
+ -0.99999994, 1.0000001, -0.99999994, 1.0000001,
);
assert_eq_m512(r, e);
let r = _mm512_fmsubadd_round_ps(a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
@@ -41201,23 +43342,12 @@ mod tests {
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
+ #[rustfmt::skip]
let e = _mm512_setr_ps(
- -0.99999994,
- 1.0000001,
- -0.99999994,
- 1.0000001,
- -0.99999994,
- 1.0000001,
- -0.99999994,
- 1.0000001,
- 0.00000007,
- 0.00000007,
- 0.00000007,
- 0.00000007,
- 0.00000007,
- 0.00000007,
- 0.00000007,
- 0.00000007,
+ -0.99999994, 1.0000001, -0.99999994, 1.0000001,
+ -0.99999994, 1.0000001, -0.99999994, 1.0000001,
+ 0.00000007, 0.00000007, 0.00000007, 0.00000007,
+ 0.00000007, 0.00000007, 0.00000007, 0.00000007,
);
assert_eq_m512(r, e);
}
@@ -41242,23 +43372,12 @@ mod tests {
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
+ #[rustfmt::skip]
let e = _mm512_setr_ps(
- -0.99999994,
- 1.0000001,
- -0.99999994,
- 1.0000001,
- -0.99999994,
- 1.0000001,
- -0.99999994,
- 1.0000001,
- 0.,
- 0.,
- 0.,
- 0.,
- 0.,
- 0.,
- 0.,
- 0.,
+ -0.99999994, 1.0000001, -0.99999994, 1.0000001,
+ -0.99999994, 1.0000001, -0.99999994, 1.0000001,
+ 0., 0., 0., 0.,
+ 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
@@ -41283,23 +43402,12 @@ mod tests {
0b00000000_11111111,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
+ #[rustfmt::skip]
let e = _mm512_setr_ps(
- -0.99999994,
- 1.0000001,
- -0.99999994,
- 1.0000001,
- -0.99999994,
- 1.0000001,
- -0.99999994,
- 1.0000001,
- -1.,
- -1.,
- -1.,
- -1.,
- -1.,
- -1.,
- -1.,
- -1.,
+ -0.99999994, 1.0000001, -0.99999994, 1.0000001,
+ -0.99999994, 1.0000001, -0.99999994, 1.0000001,
+ -1., -1., -1., -1.,
+ -1., -1., -1., -1.,
);
assert_eq_m512(r, e);
}
@@ -41688,23 +43796,12 @@ mod tests {
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_fixupimm_round_ps() {
+ #[rustfmt::skip]
let a = _mm512_set_ps(
- f32::NAN,
- f32::NAN,
- f32::NAN,
- f32::NAN,
- f32::NAN,
- f32::NAN,
- f32::NAN,
- f32::NAN,
- 1.,
- 1.,
- 1.,
- 1.,
- 1.,
- 1.,
- 1.,
- 1.,
+ f32::NAN, f32::NAN, f32::NAN, f32::NAN,
+ f32::NAN, f32::NAN, f32::NAN, f32::NAN,
+ 1., 1., 1., 1.,
+ 1., 1., 1., 1.,
);
let b = _mm512_set1_ps(f32::MAX);
let c = _mm512_set1_epi32(i32::MAX);
@@ -41724,23 +43821,12 @@ mod tests {
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_fixupimm_round_ps() {
+ #[rustfmt::skip]
let a = _mm512_set_ps(
- f32::NAN,
- f32::NAN,
- f32::NAN,
- f32::NAN,
- f32::NAN,
- f32::NAN,
- f32::NAN,
- f32::NAN,
- 1.,
- 1.,
- 1.,
- 1.,
- 1.,
- 1.,
- 1.,
- 1.,
+ f32::NAN, f32::NAN, f32::NAN, f32::NAN,
+ f32::NAN, f32::NAN, f32::NAN, f32::NAN,
+ 1., 1., 1., 1.,
+ 1., 1., 1., 1.,
);
let b = _mm512_set1_ps(f32::MAX);
let c = _mm512_set1_epi32(i32::MAX);
@@ -41856,6 +43942,48 @@ mod tests {
assert_eq_m512i(r, e);
}
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtps_epi32() {
+ let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
+ let src = _mm256_set1_epi32(0);
+ let r = _mm256_mask_cvtps_epi32(src, 0, a);
+ assert_eq_m256i(r, src);
+ let r = _mm256_mask_cvtps_epi32(src, 0b11111111, a);
+ let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtps_epi32() {
+ let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
+ let r = _mm256_maskz_cvtps_epi32(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_cvtps_epi32(0b11111111, a);
+ let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtps_epi32() {
+ let a = _mm_set_ps(12., 13.5, 14., 15.5);
+ let src = _mm_set1_epi32(0);
+ let r = _mm_mask_cvtps_epi32(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtps_epi32(src, 0b00001111, a);
+ let e = _mm_set_epi32(12, 14, 14, 16);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtps_epi32() {
+ let a = _mm_set_ps(12., 13.5, 14., 15.5);
+ let r = _mm_maskz_cvtps_epi32(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtps_epi32(0b00001111, a);
+ let e = _mm_set_epi32(12, 14, 14, 16);
+ assert_eq_m128i(r, e);
+ }
+
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtps_epu32() {
let a = _mm512_setr_ps(
@@ -41891,6 +44019,64 @@ mod tests {
assert_eq_m512i(r, e);
}
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cvtps_epu32() {
+ let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
+ let r = _mm256_cvtps_epu32(a);
+ let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtps_epu32() {
+ let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
+ let src = _mm256_set1_epi32(0);
+ let r = _mm256_mask_cvtps_epu32(src, 0, a);
+ assert_eq_m256i(r, src);
+ let r = _mm256_mask_cvtps_epu32(src, 0b11111111, a);
+ let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtps_epu32() {
+ let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
+ let r = _mm256_maskz_cvtps_epu32(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_cvtps_epu32(0b11111111, a);
+ let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cvtps_epu32() {
+ let a = _mm_set_ps(12., 13.5, 14., 15.5);
+ let r = _mm_cvtps_epu32(a);
+ let e = _mm_set_epi32(12, 14, 14, 16);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtps_epu32() {
+ let a = _mm_set_ps(12., 13.5, 14., 15.5);
+ let src = _mm_set1_epi32(0);
+ let r = _mm_mask_cvtps_epu32(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtps_epu32(src, 0b00001111, a);
+ let e = _mm_set_epi32(12, 14, 14, 16);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtps_epu32() {
+ let a = _mm_set_ps(12., 13.5, 14., 15.5);
+ let r = _mm_maskz_cvtps_epu32(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtps_epu32(0b00001111, a);
+ let e = _mm_set_epi32(12, 14, 14, 16);
+ assert_eq_m128i(r, e);
+ }
+
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtepi8_epi32() {
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@@ -41991,6 +44177,48 @@ mod tests {
assert_eq_m512i(r, e);
}
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtepu8_epi32() {
+ let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let src = _mm256_set1_epi32(-1);
+ let r = _mm256_mask_cvtepu8_epi32(src, 0, a);
+ assert_eq_m256i(r, src);
+ let r = _mm256_mask_cvtepu8_epi32(src, 0b11111111, a);
+ let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtepu8_epi32() {
+ let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm256_maskz_cvtepu8_epi32(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_cvtepu8_epi32(0b11111111, a);
+ let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtepu8_epi32() {
+ let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let src = _mm_set1_epi32(-1);
+ let r = _mm_mask_cvtepu8_epi32(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtepu8_epi32(src, 0b00001111, a);
+ let e = _mm_set_epi32(12, 13, 14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtepu8_epi32() {
+ let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm_maskz_cvtepu8_epi32(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtepu8_epi32(0b00001111, a);
+ let e = _mm_set_epi32(12, 13, 14, 15);
+ assert_eq_m128i(r, e);
+ }
+
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtepi16_epi32() {
let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@@ -42091,6 +44319,48 @@ mod tests {
assert_eq_m512i(r, e);
}
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtepu16_epi32() {
+ let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
+ let src = _mm256_set1_epi32(-1);
+ let r = _mm256_mask_cvtepu16_epi32(src, 0, a);
+ assert_eq_m256i(r, src);
+ let r = _mm256_mask_cvtepu16_epi32(src, 0b11111111, a);
+ let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtepu16_epi32() {
+ let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm256_maskz_cvtepu16_epi32(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_cvtepu16_epi32(0b11111111, a);
+ let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtepu16_epi32() {
+ let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
+ let src = _mm_set1_epi32(-1);
+ let r = _mm_mask_cvtepu16_epi32(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtepu16_epi32(src, 0b00001111, a);
+ let e = _mm_set_epi32(12, 13, 14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtepu16_epi32() {
+ let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm_maskz_cvtepu16_epi32(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtepu16_epi32(0b00001111, a);
+ let e = _mm_set_epi32(12, 13, 14, 15);
+ assert_eq_m128i(r, e);
+ }
+
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtepi32_ps() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@@ -42379,285 +44649,294 @@ mod tests {
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtsepi32_epi16() {
+ #[rustfmt::skip]
let a = _mm512_set_epi32(
- 0,
- 1,
- 2,
- 3,
- 4,
- 5,
- 6,
- 7,
- 8,
- 9,
- 10,
- 11,
- 12,
- 13,
- i32::MIN,
- i32::MAX,
+ 0, 1, 2, 3,
+ 4, 5, 6, 7,
+ 8, 9, 10, 11,
+ 12, 13, i32::MIN, i32::MAX,
);
let r = _mm512_cvtsepi32_epi16(a);
+ #[rustfmt::skip]
let e = _mm256_set_epi16(
- 0,
- 1,
- 2,
- 3,
- 4,
- 5,
- 6,
- 7,
- 8,
- 9,
- 10,
- 11,
- 12,
- 13,
- i16::MIN,
- i16::MAX,
+ 0, 1, 2, 3,
+ 4, 5, 6, 7,
+ 8, 9, 10, 11,
+ 12, 13, i16::MIN, i16::MAX,
);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtsepi32_epi16() {
+ #[rustfmt::skip]
let a = _mm512_set_epi32(
- 0,
- 1,
- 2,
- 3,
- 4,
- 5,
- 6,
- 7,
- 8,
- 9,
- 10,
- 11,
- 12,
- 13,
- i32::MIN,
- i32::MAX,
+ 0, 1, 2, 3,
+ 4, 5, 6, 7,
+ 8, 9, 10, 11,
+ 12, 13, i32::MIN, i32::MAX,
);
let src = _mm256_set1_epi16(-1);
let r = _mm512_mask_cvtsepi32_epi16(src, 0, a);
assert_eq_m256i(r, src);
let r = _mm512_mask_cvtsepi32_epi16(src, 0b00000000_11111111, a);
+ #[rustfmt::skip]
let e = _mm256_set_epi16(
- -1,
- -1,
- -1,
- -1,
- -1,
- -1,
- -1,
- -1,
- 8,
- 9,
- 10,
- 11,
- 12,
- 13,
- i16::MIN,
- i16::MAX,
+ -1, -1, -1, -1,
+ -1, -1, -1, -1,
+ 8, 9, 10, 11,
+ 12, 13, i16::MIN, i16::MAX,
);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtsepi32_epi16() {
+ #[rustfmt::skip]
let a = _mm512_set_epi32(
- 0,
- 1,
- 2,
- 3,
- 4,
- 5,
- 6,
- 7,
- 8,
- 9,
- 10,
- 11,
- 12,
- 13,
- i32::MIN,
- i32::MAX,
+ 0, 1, 2, 3,
+ 4, 5, 6, 7,
+ 8, 9, 10, 11,
+ 12, 13, i32::MIN, i32::MAX,
);
let r = _mm512_maskz_cvtsepi32_epi16(0, a);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm512_maskz_cvtsepi32_epi16(0b00000000_11111111, a);
+ #[rustfmt::skip]
let e = _mm256_set_epi16(
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 8,
- 9,
- 10,
- 11,
- 12,
- 13,
- i16::MIN,
- i16::MAX,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 8, 9, 10, 11,
+ 12, 13, i16::MIN, i16::MAX,
);
assert_eq_m256i(r, e);
}
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cvtsepi32_epi16() {
+ let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
+ let r = _mm256_cvtsepi32_epi16(a);
+ let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtsepi32_epi16() {
+ let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
+ let src = _mm_set1_epi16(-1);
+ let r = _mm256_mask_cvtsepi32_epi16(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm256_mask_cvtsepi32_epi16(src, 0b11111111, a);
+ let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtsepi32_epi16() {
+ let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
+ let r = _mm256_maskz_cvtsepi32_epi16(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm256_maskz_cvtsepi32_epi16(0b11111111, a);
+ let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cvtsepi32_epi16() {
+ let a = _mm_set_epi32(4, 5, 6, 7);
+ let r = _mm_cvtsepi32_epi16(a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtsepi32_epi16() {
+ let a = _mm_set_epi32(4, 5, 6, 7);
+ let src = _mm_set1_epi16(0);
+ let r = _mm_mask_cvtsepi32_epi16(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtsepi32_epi16(src, 0b11111111, a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtsepi32_epi16() {
+ let a = _mm_set_epi32(4, 5, 6, 7);
+ let r = _mm_maskz_cvtsepi32_epi16(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtsepi32_epi16(0b11111111, a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
+ assert_eq_m128i(r, e);
+ }
+
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtsepi32_epi8() {
+ #[rustfmt::skip]
let a = _mm512_set_epi32(
- 0,
- 1,
- 2,
- 3,
- 4,
- 5,
- 6,
- 7,
- 8,
- 9,
- 10,
- 11,
- 12,
- 13,
- i32::MIN,
- i32::MAX,
+ 0, 1, 2, 3,
+ 4, 5, 6, 7,
+ 8, 9, 10, 11,
+ 12, 13, i32::MIN, i32::MAX,
);
let r = _mm512_cvtsepi32_epi8(a);
+ #[rustfmt::skip]
let e = _mm_set_epi8(
- 0,
- 1,
- 2,
- 3,
- 4,
- 5,
- 6,
- 7,
- 8,
- 9,
- 10,
- 11,
- 12,
- 13,
- i8::MIN,
- i8::MAX,
+ 0, 1, 2, 3,
+ 4, 5, 6, 7,
+ 8, 9, 10, 11,
+ 12, 13, i8::MIN, i8::MAX,
);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtsepi32_epi8() {
+ #[rustfmt::skip]
let a = _mm512_set_epi32(
- 0,
- 1,
- 2,
- 3,
- 4,
- 5,
- 6,
- 7,
- 8,
- 9,
- 10,
- 11,
- 12,
- 13,
- i32::MIN,
- i32::MAX,
+ 0, 1, 2, 3,
+ 4, 5, 6, 7,
+ 8, 9, 10, 11,
+ 12, 13, i32::MIN, i32::MAX,
);
let src = _mm_set1_epi8(-1);
let r = _mm512_mask_cvtsepi32_epi8(src, 0, a);
assert_eq_m128i(r, src);
let r = _mm512_mask_cvtsepi32_epi8(src, 0b00000000_11111111, a);
+ #[rustfmt::skip]
let e = _mm_set_epi8(
- -1,
- -1,
- -1,
- -1,
- -1,
- -1,
- -1,
- -1,
- 8,
- 9,
- 10,
- 11,
- 12,
- 13,
- i8::MIN,
- i8::MAX,
+ -1, -1, -1, -1,
+ -1, -1, -1, -1,
+ 8, 9, 10, 11,
+ 12, 13, i8::MIN, i8::MAX,
);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtsepi32_epi8() {
+ #[rustfmt::skip]
let a = _mm512_set_epi32(
- 0,
- 1,
- 2,
- 3,
- 4,
- 5,
- 6,
- 7,
- 8,
- 9,
- 10,
- 11,
- 12,
- 13,
- i32::MIN,
- i32::MAX,
+ 0, 1, 2, 3,
+ 4, 5, 6, 7,
+ 8, 9, 10, 11,
+ 12, 13, i32::MIN, i32::MAX,
);
let r = _mm512_maskz_cvtsepi32_epi8(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm512_maskz_cvtsepi32_epi8(0b00000000_11111111, a);
+ #[rustfmt::skip]
let e = _mm_set_epi8(
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 8,
- 9,
- 10,
- 11,
- 12,
- 13,
- i8::MIN,
- i8::MAX,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 8, 9, 10, 11,
+ 12, 13, i8::MIN, i8::MAX,
+ );
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cvtsepi32_epi8() {
+ let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
+ let r = _mm256_cvtsepi32_epi8(a);
+ #[rustfmt::skip]
+ let e = _mm_set_epi8(
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 9, 10, 11, 12,
+ 13, 14, 15, 16,
+ );
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtsepi32_epi8() {
+ let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
+ let src = _mm_set1_epi8(0);
+ let r = _mm256_mask_cvtsepi32_epi8(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm256_mask_cvtsepi32_epi8(src, 0b11111111, a);
+ #[rustfmt::skip]
+ let e = _mm_set_epi8(
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 9, 10, 11, 12,
+ 13, 14, 15, 16,
+ );
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtsepi32_epi8() {
+ let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
+ let r = _mm256_maskz_cvtsepi32_epi8(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm256_maskz_cvtsepi32_epi8(0b11111111, a);
+ #[rustfmt::skip]
+ let e = _mm_set_epi8(
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 9, 10, 11, 12,
+ 13, 14, 15, 16,
+ );
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cvtsepi32_epi8() {
+ let a = _mm_set_epi32(13, 14, 15, 16);
+ let r = _mm_cvtsepi32_epi8(a);
+ #[rustfmt::skip]
+ let e = _mm_set_epi8(
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 13, 14, 15, 16,
+ );
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtsepi32_epi8() {
+ let a = _mm_set_epi32(13, 14, 15, 16);
+ let src = _mm_set1_epi8(0);
+ let r = _mm_mask_cvtsepi32_epi8(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtsepi32_epi8(src, 0b00001111, a);
+ #[rustfmt::skip]
+ let e = _mm_set_epi8(
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 13, 14, 15, 16,
+ );
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtsepi32_epi8() {
+ let a = _mm_set_epi32(13, 14, 15, 16);
+ let r = _mm_maskz_cvtsepi32_epi8(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtsepi32_epi8(0b00001111, a);
+ #[rustfmt::skip]
+ let e = _mm_set_epi8(
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 13, 14, 15, 16,
);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtusepi32_epi16() {
+ #[rustfmt::skip]
let a = _mm512_set_epi32(
- 0,
- 1,
- 2,
- 3,
- 4,
- 5,
- 6,
- 7,
- 8,
- 9,
- 10,
- 11,
- 12,
- 13,
- i32::MIN,
- i32::MIN,
+ 0, 1, 2, 3,
+ 4, 5, 6, 7,
+ 8, 9, 10, 11,
+ 12, 13, i32::MIN, i32::MIN,
);
let r = _mm512_cvtusepi32_epi16(a);
let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1);
@@ -42666,23 +44945,12 @@ mod tests {
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtusepi32_epi16() {
+ #[rustfmt::skip]
let a = _mm512_set_epi32(
- 0,
- 1,
- 2,
- 3,
- 4,
- 5,
- 6,
- 7,
- 8,
- 9,
- 10,
- 11,
- 12,
- 13,
- i32::MIN,
- i32::MIN,
+ 0, 1, 2, 3,
+ 4, 5, 6, 7,
+ 8, 9, 10, 11,
+ 12, 13, i32::MIN, i32::MIN,
);
let src = _mm256_set1_epi16(-1);
let r = _mm512_mask_cvtusepi32_epi16(src, 0, a);
@@ -42694,23 +44962,12 @@ mod tests {
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtusepi32_epi16() {
+ #[rustfmt::skip]
let a = _mm512_set_epi32(
- 0,
- 1,
- 2,
- 3,
- 4,
- 5,
- 6,
- 7,
- 8,
- 9,
- 10,
- 11,
- 12,
- 13,
- i32::MIN,
- i32::MIN,
+ 0, 1, 2, 3,
+ 4, 5, 6, 7,
+ 8, 9, 10, 11,
+ 12, 13, i32::MIN, i32::MIN,
);
let r = _mm512_maskz_cvtusepi32_epi16(0, a);
assert_eq_m256i(r, _mm256_setzero_si256());
@@ -42719,25 +44976,72 @@ mod tests {
assert_eq_m256i(r, e);
}
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cvtusepi32_epi16() {
+ let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
+ let r = _mm256_cvtusepi32_epi16(a);
+ let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtusepi32_epi16() {
+ let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
+ let src = _mm_set1_epi16(0);
+ let r = _mm256_mask_cvtusepi32_epi16(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm256_mask_cvtusepi32_epi16(src, 0b11111111, a);
+ let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtusepi32_epi16() {
+ let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
+ let r = _mm256_maskz_cvtusepi32_epi16(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm256_maskz_cvtusepi32_epi16(0b11111111, a);
+ let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cvtusepi32_epi16() {
+ let a = _mm_set_epi32(5, 6, 7, 8);
+ let r = _mm_cvtusepi32_epi16(a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtusepi32_epi16() {
+ let a = _mm_set_epi32(5, 6, 7, 8);
+ let src = _mm_set1_epi16(0);
+ let r = _mm_mask_cvtusepi32_epi16(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtusepi32_epi16(src, 0b00001111, a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtusepi32_epi16() {
+ let a = _mm_set_epi32(5, 6, 7, 8);
+ let r = _mm_maskz_cvtusepi32_epi16(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtusepi32_epi16(0b00001111, a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
+ assert_eq_m128i(r, e);
+ }
+
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtusepi32_epi8() {
+ #[rustfmt::skip]
let a = _mm512_set_epi32(
- 0,
- 1,
- 2,
- 3,
- 4,
- 5,
- 6,
- 7,
- 8,
- 9,
- 10,
- 11,
- 12,
- 13,
- i32::MIN,
- i32::MIN,
+ 0, 1, 2, 3,
+ 4, 5, 6, 7,
+ 8, 9, 10, 11,
+ 12, 13, i32::MIN, i32::MIN,
);
let r = _mm512_cvtusepi32_epi8(a);
let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1);
@@ -42746,23 +45050,12 @@ mod tests {
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtusepi32_epi8() {
+ #[rustfmt::skip]
let a = _mm512_set_epi32(
- 0,
- 1,
- 2,
- 3,
- 4,
- 5,
- 6,
- 7,
- 8,
- 9,
- 10,
- 11,
- 12,
- 13,
- i32::MIN,
- i32::MIN,
+ 0, 1, 2, 3,
+ 4, 5, 6, 7,
+ 8, 9, 10, 11,
+ 12, 13, i32::MIN, i32::MIN,
);
let src = _mm_set1_epi8(-1);
let r = _mm512_mask_cvtusepi32_epi8(src, 0, a);
@@ -42774,23 +45067,12 @@ mod tests {
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtusepi32_epi8() {
+ #[rustfmt::skip]
let a = _mm512_set_epi32(
- 0,
- 1,
- 2,
- 3,
- 4,
- 5,
- 6,
- 7,
- 8,
- 9,
- 10,
- 11,
- 12,
- 13,
- i32::MIN,
- i32::MIN,
+ 0, 1, 2, 3,
+ 4, 5, 6, 7,
+ 8, 9, 10, 11,
+ 12, 13, i32::MIN, i32::MIN,
);
let r = _mm512_maskz_cvtusepi32_epi8(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
@@ -42799,6 +45081,64 @@ mod tests {
assert_eq_m128i(r, e);
}
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cvtusepi32_epi8() {
+ let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
+ let r = _mm256_cvtusepi32_epi8(a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtusepi32_epi8() {
+ let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
+ let src = _mm_set1_epi8(0);
+ let r = _mm256_mask_cvtusepi32_epi8(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm256_mask_cvtusepi32_epi8(src, 0b11111111, a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtusepi32_epi8() {
+ let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
+ let r = _mm256_maskz_cvtusepi32_epi8(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm256_maskz_cvtusepi32_epi8(0b11111111, a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cvtusepi32_epi8() {
+ let a = _mm_set_epi32(5, 6, 7, i32::MAX);
+ let r = _mm_cvtusepi32_epi8(a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtusepi32_epi8() {
+ let a = _mm_set_epi32(5, 6, 7, i32::MAX);
+ let src = _mm_set1_epi8(0);
+ let r = _mm_mask_cvtusepi32_epi8(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtusepi32_epi8(src, 0b00001111, a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtusepi32_epi8() {
+ let a = _mm_set_epi32(5, 6, 7, i32::MAX);
+ let r = _mm_maskz_cvtusepi32_epi8(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtusepi32_epi8(0b00001111, a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
+ assert_eq_m128i(r, e);
+ }
+
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvt_roundps_epi32() {
let a = _mm512_setr_ps(
@@ -42944,23 +45284,12 @@ mod tests {
unsafe fn test_mm512_cvt_roundepu32_ps() {
let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
let r = _mm512_cvt_roundepu32_ps(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
+ #[rustfmt::skip]
let e = _mm512_setr_ps(
- 0.,
- 4294967300.,
- 2.,
- 4294967300.,
- 4.,
- 4294967300.,
- 6.,
- 4294967300.,
- 8.,
- 10.,
- 10.,
- 12.,
- 12.,
- 14.,
- 14.,
- 16.,
+ 0., 4294967300., 2., 4294967300.,
+ 4., 4294967300., 6., 4294967300.,
+ 8., 10., 10., 12.,
+ 12., 14., 14., 16.,
);
assert_eq_m512(r, e);
}
@@ -42978,23 +45307,12 @@ mod tests {
a,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
+ #[rustfmt::skip]
let e = _mm512_setr_ps(
- 0.,
- 4294967300.,
- 2.,
- 4294967300.,
- 4.,
- 4294967300.,
- 6.,
- 4294967300.,
- 0.,
- 0.,
- 0.,
- 0.,
- 0.,
- 0.,
- 0.,
- 0.,
+ 0., 4294967300., 2., 4294967300.,
+ 4., 4294967300., 6., 4294967300.,
+ 0., 0., 0., 0.,
+ 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
@@ -43009,23 +45327,12 @@ mod tests {
a,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
+ #[rustfmt::skip]
let e = _mm512_setr_ps(
- 0.,
- 4294967300.,
- 2.,
- 4294967300.,
- 4.,
- 4294967300.,
- 6.,
- 4294967300.,
- 0.,
- 0.,
- 0.,
- 0.,
- 0.,
- 0.,
- 0.,
- 0.,
+ 0., 4294967300., 2., 4294967300.,
+ 4., 4294967300., 6., 4294967300.,
+ 0., 0., 0., 0.,
+ 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
@@ -43140,6 +45447,48 @@ mod tests {
assert_eq_m256i(r, e);
}
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtps_ph() {
+ let a = _mm256_set1_ps(1.);
+ let src = _mm_set1_epi16(0);
+ let r = _mm256_mask_cvtps_ph(src, 0, a, _MM_FROUND_NO_EXC);
+ assert_eq_m128i(r, src);
+ let r = _mm256_mask_cvtps_ph(src, 0b11111111, a, _MM_FROUND_NO_EXC);
+ let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtps_ph() {
+ let a = _mm256_set1_ps(1.);
+ let r = _mm256_maskz_cvtps_ph(0, a, _MM_FROUND_NO_EXC);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm256_maskz_cvtps_ph(0b11111111, a, _MM_FROUND_NO_EXC);
+ let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtps_ph() {
+ let a = _mm_set1_ps(1.);
+ let src = _mm_set1_epi16(0);
+ let r = _mm_mask_cvtps_ph(src, 0, a, _MM_FROUND_NO_EXC);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtps_ph(src, 0b00001111, a, _MM_FROUND_NO_EXC);
+ let e = _mm_setr_epi64x(4323521613979991040, 0);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtps_ph() {
+ let a = _mm_set1_ps(1.);
+ let r = _mm_maskz_cvtps_ph(0, a, _MM_FROUND_NO_EXC);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtps_ph(0b00001111, a, _MM_FROUND_NO_EXC);
+ let e = _mm_setr_epi64x(4323521613979991040, 0);
+ assert_eq_m128i(r, e);
+ }
+
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvt_roundph_ps() {
let a = _mm256_setr_epi64x(
@@ -43236,6 +45585,48 @@ mod tests {
assert_eq_m512(r, e);
}
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtph_ps() {
+ let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
+ let src = _mm256_set1_ps(0.);
+ let r = _mm256_mask_cvtph_ps(src, 0, a);
+ assert_eq_m256(r, src);
+ let r = _mm256_mask_cvtph_ps(src, 0b11111111, a);
+ let e = _mm256_setr_ps(1., 1., 1., 1., 1., 1., 1., 1.);
+ assert_eq_m256(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtph_ps() {
+ let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
+ let r = _mm256_maskz_cvtph_ps(0, a);
+ assert_eq_m256(r, _mm256_setzero_ps());
+ let r = _mm256_maskz_cvtph_ps(0b11111111, a);
+ let e = _mm256_setr_ps(1., 1., 1., 1., 1., 1., 1., 1.);
+ assert_eq_m256(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtph_ps() {
+ let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
+ let src = _mm_set1_ps(0.);
+ let r = _mm_mask_cvtph_ps(src, 0, a);
+ assert_eq_m128(r, src);
+ let r = _mm_mask_cvtph_ps(src, 0b00001111, a);
+ let e = _mm_setr_ps(1., 1., 1., 1.);
+ assert_eq_m128(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtph_ps() {
+ let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
+ let r = _mm_maskz_cvtph_ps(0, a);
+ assert_eq_m128(r, _mm_setzero_ps());
+ let r = _mm_maskz_cvtph_ps(0b00001111, a);
+ let e = _mm_setr_ps(1., 1., 1., 1.);
+ assert_eq_m128(r, e);
+ }
+
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtt_roundps_epi32() {
let a = _mm512_setr_ps(
@@ -43341,6 +45732,48 @@ mod tests {
assert_eq_m512i(r, e);
}
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvttps_epi32() {
+ let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
+ let src = _mm256_set1_epi32(0);
+ let r = _mm256_mask_cvttps_epi32(src, 0, a);
+ assert_eq_m256i(r, src);
+ let r = _mm256_mask_cvttps_epi32(src, 0b11111111, a);
+ let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvttps_epi32() {
+ let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
+ let r = _mm256_maskz_cvttps_epi32(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_cvttps_epi32(0b11111111, a);
+ let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvttps_epi32() {
+ let a = _mm_set_ps(12., 13.5, 14., 15.5);
+ let src = _mm_set1_epi32(0);
+ let r = _mm_mask_cvttps_epi32(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvttps_epi32(src, 0b00001111, a);
+ let e = _mm_set_epi32(12, 13, 14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvttps_epi32() {
+ let a = _mm_set_ps(12., 13.5, 14., 15.5);
+ let r = _mm_maskz_cvttps_epi32(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvttps_epi32(0b00001111, a);
+ let e = _mm_set_epi32(12, 13, 14, 15);
+ assert_eq_m128i(r, e);
+ }
+
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvttps_epu32() {
let a = _mm512_setr_ps(
@@ -43376,6 +45809,64 @@ mod tests {
assert_eq_m512i(r, e);
}
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cvttps_epu32() {
+ let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
+ let r = _mm256_cvttps_epu32(a);
+ let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvttps_epu32() {
+ let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
+ let src = _mm256_set1_epi32(0);
+ let r = _mm256_mask_cvttps_epu32(src, 0, a);
+ assert_eq_m256i(r, src);
+ let r = _mm256_mask_cvttps_epu32(src, 0b11111111, a);
+ let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvttps_epu32() {
+ let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
+ let r = _mm256_maskz_cvttps_epu32(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_cvttps_epu32(0b11111111, a);
+ let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cvttps_epu32() {
+ let a = _mm_set_ps(12., 13.5, 14., 15.5);
+ let r = _mm_cvttps_epu32(a);
+ let e = _mm_set_epi32(12, 13, 14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvttps_epu32() {
+ let a = _mm_set_ps(12., 13.5, 14., 15.5);
+ let src = _mm_set1_epi32(0);
+ let r = _mm_mask_cvttps_epu32(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvttps_epu32(src, 0b00001111, a);
+ let e = _mm_set_epi32(12, 13, 14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvttps_epu32() {
+ let a = _mm_set_ps(12., 13.5, 14., 15.5);
+ let r = _mm_maskz_cvttps_epu32(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvttps_epu32(0b00001111, a);
+ let e = _mm_set_epi32(12, 13, 14, 15);
+ assert_eq_m128i(r, e);
+ }
+
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_i32gather_ps() {
let mut arr = [0f32; 256];
@@ -49496,6 +51987,69 @@ mod tests {
assert_eq_m128i(r, e);
}
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtsepi32_storeu_epi16() {
+ let a = _mm512_set1_epi32(i32::MAX);
+ let mut r = _mm256_undefined_si256();
+ _mm512_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
+ let e = _mm256_set1_epi16(i16::MAX);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtsepi32_storeu_epi16() {
+ let a = _mm256_set1_epi32(i32::MAX);
+ let mut r = _mm_undefined_si128();
+ _mm256_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ let e = _mm_set1_epi16(i16::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtsepi32_storeu_epi16() {
+ let a = _mm_set1_epi32(i32::MAX);
+ let mut r = _mm_set1_epi8(0);
+ _mm_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtusepi32_storeu_epi16() {
+ let a = _mm512_set1_epi32(i32::MAX);
+ let mut r = _mm256_undefined_si256();
+ _mm512_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
+ let e = _mm256_set1_epi16(u16::MAX as i16);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtusepi32_storeu_epi16() {
+ let a = _mm256_set1_epi32(i32::MAX);
+ let mut r = _mm_undefined_si128();
+ _mm256_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ let e = _mm_set1_epi16(u16::MAX as i16);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtusepi32_storeu_epi16() {
+ let a = _mm_set1_epi32(i32::MAX);
+ let mut r = _mm_set1_epi8(0);
+ _mm_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ let e = _mm_set_epi16(
+ 0,
+ 0,
+ 0,
+ 0,
+ u16::MAX as i16,
+ u16::MAX as i16,
+ u16::MAX as i16,
+ u16::MAX as i16,
+ );
+ assert_eq_m128i(r, e);
+ }
+
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtepi32_storeu_epi8() {
let a = _mm512_set1_epi32(9);
@@ -49523,6 +52077,84 @@ mod tests {
assert_eq_m128i(r, e);
}
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtsepi32_storeu_epi8() {
+ let a = _mm512_set1_epi32(i32::MAX);
+ let mut r = _mm_undefined_si128();
+ _mm512_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
+ let e = _mm_set1_epi8(i8::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtsepi32_storeu_epi8() {
+ let a = _mm256_set1_epi32(i32::MAX);
+ let mut r = _mm_set1_epi8(0);
+ _mm256_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ #[rustfmt::skip]
+ let e = _mm_set_epi8(
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ i8::MAX, i8::MAX, i8::MAX, i8::MAX,
+ i8::MAX, i8::MAX, i8::MAX, i8::MAX,
+ );
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtsepi32_storeu_epi8() {
+ let a = _mm_set1_epi32(i32::MAX);
+ let mut r = _mm_set1_epi8(0);
+ _mm_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ #[rustfmt::skip]
+ let e = _mm_set_epi8(
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ i8::MAX, i8::MAX, i8::MAX, i8::MAX,
+ );
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtusepi32_storeu_epi8() {
+ let a = _mm512_set1_epi32(i32::MAX);
+ let mut r = _mm_undefined_si128();
+ _mm512_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
+ let e = _mm_set1_epi8(u8::MAX as i8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtusepi32_storeu_epi8() {
+ let a = _mm256_set1_epi32(i32::MAX);
+ let mut r = _mm_set1_epi8(0);
+ _mm256_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ #[rustfmt::skip]
+ let e = _mm_set_epi8(
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
+ u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
+ );
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtusepi32_storeu_epi8() {
+ let a = _mm_set1_epi32(i32::MAX);
+ let mut r = _mm_set1_epi8(0);
+ _mm_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ #[rustfmt::skip]
+ let e = _mm_set_epi8(
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
+ );
+ assert_eq_m128i(r, e);
+ }
+
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_storeu_epi32() {
let a = _mm512_set1_epi32(9);
@@ -52720,24 +55352,6 @@ mod tests {
assert_eq_m128d(r, e);
}
- #[simd_test(enable = "avx512f")]
- unsafe fn test_mm_cvtu64_ss() {
- let a = _mm_set_ps(0., -0.5, 1., -1.5);
- let b: u64 = 9;
- let r = _mm_cvtu64_ss(a, b);
- let e = _mm_set_ps(0., -0.5, 1., 9.);
- assert_eq_m128(r, e);
- }
-
- #[simd_test(enable = "avx512f")]
- unsafe fn test_mm_cvtu64_sd() {
- let a = _mm_set_pd(1., -1.5);
- let b: u64 = 9;
- let r = _mm_cvtu64_sd(a, b);
- let e = _mm_set_pd(1., 9.);
- assert_eq_m128d(r, e);
- }
-
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_comi_round_ss() {
let a = _mm_set1_ps(2.2);
@@ -52755,4 +55369,12 @@ mod tests {
let e: i32 = 0;
assert_eq!(r, e);
}
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cvtsi512_si32() {
+ let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ let r = _mm512_cvtsi512_si32(a);
+ let e: i32 = 1;
+ assert_eq!(r, e);
+ }
}
diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs
index 29854d858b..cf1b4b6220 100644
--- a/crates/core_arch/src/x86_64/avx512f.rs
+++ b/crates/core_arch/src/x86_64/avx512f.rs
@@ -1,8 +1,561 @@
-//use crate::{
-//
-// core_arch::{simd::*, simd_llvm::*, x86::*},
-// mem::transmute,
-//};
+use crate::{
+ core_arch::{simd::*, simd_llvm::*, x86::*, x86_64::*},
+ mem::transmute,
+};
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer, and store the result in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_i64&expand=1792)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtsd2si))]
+pub unsafe fn _mm_cvtsd_i64(a: __m128d) -> i64 {
+ _mm_cvtsd_si64(a)
+}
+
+/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer, and store the result in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_i64&expand=1894)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtss2si))]
+pub unsafe fn _mm_cvtss_i64(a: __m128) -> i64 {
+ _mm_cvtss_si64(a)
+}
+
+/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_u64&expand=1902)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtss2usi))]
+pub unsafe fn _mm_cvtss_u64(a: __m128) -> u64 {
+ transmute(vcvtss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION))
+}
+
+/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_u64&expand=1800)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtsd2usi))]
+pub unsafe fn _mm_cvtsd_u64(a: __m128d) -> u64 {
+ transmute(vcvtsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION))
+}
+
+/// Convert the signed 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvti32_ss&expand=1643)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtsi2ss))]
+pub unsafe fn _mm_cvti64_ss(a: __m128, b: i64) -> __m128 {
+ let b = b as f32;
+ let r = simd_insert(a, 0, b);
+ transmute(r)
+}
+
+/// Convert the signed 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvti64_sd&expand=1644)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtsi2sd))]
+pub unsafe fn _mm_cvti64_sd(a: __m128d, b: i64) -> __m128d {
+ let b = b as f64;
+ let r = simd_insert(a, 0, b);
+ transmute(r)
+}
+
+/// Convert the unsigned 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu64_ss&expand=2035)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(mov))] // should be vcvtusi2ss
+pub unsafe fn _mm_cvtu64_ss(a: __m128, b: u64) -> __m128 {
+ let b = b as f32;
+ let r = simd_insert(a, 0, b);
+ transmute(r)
+}
+
+/// Convert the unsigned 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu64_sd&expand=2034)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(mov))] // should be vcvtusi2sd
+pub unsafe fn _mm_cvtu64_sd(a: __m128d, b: u64) -> __m128d {
+ let b = b as f64;
+ let r = simd_insert(a, 0, b);
+ transmute(r)
+}
+
+/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_i64&expand=2016)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtsd2si))]
+pub unsafe fn _mm_cvttsd_i64(a: __m128d) -> i64 {
+ transmute(vcvtsd2si64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION))
+}
+
+/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_u64&expand=2021)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtsd2usi))]
+pub unsafe fn _mm_cvttsd_u64(a: __m128d) -> u64 {
+ transmute(vcvtsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION))
+}
+
+/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=#text=_mm_cvttss_i64&expand=2023)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtss2si))]
+pub unsafe fn _mm_cvttss_i64(a: __m128) -> i64 {
+ transmute(vcvtss2si64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION))
+}
+
+/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttss_u64&expand=2027)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtss2usi))]
+pub unsafe fn _mm_cvttss_u64(a: __m128) -> u64 {
+ transmute(vcvtss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION))
+}
+
+/// Convert the signed 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
+/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi64_sd&expand=1313)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtsi2sd, rounding = 8))]
+#[rustc_args_required_const(2)]
+pub unsafe fn _mm_cvt_roundi64_sd(a: __m128d, b: i64, rounding: i32) -> __m128d {
+ let a = a.as_f64x2();
+ macro_rules! call {
+ ($imm4:expr) => {
+ vcvtsi2sd64(a, b, $imm4)
+ };
+ }
+ let r = constify_imm4_round!(rounding, call);
+ transmute(r)
+}
+
+/// Convert the signed 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
+/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsi64_sd&expand=1367)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtsi2sd, rounding = 8))]
+#[rustc_args_required_const(2)]
+pub unsafe fn _mm_cvt_roundsi64_sd(a: __m128d, b: i64, rounding: i32) -> __m128d {
+ let a = a.as_f64x2();
+ macro_rules! call {
+ ($imm4:expr) => {
+ vcvtsi2sd64(a, b, $imm4)
+ };
+ }
+ let r = constify_imm4_round!(rounding, call);
+ transmute(r)
+}
+
+/// Convert the signed 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
+/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi64_ss&expand=1314)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtsi2ss, rounding = 8))]
+#[rustc_args_required_const(2)]
+pub unsafe fn _mm_cvt_roundi64_ss(a: __m128, b: i64, rounding: i32) -> __m128 {
+ let a = a.as_f32x4();
+ macro_rules! call {
+ ($imm4:expr) => {
+ vcvtsi2ss64(a, b, $imm4)
+ };
+ }
+ let r = constify_imm4_round!(rounding, call);
+ transmute(r)
+}
+
+/// Convert the unsigned 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
+/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundu64_sd&expand=1379)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtusi2sd, rounding = 8))]
+#[rustc_args_required_const(2)]
+pub unsafe fn _mm_cvt_roundu64_sd(a: __m128d, b: u64, rounding: i32) -> __m128d {
+ let a = a.as_f64x2();
+ macro_rules! call {
+ ($imm4:expr) => {
+ vcvtusi2sd64(a, b, $imm4)
+ };
+ }
+ let r = constify_imm4_round!(rounding, call);
+ transmute(r)
+}
+
+/// Convert the signed 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
+/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsi64_ss&expand=1368)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtsi2ss, rounding = 8))]
+#[rustc_args_required_const(2)]
+pub unsafe fn _mm_cvt_roundsi64_ss(a: __m128, b: i64, rounding: i32) -> __m128 {
+ let a = a.as_f32x4();
+ macro_rules! call {
+ ($imm4:expr) => {
+ vcvtsi2ss64(a, b, $imm4)
+ };
+ }
+ let r = constify_imm4_round!(rounding, call);
+ transmute(r)
+}
+
+/// Convert the unsigned 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
+/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundu64_ss&expand=1380)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtusi2ss, rounding = 8))]
+#[rustc_args_required_const(2)]
+pub unsafe fn _mm_cvt_roundu64_ss(a: __m128, b: u64, rounding: i32) -> __m128 {
+ let a = a.as_f32x4();
+ macro_rules! call {
+ ($imm4:expr) => {
+ vcvtusi2ss64(a, b, $imm4)
+ };
+ }
+ let r = constify_imm4_round!(rounding, call);
+ transmute(r)
+}
+
+/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\
+/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_si64&expand=1360)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtsd2si, rounding = 8))]
+#[rustc_args_required_const(1)]
+pub unsafe fn _mm_cvt_roundsd_si64(a: __m128d, rounding: i32) -> i64 {
+ let a = a.as_f64x2();
+ macro_rules! call {
+ ($imm4:expr) => {
+ vcvtsd2si64(a, $imm4)
+ };
+ }
+ let r = constify_imm4_round!(rounding, call);
+ transmute(r)
+}
+
+/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\
+/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_i64&expand=1358)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtsd2si, rounding = 8))]
+#[rustc_args_required_const(1)]
+pub unsafe fn _mm_cvt_roundsd_i64(a: __m128d, rounding: i32) -> i64 {
+ let a = a.as_f64x2();
+ macro_rules! call {
+ ($imm4:expr) => {
+ vcvtsd2si64(a, $imm4)
+ };
+ }
+ let r = constify_imm4_round!(rounding, call);
+ transmute(r)
+}
+
+/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst.\
+/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_u64&expand=1365)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtsd2usi, rounding = 8))]
+#[rustc_args_required_const(1)]
+pub unsafe fn _mm_cvt_roundsd_u64(a: __m128d, rounding: i32) -> u64 {
+ let a = a.as_f64x2();
+ macro_rules! call {
+ ($imm4:expr) => {
+ vcvtsd2usi64(a, $imm4)
+ };
+ }
+ let r = constify_imm4_round!(rounding, call);
+ transmute(r)
+}
+
+/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\
+/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_si64&expand=1375)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtss2si, rounding = 8))]
+#[rustc_args_required_const(1)]
+pub unsafe fn _mm_cvt_roundss_si64(a: __m128, rounding: i32) -> i64 {
+ let a = a.as_f32x4();
+ macro_rules! call {
+ ($imm4:expr) => {
+ vcvtss2si64(a, $imm4)
+ };
+ }
+ let r = constify_imm4_round!(rounding, call);
+ transmute(r)
+}
+
+/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\
+/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_i64&expand=1370)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtss2si, rounding = 8))]
+#[rustc_args_required_const(1)]
+pub unsafe fn _mm_cvt_roundss_i64(a: __m128, rounding: i32) -> i64 {
+ let a = a.as_f32x4();
+ macro_rules! call {
+ ($imm4:expr) => {
+ vcvtss2si64(a, $imm4)
+ };
+ }
+ let r = constify_imm4_round!(rounding, call);
+ transmute(r)
+}
+
+/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst.\
+/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_u64&expand=1377)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtss2usi, rounding = 8))]
+#[rustc_args_required_const(1)]
+pub unsafe fn _mm_cvt_roundss_u64(a: __m128, rounding: i32) -> u64 {
+ let a = a.as_f32x4();
+ macro_rules! call {
+ ($imm4:expr) => {
+ vcvtss2usi64(a, $imm4)
+ };
+ }
+ let r = constify_imm4_round!(rounding, call);
+ transmute(r)
+}
+
+/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsd_si64&expand=1931)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtsd2si, sae = 8))]
+#[rustc_args_required_const(1)]
+pub unsafe fn _mm_cvtt_roundsd_si64(a: __m128d, sae: i32) -> i64 {
+ let a = a.as_f64x2();
+ macro_rules! call {
+ ($imm4:expr) => {
+ vcvtsd2si64(a, $imm4)
+ };
+ }
+ let r = constify_imm4_sae!(sae, call);
+ transmute(r)
+}
+
+/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsd_i64&expand=1929)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtsd2si, sae = 8))]
+#[rustc_args_required_const(1)]
+pub unsafe fn _mm_cvtt_roundsd_i64(a: __m128d, sae: i32) -> i64 {
+ let a = a.as_f64x2();
+ macro_rules! call {
+ ($imm4:expr) => {
+ vcvtsd2si64(a, $imm4)
+ };
+ }
+ let r = constify_imm4_sae!(sae, call);
+ transmute(r)
+}
+
+/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.\
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsd_u64&expand=1933)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtsd2usi, sae = 8))]
+#[rustc_args_required_const(1)]
+pub unsafe fn _mm_cvtt_roundsd_u64(a: __m128d, sae: i32) -> u64 {
+ let a = a.as_f64x2();
+ macro_rules! call {
+ ($imm4:expr) => {
+ vcvtsd2usi64(a, $imm4)
+ };
+ }
+ let r = constify_imm4_sae!(sae, call);
+ transmute(r)
+}
+
+/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundss_i64&expand=1935)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtss2si, sae = 8))]
+#[rustc_args_required_const(1)]
+pub unsafe fn _mm_cvtt_roundss_i64(a: __m128, sae: i32) -> i64 {
+ let a = a.as_f32x4();
+ macro_rules! call {
+ ($imm4:expr) => {
+ vcvtss2si64(a, $imm4)
+ };
+ }
+ let r = constify_imm4_sae!(sae, call);
+ transmute(r)
+}
+
+/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundss_si64&expand=1937)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtss2si, sae = 8))]
+#[rustc_args_required_const(1)]
+pub unsafe fn _mm_cvtt_roundss_si64(a: __m128, sae: i32) -> i64 {
+ let a = a.as_f32x4();
+ macro_rules! call {
+ ($imm4:expr) => {
+ vcvtss2si64(a, $imm4)
+ };
+ }
+ let r = constify_imm4_sae!(sae, call);
+ transmute(r)
+}
+
+/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.\
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundss_u64&expand=1939)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtss2usi, sae = 8))]
+#[rustc_args_required_const(1)]
+pub unsafe fn _mm_cvtt_roundss_u64(a: __m128, sae: i32) -> u64 {
+ let a = a.as_f32x4();
+ macro_rules! call {
+ ($imm4:expr) => {
+ vcvtss2usi64(a, $imm4)
+ };
+ }
+ let r = constify_imm4_sae!(sae, call);
+ transmute(r)
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+ #[link_name = "llvm.x86.avx512.vcvtss2si64"]
+ fn vcvtss2si64(a: f32x4, rounding: i32) -> i64;
+ #[link_name = "llvm.x86.avx512.vcvtss2usi64"]
+ fn vcvtss2usi64(a: f32x4, rounding: i32) -> u64;
+ #[link_name = "llvm.x86.avx512.vcvtsd2si64"]
+ fn vcvtsd2si64(a: f64x2, rounding: i32) -> i64;
+ #[link_name = "llvm.x86.avx512.vcvtsd2usi64"]
+ fn vcvtsd2usi64(a: f64x2, rounding: i32) -> u64;
+
+ #[link_name = "llvm.x86.avx512.cvtsi2ss64"]
+ fn vcvtsi2ss64(a: f32x4, b: i64, rounding: i32) -> f32x4;
+ #[link_name = "llvm.x86.avx512.cvtsi2sd64"]
+ fn vcvtsi2sd64(a: f64x2, b: i64, rounding: i32) -> f64x2;
+ #[link_name = "llvm.x86.avx512.cvtusi642ss"]
+ fn vcvtusi2ss64(a: f32x4, b: u64, rounding: i32) -> f32x4;
+ #[link_name = "llvm.x86.avx512.cvtusi642sd"]
+ fn vcvtusi2sd64(a: f64x2, b: u64, rounding: i32) -> f64x2;
+}
#[cfg(test)]
mod tests {
@@ -2901,6 +3454,206 @@ mod tests {
assert_eq_m256(r, e);
}
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtpd_ps() {
+ let a = _mm256_set_pd(4., -5.5, 6., -7.5);
+ let src = _mm_set1_ps(0.);
+ let r = _mm256_mask_cvtpd_ps(src, 0, a);
+ assert_eq_m128(r, src);
+ let r = _mm256_mask_cvtpd_ps(src, 0b00001111, a);
+ let e = _mm_set_ps(4., -5.5, 6., -7.5);
+ assert_eq_m128(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtpd_ps() {
+ let a = _mm256_set_pd(4., -5.5, 6., -7.5);
+ let r = _mm256_maskz_cvtpd_ps(0, a);
+ assert_eq_m128(r, _mm_setzero_ps());
+ let r = _mm256_maskz_cvtpd_ps(0b00001111, a);
+ let e = _mm_set_ps(4., -5.5, 6., -7.5);
+ assert_eq_m128(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtpd_ps() {
+ let a = _mm_set_pd(6., -7.5);
+ let src = _mm_set1_ps(0.);
+ let r = _mm_mask_cvtpd_ps(src, 0, a);
+ assert_eq_m128(r, src);
+ let r = _mm_mask_cvtpd_ps(src, 0b00000011, a);
+ let e = _mm_set_ps(0., 0., 6., -7.5);
+ assert_eq_m128(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtpd_ps() {
+ let a = _mm_set_pd(6., -7.5);
+ let r = _mm_maskz_cvtpd_ps(0, a);
+ assert_eq_m128(r, _mm_setzero_ps());
+ let r = _mm_maskz_cvtpd_ps(0b00000011, a);
+ let e = _mm_set_ps(0., 0., 6., -7.5);
+ assert_eq_m128(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cvtpd_epi32() {
+ let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let r = _mm512_cvtpd_epi32(a);
+ let e = _mm256_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtpd_epi32() {
+ let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let src = _mm256_set1_epi32(0);
+ let r = _mm512_mask_cvtpd_epi32(src, 0, a);
+ assert_eq_m256i(r, src);
+ let r = _mm512_mask_cvtpd_epi32(src, 0b11111111, a);
+ let e = _mm256_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_cvtpd_epi32() {
+ let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let r = _mm512_maskz_cvtpd_epi32(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm512_maskz_cvtpd_epi32(0b11111111, a);
+ let e = _mm256_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtpd_epi32() {
+ let a = _mm256_set_pd(4., -5.5, 6., -7.5);
+ let src = _mm_set1_epi32(0);
+ let r = _mm256_mask_cvtpd_epi32(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm256_mask_cvtpd_epi32(src, 0b00001111, a);
+ let e = _mm_set_epi32(4, -6, 6, -8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtpd_epi32() {
+ let a = _mm256_set_pd(4., -5.5, 6., -7.5);
+ let r = _mm256_maskz_cvtpd_epi32(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm256_maskz_cvtpd_epi32(0b00001111, a);
+ let e = _mm_set_epi32(4, -6, 6, -8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtpd_epi32() {
+ let a = _mm_set_pd(6., -7.5);
+ let src = _mm_set1_epi32(0);
+ let r = _mm_mask_cvtpd_epi32(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtpd_epi32(src, 0b00000011, a);
+ let e = _mm_set_epi32(0, 0, 6, -8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtpd_epi32() {
+ let a = _mm_set_pd(6., -7.5);
+ let r = _mm_maskz_cvtpd_epi32(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtpd_epi32(0b00000011, a);
+ let e = _mm_set_epi32(0, 0, 6, -8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cvtpd_epu32() {
+ let a = _mm512_setr_pd(0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5);
+ let r = _mm512_cvtpd_epu32(a);
+ let e = _mm256_setr_epi32(0, 2, 2, 4, 4, 6, 6, 8);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtpd_epu32() {
+ let a = _mm512_setr_pd(0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5);
+ let src = _mm256_set1_epi32(0);
+ let r = _mm512_mask_cvtpd_epu32(src, 0, a);
+ assert_eq_m256i(r, src);
+ let r = _mm512_mask_cvtpd_epu32(src, 0b11111111, a);
+ let e = _mm256_setr_epi32(0, 2, 2, 4, 4, 6, 6, 8);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_cvtpd_epu32() {
+ let a = _mm512_setr_pd(0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5);
+ let r = _mm512_maskz_cvtpd_epu32(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm512_maskz_cvtpd_epu32(0b11111111, a);
+ let e = _mm256_setr_epi32(0, 2, 2, 4, 4, 6, 6, 8);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cvtpd_epu32() {
+ let a = _mm256_set_pd(4., 5.5, 6., 7.5);
+ let r = _mm256_cvtpd_epu32(a);
+ let e = _mm_set_epi32(4, 6, 6, 8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtpd_epu32() {
+ let a = _mm256_set_pd(4., 5.5, 6., 7.5);
+ let src = _mm_set1_epi32(0);
+ let r = _mm256_mask_cvtpd_epu32(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm256_mask_cvtpd_epu32(src, 0b00001111, a);
+ let e = _mm_set_epi32(4, 6, 6, 8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtpd_epu32() {
+ let a = _mm256_set_pd(4., 5.5, 6., 7.5);
+ let r = _mm256_maskz_cvtpd_epu32(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm256_maskz_cvtpd_epu32(0b00001111, a);
+ let e = _mm_set_epi32(4, 6, 6, 8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cvtpd_epu32() {
+ let a = _mm_set_pd(6., 7.5);
+ let r = _mm_cvtpd_epu32(a);
+ let e = _mm_set_epi32(0, 0, 6, 8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtpd_epu32() {
+ let a = _mm_set_pd(6., 7.5);
+ let src = _mm_set1_epi32(0);
+ let r = _mm_mask_cvtpd_epu32(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtpd_epu32(src, 0b00000011, a);
+ let e = _mm_set_epi32(0, 0, 6, 8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtpd_epu32() {
+ let a = _mm_set_pd(6., 7.5);
+ let r = _mm_maskz_cvtpd_epu32(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtpd_epu32(0b00000011, a);
+ let e = _mm_set_epi32(0, 0, 6, 8);
+ assert_eq_m128i(r, e);
+ }
+
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtpd_pslo() {
let v2 = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
@@ -2953,6 +3706,48 @@ mod tests {
assert_eq_m512i(r, e);
}
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtepi8_epi64() {
+ let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let src = _mm256_set1_epi64x(-1);
+ let r = _mm256_mask_cvtepi8_epi64(src, 0, a);
+ assert_eq_m256i(r, src);
+ let r = _mm256_mask_cvtepi8_epi64(src, 0b00001111, a);
+ let e = _mm256_set_epi64x(12, 13, 14, 15);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtepi8_epi64() {
+ let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm256_maskz_cvtepi8_epi64(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_cvtepi8_epi64(0b00001111, a);
+ let e = _mm256_set_epi64x(12, 13, 14, 15);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtepi8_epi64() {
+ let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let src = _mm_set1_epi64x(-1);
+ let r = _mm_mask_cvtepi8_epi64(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtepi8_epi64(src, 0b00000011, a);
+ let e = _mm_set_epi64x(14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtepi8_epi64() {
+ let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm_maskz_cvtepi8_epi64(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtepi8_epi64(0b00000011, a);
+ let e = _mm_set_epi64x(14, 15);
+ assert_eq_m128i(r, e);
+ }
+
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtepu8_epi64() {
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@@ -2982,26 +3777,68 @@ mod tests {
assert_eq_m512i(r, e);
}
- #[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_cvtepi16_epi64() {
- let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
- let r = _mm512_cvtepi16_epi64(a);
- let e = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15);
- assert_eq_m512i(r, e);
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtepu8_epi64() {
+ let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let src = _mm256_set1_epi64x(-1);
+ let r = _mm256_mask_cvtepu8_epi64(src, 0, a);
+ assert_eq_m256i(r, src);
+ let r = _mm256_mask_cvtepu8_epi64(src, 0b00001111, a);
+ let e = _mm256_set_epi64x(12, 13, 14, 15);
+ assert_eq_m256i(r, e);
}
- #[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_mask_cvtepi16_epi64() {
- let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
- let src = _mm512_set1_epi64(-1);
- let r = _mm512_mask_cvtepi16_epi64(src, 0, a);
- assert_eq_m512i(r, src);
- let r = _mm512_mask_cvtepi16_epi64(src, 0b00001111, a);
- let e = _mm512_set_epi64(-1, -1, -1, -1, 12, 13, 14, 15);
- assert_eq_m512i(r, e);
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtepu8_epi64() {
+ let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm256_maskz_cvtepu8_epi64(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_cvtepu8_epi64(0b00001111, a);
+ let e = _mm256_set_epi64x(12, 13, 14, 15);
+ assert_eq_m256i(r, e);
}
- #[simd_test(enable = "avx512f")]
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtepu8_epi64() {
+ let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let src = _mm_set1_epi64x(-1);
+ let r = _mm_mask_cvtepu8_epi64(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtepu8_epi64(src, 0b00000011, a);
+ let e = _mm_set_epi64x(14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtepu8_epi64() {
+ let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm_maskz_cvtepu8_epi64(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtepu8_epi64(0b00000011, a);
+ let e = _mm_set_epi64x(14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cvtepi16_epi64() {
+ let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm512_cvtepi16_epi64(a);
+ let e = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtepi16_epi64() {
+ let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
+ let src = _mm512_set1_epi64(-1);
+ let r = _mm512_mask_cvtepi16_epi64(src, 0, a);
+ assert_eq_m512i(r, src);
+ let r = _mm512_mask_cvtepi16_epi64(src, 0b00001111, a);
+ let e = _mm512_set_epi64(-1, -1, -1, -1, 12, 13, 14, 15);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtepi16_epi64() {
let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_maskz_cvtepi16_epi64(0, a);
@@ -3082,6 +3919,48 @@ mod tests {
assert_eq_m512i(r, e);
}
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtepu16_epi64() {
+ let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
+ let src = _mm256_set1_epi64x(-1);
+ let r = _mm256_mask_cvtepu16_epi64(src, 0, a);
+ assert_eq_m256i(r, src);
+ let r = _mm256_mask_cvtepu16_epi64(src, 0b00001111, a);
+ let e = _mm256_set_epi64x(12, 13, 14, 15);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtepu16_epi64() {
+ let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm256_maskz_cvtepu16_epi64(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_cvtepu16_epi64(0b00001111, a);
+ let e = _mm256_set_epi64x(12, 13, 14, 15);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtepu16_epi64() {
+ let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
+ let src = _mm_set1_epi64x(-1);
+ let r = _mm_mask_cvtepu16_epi64(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtepu16_epi64(src, 0b00000011, a);
+ let e = _mm_set_epi64x(14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtepu16_epi64() {
+ let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm_maskz_cvtepu16_epi64(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtepu16_epi64(0b00000011, a);
+ let e = _mm_set_epi64x(14, 15);
+ assert_eq_m128i(r, e);
+ }
+
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtepi32_epi64() {
let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
@@ -3182,6 +4061,48 @@ mod tests {
assert_eq_m512i(r, e);
}
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtepu32_epi64() {
+ let a = _mm_set_epi32(12, 13, 14, 15);
+ let src = _mm256_set1_epi64x(-1);
+ let r = _mm256_mask_cvtepu32_epi64(src, 0, a);
+ assert_eq_m256i(r, src);
+ let r = _mm256_mask_cvtepu32_epi64(src, 0b00001111, a);
+ let e = _mm256_set_epi64x(12, 13, 14, 15);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtepu32_epi64() {
+ let a = _mm_set_epi32(12, 13, 14, 15);
+ let r = _mm256_maskz_cvtepu32_epi64(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_cvtepu32_epi64(0b00001111, a);
+ let e = _mm256_set_epi64x(12, 13, 14, 15);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtepu32_epi64() {
+ let a = _mm_set_epi32(12, 13, 14, 15);
+ let src = _mm_set1_epi64x(-1);
+ let r = _mm_mask_cvtepu32_epi64(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtepu32_epi64(src, 0b00000011, a);
+ let e = _mm_set_epi64x(14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtepu32_epi64() {
+ let a = _mm_set_epi32(12, 13, 14, 15);
+ let r = _mm_maskz_cvtepu32_epi64(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtepu32_epi64(0b00000011, a);
+ let e = _mm_set_epi64x(14, 15);
+ assert_eq_m128i(r, e);
+ }
+
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtepi32_pd() {
let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
@@ -3282,6 +4203,64 @@ mod tests {
assert_eq_m512d(r, e);
}
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cvtepu32_pd() {
+ let a = _mm_set_epi32(12, 13, 14, 15);
+ let r = _mm256_cvtepu32_pd(a);
+ let e = _mm256_set_pd(12., 13., 14., 15.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtepu32_pd() {
+ let a = _mm_set_epi32(12, 13, 14, 15);
+ let src = _mm256_set1_pd(-1.);
+ let r = _mm256_mask_cvtepu32_pd(src, 0, a);
+ assert_eq_m256d(r, src);
+ let r = _mm256_mask_cvtepu32_pd(src, 0b00001111, a);
+ let e = _mm256_set_pd(12., 13., 14., 15.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtepu32_pd() {
+ let a = _mm_set_epi32(12, 13, 14, 15);
+ let r = _mm256_maskz_cvtepu32_pd(0, a);
+ assert_eq_m256d(r, _mm256_setzero_pd());
+ let r = _mm256_maskz_cvtepu32_pd(0b00001111, a);
+ let e = _mm256_set_pd(12., 13., 14., 15.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cvtepu32_pd() {
+ let a = _mm_set_epi32(12, 13, 14, 15);
+ let r = _mm_cvtepu32_pd(a);
+ let e = _mm_set_pd(14., 15.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtepu32_pd() {
+ let a = _mm_set_epi32(12, 13, 14, 15);
+ let src = _mm_set1_pd(-1.);
+ let r = _mm_mask_cvtepu32_pd(src, 0, a);
+ assert_eq_m128d(r, src);
+ let r = _mm_mask_cvtepu32_pd(src, 0b00000011, a);
+ let e = _mm_set_pd(14., 15.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtepu32_pd() {
+ let a = _mm_set_epi32(12, 13, 14, 15);
+ let r = _mm_maskz_cvtepu32_pd(0, a);
+ assert_eq_m128d(r, _mm_setzero_pd());
+ let r = _mm_maskz_cvtepu32_pd(0b00000011, a);
+ let e = _mm_set_pd(14., 15.);
+ assert_eq_m128d(r, e);
+ }
+
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtepi32lo_pd() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@@ -3610,6 +4589,64 @@ mod tests {
assert_eq_m256i(r, e);
}
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cvtsepi64_epi32() {
+ let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX);
+ let r = _mm256_cvtsepi64_epi32(a);
+ let e = _mm_set_epi32(4, 5, i32::MIN, i32::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtsepi64_epi32() {
+ let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX);
+ let src = _mm_set1_epi32(-1);
+ let r = _mm256_mask_cvtsepi64_epi32(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm256_mask_cvtsepi64_epi32(src, 0b00001111, a);
+ let e = _mm_set_epi32(4, 5, i32::MIN, i32::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtsepi64_epi32() {
+ let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX);
+ let r = _mm256_maskz_cvtsepi64_epi32(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm256_maskz_cvtsepi64_epi32(0b00001111, a);
+ let e = _mm_set_epi32(4, 5, i32::MIN, i32::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cvtsepi64_epi32() {
+ let a = _mm_set_epi64x(i64::MIN, i64::MAX);
+ let r = _mm_cvtsepi64_epi32(a);
+ let e = _mm_set_epi32(0, 0, i32::MIN, i32::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtsepi64_epi32() {
+ let a = _mm_set_epi64x(i64::MIN, i64::MAX);
+ let src = _mm_set1_epi32(0);
+ let r = _mm_mask_cvtsepi64_epi32(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtsepi64_epi32(src, 0b00000011, a);
+ let e = _mm_set_epi32(0, 0, i32::MIN, i32::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtsepi64_epi32() {
+ let a = _mm_set_epi64x(i64::MIN, i64::MAX);
+ let r = _mm_maskz_cvtsepi64_epi32(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtsepi64_epi32(0b00000011, a);
+ let e = _mm_set_epi32(0, 0, i32::MIN, i32::MAX);
+ assert_eq_m128i(r, e);
+ }
+
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtsepi64_epi16() {
let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX);
@@ -3639,6 +4676,64 @@ mod tests {
assert_eq_m128i(r, e);
}
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cvtsepi64_epi16() {
+ let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX);
+ let r = _mm256_cvtsepi64_epi16(a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, i16::MIN, i16::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtsepi64_epi16() {
+ let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX);
+ let src = _mm_set1_epi16(0);
+ let r = _mm256_mask_cvtsepi64_epi16(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm256_mask_cvtsepi64_epi16(src, 0b00001111, a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, i16::MIN, i16::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtsepi64_epi16() {
+ let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX);
+ let r = _mm256_maskz_cvtsepi64_epi16(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm256_maskz_cvtsepi64_epi16(0b00001111, a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, i16::MIN, i16::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cvtsepi64_epi16() {
+ let a = _mm_set_epi64x(i64::MIN, i64::MAX);
+ let r = _mm_cvtsepi64_epi16(a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, i16::MIN, i16::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtsepi64_epi16() {
+ let a = _mm_set_epi64x(i64::MIN, i64::MAX);
+ let src = _mm_set1_epi16(0);
+ let r = _mm_mask_cvtsepi64_epi16(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtsepi64_epi16(src, 0b00000011, a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, i16::MIN, i16::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtsepi64_epi16() {
+ let a = _mm_set_epi64x(i64::MIN, i64::MAX);
+ let r = _mm_maskz_cvtsepi64_epi16(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtsepi64_epi16(0b00000011, a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, i16::MIN, i16::MAX);
+ assert_eq_m128i(r, e);
+ }
+
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtsepi64_epi8() {
let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX);
@@ -3654,23 +4749,12 @@ mod tests {
let r = _mm512_mask_cvtsepi64_epi8(src, 0, a);
assert_eq_m128i(r, src);
let r = _mm512_mask_cvtsepi64_epi8(src, 0b00001111, a);
+ #[rustfmt::skip]
let e = _mm_set_epi8(
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- -1,
- -1,
- -1,
- -1,
- 4,
- 5,
- i8::MIN,
- i8::MAX,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ -1, -1, -1, -1,
+ 4, 5, i8::MIN, i8::MAX,
);
assert_eq_m128i(r, e);
}
@@ -3685,6 +4769,64 @@ mod tests {
assert_eq_m128i(r, e);
}
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cvtsepi64_epi8() {
+ let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX);
+ let r = _mm256_cvtsepi64_epi8(a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, i8::MIN, i8::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtsepi64_epi8() {
+ let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX);
+ let src = _mm_set1_epi8(0);
+ let r = _mm256_mask_cvtsepi64_epi8(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm256_mask_cvtsepi64_epi8(src, 0b00001111, a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, i8::MIN, i8::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtsepi64_epi8() {
+ let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX);
+ let r = _mm256_maskz_cvtsepi64_epi8(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm256_maskz_cvtsepi64_epi8(0b00001111, a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, i8::MIN, i8::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cvtsepi64_epi8() {
+ let a = _mm_set_epi64x(i64::MIN, i64::MAX);
+ let r = _mm_cvtsepi64_epi8(a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtsepi64_epi8() {
+ let a = _mm_set_epi64x(i64::MIN, i64::MAX);
+ let src = _mm_set1_epi8(0);
+ let r = _mm_mask_cvtsepi64_epi8(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtsepi64_epi8(src, 0b00000011, a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtsepi64_epi8() {
+ let a = _mm_set_epi64x(i64::MIN, i64::MAX);
+ let r = _mm_maskz_cvtsepi64_epi8(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtsepi64_epi8(0b00000011, a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MAX);
+ assert_eq_m128i(r, e);
+ }
+
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtusepi64_epi32() {
let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN);
@@ -3714,6 +4856,64 @@ mod tests {
assert_eq_m256i(r, e);
}
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cvtusepi64_epi32() {
+ let a = _mm256_set_epi64x(4, 5, 6, i64::MAX);
+ let r = _mm256_cvtusepi64_epi32(a);
+ let e = _mm_set_epi32(4, 5, 6, u32::MAX as i32);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtusepi64_epi32() {
+ let a = _mm256_set_epi64x(4, 5, 6, i64::MAX);
+ let src = _mm_set1_epi32(0);
+ let r = _mm256_mask_cvtusepi64_epi32(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm256_mask_cvtusepi64_epi32(src, 0b00001111, a);
+ let e = _mm_set_epi32(4, 5, 6, u32::MAX as i32);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtusepi64_epi32() {
+ let a = _mm256_set_epi64x(4, 5, 6, i64::MAX);
+ let r = _mm256_maskz_cvtusepi64_epi32(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm256_maskz_cvtusepi64_epi32(0b00001111, a);
+ let e = _mm_set_epi32(4, 5, 6, u32::MAX as i32);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cvtusepi64_epi32() {
+ let a = _mm_set_epi64x(6, i64::MAX);
+ let r = _mm_cvtusepi64_epi32(a);
+ let e = _mm_set_epi32(0, 0, 6, u32::MAX as i32);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtusepi64_epi32() {
+ let a = _mm_set_epi64x(6, i64::MAX);
+ let src = _mm_set1_epi32(0);
+ let r = _mm_mask_cvtusepi64_epi32(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtusepi64_epi32(src, 0b00000011, a);
+ let e = _mm_set_epi32(0, 0, 6, u32::MAX as i32);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtusepi64_epi32() {
+ let a = _mm_set_epi64x(6, i64::MAX);
+ let r = _mm_maskz_cvtusepi64_epi32(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtusepi64_epi32(0b00000011, a);
+ let e = _mm_set_epi32(0, 0, 6, u32::MAX as i32);
+ assert_eq_m128i(r, e);
+ }
+
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtusepi64_epi16() {
let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN);
@@ -3743,6 +4943,64 @@ mod tests {
assert_eq_m128i(r, e);
}
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cvtusepi64_epi16() {
+ let a = _mm256_set_epi64x(4, 5, 6, i64::MAX);
+ let r = _mm256_cvtusepi64_epi16(a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, u16::MAX as i16);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtusepi64_epi16() {
+ let a = _mm256_set_epi64x(4, 5, 6, i64::MAX);
+ let src = _mm_set1_epi16(0);
+ let r = _mm256_mask_cvtusepi64_epi16(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm256_mask_cvtusepi64_epi16(src, 0b00001111, a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, u16::MAX as i16);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtusepi64_epi16() {
+ let a = _mm256_set_epi64x(4, 5, 6, i64::MAX);
+ let r = _mm256_maskz_cvtusepi64_epi16(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm256_maskz_cvtusepi64_epi16(0b00001111, a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, u16::MAX as i16);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cvtusepi64_epi16() {
+ let a = _mm_set_epi64x(6, i64::MAX);
+ let r = _mm_cvtusepi64_epi16(a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 6, u16::MAX as i16);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtusepi64_epi16() {
+ let a = _mm_set_epi64x(6, i64::MAX);
+ let src = _mm_set1_epi16(0);
+ let r = _mm_mask_cvtusepi64_epi16(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtusepi64_epi16(src, 0b00000011, a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 6, u16::MAX as i16);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtusepi64_epi16() {
+ let a = _mm_set_epi64x(6, i64::MAX);
+ let r = _mm_maskz_cvtusepi64_epi16(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtusepi64_epi16(0b00000011, a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 6, u16::MAX as i16);
+ assert_eq_m128i(r, e);
+ }
+
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtusepi64_epi8() {
let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN);
@@ -3772,6 +5030,64 @@ mod tests {
assert_eq_m128i(r, e);
}
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cvtusepi64_epi8() {
+ let a = _mm256_set_epi64x(4, 5, 6, i64::MAX);
+ let r = _mm256_cvtusepi64_epi8(a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, u8::MAX as i8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtusepi64_epi8() {
+ let a = _mm256_set_epi64x(4, 5, 6, i64::MAX);
+ let src = _mm_set1_epi8(0);
+ let r = _mm256_mask_cvtusepi64_epi8(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm256_mask_cvtusepi64_epi8(src, 0b00001111, a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, u8::MAX as i8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtusepi64_epi8() {
+ let a = _mm256_set_epi64x(4, 5, 6, i64::MAX);
+ let r = _mm256_maskz_cvtusepi64_epi8(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm256_maskz_cvtusepi64_epi8(0b00001111, a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, u8::MAX as i8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cvtusepi64_epi8() {
+ let a = _mm_set_epi64x(6, i64::MAX);
+ let r = _mm_cvtusepi64_epi8(a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, u8::MAX as i8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtusepi64_epi8() {
+ let a = _mm_set_epi64x(6, i64::MAX);
+ let src = _mm_set1_epi8(0);
+ let r = _mm_mask_cvtusepi64_epi8(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtusepi64_epi8(src, 0b00000011, a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, u8::MAX as i8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtusepi64_epi8() {
+ let a = _mm_set_epi64x(6, i64::MAX);
+ let r = _mm_maskz_cvtusepi64_epi8(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtusepi64_epi8(0b00000011, a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, u8::MAX as i8);
+ assert_eq_m128i(r, e);
+ }
+
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtt_roundpd_epi32() {
let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
@@ -3859,6 +5175,48 @@ mod tests {
assert_eq_m256i(r, e);
}
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvttpd_epi32() {
+ let a = _mm256_setr_pd(4., -5.5, 6., -7.5);
+ let src = _mm_set1_epi32(0);
+ let r = _mm256_mask_cvttpd_epi32(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm256_mask_cvttpd_epi32(src, 0b00001111, a);
+ let e = _mm_setr_epi32(4, -5, 6, -7);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvttpd_epi32() {
+ let a = _mm256_setr_pd(4., -5.5, 6., -7.5);
+ let r = _mm256_maskz_cvttpd_epi32(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm256_maskz_cvttpd_epi32(0b00001111, a);
+ let e = _mm_setr_epi32(4, -5, 6, -7);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvttpd_epi32() {
+ let a = _mm_set_pd(6., -7.5);
+ let src = _mm_set1_epi32(0);
+ let r = _mm_mask_cvttpd_epi32(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvttpd_epi32(src, 0b00000011, a);
+ let e = _mm_set_epi32(0, 0, 6, -7);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvttpd_epi32() {
+ let a = _mm_set_pd(6., -7.5);
+ let r = _mm_maskz_cvttpd_epi32(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvttpd_epi32(0b00000011, a);
+ let e = _mm_set_epi32(0, 0, 6, -7);
+ assert_eq_m128i(r, e);
+ }
+
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvttpd_epu32() {
let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
@@ -3878,14 +5236,72 @@ mod tests {
assert_eq_m256i(r, e);
}
- #[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_maskz_cvttpd_epu32() {
- let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
- let r = _mm512_maskz_cvttpd_epu32(0, a);
- assert_eq_m256i(r, _mm256_setzero_si256());
- let r = _mm512_maskz_cvttpd_epu32(0b00001111, a);
- let e = _mm256_setr_epi32(0, -1, 2, -1, 0, 0, 0, 0);
- assert_eq_m256i(r, e);
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_cvttpd_epu32() {
+ let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let r = _mm512_maskz_cvttpd_epu32(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm512_maskz_cvttpd_epu32(0b00001111, a);
+ let e = _mm256_setr_epi32(0, -1, 2, -1, 0, 0, 0, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cvttpd_epu32() {
+ let a = _mm256_set_pd(4., 5.5, 6., 7.5);
+ let r = _mm256_cvttpd_epu32(a);
+ let e = _mm_set_epi32(4, 5, 6, 7);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvttpd_epu32() {
+ let a = _mm256_set_pd(4., 5.5, 6., 7.5);
+ let src = _mm_set1_epi32(0);
+ let r = _mm256_mask_cvttpd_epu32(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm256_mask_cvttpd_epu32(src, 0b00001111, a);
+ let e = _mm_set_epi32(4, 5, 6, 7);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvttpd_epu32() {
+ let a = _mm256_set_pd(4., 5.5, 6., 7.5);
+ let r = _mm256_maskz_cvttpd_epu32(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm256_maskz_cvttpd_epu32(0b00001111, a);
+ let e = _mm_set_epi32(4, 5, 6, 7);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cvttpd_epu32() {
+ let a = _mm_set_pd(6., 7.5);
+ let r = _mm_cvttpd_epu32(a);
+ let e = _mm_set_epi32(0, 0, 6, 7);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvttpd_epu32() {
+ let a = _mm_set_pd(6., 7.5);
+ let src = _mm_set1_epi32(0);
+ let r = _mm_mask_cvttpd_epu32(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvttpd_epu32(src, 0b00000011, a);
+ let e = _mm_set_epi32(0, 0, 6, 7);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvttpd_epu32() {
+ let a = _mm_set_pd(6., 7.5);
+ let r = _mm_maskz_cvttpd_epu32(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvttpd_epu32(0b00000011, a);
+ let e = _mm_set_epi32(0, 0, 6, 7);
+ assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
@@ -10327,7 +11743,7 @@ mod tests {
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cvtepi64_storeu_epi16() {
- let a = _mm256_set1_epi32(9);
+ let a = _mm256_set1_epi64x(9);
let mut r = _mm_set1_epi16(0);
_mm256_mask_cvtepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
let e = _mm_set_epi16(0, 0, 0, 0, 9, 9, 9, 9);
@@ -10336,13 +11752,76 @@ mod tests {
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cvtepi64_storeu_epi16() {
- let a = _mm_set1_epi32(9);
+ let a = _mm_set1_epi64x(9);
let mut r = _mm_set1_epi16(0);
_mm_mask_cvtepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 9, 9);
assert_eq_m128i(r, e);
}
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtsepi64_storeu_epi16() {
+ let a = _mm512_set1_epi64(i64::MAX);
+ let mut r = _mm_undefined_si128();
+ _mm512_mask_cvtsepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ let e = _mm_set1_epi16(i16::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtsepi64_storeu_epi16() {
+ let a = _mm256_set1_epi64x(i64::MAX);
+ let mut r = _mm_set1_epi16(0);
+ _mm256_mask_cvtsepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtsepi64_storeu_epi16() {
+ let a = _mm_set1_epi64x(i64::MAX);
+ let mut r = _mm_set1_epi16(0);
+ _mm_mask_cvtsepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtusepi64_storeu_epi16() {
+ let a = _mm512_set1_epi64(i64::MAX);
+ let mut r = _mm_undefined_si128();
+ _mm512_mask_cvtusepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ let e = _mm_set1_epi16(u16::MAX as i16);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtusepi64_storeu_epi16() {
+ let a = _mm256_set1_epi64x(i64::MAX);
+ let mut r = _mm_set1_epi16(0);
+ _mm256_mask_cvtusepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ let e = _mm_set_epi16(
+ 0,
+ 0,
+ 0,
+ 0,
+ u16::MAX as i16,
+ u16::MAX as i16,
+ u16::MAX as i16,
+ u16::MAX as i16,
+ );
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtusepi64_storeu_epi16() {
+ let a = _mm_set1_epi64x(i64::MAX);
+ let mut r = _mm_set1_epi16(0);
+ _mm_mask_cvtusepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16);
+ assert_eq_m128i(r, e);
+ }
+
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtepi64_storeu_epi8() {
let a = _mm512_set1_epi64(9);
@@ -10354,7 +11833,7 @@ mod tests {
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cvtepi64_storeu_epi8() {
- let a = _mm256_set1_epi32(9);
+ let a = _mm256_set1_epi64x(9);
let mut r = _mm_set1_epi8(0);
_mm256_mask_cvtepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9);
@@ -10363,13 +11842,97 @@ mod tests {
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cvtepi64_storeu_epi8() {
- let a = _mm_set1_epi32(9);
+ let a = _mm_set1_epi64x(9);
let mut r = _mm_set1_epi8(0);
_mm_mask_cvtepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9);
assert_eq_m128i(r, e);
}
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtsepi64_storeu_epi8() {
+ let a = _mm512_set1_epi64(i64::MAX);
+ let mut r = _mm_set1_epi8(0);
+ _mm512_mask_cvtsepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ #[rustfmt::skip]
+ let e = _mm_set_epi8(
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ i8::MAX, i8::MAX, i8::MAX, i8::MAX,
+ i8::MAX, i8::MAX, i8::MAX, i8::MAX,
+ );
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtsepi64_storeu_epi8() {
+ let a = _mm256_set1_epi64x(i64::MAX);
+ let mut r = _mm_set1_epi8(0);
+ _mm256_mask_cvtsepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ #[rustfmt::skip]
+ let e = _mm_set_epi8(
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ i8::MAX, i8::MAX, i8::MAX, i8::MAX,
+ );
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtsepi64_storeu_epi8() {
+ let a = _mm_set1_epi64x(i64::MAX);
+ let mut r = _mm_set1_epi8(0);
+ _mm_mask_cvtsepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtusepi64_storeu_epi8() {
+ let a = _mm512_set1_epi64(i64::MAX);
+ let mut r = _mm_set1_epi8(0);
+ _mm512_mask_cvtusepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ #[rustfmt::skip]
+ let e = _mm_set_epi8(
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
+ u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
+ );
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtusepi64_storeu_epi8() {
+ let a = _mm256_set1_epi64x(i64::MAX);
+ let mut r = _mm_set1_epi8(0);
+ _mm256_mask_cvtusepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ #[rustfmt::skip]
+ let e = _mm_set_epi8(
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
+ );
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtusepi64_storeu_epi8() {
+ let a = _mm_set1_epi64x(i64::MAX);
+ let mut r = _mm_set1_epi8(0);
+ _mm_mask_cvtusepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ #[rustfmt::skip]
+ let e = _mm_set_epi8(
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, u8::MAX as i8, u8::MAX as i8,
+ );
+ assert_eq_m128i(r, e);
+ }
+
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtepi64_storeu_epi32() {
let a = _mm512_set1_epi64(9);
@@ -10381,7 +11944,7 @@ mod tests {
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cvtepi64_storeu_epi32() {
- let a = _mm256_set1_epi32(9);
+ let a = _mm256_set1_epi64x(9);
let mut r = _mm_set1_epi32(0);
_mm256_mask_cvtepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b11111111, a);
let e = _mm_set_epi32(9, 9, 9, 9);
@@ -10390,13 +11953,67 @@ mod tests {
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cvtepi64_storeu_epi32() {
- let a = _mm_set1_epi32(9);
+ let a = _mm_set1_epi64x(9);
let mut r = _mm_set1_epi16(0);
_mm_mask_cvtepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b11111111, a);
let e = _mm_set_epi32(0, 0, 9, 9);
assert_eq_m128i(r, e);
}
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtsepi64_storeu_epi32() {
+ let a = _mm512_set1_epi64(i64::MAX);
+ let mut r = _mm256_undefined_si256();
+ _mm512_mask_cvtsepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ let e = _mm256_set1_epi32(i32::MAX);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtsepi64_storeu_epi32() {
+ let a = _mm256_set1_epi64x(i64::MAX);
+ let mut r = _mm_set1_epi32(0);
+ _mm256_mask_cvtsepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b00001111, a);
+ let e = _mm_set1_epi32(i32::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtsepi64_storeu_epi32() {
+ let a = _mm_set1_epi64x(i64::MAX);
+ let mut r = _mm_set1_epi16(0);
+ _mm_mask_cvtsepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b00000011, a);
+ let e = _mm_set_epi32(0, 0, i32::MAX, i32::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtusepi64_storeu_epi32() {
+ let a = _mm512_set1_epi64(i64::MAX);
+ let mut r = _mm256_undefined_si256();
+ _mm512_mask_cvtusepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ let e = _mm256_set1_epi32(u32::MAX as i32);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtusepi64_storeu_epi32() {
+ let a = _mm256_set1_epi64x(i64::MAX);
+ let mut r = _mm_set1_epi32(0);
+ _mm256_mask_cvtusepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b00001111, a);
+ let e = _mm_set1_epi32(u32::MAX as i32);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtusepi64_storeu_epi32() {
+ let a = _mm_set1_epi64x(i64::MAX);
+ let mut r = _mm_set1_epi16(0);
+ _mm_mask_cvtusepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b00000011, a);
+ let e = _mm_set_epi32(0, 0, u32::MAX as i32, u32::MAX as i32);
+ assert_eq_m128i(r, e);
+ }
+
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_storeu_epi64() {
let a = _mm512_set1_epi64(9);
@@ -10721,4 +12338,254 @@ mod tests {
let e = _mm_set1_epi64x(11);
assert_eq_m128i(r, e);
}
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvtsd_i64() {
+ let a = _mm_set_pd(1., -1.5);
+ let r = _mm_cvtsd_i64(a);
+ let e: i64 = -2;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvtss_i64() {
+ let a = _mm_set_ps(0., -0.5, 1., -1.5);
+ let r = _mm_cvtss_i64(a);
+ let e: i64 = -2;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvt_roundi64_ss() {
+ let a = _mm_set_ps(0., -0.5, 1., -1.5);
+ let b: i64 = 9;
+ let r = _mm_cvt_roundi64_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+ let e = _mm_set_ps(0., -0.5, 1., 9.);
+ assert_eq_m128(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvt_roundsi64_ss() {
+ let a = _mm_set_ps(0., -0.5, 1., -1.5);
+ let b: i64 = 9;
+ let r = _mm_cvt_roundsi64_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+ let e = _mm_set_ps(0., -0.5, 1., 9.);
+ assert_eq_m128(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvti64_ss() {
+ let a = _mm_set_ps(0., -0.5, 1., -1.5);
+ let b: i64 = 9;
+ let r = _mm_cvti64_ss(a, b);
+ let e = _mm_set_ps(0., -0.5, 1., 9.);
+ assert_eq_m128(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvti64_sd() {
+ let a = _mm_set_pd(1., -1.5);
+ let b: i64 = 9;
+ let r = _mm_cvti64_sd(a, b);
+ let e = _mm_set_pd(1., 9.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvt_roundsd_si64() {
+ let a = _mm_set_pd(1., -1.5);
+ let r = _mm_cvt_roundsd_si64(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+ let e: i64 = -1;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvt_roundsd_i64() {
+ let a = _mm_set_pd(1., -1.5);
+ let r = _mm_cvt_roundsd_i64(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+ let e: i64 = -1;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvt_roundsd_u64() {
+ let a = _mm_set_pd(1., f64::MAX);
+ let r = _mm_cvt_roundsd_u64(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+ let e: u64 = u64::MAX;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvtsd_u64() {
+ let a = _mm_set_pd(1., -1.5);
+ let r = _mm_cvtsd_u64(a);
+ let e: u64 = u64::MAX;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvt_roundss_i64() {
+ let a = _mm_set_ps(0., -0.5, 1., -1.5);
+ let r = _mm_cvt_roundss_i64(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+ let e: i64 = -1;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvt_roundss_si64() {
+ let a = _mm_set_ps(0., -0.5, 1., -1.5);
+ let r = _mm_cvt_roundss_si64(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+ let e: i64 = -1;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvt_roundss_u64() {
+ let a = _mm_set_ps(0., -0.5, 1., -1.5);
+ let r = _mm_cvt_roundss_u64(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+ let e: u64 = u64::MAX;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvtss_u64() {
+ let a = _mm_set_ps(0., -0.5, 1., -1.5);
+ let r = _mm_cvtss_u64(a);
+ let e: u64 = u64::MAX;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvttsd_i64() {
+ let a = _mm_set_pd(1., -1.5);
+ let r = _mm_cvttsd_i64(a);
+ let e: i64 = -2;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvtt_roundsd_i64() {
+ let a = _mm_set_pd(1., -1.5);
+ let r = _mm_cvtt_roundsd_i64(a, _MM_FROUND_CUR_DIRECTION);
+ let e: i64 = -2;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvtt_roundsd_si64() {
+ let a = _mm_set_pd(1., -1.5);
+ let r = _mm_cvtt_roundsd_si64(a, _MM_FROUND_CUR_DIRECTION);
+ let e: i64 = -2;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvtt_roundsd_u64() {
+ let a = _mm_set_pd(1., -1.5);
+ let r = _mm_cvtt_roundsd_u64(a, _MM_FROUND_CUR_DIRECTION);
+ let e: u64 = u64::MAX;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvttsd_u64() {
+ let a = _mm_set_pd(1., -1.5);
+ let r = _mm_cvttsd_u64(a);
+ let e: u64 = u64::MAX;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvttss_i64() {
+ let a = _mm_set_ps(0., -0.5, 1., -1.5);
+ let r = _mm_cvttss_i64(a);
+ let e: i64 = -2;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvtt_roundss_i64() {
+ let a = _mm_set_ps(0., -0.5, 1., -1.5);
+ let r = _mm_cvtt_roundss_i64(a, _MM_FROUND_CUR_DIRECTION);
+ let e: i64 = -2;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvtt_roundss_si64() {
+ let a = _mm_set_ps(0., -0.5, 1., -1.5);
+ let r = _mm_cvtt_roundss_si64(a, _MM_FROUND_CUR_DIRECTION);
+ let e: i64 = -2;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvtt_roundss_u64() {
+ let a = _mm_set_ps(0., -0.5, 1., -1.5);
+ let r = _mm_cvtt_roundss_u64(a, _MM_FROUND_CUR_DIRECTION);
+ let e: u64 = u64::MAX;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvttss_u64() {
+ let a = _mm_set_ps(0., -0.5, 1., -1.5);
+ let r = _mm_cvttss_u64(a);
+ let e: u64 = u64::MAX;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvtu64_ss() {
+ let a = _mm_set_ps(0., -0.5, 1., -1.5);
+ let b: u64 = 9;
+ let r = _mm_cvtu64_ss(a, b);
+ let e = _mm_set_ps(0., -0.5, 1., 9.);
+ assert_eq_m128(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvtu64_sd() {
+ let a = _mm_set_pd(1., -1.5);
+ let b: u64 = 9;
+ let r = _mm_cvtu64_sd(a, b);
+ let e = _mm_set_pd(1., 9.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvt_roundu64_ss() {
+ let a = _mm_set_ps(0., -0.5, 1., -1.5);
+ let b: u64 = 9;
+ let r = _mm_cvt_roundu64_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+ let e = _mm_set_ps(0., -0.5, 1., 9.);
+ assert_eq_m128(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvt_roundu64_sd() {
+ let a = _mm_set_pd(1., -1.5);
+ let b: u64 = 9;
+ let r = _mm_cvt_roundu64_sd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+ let e = _mm_set_pd(1., 9.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvt_roundi64_sd() {
+ let a = _mm_set_pd(1., -1.5);
+ let b: i64 = 9;
+ let r = _mm_cvt_roundi64_sd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+ let e = _mm_set_pd(1., 9.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvt_roundsi64_sd() {
+ let a = _mm_set_pd(1., -1.5);
+ let b: i64 = 9;
+ let r = _mm_cvt_roundsi64_sd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+ let e = _mm_set_pd(1., 9.);
+ assert_eq_m128d(r, e);
+ }
}
diff --git a/crates/core_arch/src/x86_64/macros.rs b/crates/core_arch/src/x86_64/macros.rs
new file mode 100644
index 0000000000..e3682d40fe
--- /dev/null
+++ b/crates/core_arch/src/x86_64/macros.rs
@@ -0,0 +1,32 @@
+//! Utility macros.
+
+// For round instructions, the only valid values for rounding are 4, 8, 9, 10 and 11.
+// This macro enforces that.
+#[allow(unused)]
+macro_rules! constify_imm4_round {
+ ($imm8:expr, $expand:ident) => {
+ #[allow(overflowing_literals)]
+ match ($imm8) & 0b1111 {
+ 4 => $expand!(4),
+ 8 => $expand!(8),
+ 9 => $expand!(9),
+ 10 => $expand!(10),
+ 11 => $expand!(11),
+ _ => panic!("Invalid round value"),
+ }
+ };
+}
+
+// For sae instructions, the only valid values for sae are 4 and 8.
+// This macro enforces that.
+#[allow(unused)]
+macro_rules! constify_imm4_sae {
+ ($imm8:expr, $expand:ident) => {
+ #[allow(overflowing_literals)]
+ match ($imm8) & 0b1111 {
+ 4 => $expand!(4),
+ 8 => $expand!(8),
+ _ => panic!("Invalid sae value"),
+ }
+ };
+}
diff --git a/crates/core_arch/src/x86_64/mod.rs b/crates/core_arch/src/x86_64/mod.rs
index c9f3bd637c..461874ece0 100644
--- a/crates/core_arch/src/x86_64/mod.rs
+++ b/crates/core_arch/src/x86_64/mod.rs
@@ -1,5 +1,8 @@
//! `x86_64` intrinsics
+#[macro_use]
+mod macros;
+
mod fxsr;
pub use self::fxsr::*;