diff --git a/crates/core_arch/avx512bw.md b/crates/core_arch/avx512bw.md index 7484e8792b..367cb5de2a 100644 --- a/crates/core_arch/avx512bw.md +++ b/crates/core_arch/avx512bw.md @@ -1,34 +1,34 @@ ["AVX512BW"]

* [x] [`_mm512_loadu_epi16`] - * [_] [`_mm512_mask_loadu_epi16`] - * [_] [`_mm512_maskz_loadu_epi16`] + * [_] [`_mm512_mask_loadu_epi16`] //need i1 + * [_] [`_mm512_maskz_loadu_epi16`] //need i1 * [x] [`_mm_loadu_epi16`] - * [_] [`_mm_mask_loadu_epi16`] - * [_] [`_mm_maskz_loadu_epi16`] + * [_] [`_mm_mask_loadu_epi16`] //need i1 + * [_] [`_mm_maskz_loadu_epi16`] //need i1 * [x] [`_mm256_loadu_epi16`] - * [_] [`_mm256_mask_loadu_epi16`] - * [_] [`_mm256_maskz_loadu_epi16`] + * [_] [`_mm256_mask_loadu_epi16`] //need i1 + * [_] [`_mm256_maskz_loadu_epi16`] //need i1 * [x] [`_mm512_loadu_epi8`] - * [_] [`_mm512_mask_loadu_epi8`] - * [_] [`_mm512_maskz_loadu_epi8`] + * [_] [`_mm512_mask_loadu_epi8`] //need i1 + * [_] [`_mm512_maskz_loadu_epi8`] //need i1 * [x] [`_mm_loadu_epi8`] - * [_] [`_mm_mask_loadu_epi8`] - * [_] [`_mm_maskz_loadu_epi8`] + * [_] [`_mm_mask_loadu_epi8`] //need i1 + * [_] [`_mm_maskz_loadu_epi8`] //need i1 * [x] [`_mm256_loadu_epi8`] - * [_] [`_mm256_mask_loadu_epi8`] - * [_] [`_mm256_maskz_loadu_epi8`] + * [_] [`_mm256_mask_loadu_epi8`] //need i1 + * [_] [`_mm256_maskz_loadu_epi8`] //need i1 * [_] [`_mm512_mask_storeu_epi16`] * [x] [`_mm512_storeu_epi16`] - * [_] [`_mm_mask_storeu_epi16`] + * [_] [`_mm_mask_storeu_epi16`] //need i1 * [x] [`_mm_storeu_epi16`] - * [_] [`_mm256_mask_storeu_epi16`] + * [_] [`_mm256_mask_storeu_epi16`] //need i1 * [x] [`_mm256_storeu_epi16`] - * [_] [`_mm512_mask_storeu_epi8`] + * [_] [`_mm512_mask_storeu_epi8`] //need i1 * [x] [`_mm512_storeu_epi8`] - * [_] [`_mm_mask_storeu_epi8`] + * [_] [`_mm_mask_storeu_epi8`] //need i1 * [x] [`_mm_storeu_epi8`] - * [_] [`_mm256_mask_storeu_epi8`] + * [_] [`_mm256_mask_storeu_epi8`] //need i1 * [x] [`_mm256_storeu_epi8`] * [x] [`_mm512_abs_epi16`] * [x] [`_mm512_mask_abs_epi16`] diff --git a/crates/core_arch/avx512f.md b/crates/core_arch/avx512f.md index e61f25507c..1ad80147cf 100644 --- a/crates/core_arch/avx512f.md +++ b/crates/core_arch/avx512f.md @@ -1,5 +1,5 @@

["AVX512F"]

- + * [x] [`_mm512_abs_epi32`] * [x] [`_mm512_mask_abs_epi32`] * [x] [`_mm512_maskz_abs_epi32`] @@ -2025,165 +2025,314 @@ * [x] [`_mm_maskz_cvtepi8_epi32`] * [x] [`_mm256_mask_cvtepi8_epi32`] * [x] [`_mm256_maskz_cvtepi8_epi32`] - - * [x] [`_mm512_mask_cvtsepi64_epi32`] - * [x] [`_mm512_mask_cvtsepi64_epi8`] - * [ ] [`_mm512_mask_cvtsepi64_storeu_epi16`] - * [ ] [`_mm512_mask_cvtsepi64_storeu_epi32`] - * [ ] [`_mm512_mask_cvtsepi64_storeu_epi8`] - * [x] [`_mm512_cvt_roundepi32_ps`] - * [x] [`_mm512_cvt_roundepu32_ps`] - * [x] [`_mm512_cvt_roundpd_epi32`] - * [x] [`_mm512_cvt_roundpd_epu32`] - * [x] [`_mm512_cvt_roundpd_ps`] - * [x] [`_mm512_cvt_roundph_ps`] - * [x] [`_mm512_cvt_roundps_epi32`] - * [x] [`_mm512_cvt_roundps_epu32`] - * [x] [`_mm512_cvt_roundps_pd`] - - * [x] [`_mm512_mask_cvtsepi64_epi16`] * [x] [`_mm512_cvtepi8_epi64`] + * [x] [`_mm512_mask_cvtepi8_epi64`] + * [x] [`_mm512_maskz_cvtepi8_epi64`] + * [x] [`_mm_mask_cvtepi8_epi64`] + * [x] [`_mm_maskz_cvtepi8_epi64`] + * [x] [`_mm256_mask_cvtepi8_epi64`] + * [x] [`_mm256_maskz_cvtepi8_epi64`] * [x] [`_mm512_cvtepu16_epi32`] + * [x] [`_mm512_mask_cvtepu16_epi32`] + * [x] [`_mm512_maskz_cvtepu16_epi32`] + * [x] [`_mm_mask_cvtepu16_epi32`] + * [x] [`_mm_maskz_cvtepu16_epi32`] + * [x] [`_mm256_mask_cvtepu16_epi32`] + * [x] [`_mm256_maskz_cvtepu16_epi32`] * [x] [`_mm512_cvtepu16_epi64`] + * [x] [`_mm512_mask_cvtepu16_epi64`] + * [x] [`_mm512_maskz_cvtepu16_epi64`] + * [x] [`_mm_mask_cvtepu16_epi64`] + * [x] [`_mm_maskz_cvtepu16_epi64`] + * [x] [`_mm256_mask_cvtepu16_epi64`] + * [x] [`_mm256_maskz_cvtepu16_epi64`] * [x] [`_mm512_cvtepu32_epi64`] - * [x] [`_mm512_cvtepu32_pd`] + * [x] [`_mm512_mask_cvtepu32_epi64`] + * [x] [`_mm512_maskz_cvtepu32_epi64`] + * [x] [`_mm_mask_cvtepu32_epi64`] + * [x] [`_mm_maskz_cvtepu32_epi64`] + * [x] [`_mm256_mask_cvtepu32_epi64`] + * [x] [`_mm256_maskz_cvtepu32_epi64`] * [x] [`_mm512_cvtepu32_ps`] + * [x] [`_mm512_mask_cvtepu32_ps`] + * [x] [`_mm512_maskz_cvtepu32_ps`] + * [x] [`_mm512_cvtepu32_pd`] + * [x] [`_mm512_mask_cvtepu32_pd`] + * [x] [`_mm512_maskz_cvtepu32_pd`] + * [x] [`_mm_cvtepu32_pd`] + * [x] [`_mm_mask_cvtepu32_pd`] + * [x] [`_mm_maskz_cvtepu32_pd`] + * [x] [`_mm256_cvtepu32_pd`] + * [x] [`_mm256_mask_cvtepu32_pd`] + * [x] [`_mm256_maskz_cvtepu32_pd`] * [x] [`_mm512_cvtepu32lo_pd`] + * [x] [`_mm512_mask_cvtepu32lo_pd`] * [x] [`_mm512_cvtepu8_epi32`] + * [x] [`_mm512_mask_cvtepu8_epi32`] + * [x] [`_mm512_maskz_cvtepu8_epi32`] + * [x] [`_mm_mask_cvtepu8_epi32`] + * [x] [`_mm_maskz_cvtepu8_epi32`] + * [x] [`_mm256_mask_cvtepu8_epi32`] + * [x] [`_mm256_maskz_cvtepu8_epi32`] * [x] [`_mm512_cvtepu8_epi64`] + * [x] [`_mm512_mask_cvtepu8_epi64`] + * [x] [`_mm512_maskz_cvtepu8_epi64`] + * [x] [`_mm_mask_cvtepu8_epi64`] + * [x] [`_mm_maskz_cvtepu8_epi64`] + * [x] [`_mm256_mask_cvtepu8_epi64`] + * [x] [`_mm256_maskz_cvtepu8_epi64`] * [x] [`_mm512_cvtpd_epi32`] + * [x] [`_mm512_mask_cvtpd_epi32`] + * [x] [`_mm512_maskz_cvtpd_epi32`] + * [x] [`_mm_mask_cvtpd_epi32`] + * [x] [`_mm_maskz_cvtpd_epi32`] + * [x] [`_mm256_mask_cvtpd_epi32`] + * [x] [`_mm256_maskz_cvtpd_epi32`] * [x] [`_mm512_cvtpd_epu32`] + * [x] [`_mm512_mask_cvtpd_epu32`] + * [x] [`_mm512_maskz_cvtpd_epu32`] + * [x] [`_mm_cvtpd_epu32`] + * [x] [`_mm_mask_cvtpd_epu32`] + * [x] [`_mm_maskz_cvtpd_epu32`] + * [x] [`_mm256_cvtpd_epu32`] + * [x] [`_mm256_mask_cvtpd_epu32`] + * [x] [`_mm256_maskz_cvtpd_epu32`] * [x] [`_mm512_cvtpd_ps`] + * [x] [`_mm512_mask_cvtpd_ps`] + * [x] [`_mm512_maskz_cvtpd_ps`] + * [x] [`_mm_mask_cvtpd_ps`] + * [x] [`_mm_maskz_cvtpd_ps`] + * [x] [`_mm256_mask_cvtpd_ps`] + * [x] [`_mm256_maskz_cvtpd_ps`] * [x] [`_mm512_cvtpd_pslo`] + * [x] [`_mm512_mask_cvtpd_pslo`] * [x] [`_mm512_cvtph_ps`] + * [x] [`_mm512_mask_cvtph_ps`] + * [x] [`_mm512_maskz_cvtph_ps`] + * [x] [`_mm_mask_cvtph_ps`] + * [x] [`_mm_maskz_cvtph_ps`] + * [x] [`_mm256_mask_cvtph_ps`] + * [x] [`_mm256_maskz_cvtph_ps`] * [x] [`_mm512_cvtps_epi32`] + * [x] [`_mm512_mask_cvtps_epi32`] + * [x] [`_mm512_maskz_cvtps_epi32`] + * [x] [`_mm_mask_cvtps_epi32`] + * [x] [`_mm_maskz_cvtps_epi32`] + * [x] [`_mm256_mask_cvtps_epi32`] + * [x] [`_mm256_maskz_cvtps_epi32`] * [x] [`_mm512_cvtps_epu32`] + * [x] [`_mm512_mask_cvtps_epu32`] + * [x] [`_mm512_maskz_cvtps_epu32`] + * [x] [`_mm_cvtps_epu32`] + * [x] [`_mm_mask_cvtps_epu32`] + * [x] [`_mm_maskz_cvtps_epu32`] + * [x] [`_mm256_cvtps_epu32`] + * [x] [`_mm256_mask_cvtps_epu32`] + * [x] [`_mm256_maskz_cvtps_epu32`] * [x] [`_mm512_cvtps_pd`] + * [x] [`_mm512_mask_cvtps_pd`] + * [x] [`_mm512_maskz_cvtps_pd`] * [x] [`_mm512_cvtps_ph`] + * [x] [`_mm512_mask_cvtps_ph`] + * [x] [`_mm512_maskz_cvtps_ph`] + * [x] [`_mm_mask_cvtps_ph`] + * [x] [`_mm_maskz_cvtps_ph`] + * [x] [`_mm256_mask_cvtps_ph`] + * [x] [`_mm256_maskz_cvtps_ph`] * [x] [`_mm512_cvtpslo_pd`] + * [x] [`_mm512_mask_cvtpslo_pd`] * [x] [`_mm512_cvtsepi32_epi16`] + * [x] [`_mm512_mask_cvtsepi32_epi16`] + * [x] [`_mm512_maskz_cvtsepi32_epi16`] + * [x] [`_mm_cvtsepi32_epi16`] + * [x] [`_mm_mask_cvtsepi32_epi16`] + * [x] [`_mm_maskz_cvtsepi32_epi16`] + * [x] [`_mm256_cvtsepi32_epi16`] + * [x] [`_mm256_mask_cvtsepi32_epi16`] + * [x] [`_mm256_maskz_cvtsepi32_epi16`] * [x] [`_mm512_cvtsepi32_epi8`] + * [x] [`_mm512_mask_cvtsepi32_epi8`] + * [x] [`_mm512_maskz_cvtsepi32_epi8`] + * [x] [`_mm_cvtsepi32_epi8`] + * [x] [`_mm_mask_cvtsepi32_epi8`] + * [x] [`_mm_maskz_cvtsepi32_epi8`] + * [x] [`_mm256_cvtsepi32_epi8`] + * [x] [`_mm256_mask_cvtsepi32_epi8`] + * [x] [`_mm256_maskz_cvtsepi32_epi8`] + * [x] [`_mm512_mask_cvtsepi32_storeu_epi16`] + * [x] [`_mm_mask_cvtsepi32_storeu_epi16`] + * [x] [`_mm256_mask_cvtsepi32_storeu_epi16`] + * [x] [`_mm512_mask_cvtsepi32_storeu_epi8`] + * [x] [`_mm_mask_cvtsepi32_storeu_epi8`] + * [x] [`_mm256_mask_cvtsepi32_storeu_epi8`] * [x] [`_mm512_cvtsepi64_epi16`] + * [x] [`_mm512_mask_cvtsepi64_epi16`] + * [x] [`_mm512_maskz_cvtsepi64_epi16`] + * [x] [`_mm_cvtsepi64_epi16`] + * [x] [`_mm_mask_cvtsepi64_epi16`] + * [x] [`_mm_maskz_cvtsepi64_epi16`] + * [x] [`_mm256_cvtsepi64_epi16`] + * [x] [`_mm256_mask_cvtsepi64_epi16`] + * [x] [`_mm256_maskz_cvtsepi64_epi16`] * [x] [`_mm512_cvtsepi64_epi32`] + * [x] [`_mm512_mask_cvtsepi64_epi32`] + * [x] [`_mm512_maskz_cvtsepi64_epi32`] + * [x] [`_mm_cvtsepi64_epi32`] + * [x] [`_mm_mask_cvtsepi64_epi32`] + * [x] [`_mm_maskz_cvtsepi64_epi32`] + * [x] [`_mm256_cvtsepi64_epi32`] + * [x] [`_mm256_mask_cvtsepi64_epi32`] + * [x] [`_mm256_maskz_cvtsepi64_epi32`] * [x] [`_mm512_cvtsepi64_epi8`] - * [x] [`_mm512_cvtt_roundpd_epi32`] - * [x] [`_mm512_cvtt_roundpd_epu32`] - * [x] [`_mm512_cvtt_roundps_epi32`] - * [x] [`_mm512_cvtt_roundps_epu32`] - * [x] [`_mm512_cvttpd_epi32`] - * [x] [`_mm512_cvttpd_epu32`] - * [x] [`_mm512_cvttps_epi32`] - * [x] [`_mm512_cvttps_epu32`] + * [x] [`_mm512_mask_cvtsepi64_epi8`] + * [x] [`_mm512_maskz_cvtsepi64_epi8`] + * [x] [`_mm_cvtsepi64_epi8`] + * [x] [`_mm_mask_cvtsepi64_epi8`] + * [x] [`_mm_maskz_cvtsepi64_epi8`] + * [x] [`_mm256_cvtsepi64_epi8`] + * [x] [`_mm256_mask_cvtsepi64_epi8`] + * [x] [`_mm256_maskz_cvtsepi64_epi8`] + * [x] [`_mm512_mask_cvtsepi64_storeu_epi16`] + * [x] [`_mm_mask_cvtsepi64_storeu_epi16`] + * [x] [`_mm256_mask_cvtsepi64_storeu_epi16`] + * [x] [`_mm512_mask_cvtsepi64_storeu_epi32`] + * [x] [`_mm_mask_cvtsepi64_storeu_epi32`] + * [x] [`_mm256_mask_cvtsepi64_storeu_epi32`] + * [x] [`_mm512_mask_cvtsepi64_storeu_epi8`] + * [x] [`_mm_mask_cvtsepi64_storeu_epi8`] + * [x] [`_mm256_mask_cvtsepi64_storeu_epi8`] * [x] [`_mm512_cvtusepi32_epi16`] + * [x] [`_mm512_mask_cvtusepi32_epi16`] + * [x] [`_mm512_maskz_cvtusepi32_epi16`] + * [x] [`_mm_cvtusepi32_epi16`] + * [x] [`_mm_mask_cvtusepi32_epi16`] + * [x] [`_mm_maskz_cvtusepi32_epi16`] + * [x] [`_mm256_cvtusepi32_epi16`] + * [x] [`_mm256_mask_cvtusepi32_epi16`] + * [x] [`_mm256_maskz_cvtusepi32_epi16`] * [x] [`_mm512_cvtusepi32_epi8`] + * [x] [`_mm512_mask_cvtusepi32_epi8`] + * [x] [`_mm512_maskz_cvtusepi32_epi8`] + * [x] [`_mm_cvtusepi32_epi8`] + * [x] [`_mm_mask_cvtusepi32_epi8`] + * [x] [`_mm_maskz_cvtusepi32_epi8`] + * [x] [`_mm256_cvtusepi32_epi8`] + * [x] [`_mm256_mask_cvtusepi32_epi8`] + * [x] [`_mm256_maskz_cvtusepi32_epi8`] + * [x] [`_mm512_mask_cvtusepi32_storeu_epi16`] + * [x] [`_mm_mask_cvtusepi32_storeu_epi16`] + * [x] [`_mm256_mask_cvtusepi32_storeu_epi16`] + * [x] [`_mm512_mask_cvtusepi32_storeu_epi8`] + * [x] [`_mm_mask_cvtusepi32_storeu_epi8`] + * [x] [`_mm256_mask_cvtusepi32_storeu_epi8`] * [x] [`_mm512_cvtusepi64_epi16`] + * [x] [`_mm512_mask_cvtusepi64_epi16`] + * [x] [`_mm512_maskz_cvtusepi64_epi16`] + * [x] [`_mm_cvtusepi64_epi16`] + * [x] [`_mm_mask_cvtusepi64_epi16`] + * [x] [`_mm_maskz_cvtusepi64_epi16`] + * [x] [`_mm256_cvtusepi64_epi16`] + * [x] [`_mm256_mask_cvtusepi64_epi16`] + * [x] [`_mm256_maskz_cvtusepi64_epi16`] * [x] [`_mm512_cvtusepi64_epi32`] + * [x] [`_mm512_mask_cvtusepi64_epi32`] + * [x] [`_mm512_maskz_cvtusepi64_epi32`] + * [x] [`_mm_cvtusepi64_epi32`] + * [x] [`_mm_mask_cvtusepi64_epi32`] + * [x] [`_mm_maskz_cvtusepi64_epi32`] + * [x] [`_mm256_cvtusepi64_epi32`] + * [x] [`_mm256_mask_cvtusepi64_epi32`] + * [x] [`_mm256_maskz_cvtusepi64_epi32`] * [x] [`_mm512_cvtusepi64_epi8`] - * [x] [`_mm512_int2mask`] - * [x] [`_mm512_kand`] - * [x] [`_mm512_kandn`] - * [x] [`_mm512_kmov`] - * [x] [`_mm512_knot`] - * [x] [`_mm512_kor`] - * [x] [`_mm512_kortestc`] - * [ ] [`_mm512_kortestz`] - * [x] [`_mm512_kunpackb`] - * [x] [`_mm512_kxnor`] - * [x] [`_mm512_kxor`] - * [x] [`_mm512_mask2int`] - * [x] [`_mm512_mask_cvt_roundepi32_ps`] - * [x] [`_mm512_mask_cvt_roundepu32_ps`] - * [x] [`_mm512_mask_cvt_roundpd_epi32`] - * [x] [`_mm512_mask_cvt_roundpd_epu32`] - * [x] [`_mm512_mask_cvt_roundpd_ps`] - * [x] [`_mm512_mask_cvt_roundph_ps`] - * [x] [`_mm512_mask_cvt_roundps_epi32`] - * [x] [`_mm512_mask_cvt_roundps_epu32`] - * [x] [`_mm512_mask_cvt_roundps_pd`] - * [x] [`_mm512_mask_cvtepi8_epi64`] - * [x] [`_mm512_mask_cvtepu16_epi32`] - * [x] [`_mm512_mask_cvtepu16_epi64`] - * [x] [`_mm512_mask_cvtepu32_epi64`] - * [x] [`_mm512_mask_cvtepu32_pd`] - * [x] [`_mm512_mask_cvtepu32_ps`] - * [x] [`_mm512_mask_cvtepu32lo_pd`] - * [x] [`_mm512_mask_cvtepu8_epi32`] - * [x] [`_mm512_mask_cvtepu8_epi64`] - * [x] [`_mm512_mask_cvtpd_epi32`] - * [x] [`_mm512_mask_cvtpd_epu32`] - * [x] [`_mm512_mask_cvtpd_ps`] - * [x] [`_mm512_mask_cvtpd_pslo`] - * [x] [`_mm512_mask_cvtph_ps`] - * [x] [`_mm512_mask_cvtps_epi32`] - * [x] [`_mm512_mask_cvtps_epu32`] - * [x] [`_mm512_mask_cvtps_pd`] - * [x] [`_mm512_mask_cvtps_ph`] - * [x] [`_mm512_mask_cvtpslo_pd`] - * [x] [`_mm512_mask_cvtsepi32_epi16`] - * [x] [`_mm512_mask_cvtsepi32_epi8`] - * [ ] [`_mm512_mask_cvtsepi32_storeu_epi16`] - * [ ] [`_mm512_mask_cvtsepi32_storeu_epi8`] - * [x] [`_mm512_mask_cvtt_roundpd_epi32`] - * [x] [`_mm512_mask_cvtt_roundpd_epu32`] - * [x] [`_mm512_mask_cvtt_roundps_epi32`] - * [x] [`_mm512_mask_cvtt_roundps_epu32`] + * [x] [`_mm512_mask_cvtusepi64_epi8`] + * [x] [`_mm512_maskz_cvtusepi64_epi8`] + * [x] [`_mm_cvtusepi64_epi8`] + * [x] [`_mm_mask_cvtusepi64_epi8`] + * [x] [`_mm_maskz_cvtusepi64_epi8`] + * [x] [`_mm256_cvtusepi64_epi8`] + * [x] [`_mm256_mask_cvtusepi64_epi8`] + * [x] [`_mm256_maskz_cvtusepi64_epi8`] + * [x] [`_mm512_mask_cvtusepi64_storeu_epi16`] + * [x] [`_mm_mask_cvtusepi64_storeu_epi16`] + * [x] [`_mm256_mask_cvtusepi64_storeu_epi16`] + * [x] [`_mm512_mask_cvtusepi64_storeu_epi32`] + * [x] [`_mm_mask_cvtusepi64_storeu_epi32`] + * [x] [`_mm256_mask_cvtusepi64_storeu_epi32`] + * [x] [`_mm512_mask_cvtusepi64_storeu_epi8`] + * [x] [`_mm_mask_cvtusepi64_storeu_epi8`] + * [x] [`_mm256_mask_cvtusepi64_storeu_epi8`] + * [x] [`_mm512_cvtsi512_si32`] + * [x] [`_mm512_cvttpd_epi32`] * [x] [`_mm512_mask_cvttpd_epi32`] + * [x] [`_mm512_maskz_cvttpd_epi32`] + * [x] [`_mm_mask_cvttpd_epi32`] + * [x] [`_mm_maskz_cvttpd_epi32`] + * [x] [`_mm256_mask_cvttpd_epi32`] + * [x] [`_mm256_maskz_cvttpd_epi32`] + * [x] [`_mm512_cvttpd_epu32`] * [x] [`_mm512_mask_cvttpd_epu32`] + * [x] [`_mm512_maskz_cvttpd_epu32`] + * [x] [`_mm_cvttpd_epu32`] + * [x] [`_mm_mask_cvttpd_epu32`] + * [x] [`_mm_maskz_cvttpd_epu32`] + * [x] [`_mm256_cvttpd_epu32`] + * [x] [`_mm256_mask_cvttpd_epu32`] + * [x] [`_mm256_maskz_cvttpd_epu32`] + * [x] [`_mm512_cvttps_epi32`] * [x] [`_mm512_mask_cvttps_epi32`] + * [x] [`_mm512_maskz_cvttps_epi32`] + * [x] [`_mm_mask_cvttps_epi32`] + * [x] [`_mm_maskz_cvttps_epi32`] + * [x] [`_mm256_mask_cvttps_epi32`] + * [x] [`_mm256_maskz_cvttps_epi32`] + * [x] [`_mm512_cvttps_epu32`] * [x] [`_mm512_mask_cvttps_epu32`] - * [x] [`_mm512_mask_cvtusepi32_epi16`] - * [x] [`_mm512_mask_cvtusepi32_epi8`] - * [ ] [`_mm512_mask_cvtusepi32_storeu_epi16`] - * [ ] [`_mm512_mask_cvtusepi32_storeu_epi8`] - * [x] [`_mm512_mask_cvtusepi64_epi16`] - * [x] [`_mm512_mask_cvtusepi64_epi32`] - * [x] [`_mm512_mask_cvtusepi64_epi8`] - * [ ] [`_mm512_mask_cvtusepi64_storeu_epi16`] - * [ ] [`_mm512_mask_cvtusepi64_storeu_epi32`] - * [ ] [`_mm512_mask_cvtusepi64_storeu_epi8`] + * [x] [`_mm512_maskz_cvttps_epu32`] + * [x] [`_mm_cvttps_epu32`] + * [x] [`_mm_mask_cvttps_epu32`] + * [x] [`_mm_maskz_cvttps_epu32`] + * [x] [`_mm256_cvttps_epu32`] + * [x] [`_mm256_mask_cvttps_epu32`] + * [x] [`_mm256_maskz_cvttps_epu32`] + * [x] [`_mm512_cvt_roundepi32_ps`] + * [x] [`_mm512_mask_cvt_roundepi32_ps`] * [x] [`_mm512_maskz_cvt_roundepi32_ps`] + * [x] [`_mm512_cvt_roundepu32_ps`] + * [x] [`_mm512_mask_cvt_roundepu32_ps`] * [x] [`_mm512_maskz_cvt_roundepu32_ps`] + * [x] [`_mm512_cvt_roundpd_epi32`] + * [x] [`_mm512_mask_cvt_roundpd_epi32`] * [x] [`_mm512_maskz_cvt_roundpd_epi32`] + * [x] [`_mm512_cvt_roundpd_epu32`] + * [x] [`_mm512_mask_cvt_roundpd_epu32`] * [x] [`_mm512_maskz_cvt_roundpd_epu32`] + * [x] [`_mm512_cvt_roundpd_ps`] + * [x] [`_mm512_mask_cvt_roundpd_ps`] * [x] [`_mm512_maskz_cvt_roundpd_ps`] + * [x] [`_mm512_cvt_roundph_ps`] + * [x] [`_mm512_mask_cvt_roundph_ps`] * [x] [`_mm512_maskz_cvt_roundph_ps`] + * [x] [`_mm512_cvt_roundps_epi32`] + * [x] [`_mm512_mask_cvt_roundps_epi32`] * [x] [`_mm512_maskz_cvt_roundps_epi32`] + * [x] [`_mm512_cvt_roundps_epu32`] + * [x] [`_mm512_mask_cvt_roundps_epu32`] * [x] [`_mm512_maskz_cvt_roundps_epu32`] + * [x] [`_mm512_cvt_roundps_pd`] + * [x] [`_mm512_mask_cvt_roundps_pd`] * [x] [`_mm512_maskz_cvt_roundps_pd`] - * [x] [`_mm512_maskz_cvtepi8_epi64`] - * [x] [`_mm512_maskz_cvtepu16_epi32`] - * [x] [`_mm512_maskz_cvtepu16_epi64`] - * [x] [`_mm512_maskz_cvtepu32_epi64`] - * [x] [`_mm512_maskz_cvtepu32_pd`] - * [x] [`_mm512_maskz_cvtepu32_ps`] - * [x] [`_mm512_maskz_cvtepu8_epi32`] - * [x] [`_mm512_maskz_cvtepu8_epi64`] - * [x] [`_mm512_maskz_cvtpd_epi32`] - * [x] [`_mm512_maskz_cvtpd_epu32`] - * [x] [`_mm512_maskz_cvtpd_ps`] - * [x] [`_mm512_maskz_cvtph_ps`] - * [x] [`_mm512_maskz_cvtps_epi32`] - * [x] [`_mm512_maskz_cvtps_epu32`] - * [x] [`_mm512_maskz_cvtps_pd`] - * [x] [`_mm512_maskz_cvtps_ph`] - * [x] [`_mm512_maskz_cvtsepi32_epi16`] - * [x] [`_mm512_maskz_cvtsepi32_epi8`] - * [x] [`_mm512_maskz_cvtsepi64_epi16`] - * [x] [`_mm512_maskz_cvtsepi64_epi32`] - * [x] [`_mm512_maskz_cvtsepi64_epi8`] + * [x] [`_mm512_cvtt_roundpd_epi32`] + * [x] [`_mm512_mask_cvtt_roundpd_epi32`] * [x] [`_mm512_maskz_cvtt_roundpd_epi32`] + * [x] [`_mm512_cvtt_roundpd_epu32`] + * [x] [`_mm512_mask_cvtt_roundpd_epu32`] * [x] [`_mm512_maskz_cvtt_roundpd_epu32`] + * [x] [`_mm512_cvtt_roundps_epi32`] + * [x] [`_mm512_mask_cvtt_roundps_epi32`] * [x] [`_mm512_maskz_cvtt_roundps_epi32`] + * [x] [`_mm512_cvtt_roundps_epu32`] + * [x] [`_mm512_mask_cvtt_roundps_epu32`] * [x] [`_mm512_maskz_cvtt_roundps_epu32`] - * [x] [`_mm512_maskz_cvttpd_epi32`] - * [x] [`_mm512_maskz_cvttpd_epu32`] - * [x] [`_mm512_maskz_cvttps_epi32`] - * [x] [`_mm512_maskz_cvttps_epu32`] - * [x] [`_mm512_maskz_cvtusepi32_epi16`] - * [x] [`_mm512_maskz_cvtusepi32_epi8`] - * [x] [`_mm512_maskz_cvtusepi64_epi16`] - * [x] [`_mm512_maskz_cvtusepi64_epi32`] - * [x] [`_mm512_maskz_cvtusepi64_epi8`] * [x] [`_mm_add_round_sd`] * [x] [`_mm_add_round_ss`] * [x] [`_mm_cmp_round_sd_mask`] @@ -2193,60 +2342,60 @@ * [x] [`_mm_comi_round_sd`] * [x] [`_mm_comi_round_ss`] * [x] [`_mm_cvt_roundi32_ss`] - * [ ] [`_mm_cvt_roundi64_sd`] - * [ ] [`_mm_cvt_roundi64_ss`] + * [x] [`_mm_cvt_roundi64_sd`] + * [x] [`_mm_cvt_roundi64_ss`] * [x] [`_mm_cvt_roundsd_i32`] - * [ ] [`_mm_cvt_roundsd_i64`] + * [x] [`_mm_cvt_roundsd_i64`] * [x] [`_mm_cvt_roundsd_si32`] - * [ ] [`_mm_cvt_roundsd_si64`] + * [x] [`_mm_cvt_roundsd_si64`] * [x] [`_mm_cvt_roundsd_ss`] * [x] [`_mm_cvt_roundsd_u32`] - * [ ] [`_mm_cvt_roundsd_u64`] + * [x] [`_mm_cvt_roundsd_u64`] * [x] [`_mm_cvt_roundsi32_ss`] - * [ ] [`_mm_cvt_roundsi64_sd`] - * [ ] [`_mm_cvt_roundsi64_ss`] + * [x] [`_mm_cvt_roundsi64_sd`] + * [x] [`_mm_cvt_roundsi64_ss`] * [x] [`_mm_cvt_roundss_i32`] - * [ ] [`_mm_cvt_roundss_i64`] + * [x] [`_mm_cvt_roundss_i64`] * [x] [`_mm_cvt_roundss_sd`] * [x] [`_mm_cvt_roundss_si32`] - * [ ] [`_mm_cvt_roundss_si64`] + * [x] [`_mm_cvt_roundss_si64`] * [x] [`_mm_cvt_roundss_u32`] - * [ ] [`_mm_cvt_roundss_u64`] + * [x] [`_mm_cvt_roundss_u64`] * [x] [`_mm_cvt_roundu32_ss`] - * [ ] [`_mm_cvt_roundu64_sd`] - * [ ] [`_mm_cvt_roundu64_ss`] + * [x] [`_mm_cvt_roundu64_sd`] + * [x] [`_mm_cvt_roundu64_ss`] * [x] [`_mm_cvti32_sd`] * [x] [`_mm_cvti32_ss`] - * [ ] [`_mm_cvti64_sd`] - * [ ] [`_mm_cvti64_ss`] + * [x] [`_mm_cvti64_sd`] + * [x] [`_mm_cvti64_ss`] * [x] [`_mm_cvtsd_i32`] - * [ ] [`_mm_cvtsd_i64`] + * [x] [`_mm_cvtsd_i64`] * [x] [`_mm_cvtsd_u32`] - * [ ] [`_mm_cvtsd_u64`] + * [x] [`_mm_cvtsd_u64`] * [x] [`_mm_cvtss_i32`] - * [ ] [`_mm_cvtss_i64`] + * [x] [`_mm_cvtss_i64`] * [x] [`_mm_cvtss_u32`] - * [ ] [`_mm_cvtss_u64`] + * [x] [`_mm_cvtss_u64`] * [x] [`_mm_cvtt_roundsd_i32`] * [x] [`_mm_cvtt_roundsd_i64`] * [x] [`_mm_cvtt_roundsd_si32`] - * [ ] [`_mm_cvtt_roundsd_si64`] + * [x] [`_mm_cvtt_roundsd_si64`] * [x] [`_mm_cvtt_roundsd_u32`] - * [ ] [`_mm_cvtt_roundsd_u64`] + * [x] [`_mm_cvtt_roundsd_u64`] * [x] [`_mm_cvtt_roundss_i32`] - * [ ] [`_mm_cvtt_roundss_i64`] + * [x] [`_mm_cvtt_roundss_i64`] * [x] [`_mm_cvtt_roundss_si32`] - * [ ] [`_mm_cvtt_roundss_si64`] + * [x] [`_mm_cvtt_roundss_si64`] * [x] [`_mm_cvtt_roundss_u32`] - * [ ] [`_mm_cvtt_roundss_u64`] + * [x] [`_mm_cvtt_roundss_u64`] * [x] [`_mm_cvttsd_i32`] - * [ ] [`_mm_cvttsd_i64`] + * [x] [`_mm_cvttsd_i64`] * [x] [`_mm_cvttsd_u32`] - * [ ] [`_mm_cvttsd_u64`] + * [x] [`_mm_cvttsd_u64`] * [x] [`_mm_cvttss_i32`] - * [ ] [`_mm_cvttss_i64`] + * [x] [`_mm_cvttss_i64`] * [x] [`_mm_cvttss_u32`] - * [ ] [`_mm_cvttss_u64`] + * [x] [`_mm_cvttss_u64`] * [x] [`_mm_cvtu32_sd`] * [x] [`_mm_cvtu32_ss`] * [x] [`_mm_cvtu64_sd`] @@ -2333,8 +2482,8 @@ * [x] [`_mm_mask_getmant_round_ss`] * [x] [`_mm_mask_getmant_sd`] * [x] [`_mm_mask_getmant_ss`] - * [ ] [`_mm_mask_load_sd`] - * [ ] [`_mm_mask_load_ss`] + * [ ] [`_mm_mask_load_sd`] //need i1 + * [ ] [`_mm_mask_load_ss`] //need i1 * [x] [`_mm_mask_max_round_sd`] * [x] [`_mm_mask_max_round_ss`] * [x] [`_mm_mask_max_sd`] @@ -2365,8 +2514,8 @@ * [x] [`_mm_mask_sqrt_round_ss`] * [x] [`_mm_mask_sqrt_sd`] * [x] [`_mm_mask_sqrt_ss`] - * [ ] [`_mm_mask_store_sd`] - * [ ] [`_mm_mask_store_ss`] + * [ ] [`_mm_mask_store_sd`] //need i1 + * [ ] [`_mm_mask_store_ss`] //need i1 * [x] [`_mm_mask_sub_round_sd`] * [x] [`_mm_mask_sub_round_ss`] * [x] [`_mm_mask_sub_sd`] @@ -2411,8 +2560,8 @@ * [x] [`_mm_maskz_getmant_round_ss`] * [x] [`_mm_maskz_getmant_sd`] * [x] [`_mm_maskz_getmant_ss`] - * [ ] [`_mm_maskz_load_sd`] - * [ ] [`_mm_maskz_load_ss`] + * [ ] [`_mm_maskz_load_sd`] //need i1 + * [ ] [`_mm_maskz_load_ss`] //need i1 * [x] [`_mm_maskz_max_round_sd`] * [x] [`_mm_maskz_max_round_ss`] * [x] [`_mm_maskz_max_sd`] @@ -2469,4 +2618,16 @@ * [x] [`_mm_sqrt_round_ss`] * [x] [`_mm_sub_round_sd`] * [x] [`_mm_sub_round_ss`] + * [x] [`_mm512_int2mask`] + * [x] [`_mm512_kand`] + * [x] [`_mm512_kandn`] + * [x] [`_mm512_kmov`] + * [x] [`_mm512_knot`] + * [x] [`_mm512_kor`] + * [x] [`_mm512_kortestc`] + * [ ] [`_mm512_kortestz`] //not sure + * [x] [`_mm512_kunpackb`] + * [x] [`_mm512_kxnor`] + * [x] [`_mm512_kxor`] + * [x] [`_mm512_mask2int`]

diff --git a/crates/core_arch/avx512vbmi2.md b/crates/core_arch/avx512vbmi2.md index 4bb6a0ed0c..693af9d930 100644 --- a/crates/core_arch/avx512vbmi2.md +++ b/crates/core_arch/avx512vbmi2.md @@ -12,12 +12,12 @@ * [x] [`_mm256_maskz_compress_epi8`] * [x] [`_mm512_mask_compress_epi8`] * [x] [`_mm512_maskz_compress_epi8`] - * [_] [`_mm_mask_compressstoreu_epi16`] - * [_] [`_mm256_mask_compressstoreu_epi16`] - * [_] [`_mm512_mask_compressstoreu_epi16`] - * [_] [`_mm_mask_compressstoreu_epi8`] - * [_] [`_mm256_mask_compressstoreu_epi8`] - * [_] [`_mm512_mask_compressstoreu_epi8`] + * [_] [`_mm_mask_compressstoreu_epi16`] //need i1 + * [_] [`_mm256_mask_compressstoreu_epi16`] //need i1 + * [_] [`_mm512_mask_compressstoreu_epi16`] //need i1 + * [_] [`_mm_mask_compressstoreu_epi8`] //need i1 + * [_] [`_mm256_mask_compressstoreu_epi8`] //need i1 + * [_] [`_mm512_mask_compressstoreu_epi8`] //need i1 * [x] [`_mm_mask_expand_epi16`] * [x] [`_mm_maskz_expand_epi16`] * [x] [`_mm256_mask_expand_epi16`] @@ -30,18 +30,18 @@ * [x] [`_mm256_maskz_expand_epi8`] * [x] [`_mm512_mask_expand_epi8`] * [x] [`_mm512_maskz_expand_epi8`] - * [_] [`_mm_mask_expandloadu_epi16`] - * [_] [`_mm_maskz_expandloadu_epi16`] - * [_] [`_mm256_mask_expandloadu_epi16`] - * [_] [`_mm256_maskz_expandloadu_epi16`] - * [_] [`_mm512_mask_expandloadu_epi16`] - * [_] [`_mm512_maskz_expandloadu_epi16`] - * [_] [`_mm_mask_expandloadu_epi8`] - * [_] [`_mm_maskz_expandloadu_epi8`] - * [_] [`_mm256_mask_expandloadu_epi8`] - * [_] [`_mm256_maskz_expandloadu_epi8`] - * [_] [`_mm512_mask_expandloadu_epi8`] - * [_] [`_mm512_maskz_expandloadu_epi8`] + * [_] [`_mm_mask_expandloadu_epi16`] //need i1 + * [_] [`_mm_maskz_expandloadu_epi16`] //need i1 + * [_] [`_mm256_mask_expandloadu_epi16`] //need i1 + * [_] [`_mm256_maskz_expandloadu_epi16`] //need i1 + * [_] [`_mm512_mask_expandloadu_epi16`] //need i1 + * [_] [`_mm512_maskz_expandloadu_epi16`] //need i1 + * [_] [`_mm_mask_expandloadu_epi8`] //need i1 + * [_] [`_mm_maskz_expandloadu_epi8`] //need i1 + * [_] [`_mm256_mask_expandloadu_epi8`] //need i1 + * [_] [`_mm256_maskz_expandloadu_epi8`] //need i1 + * [_] [`_mm512_mask_expandloadu_epi8`] //need i1 + * [_] [`_mm512_maskz_expandloadu_epi8`] //need i1 * [x] [`_mm_mask_shldi_epi16`] * [x] [`_mm_maskz_shldi_epi16`] * [x] [`_mm_shldi_epi16`] diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 0e5a1ba461..f72f56a355 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -10696,6 +10696,52 @@ pub unsafe fn _mm512_maskz_cvtps_epi32(k: __mmask16, a: __m512) -> __m512i { )) } +/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtps_epi32&expand=1735) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtps2dq))] +pub unsafe fn _mm256_mask_cvtps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i { + let convert = _mm256_cvtps_epi32(a); + transmute(simd_select_bitmask(k, convert.as_i32x8(), src.as_i32x8())) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtps_epi32&expand=1736) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtps2dq))] +pub unsafe fn _mm256_maskz_cvtps_epi32(k: __mmask8, a: __m256) -> __m256i { + let convert = _mm256_cvtps_epi32(a); + let zero = _mm256_setzero_si256().as_i32x8(); + transmute(simd_select_bitmask(k, convert.as_i32x8(), zero)) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtps_epi32&expand=1732) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtps2dq))] +pub unsafe fn _mm_mask_cvtps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i { + let convert = _mm_cvtps_epi32(a); + transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4())) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtps_epi32&expand=1733) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtps2dq))] +pub unsafe fn _mm_maskz_cvtps_epi32(k: __mmask8, a: __m128) -> __m128i { + let convert = _mm_cvtps_epi32(a); + let zero = _mm_setzero_si128().as_i32x4(); + transmute(simd_select_bitmask(k, convert.as_i32x4(), zero)) +} + /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtps_epu32&expand=1755) @@ -10741,6 +10787,82 @@ pub unsafe fn _mm512_maskz_cvtps_epu32(k: __mmask16, a: __m512) -> __m512i { )) } +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtps_epu32&expand=1752) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtps2udq))] +pub unsafe fn _mm256_cvtps_epu32(a: __m256) -> __m256i { + transmute(vcvtps2udq256( + a.as_f32x8(), + _mm256_setzero_si256().as_u32x8(), + 0b11111111, + )) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtps_epu32&expand=1753) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtps2udq))] +pub unsafe fn _mm256_mask_cvtps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i { + transmute(vcvtps2udq256(a.as_f32x8(), src.as_u32x8(), k)) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtps_epu32&expand=1754) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtps2udq))] +pub unsafe fn _mm256_maskz_cvtps_epu32(k: __mmask8, a: __m256) -> __m256i { + transmute(vcvtps2udq256( + a.as_f32x8(), + _mm256_setzero_si256().as_u32x8(), + k, + )) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_epu32&expand=1749) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtps2udq))] +pub unsafe fn _mm_cvtps_epu32(a: __m128) -> __m128i { + transmute(vcvtps2udq128( + a.as_f32x4(), + _mm_setzero_si128().as_u32x4(), + 0b11111111, + )) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtps_epu32&expand=1750) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtps2udq))] +pub unsafe fn _mm_mask_cvtps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i { + transmute(vcvtps2udq128(a.as_f32x4(), src.as_u32x4(), k)) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtps_epu32&expand=1751) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtps2udq))] +pub unsafe fn _mm_maskz_cvtps_epu32(k: __mmask8, a: __m128) -> __m128i { + transmute(vcvtps2udq128( + a.as_f32x4(), + _mm_setzero_si128().as_u32x4(), + k, + )) +} + /// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtps_pd&expand=1769) @@ -10861,6 +10983,270 @@ pub unsafe fn _mm512_maskz_cvtpd_ps(k: __mmask8, a: __m512d) -> __m256 { )) } +/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtpd_ps&expand=1710) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtpd2ps))] +pub unsafe fn _mm256_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m256d) -> __m128 { + let convert = _mm256_cvtpd_ps(a); + transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4())) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtpd_ps&expand=1711) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtpd2ps))] +pub unsafe fn _mm256_maskz_cvtpd_ps(k: __mmask8, a: __m256d) -> __m128 { + let convert = _mm256_cvtpd_ps(a); + let zero = _mm_setzero_ps().as_f32x4(); + transmute(simd_select_bitmask(k, convert.as_f32x4(), zero)) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtpd_ps&expand=1707) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtpd2ps))] +pub unsafe fn _mm_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m128d) -> __m128 { + let convert = _mm_cvtpd_ps(a); + transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4())) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtpd_ps&expand=1708) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtpd2ps))] +pub unsafe fn _mm_maskz_cvtpd_ps(k: __mmask8, a: __m128d) -> __m128 { + let convert = _mm_cvtpd_ps(a); + let zero = _mm_setzero_ps().as_f32x4(); + transmute(simd_select_bitmask(k, convert.as_f32x4(), zero)) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtpd_epi32&expand=1675) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtpd2dq))] +pub unsafe fn _mm512_cvtpd_epi32(a: __m512d) -> __m256i { + transmute(vcvtpd2dq( + a.as_f64x8(), + _mm256_setzero_si256().as_i32x8(), + 0b11111111, + _MM_FROUND_CUR_DIRECTION, + )) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtpd_epi32&expand=1676) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtpd2dq))] +pub unsafe fn _mm512_mask_cvtpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i { + transmute(vcvtpd2dq( + a.as_f64x8(), + src.as_i32x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtpd_epi32&expand=1677) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtpd2dq))] +pub unsafe fn _mm512_maskz_cvtpd_epi32(k: __mmask8, a: __m512d) -> __m256i { + transmute(vcvtpd2dq( + a.as_f64x8(), + _mm256_setzero_si256().as_i32x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtpd_epi32&expand=1673) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtpd2dq))] +pub unsafe fn _mm256_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i { + let convert = _mm256_cvtpd_epi32(a); + transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4())) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtpd_epi32&expand=1674) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtpd2dq))] +pub unsafe fn _mm256_maskz_cvtpd_epi32(k: __mmask8, a: __m256d) -> __m128i { + let convert = _mm256_cvtpd_epi32(a); + transmute(simd_select_bitmask( + k, + convert.as_i32x4(), + _mm_setzero_si128().as_i32x4(), + )) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtpd_epi32&expand=1670) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtpd2dq))] +pub unsafe fn _mm_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i { + let convert = _mm_cvtpd_epi32(a); + transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4())) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtpd_epi32&expand=1671) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtpd2dq))] +pub unsafe fn _mm_maskz_cvtpd_epi32(k: __mmask8, a: __m128d) -> __m128i { + let convert = _mm_cvtpd_epi32(a); + transmute(simd_select_bitmask( + k, + convert.as_i32x4(), + _mm_setzero_si128().as_i32x4(), + )) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtpd_epu32&expand=1693) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtpd2udq))] +pub unsafe fn _mm512_cvtpd_epu32(a: __m512d) -> __m256i { + transmute(vcvtpd2udq( + a.as_f64x8(), + _mm256_setzero_si256().as_u32x8(), + 0b11111111, + _MM_FROUND_CUR_DIRECTION, + )) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtpd_epu32&expand=1694) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtpd2udq))] +pub unsafe fn _mm512_mask_cvtpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i { + transmute(vcvtpd2udq( + a.as_f64x8(), + src.as_u32x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_cvtpd_epu32&expand=1695) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtpd2udq))] +pub unsafe fn _mm512_maskz_cvtpd_epu32(k: __mmask8, a: __m512d) -> __m256i { + transmute(vcvtpd2udq( + a.as_f64x8(), + _mm256_setzero_si256().as_u32x8(), + k, + _MM_FROUND_CUR_DIRECTION, + )) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtpd_epu32&expand=1690) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtpd2udq))] +pub unsafe fn _mm256_cvtpd_epu32(a: __m256d) -> __m128i { + transmute(vcvtpd2udq256( + a.as_f64x4(), + _mm_setzero_si128().as_u32x4(), + 0b11111111, + )) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtpd_epu32&expand=1691) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtpd2udq))] +pub unsafe fn _mm256_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i { + transmute(vcvtpd2udq256(a.as_f64x4(), src.as_u32x4(), k)) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtpd_epu32&expand=1692) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtpd2udq))] +pub unsafe fn _mm256_maskz_cvtpd_epu32(k: __mmask8, a: __m256d) -> __m128i { + transmute(vcvtpd2udq256( + a.as_f64x4(), + _mm_setzero_si128().as_u32x4(), + k, + )) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_epu32&expand=1687) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtpd2udq))] +pub unsafe fn _mm_cvtpd_epu32(a: __m128d) -> __m128i { + transmute(vcvtpd2udq128( + a.as_f64x2(), + _mm_setzero_si128().as_u32x4(), + 0b11111111, + )) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtpd_epu32&expand=1688) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtpd2udq))] +pub unsafe fn _mm_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i { + transmute(vcvtpd2udq128(a.as_f64x2(), src.as_u32x4(), k)) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtpd_epu32&expand=1689) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtpd2udq))] +pub unsafe fn _mm_maskz_cvtpd_epu32(k: __mmask8, a: __m128d) -> __m128i { + transmute(vcvtpd2udq128( + a.as_f64x2(), + _mm_setzero_si128().as_u32x4(), + k, + )) +} + /// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst. The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtpd_pslo&expand=1715) @@ -11016,6 +11402,52 @@ pub unsafe fn _mm512_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m512i { transmute(simd_select_bitmask(k, convert, zero)) } +/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepi8_epi64&expand=1542) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsxbq))] +pub unsafe fn _mm256_mask_cvtepi8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { + let convert = _mm256_cvtepi8_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, convert, src.as_i64x4())) +} + +/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepi8_epi64&expand=1543) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsxbq))] +pub unsafe fn _mm256_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m256i { + let convert = _mm256_cvtepi8_epi64(a).as_i64x4(); + let zero = _mm256_setzero_si256().as_i64x4(); + transmute(simd_select_bitmask(k, convert, zero)) +} + +/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepi8_epi64&expand=1539) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsxbq))] +pub unsafe fn _mm_mask_cvtepi8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + let convert = _mm_cvtepi8_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, convert, src.as_i64x2())) +} + +/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepi8_epi64&expand=1540) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsxbq))] +pub unsafe fn _mm_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m128i { + let convert = _mm_cvtepi8_epi64(a).as_i64x2(); + let zero = _mm_setzero_si128().as_i64x2(); + transmute(simd_select_bitmask(k, convert, zero)) +} + /// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu8_epi32&expand=1621) @@ -11050,6 +11482,52 @@ pub unsafe fn _mm512_maskz_cvtepu8_epi32(k: __mmask16, a: __m128i) -> __m512i { transmute(simd_select_bitmask(k, convert, zero)) } +/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu8_epi32&expand=1619) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovzxbd))] +pub unsafe fn _mm256_mask_cvtepu8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { + let convert = _mm256_cvtepu8_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, convert, src.as_i32x8())) +} + +/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm256_maskz_cvtepu8_epi32&expand=1620) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovzxbd))] +pub unsafe fn _mm256_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m256i { + let convert = _mm256_cvtepu8_epi32(a).as_i32x8(); + let zero = _mm256_setzero_si256().as_i32x8(); + transmute(simd_select_bitmask(k, convert, zero)) +} + +/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu8_epi32&expand=1616) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovzxbd))] +pub unsafe fn _mm_mask_cvtepu8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + let convert = _mm_cvtepu8_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, convert, src.as_i32x4())) +} + +/// Zero extend packed unsigned 8-bit integers in th elow 4 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_cvtepu8_epi32&expand=1617) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovzxbd))] +pub unsafe fn _mm_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m128i { + let convert = _mm_cvtepu8_epi32(a).as_i32x4(); + let zero = _mm_setzero_si128().as_i32x4(); + transmute(simd_select_bitmask(k, convert, zero)) +} + /// Zero extend packed unsigned 8-bit integers in the low 8 byte sof a to packed 64-bit integers, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu8_epi64&expand=1630) @@ -11085,6 +11563,52 @@ pub unsafe fn _mm512_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m512i { transmute(simd_select_bitmask(k, convert, zero)) } +/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu8_epi64&expand=1628) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovzxbq))] +pub unsafe fn _mm256_mask_cvtepu8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { + let convert = _mm256_cvtepu8_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, convert, src.as_i64x4())) +} + +/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu8_epi64&expand=1629) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovzxbq))] +pub unsafe fn _mm256_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m256i { + let convert = _mm256_cvtepu8_epi64(a).as_i64x4(); + let zero = _mm256_setzero_si256().as_i64x4(); + transmute(simd_select_bitmask(k, convert, zero)) +} + +/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu8_epi64&expand=1625) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovzxbq))] +pub unsafe fn _mm_mask_cvtepu8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + let convert = _mm_cvtepu8_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, convert, src.as_i64x2())) +} + +/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu8_epi64&expand=1626) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovzxbq))] +pub unsafe fn _mm_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m128i { + let convert = _mm_cvtepu8_epi64(a).as_i64x2(); + let zero = _mm_setzero_si128().as_i64x2(); + transmute(simd_select_bitmask(k, convert, zero)) +} + /// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi16_epi32&expand=1389) @@ -11279,6 +11803,52 @@ pub unsafe fn _mm512_maskz_cvtepu16_epi32(k: __mmask16, a: __m256i) -> __m512i { transmute(simd_select_bitmask(k, convert, zero)) } +/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu16_epi32&expand=1551) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovzxwd))] +pub unsafe fn _mm256_mask_cvtepu16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { + let convert = _mm256_cvtepu16_epi32(a).as_i32x8(); + transmute(simd_select_bitmask(k, convert, src.as_i32x8())) +} + +/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu16_epi32&expand=1552) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovzxwd))] +pub unsafe fn _mm256_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m256i { + let convert = _mm256_cvtepu16_epi32(a).as_i32x8(); + let zero = _mm256_setzero_si256().as_i32x8(); + transmute(simd_select_bitmask(k, convert, zero)) +} + +/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu16_epi32&expand=1548) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovzxwd))] +pub unsafe fn _mm_mask_cvtepu16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + let convert = _mm_cvtepu16_epi32(a).as_i32x4(); + transmute(simd_select_bitmask(k, convert, src.as_i32x4())) +} + +/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu16_epi32&expand=1549) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovzxwd))] +pub unsafe fn _mm_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m128i { + let convert = _mm_cvtepu16_epi32(a).as_i32x4(); + let zero = _mm_setzero_si128().as_i32x4(); + transmute(simd_select_bitmask(k, convert, zero)) +} + /// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepu16_epi64&expand=1562) @@ -11313,6 +11883,52 @@ pub unsafe fn _mm512_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m512i { transmute(simd_select_bitmask(k, convert, zero)) } +/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu16_epi64&expand=1560) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovzxwq))] +pub unsafe fn _mm256_mask_cvtepu16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { + let convert = _mm256_cvtepu16_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, convert, src.as_i64x4())) +} + +/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu16_epi64&expand=1561) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovzxwq))] +pub unsafe fn _mm256_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m256i { + let convert = _mm256_cvtepu16_epi64(a).as_i64x4(); + let zero = _mm256_setzero_si256().as_i64x4(); + transmute(simd_select_bitmask(k, convert, zero)) +} + +/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu16_epi64&expand=1557) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovzxwq))] +pub unsafe fn _mm_mask_cvtepu16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + let convert = _mm_cvtepu16_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, convert, src.as_i64x2())) +} + +/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu16_epi64&expand=1558) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovzxwq))] +pub unsafe fn _mm_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m128i { + let convert = _mm_cvtepu16_epi64(a).as_i64x2(); + let zero = _mm_setzero_si128().as_i64x2(); + transmute(simd_select_bitmask(k, convert, zero)) +} + /// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi32_epi64&expand=1428) @@ -11427,6 +12043,52 @@ pub unsafe fn _mm512_maskz_cvtepu32_epi64(k: __mmask8, a: __m256i) -> __m512i { transmute(simd_select_bitmask(k, convert, zero)) } +/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu32_epi64&expand=1569) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovzxdq))] +pub unsafe fn _mm256_mask_cvtepu32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i { + let convert = _mm256_cvtepu32_epi64(a).as_i64x4(); + transmute(simd_select_bitmask(k, convert, src.as_i64x4())) +} + +/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu32_epi64&expand=1570) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovzxdq))] +pub unsafe fn _mm256_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m256i { + let convert = _mm256_cvtepu32_epi64(a).as_i64x4(); + let zero = _mm256_setzero_si256().as_i64x4(); + transmute(simd_select_bitmask(k, convert, zero)) +} + +/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu32_epi64&expand=1566) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovzxdq))] +pub unsafe fn _mm_mask_cvtepu32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + let convert = _mm_cvtepu32_epi64(a).as_i64x2(); + transmute(simd_select_bitmask(k, convert, src.as_i64x2())) +} + +/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu32_epi64&expand=1567) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovzxdq))] +pub unsafe fn _mm_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m128i { + let convert = _mm_cvtepu32_epi64(a).as_i64x2(); + let zero = _mm_setzero_si128().as_i64x2(); + transmute(simd_select_bitmask(k, convert, zero)) +} + /// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi32_ps&expand=1455) @@ -11655,6 +12317,75 @@ pub unsafe fn _mm512_maskz_cvtepu32_pd(k: __mmask8, a: __m256i) -> __m512d { transmute(simd_select_bitmask(k, convert, zero)) } +/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtepu32_pd&expand=1577) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtudq2pd))] +pub unsafe fn _mm256_cvtepu32_pd(a: __m128i) -> __m256d { + let a = a.as_u32x4(); + transmute::(simd_cast(a)) +} + +/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtepu32_pd&expand=1578) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtudq2pd))] +pub unsafe fn _mm256_mask_cvtepu32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d { + let convert = _mm256_cvtepu32_pd(a).as_f64x4(); + transmute(simd_select_bitmask(k, convert, src.as_f64x4())) +} + +/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtepu32_pd&expand=1579) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtudq2pd))] +pub unsafe fn _mm256_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m256d { + let convert = _mm256_cvtepu32_pd(a).as_f64x4(); + let zero = _mm256_setzero_pd().as_f64x4(); + transmute(simd_select_bitmask(k, convert, zero)) +} + +/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu32_pd&expand=1574) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtudq2pd))] +pub unsafe fn _mm_cvtepu32_pd(a: __m128i) -> __m128d { + let a = a.as_u32x4(); + let u64: u32x2 = simd_shuffle2(a, a, [0, 1]); + transmute::(simd_cast(u64)) +} + +/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtepu32_pd&expand=1575) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtudq2pd))] +pub unsafe fn _mm_mask_cvtepu32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d { + let convert = _mm_cvtepu32_pd(a).as_f64x2(); + transmute(simd_select_bitmask(k, convert, src.as_f64x2())) +} + +/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtepu32_pd&expand=1576) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtudq2pd))] +pub unsafe fn _mm_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m128d { + let convert = _mm_cvtepu32_pd(a).as_f64x2(); + let zero = _mm_setzero_pd().as_f64x2(); + transmute(simd_select_bitmask(k, convert, zero)) +} + /// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtepi32lo_pd&expand=1464) @@ -12249,6 +12980,74 @@ pub unsafe fn _mm512_maskz_cvtsepi32_epi16(k: __mmask16, a: __m512i) -> __m256i )) } +/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsepi32_epi16&expand=1816) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsdw))] +pub unsafe fn _mm256_cvtsepi32_epi16(a: __m256i) -> __m128i { + transmute(vpmovsdw256( + a.as_i32x8(), + _mm_setzero_si128().as_i16x8(), + 0b11111111, + )) +} + +/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtsepi32_epi16&expand=1817) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsdw))] +pub unsafe fn _mm256_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + transmute(vpmovsdw256(a.as_i32x8(), src.as_i16x8(), k)) +} + +/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtsepi32_epi16&expand=1818) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsdw))] +pub unsafe fn _mm256_maskz_cvtsepi32_epi16(k: __mmask8, a: __m256i) -> __m128i { + transmute(vpmovsdw256(a.as_i32x8(), _mm_setzero_si128().as_i16x8(), k)) +} + +/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsepi32_epi16&expand=1813) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsdw))] +pub unsafe fn _mm_cvtsepi32_epi16(a: __m128i) -> __m128i { + transmute(vpmovsdw128( + a.as_i32x4(), + _mm_setzero_si128().as_i16x8(), + 0b11111111, + )) +} + +/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsepi32_epi16&expand=1814) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsdw))] +pub unsafe fn _mm_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + transmute(vpmovsdw128(a.as_i32x4(), src.as_i16x8(), k)) +} + +/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsepi32_epi16&expand=1815) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsdw))] +pub unsafe fn _mm_maskz_cvtsepi32_epi16(k: __mmask8, a: __m128i) -> __m128i { + transmute(vpmovsdw128(a.as_i32x4(), _mm_setzero_si128().as_i16x8(), k)) +} + /// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtsepi32_epi8&expand=1828) @@ -12283,6 +13082,74 @@ pub unsafe fn _mm512_maskz_cvtsepi32_epi8(k: __mmask16, a: __m512i) -> __m128i { transmute(vpmovsdb(a.as_i32x16(), _mm_setzero_si128().as_i8x16(), k)) } +/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsepi32_epi8&expand=1825) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsdb))] +pub unsafe fn _mm256_cvtsepi32_epi8(a: __m256i) -> __m128i { + transmute(vpmovsdb256( + a.as_i32x8(), + _mm_setzero_si128().as_i8x16(), + 0b11111111, + )) +} + +/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtsepi32_epi8&expand=1826) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsdb))] +pub unsafe fn _mm256_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + transmute(vpmovsdb256(a.as_i32x8(), src.as_i8x16(), k)) +} + +/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtsepi32_epi8&expand=1827) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsdb))] +pub unsafe fn _mm256_maskz_cvtsepi32_epi8(k: __mmask8, a: __m256i) -> __m128i { + transmute(vpmovsdb256(a.as_i32x8(), _mm_setzero_si128().as_i8x16(), k)) +} + +/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsepi32_epi8&expand=1822) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsdb))] +pub unsafe fn _mm_cvtsepi32_epi8(a: __m128i) -> __m128i { + transmute(vpmovsdb128( + a.as_i32x4(), + _mm_setzero_si128().as_i8x16(), + 0b11111111, + )) +} + +/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsepi32_epi8&expand=1823) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsdb))] +pub unsafe fn _mm_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + transmute(vpmovsdb128(a.as_i32x4(), src.as_i8x16(), k)) +} + +/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsepi32_epi8&expand=1824) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsdb))] +pub unsafe fn _mm_maskz_cvtsepi32_epi8(k: __mmask8, a: __m128i) -> __m128i { + transmute(vpmovsdb128(a.as_i32x4(), _mm_setzero_si128().as_i8x16(), k)) +} + /// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtsepi64_epi32&expand=1852) @@ -12317,6 +13184,74 @@ pub unsafe fn _mm512_maskz_cvtsepi64_epi32(k: __mmask8, a: __m512i) -> __m256i { transmute(vpmovsqd(a.as_i64x8(), _mm256_setzero_si256().as_i32x8(), k)) } +/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsepi64_epi32&expand=1849) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsqd))] +pub unsafe fn _mm256_cvtsepi64_epi32(a: __m256i) -> __m128i { + transmute(vpmovsqd256( + a.as_i64x4(), + _mm_setzero_si128().as_i32x4(), + 0b11111111, + )) +} + +/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtsepi64_epi32&expand=1850) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsqd))] +pub unsafe fn _mm256_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + transmute(vpmovsqd256(a.as_i64x4(), src.as_i32x4(), k)) +} + +/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtsepi64_epi32&expand=1851) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsqd))] +pub unsafe fn _mm256_maskz_cvtsepi64_epi32(k: __mmask8, a: __m256i) -> __m128i { + transmute(vpmovsqd256(a.as_i64x4(), _mm_setzero_si128().as_i32x4(), k)) +} + +/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsepi64_epi32&expand=1846) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsqd))] +pub unsafe fn _mm_cvtsepi64_epi32(a: __m128i) -> __m128i { + transmute(vpmovsqd128( + a.as_i64x2(), + _mm_setzero_si128().as_i32x4(), + 0b11111111, + )) +} + +/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsepi64_epi32&expand=1847) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsqd))] +pub unsafe fn _mm_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + transmute(vpmovsqd128(a.as_i64x2(), src.as_i32x4(), k)) +} + +/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsepi64_epi32&expand=1848) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsqd))] +pub unsafe fn _mm_maskz_cvtsepi64_epi32(k: __mmask8, a: __m128i) -> __m128i { + transmute(vpmovsqd128(a.as_i64x2(), _mm_setzero_si128().as_i32x4(), k)) +} + /// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtsepi64_epi16&expand=1843) @@ -12351,6 +13286,74 @@ pub unsafe fn _mm512_maskz_cvtsepi64_epi16(k: __mmask8, a: __m512i) -> __m128i { transmute(vpmovsqw(a.as_i64x8(), _mm_setzero_si128().as_i16x8(), k)) } +/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsepi64_epi16&expand=1840) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsqw))] +pub unsafe fn _mm256_cvtsepi64_epi16(a: __m256i) -> __m128i { + transmute(vpmovsqw256( + a.as_i64x4(), + _mm_setzero_si128().as_i16x8(), + 0b11111111, + )) +} + +/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtsepi64_epi16&expand=1841) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsqw))] +pub unsafe fn _mm256_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + transmute(vpmovsqw256(a.as_i64x4(), src.as_i16x8(), k)) +} + +/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtsepi64_epi16&expand=1842) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsqw))] +pub unsafe fn _mm256_maskz_cvtsepi64_epi16(k: __mmask8, a: __m256i) -> __m128i { + transmute(vpmovsqw256(a.as_i64x4(), _mm_setzero_si128().as_i16x8(), k)) +} + +/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsepi64_epi16&expand=1837) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsqw))] +pub unsafe fn _mm_cvtsepi64_epi16(a: __m128i) -> __m128i { + transmute(vpmovsqw128( + a.as_i64x2(), + _mm_setzero_si128().as_i16x8(), + 0b11111111, + )) +} + +/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsepi64_epi16&expand=1838) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsqw))] +pub unsafe fn _mm_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + transmute(vpmovsqw128(a.as_i64x2(), src.as_i16x8(), k)) +} + +/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsepi64_epi16&expand=1839) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsqw))] +pub unsafe fn _mm_maskz_cvtsepi64_epi16(k: __mmask8, a: __m128i) -> __m128i { + transmute(vpmovsqw128(a.as_i64x2(), _mm_setzero_si128().as_i16x8(), k)) +} + /// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtsepi64_epi8&expand=1861) @@ -12385,6 +13388,74 @@ pub unsafe fn _mm512_maskz_cvtsepi64_epi8(k: __mmask8, a: __m512i) -> __m128i { transmute(vpmovsqb(a.as_i64x8(), _mm_setzero_si128().as_i8x16(), k)) } +/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtsepi64_epi8&expand=1858) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsqb))] +pub unsafe fn _mm256_cvtsepi64_epi8(a: __m256i) -> __m128i { + transmute(vpmovsqb256( + a.as_i64x4(), + _mm_setzero_si128().as_i8x16(), + 0b11111111, + )) +} + +/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtsepi64_epi8&expand=1859) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsqb))] +pub unsafe fn _mm256_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + transmute(vpmovsqb256(a.as_i64x4(), src.as_i8x16(), k)) +} + +/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtsepi64_epi8&expand=1860) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsqb))] +pub unsafe fn _mm256_maskz_cvtsepi64_epi8(k: __mmask8, a: __m256i) -> __m128i { + transmute(vpmovsqb256(a.as_i64x4(), _mm_setzero_si128().as_i8x16(), k)) +} + +/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsepi64_epi8&expand=1855) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsqb))] +pub unsafe fn _mm_cvtsepi64_epi8(a: __m128i) -> __m128i { + transmute(vpmovsqb128( + a.as_i64x2(), + _mm_setzero_si128().as_i8x16(), + 0b11111111, + )) +} + +/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsepi64_epi8&expand=1856) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsqb))] +pub unsafe fn _mm_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + transmute(vpmovsqb128(a.as_i64x2(), src.as_i8x16(), k)) +} + +/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtsepi64_epi8&expand=1857) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsqb))] +pub unsafe fn _mm_maskz_cvtsepi64_epi8(k: __mmask8, a: __m128i) -> __m128i { + transmute(vpmovsqb128(a.as_i64x2(), _mm_setzero_si128().as_i8x16(), k)) +} + /// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtusepi32_epi16&expand=2054) @@ -12423,6 +13494,82 @@ pub unsafe fn _mm512_maskz_cvtusepi32_epi16(k: __mmask16, a: __m512i) -> __m256i )) } +/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtusepi32_epi16&expand=2051) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusdw))] +pub unsafe fn _mm256_cvtusepi32_epi16(a: __m256i) -> __m128i { + transmute(vpmovusdw256( + a.as_u32x8(), + _mm_setzero_si128().as_u16x8(), + 0b11111111, + )) +} + +/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtusepi32_epi16&expand=2052) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusdw))] +pub unsafe fn _mm256_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + transmute(vpmovusdw256(a.as_u32x8(), src.as_u16x8(), k)) +} + +/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtusepi32_epi16&expand=2053) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusdw))] +pub unsafe fn _mm256_maskz_cvtusepi32_epi16(k: __mmask8, a: __m256i) -> __m128i { + transmute(vpmovusdw256( + a.as_u32x8(), + _mm_setzero_si128().as_u16x8(), + k, + )) +} + +/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtusepi32_epi16&expand=2048) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusdw))] +pub unsafe fn _mm_cvtusepi32_epi16(a: __m128i) -> __m128i { + transmute(vpmovusdw128( + a.as_u32x4(), + _mm_setzero_si128().as_u16x8(), + 0b11111111, + )) +} + +/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtusepi32_epi16&expand=2049) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusdw))] +pub unsafe fn _mm_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + transmute(vpmovusdw128(a.as_u32x4(), src.as_u16x8(), k)) +} + +/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtusepi32_epi16&expand=2050) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusdw))] +pub unsafe fn _mm_maskz_cvtusepi32_epi16(k: __mmask8, a: __m128i) -> __m128i { + transmute(vpmovusdw128( + a.as_u32x4(), + _mm_setzero_si128().as_u16x8(), + k, + )) +} + /// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtusepi32_epi8&expand=2063) @@ -12457,6 +13604,82 @@ pub unsafe fn _mm512_maskz_cvtusepi32_epi8(k: __mmask16, a: __m512i) -> __m128i transmute(vpmovusdb(a.as_u32x16(), _mm_setzero_si128().as_u8x16(), k)) } +/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtusepi32_epi8&expand=2060) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusdb))] +pub unsafe fn _mm256_cvtusepi32_epi8(a: __m256i) -> __m128i { + transmute(vpmovusdb256( + a.as_u32x8(), + _mm_setzero_si128().as_u8x16(), + 0b11111111, + )) +} + +/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtusepi32_epi8&expand=2061) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusdb))] +pub unsafe fn _mm256_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + transmute(vpmovusdb256(a.as_u32x8(), src.as_u8x16(), k)) +} + +/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtusepi32_epi8&expand=2062) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusdb))] +pub unsafe fn _mm256_maskz_cvtusepi32_epi8(k: __mmask8, a: __m256i) -> __m128i { + transmute(vpmovusdb256( + a.as_u32x8(), + _mm_setzero_si128().as_u8x16(), + k, + )) +} + +/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtusepi32_epi8&expand=2057) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusdb))] +pub unsafe fn _mm_cvtusepi32_epi8(a: __m128i) -> __m128i { + transmute(vpmovusdb128( + a.as_u32x4(), + _mm_setzero_si128().as_u8x16(), + 0b11111111, + )) +} + +/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtusepi32_epi8&expand=2058) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusdb))] +pub unsafe fn _mm_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + transmute(vpmovusdb128(a.as_u32x4(), src.as_u8x16(), k)) +} + +/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtusepi32_epi8&expand=2059) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusdb))] +pub unsafe fn _mm_maskz_cvtusepi32_epi8(k: __mmask8, a: __m128i) -> __m128i { + transmute(vpmovusdb128( + a.as_u32x4(), + _mm_setzero_si128().as_u8x16(), + k, + )) +} + /// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtusepi64_epi32&expand=2087) @@ -12495,6 +13718,82 @@ pub unsafe fn _mm512_maskz_cvtusepi64_epi32(k: __mmask8, a: __m512i) -> __m256i )) } +/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtusepi64_epi32&expand=2084) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusqd))] +pub unsafe fn _mm256_cvtusepi64_epi32(a: __m256i) -> __m128i { + transmute(vpmovusqd256( + a.as_u64x4(), + _mm_setzero_si128().as_u32x4(), + 0b11111111, + )) +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtusepi64_epi32&expand=2085) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusqd))] +pub unsafe fn _mm256_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + transmute(vpmovusqd256(a.as_u64x4(), src.as_u32x4(), k)) +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtusepi64_epi32&expand=2086) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusqd))] +pub unsafe fn _mm256_maskz_cvtusepi64_epi32(k: __mmask8, a: __m256i) -> __m128i { + transmute(vpmovusqd256( + a.as_u64x4(), + _mm_setzero_si128().as_u32x4(), + k, + )) +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtusepi64_epi32&expand=2081) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusqd))] +pub unsafe fn _mm_cvtusepi64_epi32(a: __m128i) -> __m128i { + transmute(vpmovusqd128( + a.as_u64x2(), + _mm_setzero_si128().as_u32x4(), + 0b11111111, + )) +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtusepi64_epi32&expand=2082) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusqd))] +pub unsafe fn _mm_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + transmute(vpmovusqd128(a.as_u64x2(), src.as_u32x4(), k)) +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtusepi64_epi32&expand=2083) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusqd))] +pub unsafe fn _mm_maskz_cvtusepi64_epi32(k: __mmask8, a: __m128i) -> __m128i { + transmute(vpmovusqd128( + a.as_u64x2(), + _mm_setzero_si128().as_u32x4(), + k, + )) +} + /// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtusepi64_epi16&expand=2078) @@ -12529,6 +13828,82 @@ pub unsafe fn _mm512_maskz_cvtusepi64_epi16(k: __mmask8, a: __m512i) -> __m128i transmute(vpmovusqw(a.as_u64x8(), _mm_setzero_si128().as_u16x8(), k)) } +/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtusepi64_epi16&expand=2075) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusqw))] +pub unsafe fn _mm256_cvtusepi64_epi16(a: __m256i) -> __m128i { + transmute(vpmovusqw256( + a.as_u64x4(), + _mm_setzero_si128().as_u16x8(), + 0b11111111, + )) +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtusepi64_epi16&expand=2076) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusqw))] +pub unsafe fn _mm256_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + transmute(vpmovusqw256(a.as_u64x4(), src.as_u16x8(), k)) +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtusepi64_epi16&expand=2077) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusqw))] +pub unsafe fn _mm256_maskz_cvtusepi64_epi16(k: __mmask8, a: __m256i) -> __m128i { + transmute(vpmovusqw256( + a.as_u64x4(), + _mm_setzero_si128().as_u16x8(), + k, + )) +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtusepi64_epi16&expand=2072) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusqw))] +pub unsafe fn _mm_cvtusepi64_epi16(a: __m128i) -> __m128i { + transmute(vpmovusqw128( + a.as_u64x2(), + _mm_setzero_si128().as_u16x8(), + 0b11111111, + )) +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtusepi64_epi16&expand=2073) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusqw))] +pub unsafe fn _mm_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + transmute(vpmovusqw128(a.as_u64x2(), src.as_u16x8(), k)) +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtusepi64_epi16&expand=2074) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusqw))] +pub unsafe fn _mm_maskz_cvtusepi64_epi16(k: __mmask8, a: __m128i) -> __m128i { + transmute(vpmovusqw128( + a.as_u64x2(), + _mm_setzero_si128().as_u16x8(), + k, + )) +} + /// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtusepi64_epi8&expand=2096) @@ -12563,6 +13938,82 @@ pub unsafe fn _mm512_maskz_cvtusepi64_epi8(k: __mmask8, a: __m512i) -> __m128i { transmute(vpmovusqb(a.as_u64x8(), _mm_setzero_si128().as_u8x16(), k)) } +/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtusepi64_epi8&expand=2093) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusqb))] +pub unsafe fn _mm256_cvtusepi64_epi8(a: __m256i) -> __m128i { + transmute(vpmovusqb256( + a.as_u64x4(), + _mm_setzero_si128().as_u8x16(), + 0b11111111, + )) +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtusepi64_epi8&expand=2094) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusqb))] +pub unsafe fn _mm256_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i { + transmute(vpmovusqb256(a.as_u64x4(), src.as_u8x16(), k)) +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtusepi64_epi8&expand=2095) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusqb))] +pub unsafe fn _mm256_maskz_cvtusepi64_epi8(k: __mmask8, a: __m256i) -> __m128i { + transmute(vpmovusqb256( + a.as_u64x4(), + _mm_setzero_si128().as_u8x16(), + k, + )) +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtusepi64_epi8&expand=2090) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusqb))] +pub unsafe fn _mm_cvtusepi64_epi8(a: __m128i) -> __m128i { + transmute(vpmovusqb128( + a.as_u64x2(), + _mm_setzero_si128().as_u8x16(), + 0b11111111, + )) +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtusepi64_epi8&expand=2091) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusqb))] +pub unsafe fn _mm_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { + transmute(vpmovusqb128(a.as_u64x2(), src.as_u8x16(), k)) +} + +/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtusepi64_epi8&expand=2092) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusqb))] +pub unsafe fn _mm_maskz_cvtusepi64_epi8(k: __mmask8, a: __m128i) -> __m128i { + transmute(vpmovusqb128( + a.as_u64x2(), + _mm_setzero_si128().as_u8x16(), + k, + )) +} + /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst. /// /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of: @@ -13271,8 +14722,7 @@ pub unsafe fn _mm512_maskz_cvt_roundps_ph(k: __mmask16, a: __m512, sae: i32) -> } /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ -/// -/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of: /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\ @@ -13282,28 +14732,27 @@ pub unsafe fn _mm512_maskz_cvt_roundps_ph(k: __mmask16, a: __m512, sae: i32) -> /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvt_roundps_ph&expand=1352) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))] +#[cfg_attr(test, assert_instr(vcvtps2ph, imm8 = 8))] #[rustc_args_required_const(3)] pub unsafe fn _mm256_mask_cvt_roundps_ph( src: __m128i, k: __mmask8, a: __m256, - sae: i32, + imm8: i32, ) -> __m128i { let a = a.as_f32x8(); let src = src.as_i16x8(); macro_rules! call { - ($imm4:expr) => { - vcvtps2ph256(a, $imm4, src, k) + ($imm8:expr) => { + vcvtps2ph256(a, $imm8, src, k) }; } - let r = constify_imm4_round!(sae, call); + let r = constify_imm8_sae!(imm8, call); transmute(r) } /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ -/// -/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\ @@ -13313,23 +14762,22 @@ pub unsafe fn _mm256_mask_cvt_roundps_ph( /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvt_roundps_ph&expand=1353) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))] +#[cfg_attr(test, assert_instr(vcvtps2ph, imm8 = 8))] #[rustc_args_required_const(2)] -pub unsafe fn _mm256_maskz_cvt_roundps_ph(k: __mmask8, a: __m256, sae: i32) -> __m128i { +pub unsafe fn _mm256_maskz_cvt_roundps_ph(k: __mmask8, a: __m256, imm8: i32) -> __m128i { let a = a.as_f32x8(); let zero = _mm_setzero_si128().as_i16x8(); macro_rules! call { - ($imm4:expr) => { - vcvtps2ph256(a, $imm4, zero, k) + ($imm8:expr) => { + vcvtps2ph256(a, $imm8, zero, k) }; } - let r = constify_imm4_round!(sae, call); + let r = constify_imm8_sae!(imm8, call); transmute(r) } /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ -/// -/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\ @@ -13339,23 +14787,22 @@ pub unsafe fn _mm256_maskz_cvt_roundps_ph(k: __mmask8, a: __m256, sae: i32) -> _ /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvt_roundps_ph&expand=1350) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))] +#[cfg_attr(test, assert_instr(vcvtps2ph, imm8 = 8))] #[rustc_args_required_const(3)] -pub unsafe fn _mm_mask_cvt_roundps_ph(src: __m128i, k: __mmask8, a: __m128, sae: i32) -> __m128i { +pub unsafe fn _mm_mask_cvt_roundps_ph(src: __m128i, k: __mmask8, a: __m128, imm8: i32) -> __m128i { let a = a.as_f32x4(); let src = src.as_i16x8(); macro_rules! call { - ($imm4:expr) => { - vcvtps2ph128(a, $imm4, src, k) + ($imm8:expr) => { + vcvtps2ph128(a, $imm8, src, k) }; } - let r = constify_imm4_round!(sae, call); + let r = constify_imm8_sae!(imm8, call); transmute(r) } /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ -/// -/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ /// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\ @@ -13365,17 +14812,17 @@ pub unsafe fn _mm_mask_cvt_roundps_ph(src: __m128i, k: __mmask8, a: __m128, sae: /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvt_roundps_ph&expand=1351) #[inline] #[target_feature(enable = "avx512f,avx512vl")] -#[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))] +#[cfg_attr(test, assert_instr(vcvtps2ph, imm8 = 8))] #[rustc_args_required_const(2)] -pub unsafe fn _mm_maskz_cvt_roundps_ph(k: __mmask8, a: __m128, sae: i32) -> __m128i { +pub unsafe fn _mm_maskz_cvt_roundps_ph(k: __mmask8, a: __m128, imm8: i32) -> __m128i { let a = a.as_f32x4(); let zero = _mm_setzero_si128().as_i16x8(); macro_rules! call { - ($imm4:expr) => { - vcvtps2ph128(a, $imm4, zero, k) + ($imm8:expr) => { + vcvtps2ph128(a, $imm8, zero, k) }; } - let r = constify_imm4_round!(sae, call); + let r = constify_imm8_sae!(imm8, call); transmute(r) } @@ -13439,6 +14886,106 @@ pub unsafe fn _mm512_maskz_cvtps_ph(k: __mmask16, a: __m512, sae: i32) -> __m256 transmute(r) } +/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// _MM_FROUND_TO_NEAREST_INT // round to nearest\ +/// _MM_FROUND_TO_NEG_INF // round down\ +/// _MM_FROUND_TO_POS_INF // round up\ +/// _MM_FROUND_TO_ZERO // truncate\ +/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtps_ph&expand=1776) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtps2ph, imm8 = 8))] +#[rustc_args_required_const(3)] +pub unsafe fn _mm256_mask_cvtps_ph(src: __m128i, k: __mmask8, a: __m256, imm8: i32) -> __m128i { + let a = a.as_f32x8(); + let src = src.as_i16x8(); + macro_rules! call { + ($imm8:expr) => { + vcvtps2ph256(a, $imm8, src, k) + }; + } + let r = constify_imm8_sae!(imm8, call); + transmute(r) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// _MM_FROUND_TO_NEAREST_INT // round to nearest\ +/// _MM_FROUND_TO_NEG_INF // round down\ +/// _MM_FROUND_TO_POS_INF // round up\ +/// _MM_FROUND_TO_ZERO // truncate\ +/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtps_ph&expand=1777) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtps2ph, imm8 = 8))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm256_maskz_cvtps_ph(k: __mmask8, a: __m256, imm8: i32) -> __m128i { + let a = a.as_f32x8(); + let zero = _mm_setzero_si128().as_i16x8(); + macro_rules! call { + ($imm8:expr) => { + vcvtps2ph256(a, $imm8, zero, k) + }; + } + let r = constify_imm8_sae!(imm8, call); + transmute(r) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// _MM_FROUND_TO_NEAREST_INT // round to nearest\ +/// _MM_FROUND_TO_NEG_INF // round down\ +/// _MM_FROUND_TO_POS_INF // round up\ +/// _MM_FROUND_TO_ZERO // truncate\ +/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtps_ph&expand=1773) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtps2ph, imm8 = 8))] +#[rustc_args_required_const(3)] +pub unsafe fn _mm_mask_cvtps_ph(src: __m128i, k: __mmask8, a: __m128, imm8: i32) -> __m128i { + let a = a.as_f32x4(); + let src = src.as_i16x8(); + macro_rules! call { + ($imm8:expr) => { + vcvtps2ph128(a, $imm8, src, k) + }; + } + let r = constify_imm8_sae!(imm8, call); + transmute(r) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ +/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\ +/// _MM_FROUND_TO_NEAREST_INT // round to nearest\ +/// _MM_FROUND_TO_NEG_INF // round down\ +/// _MM_FROUND_TO_POS_INF // round up\ +/// _MM_FROUND_TO_ZERO // truncate\ +/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtps_ph&expand=1774) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtps2ph, imm8 = 8))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm_maskz_cvtps_ph(k: __mmask8, a: __m128, imm8: i32) -> __m128i { + let a = a.as_f32x4(); + let zero = _mm_setzero_si128().as_i16x8(); + macro_rules! call { + ($imm8:expr) => { + vcvtps2ph128(a, $imm8, zero, k) + }; + } + let r = constify_imm8_sae!(imm8, call); + transmute(r) +} + /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\ /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. /// @@ -13549,6 +15096,52 @@ pub unsafe fn _mm512_maskz_cvtph_ps(k: __mmask16, a: __m256i) -> __m512 { )) } +/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtph_ps&expand=1721) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2ps))] +pub unsafe fn _mm256_mask_cvtph_ps(src: __m256, k: __mmask8, a: __m128i) -> __m256 { + let convert = _mm256_cvtph_ps(a); + transmute(simd_select_bitmask(k, convert.as_f32x8(), src.as_f32x8())) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvtph_ps&expand=1722) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2ps))] +pub unsafe fn _mm256_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m256 { + let convert = _mm256_cvtph_ps(a); + let zero = _mm256_setzero_ps().as_f32x8(); + transmute(simd_select_bitmask(k, convert.as_f32x8(), zero)) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtph_ps&expand=1718) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2ps))] +pub unsafe fn _mm_mask_cvtph_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 { + let convert = _mm_cvtph_ps(a); + transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4())) +} + +/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvtph_ps&expand=1719) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvtph2ps))] +pub unsafe fn _mm_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m128 { + let convert = _mm_cvtph_ps(a); + let zero = _mm_setzero_ps().as_f32x4(); + transmute(simd_select_bitmask(k, convert.as_f32x4(), zero)) +} + /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\ /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. /// @@ -13624,7 +15217,7 @@ pub unsafe fn _mm512_maskz_cvtt_roundps_epi32(k: __mmask16, a: __m512, sae: i32) #[rustc_args_required_const(1)] pub unsafe fn _mm512_cvtt_roundps_epu32(a: __m512, sae: i32) -> __m512i { let a = a.as_f32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); + let zero = _mm512_setzero_si512().as_u32x16(); macro_rules! call { ($imm4:expr) => { vcvttps2udq(a, zero, 0b11111111_11111111, $imm4) @@ -13649,7 +15242,7 @@ pub unsafe fn _mm512_mask_cvtt_roundps_epu32( sae: i32, ) -> __m512i { let a = a.as_f32x16(); - let src = src.as_i32x16(); + let src = src.as_u32x16(); macro_rules! call { ($imm4:expr) => { vcvttps2udq(a, src, k, $imm4) @@ -13669,7 +15262,7 @@ pub unsafe fn _mm512_mask_cvtt_roundps_epu32( #[rustc_args_required_const(2)] pub unsafe fn _mm512_maskz_cvtt_roundps_epu32(k: __mmask16, a: __m512, sae: i32) -> __m512i { let a = a.as_f32x16(); - let zero = _mm512_setzero_si512().as_i32x16(); + let zero = _mm512_setzero_si512().as_u32x16(); macro_rules! call { ($imm4:expr) => { vcvttps2udq(a, zero, k, $imm4) @@ -13834,6 +15427,54 @@ pub unsafe fn _mm512_maskz_cvttps_epi32(k: __mmask16, a: __m512) -> __m512i { )) } +/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttps_epi32&expand=1982) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttps2dq))] +pub unsafe fn _mm256_mask_cvttps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i { + transmute(vcvttps2dq256(a.as_f32x8(), src.as_i32x8(), k)) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttps_epi32&expand=1983) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttps2dq))] +pub unsafe fn _mm256_maskz_cvttps_epi32(k: __mmask8, a: __m256) -> __m256i { + transmute(vcvttps2dq256( + a.as_f32x8(), + _mm256_setzero_si256().as_i32x8(), + k, + )) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttps_epi32&expand=1979) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttps2dq))] +pub unsafe fn _mm_mask_cvttps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i { + transmute(vcvttps2dq128(a.as_f32x4(), src.as_i32x4(), k)) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttps_epi32&expand=1980) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttps2dq))] +pub unsafe fn _mm_maskz_cvttps_epi32(k: __mmask8, a: __m128) -> __m128i { + transmute(vcvttps2dq128( + a.as_f32x4(), + _mm_setzero_si128().as_i32x4(), + k, + )) +} + /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttps_epu32&expand=2002) @@ -13843,7 +15484,7 @@ pub unsafe fn _mm512_maskz_cvttps_epi32(k: __mmask16, a: __m512) -> __m512i { pub unsafe fn _mm512_cvttps_epu32(a: __m512) -> __m512i { transmute(vcvttps2udq( a.as_f32x16(), - _mm512_setzero_si512().as_i32x16(), + _mm512_setzero_si512().as_u32x16(), 0b11111111_11111111, _MM_FROUND_CUR_DIRECTION, )) @@ -13858,7 +15499,7 @@ pub unsafe fn _mm512_cvttps_epu32(a: __m512) -> __m512i { pub unsafe fn _mm512_mask_cvttps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i { transmute(vcvttps2udq( a.as_f32x16(), - src.as_i32x16(), + src.as_u32x16(), k, _MM_FROUND_CUR_DIRECTION, )) @@ -13873,12 +15514,88 @@ pub unsafe fn _mm512_mask_cvttps_epu32(src: __m512i, k: __mmask16, a: __m512) -> pub unsafe fn _mm512_maskz_cvttps_epu32(k: __mmask16, a: __m512) -> __m512i { transmute(vcvttps2udq( a.as_f32x16(), - _mm512_setzero_si512().as_i32x16(), + _mm512_setzero_si512().as_u32x16(), k, _MM_FROUND_CUR_DIRECTION, )) } +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttps_epu32&expand=1999) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttps2udq))] +pub unsafe fn _mm256_cvttps_epu32(a: __m256) -> __m256i { + transmute(vcvttps2udq256( + a.as_f32x8(), + _mm256_setzero_si256().as_u32x8(), + 0b11111111, + )) +} + +/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttps_epu32&expand=2000) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttps2udq))] +pub unsafe fn _mm256_mask_cvttps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i { + transmute(vcvttps2udq256(a.as_f32x8(), src.as_u32x8(), k)) +} + +/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttps_epu32&expand=2001) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttps2udq))] +pub unsafe fn _mm256_maskz_cvttps_epu32(k: __mmask8, a: __m256) -> __m256i { + transmute(vcvttps2udq256( + a.as_f32x8(), + _mm256_setzero_si256().as_u32x8(), + k, + )) +} + +/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttps_epu32&expand=1996) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttps2udq))] +pub unsafe fn _mm_cvttps_epu32(a: __m128) -> __m128i { + transmute(vcvttps2udq128( + a.as_f32x4(), + _mm_setzero_si128().as_u32x4(), + 0b11111111, + )) +} + +/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttps_epu32&expand=1997) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttps2udq))] +pub unsafe fn _mm_mask_cvttps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i { + transmute(vcvttps2udq128(a.as_f32x4(), src.as_u32x4(), k)) +} + +/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttps_epu32&expand=1998) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttps2udq))] +pub unsafe fn _mm_maskz_cvttps_epu32(k: __mmask8, a: __m128) -> __m128i { + transmute(vcvttps2udq128( + a.as_f32x4(), + _mm_setzero_si128().as_u32x4(), + k, + )) +} + /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\ /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. /// @@ -13944,6 +15661,54 @@ pub unsafe fn _mm512_maskz_cvttpd_epi32(k: __mmask8, a: __m512d) -> __m256i { )) } +/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttpd_epi32&expand=1945) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttpd2dq))] +pub unsafe fn _mm256_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i { + transmute(vcvttpd2dq256(a.as_f64x4(), src.as_i32x4(), k)) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttpd_epi32&expand=1946) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttpd2dq))] +pub unsafe fn _mm256_maskz_cvttpd_epi32(k: __mmask8, a: __m256d) -> __m128i { + transmute(vcvttpd2dq256( + a.as_f64x4(), + _mm_setzero_si128().as_i32x4(), + k, + )) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttpd_epi32&expand=1942) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttpd2dq))] +pub unsafe fn _mm_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i { + transmute(vcvttpd2dq128(a.as_f64x2(), src.as_i32x4(), k)) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttpd_epi32&expand=1943) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttpd2dq))] +pub unsafe fn _mm_maskz_cvttpd_epi32(k: __mmask8, a: __m128d) -> __m128i { + transmute(vcvttpd2dq128( + a.as_f64x2(), + _mm_setzero_si128().as_i32x4(), + k, + )) +} + /// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvttpd_epu32&expand=1965) @@ -13989,6 +15754,82 @@ pub unsafe fn _mm512_maskz_cvttpd_epu32(k: __mmask8, a: __m512d) -> __m256i { )) } +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvttpd_epu32&expand=1962) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttpd2udq))] +pub unsafe fn _mm256_cvttpd_epu32(a: __m256d) -> __m128i { + transmute(vcvttpd2udq256( + a.as_f64x4(), + _mm_setzero_si128().as_i32x4(), + 0b11111111, + )) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvttpd_epu32&expand=1963) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttpd2udq))] +pub unsafe fn _mm256_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i { + transmute(vcvttpd2udq256(a.as_f64x4(), src.as_i32x4(), k)) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_cvttpd_epu32&expand=1964) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttpd2udq))] +pub unsafe fn _mm256_maskz_cvttpd_epu32(k: __mmask8, a: __m256d) -> __m128i { + transmute(vcvttpd2udq256( + a.as_f64x4(), + _mm_setzero_si128().as_i32x4(), + k, + )) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttpd_epu32&expand=1959) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttpd2udq))] +pub unsafe fn _mm_cvttpd_epu32(a: __m128d) -> __m128i { + transmute(vcvttpd2udq128( + a.as_f64x2(), + _mm_setzero_si128().as_i32x4(), + 0b11111111, + )) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvttpd_epu32&expand=1960) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttpd2udq))] +pub unsafe fn _mm_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i { + transmute(vcvttpd2udq128(a.as_f64x2(), src.as_i32x4(), k)) +} + +/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_cvttpd_epu32&expand=1961) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vcvttpd2udq))] +pub unsafe fn _mm_maskz_cvttpd_epu32(k: __mmask8, a: __m128d) -> __m128i { + transmute(vcvttpd2udq128( + a.as_f64x2(), + _mm_setzero_si128().as_i32x4(), + k, + )) +} + /// Returns vector of type `__m512d` with all elements set to zero. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setzero_pd&expand=5018) @@ -23876,6 +25717,17 @@ pub unsafe fn _mm512_castsi512_pd(a: __m512i) -> __m512d { transmute(a) } +/// Copy the lower 32-bit integer in a to dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtsi512_si32&expand=1882) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(vmovd))] +pub unsafe fn _mm512_cvtsi512_si32(a: __m512i) -> i32 { + let extract: i32 = simd_extract(a.as_i32x16(), 0); + transmute(extract) +} + /// Broadcast the low packed 32-bit integer from a to all elements of dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_broadcastd_epi32&expand=545) @@ -29901,6 +31753,66 @@ pub unsafe fn _mm_mask_cvtepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: vpmovdwmem128(mem_addr as *mut i8, a.as_i32x4(), k); } +/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtsepi32_storeu_epi16&expand=1833) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpmovsdw))] +pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask16, a: __m512i) { + vpmovsdwmem(mem_addr as *mut i8, a.as_i32x16(), k); +} + +/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtsepi32_storeu_epi16&expand=1832) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsdw))] +pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) { + vpmovsdwmem256(mem_addr as *mut i8, a.as_i32x8(), k); +} + +/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsepi32_storeu_epi16&expand=1831) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsdw))] +pub unsafe fn _mm_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) { + vpmovsdwmem128(mem_addr as *mut i8, a.as_i32x4(), k); +} + +/// Convert packed unsigned 32-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtusepi32_storeu_epi16&expand=2068) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpmovusdw))] +pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask16, a: __m512i) { + vpmovusdwmem(mem_addr as *mut i8, a.as_i32x16(), k); +} + +/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtusepi32_storeu_epi16&expand=2067) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusdw))] +pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) { + vpmovusdwmem256(mem_addr as *mut i8, a.as_i32x8(), k); +} + +/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtusepi32_storeu_epi16&expand=2066) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusdw))] +pub unsafe fn _mm_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) { + vpmovusdwmem128(mem_addr as *mut i8, a.as_i32x4(), k); +} + /// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi32_storeu_epi8&expand=1463) @@ -29931,6 +31843,66 @@ pub unsafe fn _mm_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: _ vpmovdbmem128(mem_addr as *mut i8, a.as_i32x4(), k); } +/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtsepi32_storeu_epi8&expand=1836) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpmovsdb))] +pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) { + vpmovsdbmem(mem_addr as *mut i8, a.as_i32x16(), k); +} + +/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtsepi32_storeu_epi8&expand=1835) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsdb))] +pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) { + vpmovsdbmem256(mem_addr as *mut i8, a.as_i32x8(), k); +} + +/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsepi32_storeu_epi8&expand=1834) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsdb))] +pub unsafe fn _mm_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) { + vpmovsdbmem128(mem_addr as *mut i8, a.as_i32x4(), k); +} + +/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtusepi32_storeu_epi8&expand=2071) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpmovusdb))] +pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) { + vpmovusdbmem(mem_addr as *mut i8, a.as_i32x16(), k); +} + +/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtusepi32_storeu_epi8&expand=2070) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusdb))] +pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) { + vpmovusdbmem256(mem_addr as *mut i8, a.as_i32x8(), k); +} + +/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtusepi32_storeu_epi8&expand=2069) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusdb))] +pub unsafe fn _mm_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) { + vpmovusdbmem128(mem_addr as *mut i8, a.as_i32x4(), k); +} + /// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi64_storeu_epi16&expand=1513) @@ -29961,6 +31933,66 @@ pub unsafe fn _mm_mask_cvtepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: vpmovqwmem128(mem_addr as *mut i8, a.as_i64x2(), k); } +/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtsepi64_storeu_epi16&expand=1866) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpmovsqw))] +pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m512i) { + vpmovsqwmem(mem_addr as *mut i8, a.as_i64x8(), k); +} + +/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtsepi64_storeu_epi16&expand=1865) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsqw))] +pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) { + vpmovsqwmem256(mem_addr as *mut i8, a.as_i64x4(), k); +} + +/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsepi64_storeu_epi16&expand=1864) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsqw))] +pub unsafe fn _mm_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) { + vpmovsqwmem128(mem_addr as *mut i8, a.as_i64x2(), k); +} + +/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtusepi64_storeu_epi16&expand=2101) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpmovusqw))] +pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m512i) { + vpmovusqwmem(mem_addr as *mut i8, a.as_i64x8(), k); +} + +/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtusepi64_storeu_epi16&expand=2100) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusqw))] +pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) { + vpmovusqwmem256(mem_addr as *mut i8, a.as_i64x4(), k); +} + +/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtusepi64_storeu_epi16&expand=2099) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusqw))] +pub unsafe fn _mm_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) { + vpmovusqwmem128(mem_addr as *mut i8, a.as_i64x2(), k); +} + /// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi64_storeu_epi8&expand=1519) @@ -29991,6 +32023,66 @@ pub unsafe fn _mm_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: _ vpmovqbmem128(mem_addr as *mut i8, a.as_i64x2(), k); } +/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtsepi64_storeu_epi8&expand=1872) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpmovsqb))] +pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) { + vpmovsqbmem(mem_addr as *mut i8, a.as_i64x8(), k); +} + +/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtsepi64_storeu_epi8&expand=1871) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsqb))] +pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) { + vpmovsqbmem256(mem_addr as *mut i8, a.as_i64x4(), k); +} + +/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsepi64_storeu_epi8&expand=1870) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsqb))] +pub unsafe fn _mm_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) { + vpmovsqbmem128(mem_addr as *mut i8, a.as_i64x2(), k); +} + +/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtusepi64_storeu_epi8&expand=2107) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpmovusqb))] +pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) { + vpmovusqbmem(mem_addr as *mut i8, a.as_i64x8(), k); +} + +/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtusepi64_storeu_epi8&expand=2106) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusqb))] +pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) { + vpmovusqbmem256(mem_addr as *mut i8, a.as_i64x4(), k); +} + +/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtusepi64_storeu_epi8&expand=2105) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusqb))] +pub unsafe fn _mm_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) { + vpmovusqbmem128(mem_addr as *mut i8, a.as_i64x2(), k); +} + ///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtepi64_storeu_epi32&expand=1516) @@ -30021,6 +32113,66 @@ pub unsafe fn _mm_mask_cvtepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: vpmovqdmem128(mem_addr as *mut i8, a.as_i64x2(), k); } +/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtsepi64_storeu_epi32&expand=1869) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpmovsqd))] +pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m512i) { + vpmovsqdmem(mem_addr as *mut i8, a.as_i64x8(), k); +} + +/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtsepi64_storeu_epi32&expand=1868) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsqd))] +pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m256i) { + vpmovsqdmem256(mem_addr as *mut i8, a.as_i64x4(), k); +} + +/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtsepi64_storeu_epi32&expand=1867) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovsqd))] +pub unsafe fn _mm_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m128i) { + vpmovsqdmem128(mem_addr as *mut i8, a.as_i64x2(), k); +} + +/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_cvtusepi64_storeu_epi32&expand=2104) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpmovusqd))] +pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m512i) { + vpmovusqdmem(mem_addr as *mut i8, a.as_i64x8(), k); +} + +/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_cvtusepi64_storeu_epi32&expand=2103) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusqd))] +pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m256i) { + vpmovusqdmem256(mem_addr as *mut i8, a.as_i64x4(), k); +} + +/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_cvtusepi64_storeu_epi32&expand=2102) +#[inline] +#[target_feature(enable = "avx512f,avx512vl")] +#[cfg_attr(test, assert_instr(vpmovusqd))] +pub unsafe fn _mm_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m128i) { + vpmovusqdmem128(mem_addr as *mut i8, a.as_i64x2(), k); +} + /// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_epi32&expand=5628) @@ -35275,8 +37427,8 @@ pub unsafe fn _mm_maskz_fixupimm_sd( } /// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\ -/// /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fixupimm_round_ss&expand=2511) #[inline] #[target_feature(enable = "avx512f")] @@ -35304,8 +37456,8 @@ pub unsafe fn _mm_fixupimm_round_ss( } /// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\ -/// /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fixupimm_round_ss&expand=2512) #[inline] #[target_feature(enable = "avx512f")] @@ -35334,8 +37486,8 @@ pub unsafe fn _mm_mask_fixupimm_round_ss( } /// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\ -/// /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fixupimm_round_ss&expand=2513) #[inline] #[target_feature(enable = "avx512f")] @@ -35364,8 +37516,8 @@ pub unsafe fn _mm_maskz_fixupimm_round_ss( } /// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\ -/// /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_fixupimm_round_sd&expand=2508) #[inline] #[target_feature(enable = "avx512f")] @@ -35393,8 +37545,8 @@ pub unsafe fn _mm_fixupimm_round_sd( } /// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\ -/// /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_fixupimm_round_sd&expand=2509) #[inline] #[target_feature(enable = "avx512f")] @@ -35423,8 +37575,8 @@ pub unsafe fn _mm_mask_fixupimm_round_sd( } /// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\ -/// /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_maskz_fixupimm_round_sd&expand=2510) #[inline] #[target_feature(enable = "avx512f")] @@ -35588,13 +37740,12 @@ pub unsafe fn _mm_maskz_cvt_roundss_sd(k: __mmask8, a: __m128d, b: __m128, sae: transmute(r) } -/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. -/// -/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of: -/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions -/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions -/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions -/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions +/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ +/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ +/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\ +/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\ /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvt_roundsd_ss&expand=1361) @@ -35615,13 +37766,12 @@ pub unsafe fn _mm_cvt_roundsd_ss(a: __m128, b: __m128d, rounding: i32) -> __m128 transmute(r) } -/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. -/// -/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of: -/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions -/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions -/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions -/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions +/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ +/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ +/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\ +/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\ /// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_mask_cvt_roundsd_ss&expand=1362) @@ -35649,7 +37799,6 @@ pub unsafe fn _mm_mask_cvt_roundsd_ss( } /// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\ -/// /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ @@ -35681,7 +37830,6 @@ pub unsafe fn _mm_maskz_cvt_roundsd_ss( } /// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\ -/// /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ @@ -35706,7 +37854,6 @@ pub unsafe fn _mm_cvt_roundss_si32(a: __m128, rounding: i32) -> i32 { } /// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\ -/// /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ @@ -35731,7 +37878,6 @@ pub unsafe fn _mm_cvt_roundss_i32(a: __m128, rounding: i32) -> i32 { } /// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\ -/// /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ @@ -35776,7 +37922,6 @@ pub unsafe fn _mm_cvtss_u32(a: __m128) -> u32 { } /// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\ -/// /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ @@ -35801,7 +37946,6 @@ pub unsafe fn _mm_cvt_roundsd_si32(a: __m128d, rounding: i32) -> i32 { } /// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\ -/// /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ @@ -35826,7 +37970,6 @@ pub unsafe fn _mm_cvt_roundsd_i32(a: __m128d, rounding: i32) -> i32 { } /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\ -/// /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ @@ -35921,7 +38064,6 @@ pub unsafe fn _mm_cvt_roundsi32_ss(a: __m128, b: i32, rounding: i32) -> __m128 { } /// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ -/// /// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ /// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ /// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ @@ -36028,7 +38170,7 @@ pub unsafe fn _mm_cvtt_roundss_u32(a: __m128, sae: i32) -> u32 { /// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvttss_i32&expand=2022) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttss_i32&expand=2022) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vcvtss2si))] @@ -36038,7 +38180,7 @@ pub unsafe fn _mm_cvttss_i32(a: __m128) -> i32 { /// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvttss_u32&expand=2026) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttss_u32&expand=2026) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vcvtss2usi))] @@ -36049,7 +38191,7 @@ pub unsafe fn _mm_cvttss_u32(a: __m128) -> u32 { /// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\ /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtt_roundsd_si32&expand=1930) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsd_si32&expand=1930) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vcvtsd2si, sae = 8))] @@ -36068,7 +38210,7 @@ pub unsafe fn _mm_cvtt_roundsd_si32(a: __m128d, sae: i32) -> i32 { /// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\ /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtt_roundsd_i32&expand=1928) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsd_i32&expand=1928) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vcvtsd2si, sae = 8))] @@ -36105,7 +38247,7 @@ pub unsafe fn _mm_cvtt_roundsd_u32(a: __m128d, sae: i32) -> u32 { /// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvttsd_i32&expand=2015) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_i32&expand=2015) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vcvtsd2si))] @@ -36115,7 +38257,7 @@ pub unsafe fn _mm_cvttsd_i32(a: __m128d) -> i32 { /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvttsd_u32&expand=2020) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_u32&expand=2020) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vcvtsd2usi))] @@ -36125,7 +38267,7 @@ pub unsafe fn _mm_cvttsd_u32(a: __m128d) -> u32 { /// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtu32_ss&expand=2032) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu32_ss&expand=2032) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vcvtusi2ss))] @@ -36137,7 +38279,7 @@ pub unsafe fn _mm_cvtu32_ss(a: __m128, b: u32) -> __m128 { /// Convert the unsigned 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtu32_sd&expand=2031) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu32_sd&expand=2031) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vcvtusi2sd))] @@ -36147,34 +38289,10 @@ pub unsafe fn _mm_cvtu32_sd(a: __m128d, b: u32) -> __m128d { transmute(r) } -/// Convert the unsigned 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. -/// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtu64_ss&expand=2035) -#[inline] -#[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(mov))] // should be vcvtusi2ss -pub unsafe fn _mm_cvtu64_ss(a: __m128, b: u64) -> __m128 { - let b = b as f32; - let r = simd_insert(a, 0, b); - transmute(r) -} - -/// Convert the unsigned 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. -/// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvtu64_sd&expand=2034) -#[inline] -#[target_feature(enable = "avx512f")] -#[cfg_attr(test, assert_instr(mov))] // should be vcvtusi2sd -pub unsafe fn _mm_cvtu64_sd(a: __m128d, b: u64) -> __m128d { - let b = b as f64; - let r = simd_insert(a, 0, b); - transmute(r) -} - /// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\ /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_comi_round_ss&expand=1175) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comi_round_ss&expand=1175) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vcmp, imm8 = 5, sae = 4))] //should be vcomiss @@ -36194,7 +38312,7 @@ pub unsafe fn _mm_comi_round_ss(a: __m128, b: __m128, imm8: i32, sae: i32) -> i3 /// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\ /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. /// -/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_comi_round_sd&expand=1174) +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comi_round_sd&expand=1174) #[inline] #[target_feature(enable = "avx512f")] #[cfg_attr(test, assert_instr(vcmp, imm8 = 5, sae = 4))] //should be vcomisd @@ -36723,16 +38841,29 @@ extern "C" { #[link_name = "llvm.x86.avx512.mask.cvtps2dq.512"] fn vcvtps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16; + #[link_name = "llvm.x86.avx512.mask.cvtps2udq.512"] fn vcvtps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16; + #[link_name = "llvm.x86.avx512.mask.cvtps2udq.256"] + fn vcvtps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8; + #[link_name = "llvm.x86.avx512.mask.cvtps2udq.128"] + fn vcvtps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4; + #[link_name = "llvm.x86.avx512.mask.cvtps2pd.512"] fn vcvtps2pd(a: f32x8, src: f64x8, mask: u8, sae: i32) -> f64x8; #[link_name = "llvm.x86.avx512.mask.cvtpd2ps.512"] fn vcvtpd2ps(a: f64x8, src: f32x8, mask: u8, rounding: i32) -> f32x8; + #[link_name = "llvm.x86.avx512.mask.cvtpd2dq.512"] fn vcvtpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8; + #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.512"] fn vcvtpd2udq(a: f64x8, src: u32x8, mask: u8, rounding: i32) -> u32x8; + #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.256"] + fn vcvtpd2udq256(a: f64x4, src: u32x4, mask: u8) -> u32x4; + #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.128"] + fn vcvtpd2udq128(a: f64x2, src: u32x4, mask: u8) -> u32x4; + #[link_name = "llvm.x86.avx512.sitofp.round.v16f32.v16i32"] fn vcvtdq2ps(a: i32x16, rounding: i32) -> f32x16; #[link_name = "llvm.x86.avx512.uitofp.round.v16f32.v16i32"] @@ -36750,12 +38881,31 @@ extern "C" { #[link_name = "llvm.x86.avx512.mask.cvttps2dq.512"] fn vcvttps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16; + #[link_name = "llvm.x86.avx512.mask.cvttps2dq.256"] + fn vcvttps2dq256(a: f32x8, src: i32x8, mask: u8) -> i32x8; + #[link_name = "llvm.x86.avx512.mask.cvttps2dq.128"] + fn vcvttps2dq128(a: f32x4, src: i32x4, mask: u8) -> i32x4; + #[link_name = "llvm.x86.avx512.mask.cvttps2udq.512"] - fn vcvttps2udq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> u32x16; + fn vcvttps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16; + #[link_name = "llvm.x86.avx512.mask.cvttps2udq.256"] + fn vcvttps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8; + #[link_name = "llvm.x86.avx512.mask.cvttps2udq.128"] + fn vcvttps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4; + #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.512"] fn vcvttpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8; + #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.256"] + fn vcvttpd2dq256(a: f64x4, src: i32x4, mask: u8) -> i32x4; + #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.128"] + fn vcvttpd2dq128(a: f64x2, src: i32x4, mask: u8) -> i32x4; + #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.512"] fn vcvttpd2udq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> u32x8; + #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.256"] + fn vcvttpd2udq256(a: f64x4, src: i32x4, mask: u8) -> u32x4; + #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.128"] + fn vcvttpd2udq128(a: f64x2, src: i32x4, mask: u8) -> u32x4; #[link_name = "llvm.x86.avx512.mask.pmov.dw.128"] fn vpmovdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8; @@ -36782,6 +38932,20 @@ extern "C" { #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.128"] fn vpmovdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.512"] + fn vpmovsdwmem(mem_addr: *mut i8, a: i32x16, mask: u16); + #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.256"] + fn vpmovsdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.128"] + fn vpmovsdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8); + + #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.512"] + fn vpmovusdwmem(mem_addr: *mut i8, a: i32x16, mask: u16); + #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.256"] + fn vpmovusdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.128"] + fn vpmovusdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.512"] fn vpmovdbmem(mem_addr: *mut i8, a: i32x16, mask: u16); #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.256"] @@ -36789,18 +38953,62 @@ extern "C" { #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.128"] fn vpmovdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.512"] + fn vpmovsdbmem(mem_addr: *mut i8, a: i32x16, mask: u16); + #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.256"] + fn vpmovsdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.128"] + fn vpmovsdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8); + + #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.512"] + fn vpmovusdbmem(mem_addr: *mut i8, a: i32x16, mask: u16); + #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.256"] + fn vpmovusdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.128"] + fn vpmovusdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.512"] fn vpmovqwmem(mem_addr: *mut i8, a: i64x8, mask: u8); #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.256"] fn vpmovqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8); #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.128"] fn vpmovqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8); + + #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.512"] + fn vpmovsqwmem(mem_addr: *mut i8, a: i64x8, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.256"] + fn vpmovsqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.128"] + fn vpmovsqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8); + + #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.512"] + fn vpmovusqwmem(mem_addr: *mut i8, a: i64x8, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.256"] + fn vpmovusqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.128"] + fn vpmovusqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.512"] fn vpmovqbmem(mem_addr: *mut i8, a: i64x8, mask: u8); #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.256"] fn vpmovqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8); #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.128"] fn vpmovqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8); + + #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.512"] + fn vpmovsqbmem(mem_addr: *mut i8, a: i64x8, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.256"] + fn vpmovsqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.128"] + fn vpmovsqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8); + + #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.512"] + fn vpmovusqbmem(mem_addr: *mut i8, a: i64x8, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.256"] + fn vpmovusqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.128"] + fn vpmovusqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.512"] fn vpmovqdmem(mem_addr: *mut i8, a: i64x8, mask: u8); #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.256"] @@ -36808,29 +39016,92 @@ extern "C" { #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.128"] fn vpmovqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.512"] + fn vpmovsqdmem(mem_addr: *mut i8, a: i64x8, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.256"] + fn vpmovsqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.128"] + fn vpmovsqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8); + + #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.512"] + fn vpmovusqdmem(mem_addr: *mut i8, a: i64x8, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.256"] + fn vpmovusqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.128"] + fn vpmovusqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8); + #[link_name = "llvm.x86.avx512.mask.pmov.qb.512"] fn vpmovqb(a: i64x8, src: i8x16, mask: u8) -> i8x16; #[link_name = "llvm.x86.avx512.mask.pmovs.dw.512"] fn vpmovsdw(a: i32x16, src: i16x16, mask: u16) -> i16x16; + #[link_name = "llvm.x86.avx512.mask.pmovs.dw.256"] + fn vpmovsdw256(a: i32x8, src: i16x8, mask: u8) -> i16x8; + #[link_name = "llvm.x86.avx512.mask.pmovs.dw.128"] + fn vpmovsdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8; + #[link_name = "llvm.x86.avx512.mask.pmovs.db.512"] fn vpmovsdb(a: i32x16, src: i8x16, mask: u16) -> i8x16; + #[link_name = "llvm.x86.avx512.mask.pmovs.db.256"] + fn vpmovsdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16; + #[link_name = "llvm.x86.avx512.mask.pmovs.db.128"] + fn vpmovsdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16; + #[link_name = "llvm.x86.avx512.mask.pmovs.qd.512"] fn vpmovsqd(a: i64x8, src: i32x8, mask: u8) -> i32x8; + #[link_name = "llvm.x86.avx512.mask.pmovs.qd.256"] + fn vpmovsqd256(a: i64x4, src: i32x4, mask: u8) -> i32x4; + #[link_name = "llvm.x86.avx512.mask.pmovs.qd.128"] + fn vpmovsqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4; + #[link_name = "llvm.x86.avx512.mask.pmovs.qw.512"] fn vpmovsqw(a: i64x8, src: i16x8, mask: u8) -> i16x8; + #[link_name = "llvm.x86.avx512.mask.pmovs.qw.256"] + fn vpmovsqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8; + #[link_name = "llvm.x86.avx512.mask.pmovs.qw.128"] + fn vpmovsqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8; + #[link_name = "llvm.x86.avx512.mask.pmovs.qb.512"] fn vpmovsqb(a: i64x8, src: i8x16, mask: u8) -> i8x16; + #[link_name = "llvm.x86.avx512.mask.pmovs.qb.256"] + fn vpmovsqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16; + #[link_name = "llvm.x86.avx512.mask.pmovs.qb.128"] + fn vpmovsqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16; + #[link_name = "llvm.x86.avx512.mask.pmovus.dw.512"] fn vpmovusdw(a: u32x16, src: u16x16, mask: u16) -> u16x16; + #[link_name = "llvm.x86.avx512.mask.pmovus.dw.256"] + fn vpmovusdw256(a: u32x8, src: u16x8, mask: u8) -> u16x8; + #[link_name = "llvm.x86.avx512.mask.pmovus.dw.128"] + fn vpmovusdw128(a: u32x4, src: u16x8, mask: u8) -> u16x8; + #[link_name = "llvm.x86.avx512.mask.pmovus.db.512"] fn vpmovusdb(a: u32x16, src: u8x16, mask: u16) -> u8x16; + #[link_name = "llvm.x86.avx512.mask.pmovus.db.256"] + fn vpmovusdb256(a: u32x8, src: u8x16, mask: u8) -> u8x16; + #[link_name = "llvm.x86.avx512.mask.pmovus.db.128"] + fn vpmovusdb128(a: u32x4, src: u8x16, mask: u8) -> u8x16; + #[link_name = "llvm.x86.avx512.mask.pmovus.qd.512"] fn vpmovusqd(a: u64x8, src: u32x8, mask: u8) -> u32x8; + #[link_name = "llvm.x86.avx512.mask.pmovus.qd.256"] + fn vpmovusqd256(a: u64x4, src: u32x4, mask: u8) -> u32x4; + #[link_name = "llvm.x86.avx512.mask.pmovus.qd.128"] + fn vpmovusqd128(a: u64x2, src: u32x4, mask: u8) -> u32x4; + #[link_name = "llvm.x86.avx512.mask.pmovus.qw.512"] fn vpmovusqw(a: u64x8, src: u16x8, mask: u8) -> u16x8; + #[link_name = "llvm.x86.avx512.mask.pmovus.qw.256"] + fn vpmovusqw256(a: u64x4, src: u16x8, mask: u8) -> u16x8; + #[link_name = "llvm.x86.avx512.mask.pmovus.qw.128"] + fn vpmovusqw128(a: u64x2, src: u16x8, mask: u8) -> u16x8; + #[link_name = "llvm.x86.avx512.mask.pmovus.qb.512"] fn vpmovusqb(a: u64x8, src: u8x16, mask: u8) -> u8x16; + #[link_name = "llvm.x86.avx512.mask.pmovus.qb.256"] + fn vpmovusqb256(a: u64x4, src: u8x16, mask: u8) -> u8x16; + #[link_name = "llvm.x86.avx512.mask.pmovus.qb.128"] + fn vpmovusqb128(a: u64x2, src: u8x16, mask: u8) -> u8x16; #[link_name = "llvm.x86.avx512.gather.dpd.512"] fn vgatherdpd(src: f64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> f64x8; @@ -37208,31 +39479,21 @@ extern "C" { #[link_name = "llvm.x86.avx512.vcvtss2si32"] fn vcvtss2si(a: f32x4, rounding: i32) -> i32; - #[link_name = "llvm.x86.avx512.vcvtss2si64"] - fn vcvtss2si64(a: f32x4, rounding: i32) -> i64; #[link_name = "llvm.x86.avx512.vcvtss2usi32"] fn vcvtss2usi(a: f32x4, rounding: i32) -> u32; - #[link_name = "llvm.x86.avx512.vcvtss2usi64"] - fn vcvtss2usi64(a: f32x4, rounding: i32) -> u64; + #[link_name = "llvm.x86.avx512.vcvtsd2si32"] fn vcvtsd2si(a: f64x2, rounding: i32) -> i32; - #[link_name = "llvm.x86.avx512.vcvtsd2si64"] - fn vcvtsd2si64(a: f64x2, rounding: i32) -> i64; #[link_name = "llvm.x86.avx512.vcvtsd2usi32"] fn vcvtsd2usi(a: f64x2, rounding: i32) -> u32; - #[link_name = "llvm.x86.avx512.vcvtsd2usi64"] - fn vcvtsd2usi64(a: f64x2, rounding: i32) -> u64; #[link_name = "llvm.x86.avx512.cvtsi2ss32"] fn vcvtsi2ss(a: f32x4, b: i32, rounding: i32) -> f32x4; - #[link_name = "llvm.x86.avx512.cvtsi2ss64"] - fn vcvtsi2ss64(a: f32x4, b: i64, rounding: i32) -> f32x4; #[link_name = "llvm.x86.avx512.cvtsi2sd64"] fn vcvtsi2sd(a: f64x2, b: i64, rounding: i32) -> f64x2; + #[link_name = "llvm.x86.avx512.cvtusi2ss"] fn vcvtusi2ss(a: f32x4, b: u32, rounding: i32) -> f32x4; - #[link_name = "llvm.x86.avx512.cvtusi642ss"] - fn vcvtusi2ss64(a: f32x4, b: u64, rounding: i32) -> f32x4; #[link_name = "llvm.x86.avx512.cvtusi642sd"] fn vcvtusi2sd(a: f64x2, b: u64, rounding: i32) -> f64x2; @@ -40778,23 +43039,12 @@ mod tests { c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, ); + #[rustfmt::skip] let e = _mm512_setr_ps( - -0.99999994, - -0.99999994, - -0.99999994, - -0.99999994, - -0.99999994, - -0.99999994, - -0.99999994, - -0.99999994, - 0.00000007, - 0.00000007, - 0.00000007, - 0.00000007, - 0.00000007, - 0.00000007, - 0.00000007, - 0.00000007, + -0.99999994, -0.99999994, -0.99999994, -0.99999994, + -0.99999994, -0.99999994, -0.99999994, -0.99999994, + 0.00000007, 0.00000007, 0.00000007, 0.00000007, + 0.00000007, 0.00000007, 0.00000007, 0.00000007, ); assert_eq_m512(r, e); } @@ -40807,6 +43057,7 @@ mod tests { let r = _mm512_maskz_fmadd_round_ps(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); assert_eq_m512(r, _mm512_setzero_ps()); + #[rustfmt::skip] let r = _mm512_maskz_fmadd_round_ps( 0b00000000_11111111, a, @@ -40814,23 +43065,12 @@ mod tests { c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, ); + #[rustfmt::skip] let e = _mm512_setr_ps( - -0.99999994, - -0.99999994, - -0.99999994, - -0.99999994, - -0.99999994, - -0.99999994, - -0.99999994, - -0.99999994, - 0., - 0., - 0., - 0., - 0., - 0., - 0., - 0., + -0.99999994, -0.99999994, -0.99999994, -0.99999994, + -0.99999994, -0.99999994, -0.99999994, -0.99999994, + 0., 0., 0., 0., + 0., 0., 0., 0., ); assert_eq_m512(r, e); } @@ -40850,23 +43090,12 @@ mod tests { 0b00000000_11111111, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, ); + #[rustfmt::skip] let e = _mm512_setr_ps( - -0.99999994, - -0.99999994, - -0.99999994, - -0.99999994, - -0.99999994, - -0.99999994, - -0.99999994, - -0.99999994, - -1., - -1., - -1., - -1., - -1., - -1., - -1., - -1., + -0.99999994, -0.99999994, -0.99999994, -0.99999994, + -0.99999994, -0.99999994, -0.99999994, -0.99999994, + -1., -1., -1., -1., + -1., -1., -1., -1., ); assert_eq_m512(r, e); } @@ -40899,23 +43128,12 @@ mod tests { c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, ); + #[rustfmt::skip] let e = _mm512_setr_ps( - -0.99999994, - -0.99999994, - -0.99999994, - -0.99999994, - -0.99999994, - -0.99999994, - -0.99999994, - -0.99999994, - 0.00000007, - 0.00000007, - 0.00000007, - 0.00000007, - 0.00000007, - 0.00000007, - 0.00000007, - 0.00000007, + -0.99999994, -0.99999994, -0.99999994, -0.99999994, + -0.99999994, -0.99999994, -0.99999994, -0.99999994, + 0.00000007, 0.00000007, 0.00000007, 0.00000007, + 0.00000007, 0.00000007, 0.00000007, 0.00000007, ); assert_eq_m512(r, e); } @@ -40935,23 +43153,12 @@ mod tests { c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, ); + #[rustfmt::skip] let e = _mm512_setr_ps( - -0.99999994, - -0.99999994, - -0.99999994, - -0.99999994, - -0.99999994, - -0.99999994, - -0.99999994, - -0.99999994, - 0., - 0., - 0., - 0., - 0., - 0., - 0., - 0., + -0.99999994, -0.99999994, -0.99999994, -0.99999994, + -0.99999994, -0.99999994, -0.99999994, -0.99999994, + 0., 0., 0., 0., + 0., 0., 0., 0., ); assert_eq_m512(r, e); } @@ -40971,23 +43178,12 @@ mod tests { 0b00000000_11111111, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, ); + #[rustfmt::skip] let e = _mm512_setr_ps( - -0.99999994, - -0.99999994, - -0.99999994, - -0.99999994, - -0.99999994, - -0.99999994, - -0.99999994, - -0.99999994, - 1., - 1., - 1., - 1., - 1., - 1., - 1., - 1., + -0.99999994, -0.99999994, -0.99999994, -0.99999994, + -0.99999994, -0.99999994, -0.99999994, -0.99999994, + 1., 1., 1., 1., + 1., 1., 1., 1., ); assert_eq_m512(r, e); } @@ -40998,23 +43194,12 @@ mod tests { let b = _mm512_set1_ps(1.); let c = _mm512_set1_ps(-1.); let r = _mm512_fmaddsub_round_ps(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + #[rustfmt::skip] let e = _mm512_setr_ps( - 1.0000001, - -0.99999994, - 1.0000001, - -0.99999994, - 1.0000001, - -0.99999994, - 1.0000001, - -0.99999994, - 1.0000001, - -0.99999994, - 1.0000001, - -0.99999994, - 1.0000001, - -0.99999994, - 1.0000001, - -0.99999994, + 1.0000001, -0.99999994, 1.0000001, -0.99999994, + 1.0000001, -0.99999994, 1.0000001, -0.99999994, + 1.0000001, -0.99999994, 1.0000001, -0.99999994, + 1.0000001, -0.99999994, 1.0000001, -0.99999994, ); assert_eq_m512(r, e); let r = _mm512_fmaddsub_round_ps(a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); @@ -41045,23 +43230,12 @@ mod tests { c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, ); + #[rustfmt::skip] let e = _mm512_setr_ps( - 1.0000001, - -0.99999994, - 1.0000001, - -0.99999994, - 1.0000001, - -0.99999994, - 1.0000001, - -0.99999994, - 0.00000007, - 0.00000007, - 0.00000007, - 0.00000007, - 0.00000007, - 0.00000007, - 0.00000007, - 0.00000007, + 1.0000001, -0.99999994, 1.0000001, -0.99999994, + 1.0000001, -0.99999994, 1.0000001, -0.99999994, + 0.00000007, 0.00000007, 0.00000007, 0.00000007, + 0.00000007, 0.00000007, 0.00000007, 0.00000007, ); assert_eq_m512(r, e); } @@ -41086,23 +43260,12 @@ mod tests { c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, ); + #[rustfmt::skip] let e = _mm512_setr_ps( - 1.0000001, - -0.99999994, - 1.0000001, - -0.99999994, - 1.0000001, - -0.99999994, - 1.0000001, - -0.99999994, - 0., - 0., - 0., - 0., - 0., - 0., - 0., - 0., + 1.0000001, -0.99999994, 1.0000001, -0.99999994, + 1.0000001, -0.99999994, 1.0000001, -0.99999994, + 0., 0., 0., 0., + 0., 0., 0., 0., ); assert_eq_m512(r, e); } @@ -41127,23 +43290,12 @@ mod tests { 0b00000000_11111111, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, ); + #[rustfmt::skip] let e = _mm512_setr_ps( - 1.0000001, - -0.99999994, - 1.0000001, - -0.99999994, - 1.0000001, - -0.99999994, - 1.0000001, - -0.99999994, - -1., - -1., - -1., - -1., - -1., - -1., - -1., - -1., + 1.0000001, -0.99999994, 1.0000001, -0.99999994, + 1.0000001, -0.99999994, 1.0000001, -0.99999994, + -1., -1., -1., -1., + -1., -1., -1., -1., ); assert_eq_m512(r, e); } @@ -41154,23 +43306,12 @@ mod tests { let b = _mm512_set1_ps(1.); let c = _mm512_set1_ps(-1.); let r = _mm512_fmsubadd_round_ps(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + #[rustfmt::skip] let e = _mm512_setr_ps( - -0.99999994, - 1.0000001, - -0.99999994, - 1.0000001, - -0.99999994, - 1.0000001, - -0.99999994, - 1.0000001, - -0.99999994, - 1.0000001, - -0.99999994, - 1.0000001, - -0.99999994, - 1.0000001, - -0.99999994, - 1.0000001, + -0.99999994, 1.0000001, -0.99999994, 1.0000001, + -0.99999994, 1.0000001, -0.99999994, 1.0000001, + -0.99999994, 1.0000001, -0.99999994, 1.0000001, + -0.99999994, 1.0000001, -0.99999994, 1.0000001, ); assert_eq_m512(r, e); let r = _mm512_fmsubadd_round_ps(a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); @@ -41201,23 +43342,12 @@ mod tests { c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, ); + #[rustfmt::skip] let e = _mm512_setr_ps( - -0.99999994, - 1.0000001, - -0.99999994, - 1.0000001, - -0.99999994, - 1.0000001, - -0.99999994, - 1.0000001, - 0.00000007, - 0.00000007, - 0.00000007, - 0.00000007, - 0.00000007, - 0.00000007, - 0.00000007, - 0.00000007, + -0.99999994, 1.0000001, -0.99999994, 1.0000001, + -0.99999994, 1.0000001, -0.99999994, 1.0000001, + 0.00000007, 0.00000007, 0.00000007, 0.00000007, + 0.00000007, 0.00000007, 0.00000007, 0.00000007, ); assert_eq_m512(r, e); } @@ -41242,23 +43372,12 @@ mod tests { c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, ); + #[rustfmt::skip] let e = _mm512_setr_ps( - -0.99999994, - 1.0000001, - -0.99999994, - 1.0000001, - -0.99999994, - 1.0000001, - -0.99999994, - 1.0000001, - 0., - 0., - 0., - 0., - 0., - 0., - 0., - 0., + -0.99999994, 1.0000001, -0.99999994, 1.0000001, + -0.99999994, 1.0000001, -0.99999994, 1.0000001, + 0., 0., 0., 0., + 0., 0., 0., 0., ); assert_eq_m512(r, e); } @@ -41283,23 +43402,12 @@ mod tests { 0b00000000_11111111, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, ); + #[rustfmt::skip] let e = _mm512_setr_ps( - -0.99999994, - 1.0000001, - -0.99999994, - 1.0000001, - -0.99999994, - 1.0000001, - -0.99999994, - 1.0000001, - -1., - -1., - -1., - -1., - -1., - -1., - -1., - -1., + -0.99999994, 1.0000001, -0.99999994, 1.0000001, + -0.99999994, 1.0000001, -0.99999994, 1.0000001, + -1., -1., -1., -1., + -1., -1., -1., -1., ); assert_eq_m512(r, e); } @@ -41688,23 +43796,12 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_fixupimm_round_ps() { + #[rustfmt::skip] let a = _mm512_set_ps( - f32::NAN, - f32::NAN, - f32::NAN, - f32::NAN, - f32::NAN, - f32::NAN, - f32::NAN, - f32::NAN, - 1., - 1., - 1., - 1., - 1., - 1., - 1., - 1., + f32::NAN, f32::NAN, f32::NAN, f32::NAN, + f32::NAN, f32::NAN, f32::NAN, f32::NAN, + 1., 1., 1., 1., + 1., 1., 1., 1., ); let b = _mm512_set1_ps(f32::MAX); let c = _mm512_set1_epi32(i32::MAX); @@ -41724,23 +43821,12 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_fixupimm_round_ps() { + #[rustfmt::skip] let a = _mm512_set_ps( - f32::NAN, - f32::NAN, - f32::NAN, - f32::NAN, - f32::NAN, - f32::NAN, - f32::NAN, - f32::NAN, - 1., - 1., - 1., - 1., - 1., - 1., - 1., - 1., + f32::NAN, f32::NAN, f32::NAN, f32::NAN, + f32::NAN, f32::NAN, f32::NAN, f32::NAN, + 1., 1., 1., 1., + 1., 1., 1., 1., ); let b = _mm512_set1_ps(f32::MAX); let c = _mm512_set1_epi32(i32::MAX); @@ -41856,6 +43942,48 @@ mod tests { assert_eq_m512i(r, e); } + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtps_epi32() { + let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5); + let src = _mm256_set1_epi32(0); + let r = _mm256_mask_cvtps_epi32(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_cvtps_epi32(src, 0b11111111, a); + let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtps_epi32() { + let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5); + let r = _mm256_maskz_cvtps_epi32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_cvtps_epi32(0b11111111, a); + let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtps_epi32() { + let a = _mm_set_ps(12., 13.5, 14., 15.5); + let src = _mm_set1_epi32(0); + let r = _mm_mask_cvtps_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtps_epi32(src, 0b00001111, a); + let e = _mm_set_epi32(12, 14, 14, 16); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtps_epi32() { + let a = _mm_set_ps(12., 13.5, 14., 15.5); + let r = _mm_maskz_cvtps_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtps_epi32(0b00001111, a); + let e = _mm_set_epi32(12, 14, 14, 16); + assert_eq_m128i(r, e); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvtps_epu32() { let a = _mm512_setr_ps( @@ -41891,6 +44019,64 @@ mod tests { assert_eq_m512i(r, e); } + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtps_epu32() { + let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5); + let r = _mm256_cvtps_epu32(a); + let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtps_epu32() { + let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5); + let src = _mm256_set1_epi32(0); + let r = _mm256_mask_cvtps_epu32(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_cvtps_epu32(src, 0b11111111, a); + let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtps_epu32() { + let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5); + let r = _mm256_maskz_cvtps_epu32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_cvtps_epu32(0b11111111, a); + let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtps_epu32() { + let a = _mm_set_ps(12., 13.5, 14., 15.5); + let r = _mm_cvtps_epu32(a); + let e = _mm_set_epi32(12, 14, 14, 16); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtps_epu32() { + let a = _mm_set_ps(12., 13.5, 14., 15.5); + let src = _mm_set1_epi32(0); + let r = _mm_mask_cvtps_epu32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtps_epu32(src, 0b00001111, a); + let e = _mm_set_epi32(12, 14, 14, 16); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtps_epu32() { + let a = _mm_set_ps(12., 13.5, 14., 15.5); + let r = _mm_maskz_cvtps_epu32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtps_epu32(0b00001111, a); + let e = _mm_set_epi32(12, 14, 14, 16); + assert_eq_m128i(r, e); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvtepi8_epi32() { let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); @@ -41991,6 +44177,48 @@ mod tests { assert_eq_m512i(r, e); } + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepu8_epi32() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm256_set1_epi32(-1); + let r = _mm256_mask_cvtepu8_epi32(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_cvtepu8_epi32(src, 0b11111111, a); + let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtepu8_epi32() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm256_maskz_cvtepu8_epi32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_cvtepu8_epi32(0b11111111, a); + let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepu8_epi32() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm_set1_epi32(-1); + let r = _mm_mask_cvtepu8_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtepu8_epi32(src, 0b00001111, a); + let e = _mm_set_epi32(12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtepu8_epi32() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm_maskz_cvtepu8_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtepu8_epi32(0b00001111, a); + let e = _mm_set_epi32(12, 13, 14, 15); + assert_eq_m128i(r, e); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvtepi16_epi32() { let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); @@ -42091,6 +44319,48 @@ mod tests { assert_eq_m512i(r, e); } + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepu16_epi32() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm256_set1_epi32(-1); + let r = _mm256_mask_cvtepu16_epi32(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_cvtepu16_epi32(src, 0b11111111, a); + let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtepu16_epi32() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm256_maskz_cvtepu16_epi32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_cvtepu16_epi32(0b11111111, a); + let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepu16_epi32() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm_set1_epi32(-1); + let r = _mm_mask_cvtepu16_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtepu16_epi32(src, 0b00001111, a); + let e = _mm_set_epi32(12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtepu16_epi32() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm_maskz_cvtepu16_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtepu16_epi32(0b00001111, a); + let e = _mm_set_epi32(12, 13, 14, 15); + assert_eq_m128i(r, e); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvtepi32_ps() { let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); @@ -42379,285 +44649,294 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvtsepi32_epi16() { + #[rustfmt::skip] let a = _mm512_set_epi32( - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - i32::MIN, - i32::MAX, + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, i32::MIN, i32::MAX, ); let r = _mm512_cvtsepi32_epi16(a); + #[rustfmt::skip] let e = _mm256_set_epi16( - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - i16::MIN, - i16::MAX, + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, i16::MIN, i16::MAX, ); assert_eq_m256i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_cvtsepi32_epi16() { + #[rustfmt::skip] let a = _mm512_set_epi32( - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - i32::MIN, - i32::MAX, + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, i32::MIN, i32::MAX, ); let src = _mm256_set1_epi16(-1); let r = _mm512_mask_cvtsepi32_epi16(src, 0, a); assert_eq_m256i(r, src); let r = _mm512_mask_cvtsepi32_epi16(src, 0b00000000_11111111, a); + #[rustfmt::skip] let e = _mm256_set_epi16( - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - 8, - 9, - 10, - 11, - 12, - 13, - i16::MIN, - i16::MAX, + -1, -1, -1, -1, + -1, -1, -1, -1, + 8, 9, 10, 11, + 12, 13, i16::MIN, i16::MAX, ); assert_eq_m256i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_cvtsepi32_epi16() { + #[rustfmt::skip] let a = _mm512_set_epi32( - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - i32::MIN, - i32::MAX, + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, i32::MIN, i32::MAX, ); let r = _mm512_maskz_cvtsepi32_epi16(0, a); assert_eq_m256i(r, _mm256_setzero_si256()); let r = _mm512_maskz_cvtsepi32_epi16(0b00000000_11111111, a); + #[rustfmt::skip] let e = _mm256_set_epi16( - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 8, - 9, - 10, - 11, - 12, - 13, - i16::MIN, - i16::MAX, + 0, 0, 0, 0, + 0, 0, 0, 0, + 8, 9, 10, 11, + 12, 13, i16::MIN, i16::MAX, ); assert_eq_m256i(r, e); } + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtsepi32_epi16() { + let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm256_cvtsepi32_epi16(a); + let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtsepi32_epi16() { + let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7); + let src = _mm_set1_epi16(-1); + let r = _mm256_mask_cvtsepi32_epi16(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtsepi32_epi16(src, 0b11111111, a); + let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtsepi32_epi16() { + let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm256_maskz_cvtsepi32_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtsepi32_epi16(0b11111111, a); + let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtsepi32_epi16() { + let a = _mm_set_epi32(4, 5, 6, 7); + let r = _mm_cvtsepi32_epi16(a); + let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtsepi32_epi16() { + let a = _mm_set_epi32(4, 5, 6, 7); + let src = _mm_set1_epi16(0); + let r = _mm_mask_cvtsepi32_epi16(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtsepi32_epi16(src, 0b11111111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtsepi32_epi16() { + let a = _mm_set_epi32(4, 5, 6, 7); + let r = _mm_maskz_cvtsepi32_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtsepi32_epi16(0b11111111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7); + assert_eq_m128i(r, e); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvtsepi32_epi8() { + #[rustfmt::skip] let a = _mm512_set_epi32( - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - i32::MIN, - i32::MAX, + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, i32::MIN, i32::MAX, ); let r = _mm512_cvtsepi32_epi8(a); + #[rustfmt::skip] let e = _mm_set_epi8( - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - i8::MIN, - i8::MAX, + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, i8::MIN, i8::MAX, ); assert_eq_m128i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_cvtsepi32_epi8() { + #[rustfmt::skip] let a = _mm512_set_epi32( - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - i32::MIN, - i32::MAX, + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, i32::MIN, i32::MAX, ); let src = _mm_set1_epi8(-1); let r = _mm512_mask_cvtsepi32_epi8(src, 0, a); assert_eq_m128i(r, src); let r = _mm512_mask_cvtsepi32_epi8(src, 0b00000000_11111111, a); + #[rustfmt::skip] let e = _mm_set_epi8( - -1, - -1, - -1, - -1, - -1, - -1, - -1, - -1, - 8, - 9, - 10, - 11, - 12, - 13, - i8::MIN, - i8::MAX, + -1, -1, -1, -1, + -1, -1, -1, -1, + 8, 9, 10, 11, + 12, 13, i8::MIN, i8::MAX, ); assert_eq_m128i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_cvtsepi32_epi8() { + #[rustfmt::skip] let a = _mm512_set_epi32( - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - i32::MIN, - i32::MAX, + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, i32::MIN, i32::MAX, ); let r = _mm512_maskz_cvtsepi32_epi8(0, a); assert_eq_m128i(r, _mm_setzero_si128()); let r = _mm512_maskz_cvtsepi32_epi8(0b00000000_11111111, a); + #[rustfmt::skip] let e = _mm_set_epi8( - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 8, - 9, - 10, - 11, - 12, - 13, - i8::MIN, - i8::MAX, + 0, 0, 0, 0, + 0, 0, 0, 0, + 8, 9, 10, 11, + 12, 13, i8::MIN, i8::MAX, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtsepi32_epi8() { + let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm256_cvtsepi32_epi8(a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + 9, 10, 11, 12, + 13, 14, 15, 16, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtsepi32_epi8() { + let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16); + let src = _mm_set1_epi8(0); + let r = _mm256_mask_cvtsepi32_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtsepi32_epi8(src, 0b11111111, a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + 9, 10, 11, 12, + 13, 14, 15, 16, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtsepi32_epi8() { + let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm256_maskz_cvtsepi32_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtsepi32_epi8(0b11111111, a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + 9, 10, 11, 12, + 13, 14, 15, 16, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtsepi32_epi8() { + let a = _mm_set_epi32(13, 14, 15, 16); + let r = _mm_cvtsepi32_epi8(a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 13, 14, 15, 16, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtsepi32_epi8() { + let a = _mm_set_epi32(13, 14, 15, 16); + let src = _mm_set1_epi8(0); + let r = _mm_mask_cvtsepi32_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtsepi32_epi8(src, 0b00001111, a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 13, 14, 15, 16, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtsepi32_epi8() { + let a = _mm_set_epi32(13, 14, 15, 16); + let r = _mm_maskz_cvtsepi32_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtsepi32_epi8(0b00001111, a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 13, 14, 15, 16, ); assert_eq_m128i(r, e); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvtusepi32_epi16() { + #[rustfmt::skip] let a = _mm512_set_epi32( - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - i32::MIN, - i32::MIN, + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, i32::MIN, i32::MIN, ); let r = _mm512_cvtusepi32_epi16(a); let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1); @@ -42666,23 +44945,12 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_cvtusepi32_epi16() { + #[rustfmt::skip] let a = _mm512_set_epi32( - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - i32::MIN, - i32::MIN, + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, i32::MIN, i32::MIN, ); let src = _mm256_set1_epi16(-1); let r = _mm512_mask_cvtusepi32_epi16(src, 0, a); @@ -42694,23 +44962,12 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_cvtusepi32_epi16() { + #[rustfmt::skip] let a = _mm512_set_epi32( - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - i32::MIN, - i32::MIN, + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, i32::MIN, i32::MIN, ); let r = _mm512_maskz_cvtusepi32_epi16(0, a); assert_eq_m256i(r, _mm256_setzero_si256()); @@ -42719,25 +44976,72 @@ mod tests { assert_eq_m256i(r, e); } + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtusepi32_epi16() { + let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm256_cvtusepi32_epi16(a); + let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtusepi32_epi16() { + let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let src = _mm_set1_epi16(0); + let r = _mm256_mask_cvtusepi32_epi16(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtusepi32_epi16(src, 0b11111111, a); + let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtusepi32_epi16() { + let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm256_maskz_cvtusepi32_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtusepi32_epi16(0b11111111, a); + let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtusepi32_epi16() { + let a = _mm_set_epi32(5, 6, 7, 8); + let r = _mm_cvtusepi32_epi16(a); + let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtusepi32_epi16() { + let a = _mm_set_epi32(5, 6, 7, 8); + let src = _mm_set1_epi16(0); + let r = _mm_mask_cvtusepi32_epi16(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtusepi32_epi16(src, 0b00001111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtusepi32_epi16() { + let a = _mm_set_epi32(5, 6, 7, 8); + let r = _mm_maskz_cvtusepi32_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtusepi32_epi16(0b00001111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8); + assert_eq_m128i(r, e); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvtusepi32_epi8() { + #[rustfmt::skip] let a = _mm512_set_epi32( - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - i32::MIN, - i32::MIN, + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, i32::MIN, i32::MIN, ); let r = _mm512_cvtusepi32_epi8(a); let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1); @@ -42746,23 +45050,12 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_cvtusepi32_epi8() { + #[rustfmt::skip] let a = _mm512_set_epi32( - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - i32::MIN, - i32::MIN, + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, i32::MIN, i32::MIN, ); let src = _mm_set1_epi8(-1); let r = _mm512_mask_cvtusepi32_epi8(src, 0, a); @@ -42774,23 +45067,12 @@ mod tests { #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_cvtusepi32_epi8() { + #[rustfmt::skip] let a = _mm512_set_epi32( - 0, - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - i32::MIN, - i32::MIN, + 0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, i32::MIN, i32::MIN, ); let r = _mm512_maskz_cvtusepi32_epi8(0, a); assert_eq_m128i(r, _mm_setzero_si128()); @@ -42799,6 +45081,64 @@ mod tests { assert_eq_m128i(r, e); } + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtusepi32_epi8() { + let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX); + let r = _mm256_cvtusepi32_epi8(a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtusepi32_epi8() { + let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX); + let src = _mm_set1_epi8(0); + let r = _mm256_mask_cvtusepi32_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtusepi32_epi8(src, 0b11111111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtusepi32_epi8() { + let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX); + let r = _mm256_maskz_cvtusepi32_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtusepi32_epi8(0b11111111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtusepi32_epi8() { + let a = _mm_set_epi32(5, 6, 7, i32::MAX); + let r = _mm_cvtusepi32_epi8(a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtusepi32_epi8() { + let a = _mm_set_epi32(5, 6, 7, i32::MAX); + let src = _mm_set1_epi8(0); + let r = _mm_mask_cvtusepi32_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtusepi32_epi8(src, 0b00001111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtusepi32_epi8() { + let a = _mm_set_epi32(5, 6, 7, i32::MAX); + let r = _mm_maskz_cvtusepi32_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtusepi32_epi8(0b00001111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8); + assert_eq_m128i(r, e); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvt_roundps_epi32() { let a = _mm512_setr_ps( @@ -42944,23 +45284,12 @@ mod tests { unsafe fn test_mm512_cvt_roundepu32_ps() { let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16); let r = _mm512_cvt_roundepu32_ps(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + #[rustfmt::skip] let e = _mm512_setr_ps( - 0., - 4294967300., - 2., - 4294967300., - 4., - 4294967300., - 6., - 4294967300., - 8., - 10., - 10., - 12., - 12., - 14., - 14., - 16., + 0., 4294967300., 2., 4294967300., + 4., 4294967300., 6., 4294967300., + 8., 10., 10., 12., + 12., 14., 14., 16., ); assert_eq_m512(r, e); } @@ -42978,23 +45307,12 @@ mod tests { a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, ); + #[rustfmt::skip] let e = _mm512_setr_ps( - 0., - 4294967300., - 2., - 4294967300., - 4., - 4294967300., - 6., - 4294967300., - 0., - 0., - 0., - 0., - 0., - 0., - 0., - 0., + 0., 4294967300., 2., 4294967300., + 4., 4294967300., 6., 4294967300., + 0., 0., 0., 0., + 0., 0., 0., 0., ); assert_eq_m512(r, e); } @@ -43009,23 +45327,12 @@ mod tests { a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC, ); + #[rustfmt::skip] let e = _mm512_setr_ps( - 0., - 4294967300., - 2., - 4294967300., - 4., - 4294967300., - 6., - 4294967300., - 0., - 0., - 0., - 0., - 0., - 0., - 0., - 0., + 0., 4294967300., 2., 4294967300., + 4., 4294967300., 6., 4294967300., + 0., 0., 0., 0., + 0., 0., 0., 0., ); assert_eq_m512(r, e); } @@ -43140,6 +45447,48 @@ mod tests { assert_eq_m256i(r, e); } + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtps_ph() { + let a = _mm256_set1_ps(1.); + let src = _mm_set1_epi16(0); + let r = _mm256_mask_cvtps_ph(src, 0, a, _MM_FROUND_NO_EXC); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtps_ph(src, 0b11111111, a, _MM_FROUND_NO_EXC); + let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtps_ph() { + let a = _mm256_set1_ps(1.); + let r = _mm256_maskz_cvtps_ph(0, a, _MM_FROUND_NO_EXC); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtps_ph(0b11111111, a, _MM_FROUND_NO_EXC); + let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtps_ph() { + let a = _mm_set1_ps(1.); + let src = _mm_set1_epi16(0); + let r = _mm_mask_cvtps_ph(src, 0, a, _MM_FROUND_NO_EXC); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtps_ph(src, 0b00001111, a, _MM_FROUND_NO_EXC); + let e = _mm_setr_epi64x(4323521613979991040, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtps_ph() { + let a = _mm_set1_ps(1.); + let r = _mm_maskz_cvtps_ph(0, a, _MM_FROUND_NO_EXC); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtps_ph(0b00001111, a, _MM_FROUND_NO_EXC); + let e = _mm_setr_epi64x(4323521613979991040, 0); + assert_eq_m128i(r, e); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvt_roundph_ps() { let a = _mm256_setr_epi64x( @@ -43236,6 +45585,48 @@ mod tests { assert_eq_m512(r, e); } + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtph_ps() { + let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040); + let src = _mm256_set1_ps(0.); + let r = _mm256_mask_cvtph_ps(src, 0, a); + assert_eq_m256(r, src); + let r = _mm256_mask_cvtph_ps(src, 0b11111111, a); + let e = _mm256_setr_ps(1., 1., 1., 1., 1., 1., 1., 1.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtph_ps() { + let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040); + let r = _mm256_maskz_cvtph_ps(0, a); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm256_maskz_cvtph_ps(0b11111111, a); + let e = _mm256_setr_ps(1., 1., 1., 1., 1., 1., 1., 1.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtph_ps() { + let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040); + let src = _mm_set1_ps(0.); + let r = _mm_mask_cvtph_ps(src, 0, a); + assert_eq_m128(r, src); + let r = _mm_mask_cvtph_ps(src, 0b00001111, a); + let e = _mm_setr_ps(1., 1., 1., 1.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtph_ps() { + let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040); + let r = _mm_maskz_cvtph_ps(0, a); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm_maskz_cvtph_ps(0b00001111, a); + let e = _mm_setr_ps(1., 1., 1., 1.); + assert_eq_m128(r, e); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvtt_roundps_epi32() { let a = _mm512_setr_ps( @@ -43341,6 +45732,48 @@ mod tests { assert_eq_m512i(r, e); } + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvttps_epi32() { + let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5); + let src = _mm256_set1_epi32(0); + let r = _mm256_mask_cvttps_epi32(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_cvttps_epi32(src, 0b11111111, a); + let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvttps_epi32() { + let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5); + let r = _mm256_maskz_cvttps_epi32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_cvttps_epi32(0b11111111, a); + let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvttps_epi32() { + let a = _mm_set_ps(12., 13.5, 14., 15.5); + let src = _mm_set1_epi32(0); + let r = _mm_mask_cvttps_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvttps_epi32(src, 0b00001111, a); + let e = _mm_set_epi32(12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvttps_epi32() { + let a = _mm_set_ps(12., 13.5, 14., 15.5); + let r = _mm_maskz_cvttps_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvttps_epi32(0b00001111, a); + let e = _mm_set_epi32(12, 13, 14, 15); + assert_eq_m128i(r, e); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvttps_epu32() { let a = _mm512_setr_ps( @@ -43376,6 +45809,64 @@ mod tests { assert_eq_m512i(r, e); } + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvttps_epu32() { + let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5); + let r = _mm256_cvttps_epu32(a); + let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvttps_epu32() { + let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5); + let src = _mm256_set1_epi32(0); + let r = _mm256_mask_cvttps_epu32(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_cvttps_epu32(src, 0b11111111, a); + let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvttps_epu32() { + let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5); + let r = _mm256_maskz_cvttps_epu32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_cvttps_epu32(0b11111111, a); + let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvttps_epu32() { + let a = _mm_set_ps(12., 13.5, 14., 15.5); + let r = _mm_cvttps_epu32(a); + let e = _mm_set_epi32(12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvttps_epu32() { + let a = _mm_set_ps(12., 13.5, 14., 15.5); + let src = _mm_set1_epi32(0); + let r = _mm_mask_cvttps_epu32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvttps_epu32(src, 0b00001111, a); + let e = _mm_set_epi32(12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvttps_epu32() { + let a = _mm_set_ps(12., 13.5, 14., 15.5); + let r = _mm_maskz_cvttps_epu32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvttps_epu32(0b00001111, a); + let e = _mm_set_epi32(12, 13, 14, 15); + assert_eq_m128i(r, e); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_i32gather_ps() { let mut arr = [0f32; 256]; @@ -49496,6 +51987,69 @@ mod tests { assert_eq_m128i(r, e); } + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtsepi32_storeu_epi16() { + let a = _mm512_set1_epi32(i32::MAX); + let mut r = _mm256_undefined_si256(); + _mm512_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a); + let e = _mm256_set1_epi16(i16::MAX); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtsepi32_storeu_epi16() { + let a = _mm256_set1_epi32(i32::MAX); + let mut r = _mm_undefined_si128(); + _mm256_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm_set1_epi16(i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtsepi32_storeu_epi16() { + let a = _mm_set1_epi32(i32::MAX); + let mut r = _mm_set1_epi8(0); + _mm_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtusepi32_storeu_epi16() { + let a = _mm512_set1_epi32(i32::MAX); + let mut r = _mm256_undefined_si256(); + _mm512_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a); + let e = _mm256_set1_epi16(u16::MAX as i16); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtusepi32_storeu_epi16() { + let a = _mm256_set1_epi32(i32::MAX); + let mut r = _mm_undefined_si128(); + _mm256_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm_set1_epi16(u16::MAX as i16); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtusepi32_storeu_epi16() { + let a = _mm_set1_epi32(i32::MAX); + let mut r = _mm_set1_epi8(0); + _mm_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm_set_epi16( + 0, + 0, + 0, + 0, + u16::MAX as i16, + u16::MAX as i16, + u16::MAX as i16, + u16::MAX as i16, + ); + assert_eq_m128i(r, e); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_cvtepi32_storeu_epi8() { let a = _mm512_set1_epi32(9); @@ -49523,6 +52077,84 @@ mod tests { assert_eq_m128i(r, e); } + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtsepi32_storeu_epi8() { + let a = _mm512_set1_epi32(i32::MAX); + let mut r = _mm_undefined_si128(); + _mm512_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a); + let e = _mm_set1_epi8(i8::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtsepi32_storeu_epi8() { + let a = _mm256_set1_epi32(i32::MAX); + let mut r = _mm_set1_epi8(0); + _mm256_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + i8::MAX, i8::MAX, i8::MAX, i8::MAX, + i8::MAX, i8::MAX, i8::MAX, i8::MAX, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtsepi32_storeu_epi8() { + let a = _mm_set1_epi32(i32::MAX); + let mut r = _mm_set1_epi8(0); + _mm_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + i8::MAX, i8::MAX, i8::MAX, i8::MAX, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtusepi32_storeu_epi8() { + let a = _mm512_set1_epi32(i32::MAX); + let mut r = _mm_undefined_si128(); + _mm512_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a); + let e = _mm_set1_epi8(u8::MAX as i8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtusepi32_storeu_epi8() { + let a = _mm256_set1_epi32(i32::MAX); + let mut r = _mm_set1_epi8(0); + _mm256_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, + u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtusepi32_storeu_epi8() { + let a = _mm_set1_epi32(i32::MAX); + let mut r = _mm_set1_epi8(0); + _mm_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, + ); + assert_eq_m128i(r, e); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_storeu_epi32() { let a = _mm512_set1_epi32(9); @@ -52720,24 +55352,6 @@ mod tests { assert_eq_m128d(r, e); } - #[simd_test(enable = "avx512f")] - unsafe fn test_mm_cvtu64_ss() { - let a = _mm_set_ps(0., -0.5, 1., -1.5); - let b: u64 = 9; - let r = _mm_cvtu64_ss(a, b); - let e = _mm_set_ps(0., -0.5, 1., 9.); - assert_eq_m128(r, e); - } - - #[simd_test(enable = "avx512f")] - unsafe fn test_mm_cvtu64_sd() { - let a = _mm_set_pd(1., -1.5); - let b: u64 = 9; - let r = _mm_cvtu64_sd(a, b); - let e = _mm_set_pd(1., 9.); - assert_eq_m128d(r, e); - } - #[simd_test(enable = "avx512f")] unsafe fn test_mm_comi_round_ss() { let a = _mm_set1_ps(2.2); @@ -52755,4 +55369,12 @@ mod tests { let e: i32 = 0; assert_eq!(r, e); } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtsi512_si32() { + let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_cvtsi512_si32(a); + let e: i32 = 1; + assert_eq!(r, e); + } } diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs index 29854d858b..cf1b4b6220 100644 --- a/crates/core_arch/src/x86_64/avx512f.rs +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -1,8 +1,561 @@ -//use crate::{ -// -// core_arch::{simd::*, simd_llvm::*, x86::*}, -// mem::transmute, -//}; +use crate::{ + core_arch::{simd::*, simd_llvm::*, x86::*, x86_64::*}, + mem::transmute, +}; + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer, and store the result in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_i64&expand=1792) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtsd2si))] +pub unsafe fn _mm_cvtsd_i64(a: __m128d) -> i64 { + _mm_cvtsd_si64(a) +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer, and store the result in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_i64&expand=1894) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtss2si))] +pub unsafe fn _mm_cvtss_i64(a: __m128) -> i64 { + _mm_cvtss_si64(a) +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_u64&expand=1902) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtss2usi))] +pub unsafe fn _mm_cvtss_u64(a: __m128) -> u64 { + transmute(vcvtss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)) +} + +/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_u64&expand=1800) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtsd2usi))] +pub unsafe fn _mm_cvtsd_u64(a: __m128d) -> u64 { + transmute(vcvtsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)) +} + +/// Convert the signed 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvti32_ss&expand=1643) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtsi2ss))] +pub unsafe fn _mm_cvti64_ss(a: __m128, b: i64) -> __m128 { + let b = b as f32; + let r = simd_insert(a, 0, b); + transmute(r) +} + +/// Convert the signed 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvti64_sd&expand=1644) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtsi2sd))] +pub unsafe fn _mm_cvti64_sd(a: __m128d, b: i64) -> __m128d { + let b = b as f64; + let r = simd_insert(a, 0, b); + transmute(r) +} + +/// Convert the unsigned 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu64_ss&expand=2035) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(mov))] // should be vcvtusi2ss +pub unsafe fn _mm_cvtu64_ss(a: __m128, b: u64) -> __m128 { + let b = b as f32; + let r = simd_insert(a, 0, b); + transmute(r) +} + +/// Convert the unsigned 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu64_sd&expand=2034) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(mov))] // should be vcvtusi2sd +pub unsafe fn _mm_cvtu64_sd(a: __m128d, b: u64) -> __m128d { + let b = b as f64; + let r = simd_insert(a, 0, b); + transmute(r) +} + +/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_i64&expand=2016) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtsd2si))] +pub unsafe fn _mm_cvttsd_i64(a: __m128d) -> i64 { + transmute(vcvtsd2si64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)) +} + +/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_u64&expand=2021) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtsd2usi))] +pub unsafe fn _mm_cvttsd_u64(a: __m128d) -> u64 { + transmute(vcvtsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)) +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=#text=_mm_cvttss_i64&expand=2023) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtss2si))] +pub unsafe fn _mm_cvttss_i64(a: __m128) -> i64 { + transmute(vcvtss2si64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)) +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttss_u64&expand=2027) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtss2usi))] +pub unsafe fn _mm_cvttss_u64(a: __m128) -> u64 { + transmute(vcvtss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)) +} + +/// Convert the signed 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ +/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ +/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\ +/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\ +/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi64_sd&expand=1313) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtsi2sd, rounding = 8))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm_cvt_roundi64_sd(a: __m128d, b: i64, rounding: i32) -> __m128d { + let a = a.as_f64x2(); + macro_rules! call { + ($imm4:expr) => { + vcvtsi2sd64(a, b, $imm4) + }; + } + let r = constify_imm4_round!(rounding, call); + transmute(r) +} + +/// Convert the signed 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ +/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ +/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\ +/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\ +/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsi64_sd&expand=1367) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtsi2sd, rounding = 8))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm_cvt_roundsi64_sd(a: __m128d, b: i64, rounding: i32) -> __m128d { + let a = a.as_f64x2(); + macro_rules! call { + ($imm4:expr) => { + vcvtsi2sd64(a, b, $imm4) + }; + } + let r = constify_imm4_round!(rounding, call); + transmute(r) +} + +/// Convert the signed 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ +/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ +/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\ +/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\ +/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi64_ss&expand=1314) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtsi2ss, rounding = 8))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm_cvt_roundi64_ss(a: __m128, b: i64, rounding: i32) -> __m128 { + let a = a.as_f32x4(); + macro_rules! call { + ($imm4:expr) => { + vcvtsi2ss64(a, b, $imm4) + }; + } + let r = constify_imm4_round!(rounding, call); + transmute(r) +} + +/// Convert the unsigned 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ +/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ +/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\ +/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\ +/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundu64_sd&expand=1379) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtusi2sd, rounding = 8))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm_cvt_roundu64_sd(a: __m128d, b: u64, rounding: i32) -> __m128d { + let a = a.as_f64x2(); + macro_rules! call { + ($imm4:expr) => { + vcvtusi2sd64(a, b, $imm4) + }; + } + let r = constify_imm4_round!(rounding, call); + transmute(r) +} + +/// Convert the signed 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ +/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ +/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\ +/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\ +/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsi64_ss&expand=1368) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtsi2ss, rounding = 8))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm_cvt_roundsi64_ss(a: __m128, b: i64, rounding: i32) -> __m128 { + let a = a.as_f32x4(); + macro_rules! call { + ($imm4:expr) => { + vcvtsi2ss64(a, b, $imm4) + }; + } + let r = constify_imm4_round!(rounding, call); + transmute(r) +} + +/// Convert the unsigned 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ +/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ +/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\ +/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\ +/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundu64_ss&expand=1380) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtusi2ss, rounding = 8))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm_cvt_roundu64_ss(a: __m128, b: u64, rounding: i32) -> __m128 { + let a = a.as_f32x4(); + macro_rules! call { + ($imm4:expr) => { + vcvtusi2ss64(a, b, $imm4) + }; + } + let r = constify_imm4_round!(rounding, call); + transmute(r) +} + +/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ +/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ +/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\ +/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\ +/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_si64&expand=1360) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtsd2si, rounding = 8))] +#[rustc_args_required_const(1)] +pub unsafe fn _mm_cvt_roundsd_si64(a: __m128d, rounding: i32) -> i64 { + let a = a.as_f64x2(); + macro_rules! call { + ($imm4:expr) => { + vcvtsd2si64(a, $imm4) + }; + } + let r = constify_imm4_round!(rounding, call); + transmute(r) +} + +/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ +/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ +/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\ +/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\ +/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_i64&expand=1358) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtsd2si, rounding = 8))] +#[rustc_args_required_const(1)] +pub unsafe fn _mm_cvt_roundsd_i64(a: __m128d, rounding: i32) -> i64 { + let a = a.as_f64x2(); + macro_rules! call { + ($imm4:expr) => { + vcvtsd2si64(a, $imm4) + }; + } + let r = constify_imm4_round!(rounding, call); + transmute(r) +} + +/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ +/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ +/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\ +/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\ +/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_u64&expand=1365) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtsd2usi, rounding = 8))] +#[rustc_args_required_const(1)] +pub unsafe fn _mm_cvt_roundsd_u64(a: __m128d, rounding: i32) -> u64 { + let a = a.as_f64x2(); + macro_rules! call { + ($imm4:expr) => { + vcvtsd2usi64(a, $imm4) + }; + } + let r = constify_imm4_round!(rounding, call); + transmute(r) +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ +/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ +/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\ +/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\ +/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_si64&expand=1375) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtss2si, rounding = 8))] +#[rustc_args_required_const(1)] +pub unsafe fn _mm_cvt_roundss_si64(a: __m128, rounding: i32) -> i64 { + let a = a.as_f32x4(); + macro_rules! call { + ($imm4:expr) => { + vcvtss2si64(a, $imm4) + }; + } + let r = constify_imm4_round!(rounding, call); + transmute(r) +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ +/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ +/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\ +/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\ +/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_i64&expand=1370) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtss2si, rounding = 8))] +#[rustc_args_required_const(1)] +pub unsafe fn _mm_cvt_roundss_i64(a: __m128, rounding: i32) -> i64 { + let a = a.as_f32x4(); + macro_rules! call { + ($imm4:expr) => { + vcvtss2si64(a, $imm4) + }; + } + let r = constify_imm4_round!(rounding, call); + transmute(r) +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ +/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ +/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\ +/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\ +/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_u64&expand=1377) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtss2usi, rounding = 8))] +#[rustc_args_required_const(1)] +pub unsafe fn _mm_cvt_roundss_u64(a: __m128, rounding: i32) -> u64 { + let a = a.as_f32x4(); + macro_rules! call { + ($imm4:expr) => { + vcvtss2usi64(a, $imm4) + }; + } + let r = constify_imm4_round!(rounding, call); + transmute(r) +} + +/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsd_si64&expand=1931) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtsd2si, sae = 8))] +#[rustc_args_required_const(1)] +pub unsafe fn _mm_cvtt_roundsd_si64(a: __m128d, sae: i32) -> i64 { + let a = a.as_f64x2(); + macro_rules! call { + ($imm4:expr) => { + vcvtsd2si64(a, $imm4) + }; + } + let r = constify_imm4_sae!(sae, call); + transmute(r) +} + +/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsd_i64&expand=1929) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtsd2si, sae = 8))] +#[rustc_args_required_const(1)] +pub unsafe fn _mm_cvtt_roundsd_i64(a: __m128d, sae: i32) -> i64 { + let a = a.as_f64x2(); + macro_rules! call { + ($imm4:expr) => { + vcvtsd2si64(a, $imm4) + }; + } + let r = constify_imm4_sae!(sae, call); + transmute(r) +} + +/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsd_u64&expand=1933) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtsd2usi, sae = 8))] +#[rustc_args_required_const(1)] +pub unsafe fn _mm_cvtt_roundsd_u64(a: __m128d, sae: i32) -> u64 { + let a = a.as_f64x2(); + macro_rules! call { + ($imm4:expr) => { + vcvtsd2usi64(a, $imm4) + }; + } + let r = constify_imm4_sae!(sae, call); + transmute(r) +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundss_i64&expand=1935) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtss2si, sae = 8))] +#[rustc_args_required_const(1)] +pub unsafe fn _mm_cvtt_roundss_i64(a: __m128, sae: i32) -> i64 { + let a = a.as_f32x4(); + macro_rules! call { + ($imm4:expr) => { + vcvtss2si64(a, $imm4) + }; + } + let r = constify_imm4_sae!(sae, call); + transmute(r) +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundss_si64&expand=1937) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtss2si, sae = 8))] +#[rustc_args_required_const(1)] +pub unsafe fn _mm_cvtt_roundss_si64(a: __m128, sae: i32) -> i64 { + let a = a.as_f32x4(); + macro_rules! call { + ($imm4:expr) => { + vcvtss2si64(a, $imm4) + }; + } + let r = constify_imm4_sae!(sae, call); + transmute(r) +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundss_u64&expand=1939) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtss2usi, sae = 8))] +#[rustc_args_required_const(1)] +pub unsafe fn _mm_cvtt_roundss_u64(a: __m128, sae: i32) -> u64 { + let a = a.as_f32x4(); + macro_rules! call { + ($imm4:expr) => { + vcvtss2usi64(a, $imm4) + }; + } + let r = constify_imm4_sae!(sae, call); + transmute(r) +} + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.x86.avx512.vcvtss2si64"] + fn vcvtss2si64(a: f32x4, rounding: i32) -> i64; + #[link_name = "llvm.x86.avx512.vcvtss2usi64"] + fn vcvtss2usi64(a: f32x4, rounding: i32) -> u64; + #[link_name = "llvm.x86.avx512.vcvtsd2si64"] + fn vcvtsd2si64(a: f64x2, rounding: i32) -> i64; + #[link_name = "llvm.x86.avx512.vcvtsd2usi64"] + fn vcvtsd2usi64(a: f64x2, rounding: i32) -> u64; + + #[link_name = "llvm.x86.avx512.cvtsi2ss64"] + fn vcvtsi2ss64(a: f32x4, b: i64, rounding: i32) -> f32x4; + #[link_name = "llvm.x86.avx512.cvtsi2sd64"] + fn vcvtsi2sd64(a: f64x2, b: i64, rounding: i32) -> f64x2; + #[link_name = "llvm.x86.avx512.cvtusi642ss"] + fn vcvtusi2ss64(a: f32x4, b: u64, rounding: i32) -> f32x4; + #[link_name = "llvm.x86.avx512.cvtusi642sd"] + fn vcvtusi2sd64(a: f64x2, b: u64, rounding: i32) -> f64x2; +} #[cfg(test)] mod tests { @@ -2901,6 +3454,206 @@ mod tests { assert_eq_m256(r, e); } + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtpd_ps() { + let a = _mm256_set_pd(4., -5.5, 6., -7.5); + let src = _mm_set1_ps(0.); + let r = _mm256_mask_cvtpd_ps(src, 0, a); + assert_eq_m128(r, src); + let r = _mm256_mask_cvtpd_ps(src, 0b00001111, a); + let e = _mm_set_ps(4., -5.5, 6., -7.5); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtpd_ps() { + let a = _mm256_set_pd(4., -5.5, 6., -7.5); + let r = _mm256_maskz_cvtpd_ps(0, a); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm256_maskz_cvtpd_ps(0b00001111, a); + let e = _mm_set_ps(4., -5.5, 6., -7.5); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtpd_ps() { + let a = _mm_set_pd(6., -7.5); + let src = _mm_set1_ps(0.); + let r = _mm_mask_cvtpd_ps(src, 0, a); + assert_eq_m128(r, src); + let r = _mm_mask_cvtpd_ps(src, 0b00000011, a); + let e = _mm_set_ps(0., 0., 6., -7.5); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtpd_ps() { + let a = _mm_set_pd(6., -7.5); + let r = _mm_maskz_cvtpd_ps(0, a); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm_maskz_cvtpd_ps(0b00000011, a); + let e = _mm_set_ps(0., 0., 6., -7.5); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtpd_epi32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_cvtpd_epi32(a); + let e = _mm256_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtpd_epi32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let src = _mm256_set1_epi32(0); + let r = _mm512_mask_cvtpd_epi32(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm512_mask_cvtpd_epi32(src, 0b11111111, a); + let e = _mm256_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtpd_epi32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_maskz_cvtpd_epi32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm512_maskz_cvtpd_epi32(0b11111111, a); + let e = _mm256_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtpd_epi32() { + let a = _mm256_set_pd(4., -5.5, 6., -7.5); + let src = _mm_set1_epi32(0); + let r = _mm256_mask_cvtpd_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtpd_epi32(src, 0b00001111, a); + let e = _mm_set_epi32(4, -6, 6, -8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtpd_epi32() { + let a = _mm256_set_pd(4., -5.5, 6., -7.5); + let r = _mm256_maskz_cvtpd_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtpd_epi32(0b00001111, a); + let e = _mm_set_epi32(4, -6, 6, -8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtpd_epi32() { + let a = _mm_set_pd(6., -7.5); + let src = _mm_set1_epi32(0); + let r = _mm_mask_cvtpd_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtpd_epi32(src, 0b00000011, a); + let e = _mm_set_epi32(0, 0, 6, -8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtpd_epi32() { + let a = _mm_set_pd(6., -7.5); + let r = _mm_maskz_cvtpd_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtpd_epi32(0b00000011, a); + let e = _mm_set_epi32(0, 0, 6, -8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtpd_epu32() { + let a = _mm512_setr_pd(0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5); + let r = _mm512_cvtpd_epu32(a); + let e = _mm256_setr_epi32(0, 2, 2, 4, 4, 6, 6, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtpd_epu32() { + let a = _mm512_setr_pd(0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5); + let src = _mm256_set1_epi32(0); + let r = _mm512_mask_cvtpd_epu32(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm512_mask_cvtpd_epu32(src, 0b11111111, a); + let e = _mm256_setr_epi32(0, 2, 2, 4, 4, 6, 6, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtpd_epu32() { + let a = _mm512_setr_pd(0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5); + let r = _mm512_maskz_cvtpd_epu32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm512_maskz_cvtpd_epu32(0b11111111, a); + let e = _mm256_setr_epi32(0, 2, 2, 4, 4, 6, 6, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtpd_epu32() { + let a = _mm256_set_pd(4., 5.5, 6., 7.5); + let r = _mm256_cvtpd_epu32(a); + let e = _mm_set_epi32(4, 6, 6, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtpd_epu32() { + let a = _mm256_set_pd(4., 5.5, 6., 7.5); + let src = _mm_set1_epi32(0); + let r = _mm256_mask_cvtpd_epu32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtpd_epu32(src, 0b00001111, a); + let e = _mm_set_epi32(4, 6, 6, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtpd_epu32() { + let a = _mm256_set_pd(4., 5.5, 6., 7.5); + let r = _mm256_maskz_cvtpd_epu32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtpd_epu32(0b00001111, a); + let e = _mm_set_epi32(4, 6, 6, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtpd_epu32() { + let a = _mm_set_pd(6., 7.5); + let r = _mm_cvtpd_epu32(a); + let e = _mm_set_epi32(0, 0, 6, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtpd_epu32() { + let a = _mm_set_pd(6., 7.5); + let src = _mm_set1_epi32(0); + let r = _mm_mask_cvtpd_epu32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtpd_epu32(src, 0b00000011, a); + let e = _mm_set_epi32(0, 0, 6, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtpd_epu32() { + let a = _mm_set_pd(6., 7.5); + let r = _mm_maskz_cvtpd_epu32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtpd_epu32(0b00000011, a); + let e = _mm_set_epi32(0, 0, 6, 8); + assert_eq_m128i(r, e); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvtpd_pslo() { let v2 = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); @@ -2953,6 +3706,48 @@ mod tests { assert_eq_m512i(r, e); } + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepi8_epi64() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm256_set1_epi64x(-1); + let r = _mm256_mask_cvtepi8_epi64(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_cvtepi8_epi64(src, 0b00001111, a); + let e = _mm256_set_epi64x(12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtepi8_epi64() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm256_maskz_cvtepi8_epi64(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_cvtepi8_epi64(0b00001111, a); + let e = _mm256_set_epi64x(12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepi8_epi64() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm_set1_epi64x(-1); + let r = _mm_mask_cvtepi8_epi64(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtepi8_epi64(src, 0b00000011, a); + let e = _mm_set_epi64x(14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtepi8_epi64() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm_maskz_cvtepi8_epi64(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtepi8_epi64(0b00000011, a); + let e = _mm_set_epi64x(14, 15); + assert_eq_m128i(r, e); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvtepu8_epi64() { let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); @@ -2982,26 +3777,68 @@ mod tests { assert_eq_m512i(r, e); } - #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_cvtepi16_epi64() { - let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); - let r = _mm512_cvtepi16_epi64(a); - let e = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15); - assert_eq_m512i(r, e); + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepu8_epi64() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm256_set1_epi64x(-1); + let r = _mm256_mask_cvtepu8_epi64(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_cvtepu8_epi64(src, 0b00001111, a); + let e = _mm256_set_epi64x(12, 13, 14, 15); + assert_eq_m256i(r, e); } - #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_mask_cvtepi16_epi64() { - let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); - let src = _mm512_set1_epi64(-1); - let r = _mm512_mask_cvtepi16_epi64(src, 0, a); - assert_eq_m512i(r, src); - let r = _mm512_mask_cvtepi16_epi64(src, 0b00001111, a); - let e = _mm512_set_epi64(-1, -1, -1, -1, 12, 13, 14, 15); - assert_eq_m512i(r, e); + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtepu8_epi64() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm256_maskz_cvtepu8_epi64(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_cvtepu8_epi64(0b00001111, a); + let e = _mm256_set_epi64x(12, 13, 14, 15); + assert_eq_m256i(r, e); } - #[simd_test(enable = "avx512f")] + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepu8_epi64() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm_set1_epi64x(-1); + let r = _mm_mask_cvtepu8_epi64(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtepu8_epi64(src, 0b00000011, a); + let e = _mm_set_epi64x(14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtepu8_epi64() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm_maskz_cvtepu8_epi64(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtepu8_epi64(0b00000011, a); + let e = _mm_set_epi64x(14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtepi16_epi64() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_cvtepi16_epi64(a); + let e = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepi16_epi64() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm512_set1_epi64(-1); + let r = _mm512_mask_cvtepi16_epi64(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_cvtepi16_epi64(src, 0b00001111, a); + let e = _mm512_set_epi64(-1, -1, -1, -1, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_maskz_cvtepi16_epi64() { let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); let r = _mm512_maskz_cvtepi16_epi64(0, a); @@ -3082,6 +3919,48 @@ mod tests { assert_eq_m512i(r, e); } + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepu16_epi64() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm256_set1_epi64x(-1); + let r = _mm256_mask_cvtepu16_epi64(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_cvtepu16_epi64(src, 0b00001111, a); + let e = _mm256_set_epi64x(12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtepu16_epi64() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm256_maskz_cvtepu16_epi64(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_cvtepu16_epi64(0b00001111, a); + let e = _mm256_set_epi64x(12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepu16_epi64() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm_set1_epi64x(-1); + let r = _mm_mask_cvtepu16_epi64(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtepu16_epi64(src, 0b00000011, a); + let e = _mm_set_epi64x(14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtepu16_epi64() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm_maskz_cvtepu16_epi64(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtepu16_epi64(0b00000011, a); + let e = _mm_set_epi64x(14, 15); + assert_eq_m128i(r, e); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvtepi32_epi64() { let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); @@ -3182,6 +4061,48 @@ mod tests { assert_eq_m512i(r, e); } + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepu32_epi64() { + let a = _mm_set_epi32(12, 13, 14, 15); + let src = _mm256_set1_epi64x(-1); + let r = _mm256_mask_cvtepu32_epi64(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_cvtepu32_epi64(src, 0b00001111, a); + let e = _mm256_set_epi64x(12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtepu32_epi64() { + let a = _mm_set_epi32(12, 13, 14, 15); + let r = _mm256_maskz_cvtepu32_epi64(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_cvtepu32_epi64(0b00001111, a); + let e = _mm256_set_epi64x(12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepu32_epi64() { + let a = _mm_set_epi32(12, 13, 14, 15); + let src = _mm_set1_epi64x(-1); + let r = _mm_mask_cvtepu32_epi64(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtepu32_epi64(src, 0b00000011, a); + let e = _mm_set_epi64x(14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtepu32_epi64() { + let a = _mm_set_epi32(12, 13, 14, 15); + let r = _mm_maskz_cvtepu32_epi64(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtepu32_epi64(0b00000011, a); + let e = _mm_set_epi64x(14, 15); + assert_eq_m128i(r, e); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvtepi32_pd() { let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); @@ -3282,6 +4203,64 @@ mod tests { assert_eq_m512d(r, e); } + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtepu32_pd() { + let a = _mm_set_epi32(12, 13, 14, 15); + let r = _mm256_cvtepu32_pd(a); + let e = _mm256_set_pd(12., 13., 14., 15.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepu32_pd() { + let a = _mm_set_epi32(12, 13, 14, 15); + let src = _mm256_set1_pd(-1.); + let r = _mm256_mask_cvtepu32_pd(src, 0, a); + assert_eq_m256d(r, src); + let r = _mm256_mask_cvtepu32_pd(src, 0b00001111, a); + let e = _mm256_set_pd(12., 13., 14., 15.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtepu32_pd() { + let a = _mm_set_epi32(12, 13, 14, 15); + let r = _mm256_maskz_cvtepu32_pd(0, a); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_cvtepu32_pd(0b00001111, a); + let e = _mm256_set_pd(12., 13., 14., 15.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtepu32_pd() { + let a = _mm_set_epi32(12, 13, 14, 15); + let r = _mm_cvtepu32_pd(a); + let e = _mm_set_pd(14., 15.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepu32_pd() { + let a = _mm_set_epi32(12, 13, 14, 15); + let src = _mm_set1_pd(-1.); + let r = _mm_mask_cvtepu32_pd(src, 0, a); + assert_eq_m128d(r, src); + let r = _mm_mask_cvtepu32_pd(src, 0b00000011, a); + let e = _mm_set_pd(14., 15.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtepu32_pd() { + let a = _mm_set_epi32(12, 13, 14, 15); + let r = _mm_maskz_cvtepu32_pd(0, a); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_cvtepu32_pd(0b00000011, a); + let e = _mm_set_pd(14., 15.); + assert_eq_m128d(r, e); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvtepi32lo_pd() { let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); @@ -3610,6 +4589,64 @@ mod tests { assert_eq_m256i(r, e); } + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtsepi64_epi32() { + let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX); + let r = _mm256_cvtsepi64_epi32(a); + let e = _mm_set_epi32(4, 5, i32::MIN, i32::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtsepi64_epi32() { + let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX); + let src = _mm_set1_epi32(-1); + let r = _mm256_mask_cvtsepi64_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtsepi64_epi32(src, 0b00001111, a); + let e = _mm_set_epi32(4, 5, i32::MIN, i32::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtsepi64_epi32() { + let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX); + let r = _mm256_maskz_cvtsepi64_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtsepi64_epi32(0b00001111, a); + let e = _mm_set_epi32(4, 5, i32::MIN, i32::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtsepi64_epi32() { + let a = _mm_set_epi64x(i64::MIN, i64::MAX); + let r = _mm_cvtsepi64_epi32(a); + let e = _mm_set_epi32(0, 0, i32::MIN, i32::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtsepi64_epi32() { + let a = _mm_set_epi64x(i64::MIN, i64::MAX); + let src = _mm_set1_epi32(0); + let r = _mm_mask_cvtsepi64_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtsepi64_epi32(src, 0b00000011, a); + let e = _mm_set_epi32(0, 0, i32::MIN, i32::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtsepi64_epi32() { + let a = _mm_set_epi64x(i64::MIN, i64::MAX); + let r = _mm_maskz_cvtsepi64_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtsepi64_epi32(0b00000011, a); + let e = _mm_set_epi32(0, 0, i32::MIN, i32::MAX); + assert_eq_m128i(r, e); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvtsepi64_epi16() { let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX); @@ -3639,6 +4676,64 @@ mod tests { assert_eq_m128i(r, e); } + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtsepi64_epi16() { + let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX); + let r = _mm256_cvtsepi64_epi16(a); + let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, i16::MIN, i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtsepi64_epi16() { + let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX); + let src = _mm_set1_epi16(0); + let r = _mm256_mask_cvtsepi64_epi16(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtsepi64_epi16(src, 0b00001111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, i16::MIN, i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtsepi64_epi16() { + let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX); + let r = _mm256_maskz_cvtsepi64_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtsepi64_epi16(0b00001111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, i16::MIN, i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtsepi64_epi16() { + let a = _mm_set_epi64x(i64::MIN, i64::MAX); + let r = _mm_cvtsepi64_epi16(a); + let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, i16::MIN, i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtsepi64_epi16() { + let a = _mm_set_epi64x(i64::MIN, i64::MAX); + let src = _mm_set1_epi16(0); + let r = _mm_mask_cvtsepi64_epi16(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtsepi64_epi16(src, 0b00000011, a); + let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, i16::MIN, i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtsepi64_epi16() { + let a = _mm_set_epi64x(i64::MIN, i64::MAX); + let r = _mm_maskz_cvtsepi64_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtsepi64_epi16(0b00000011, a); + let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, i16::MIN, i16::MAX); + assert_eq_m128i(r, e); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvtsepi64_epi8() { let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX); @@ -3654,23 +4749,12 @@ mod tests { let r = _mm512_mask_cvtsepi64_epi8(src, 0, a); assert_eq_m128i(r, src); let r = _mm512_mask_cvtsepi64_epi8(src, 0b00001111, a); + #[rustfmt::skip] let e = _mm_set_epi8( - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - -1, - -1, - -1, - -1, - 4, - 5, - i8::MIN, - i8::MAX, + 0, 0, 0, 0, + 0, 0, 0, 0, + -1, -1, -1, -1, + 4, 5, i8::MIN, i8::MAX, ); assert_eq_m128i(r, e); } @@ -3685,6 +4769,64 @@ mod tests { assert_eq_m128i(r, e); } + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtsepi64_epi8() { + let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX); + let r = _mm256_cvtsepi64_epi8(a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, i8::MIN, i8::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtsepi64_epi8() { + let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX); + let src = _mm_set1_epi8(0); + let r = _mm256_mask_cvtsepi64_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtsepi64_epi8(src, 0b00001111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, i8::MIN, i8::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtsepi64_epi8() { + let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX); + let r = _mm256_maskz_cvtsepi64_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtsepi64_epi8(0b00001111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, i8::MIN, i8::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtsepi64_epi8() { + let a = _mm_set_epi64x(i64::MIN, i64::MAX); + let r = _mm_cvtsepi64_epi8(a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtsepi64_epi8() { + let a = _mm_set_epi64x(i64::MIN, i64::MAX); + let src = _mm_set1_epi8(0); + let r = _mm_mask_cvtsepi64_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtsepi64_epi8(src, 0b00000011, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtsepi64_epi8() { + let a = _mm_set_epi64x(i64::MIN, i64::MAX); + let r = _mm_maskz_cvtsepi64_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtsepi64_epi8(0b00000011, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MAX); + assert_eq_m128i(r, e); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvtusepi64_epi32() { let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN); @@ -3714,6 +4856,64 @@ mod tests { assert_eq_m256i(r, e); } + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtusepi64_epi32() { + let a = _mm256_set_epi64x(4, 5, 6, i64::MAX); + let r = _mm256_cvtusepi64_epi32(a); + let e = _mm_set_epi32(4, 5, 6, u32::MAX as i32); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtusepi64_epi32() { + let a = _mm256_set_epi64x(4, 5, 6, i64::MAX); + let src = _mm_set1_epi32(0); + let r = _mm256_mask_cvtusepi64_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtusepi64_epi32(src, 0b00001111, a); + let e = _mm_set_epi32(4, 5, 6, u32::MAX as i32); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtusepi64_epi32() { + let a = _mm256_set_epi64x(4, 5, 6, i64::MAX); + let r = _mm256_maskz_cvtusepi64_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtusepi64_epi32(0b00001111, a); + let e = _mm_set_epi32(4, 5, 6, u32::MAX as i32); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtusepi64_epi32() { + let a = _mm_set_epi64x(6, i64::MAX); + let r = _mm_cvtusepi64_epi32(a); + let e = _mm_set_epi32(0, 0, 6, u32::MAX as i32); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtusepi64_epi32() { + let a = _mm_set_epi64x(6, i64::MAX); + let src = _mm_set1_epi32(0); + let r = _mm_mask_cvtusepi64_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtusepi64_epi32(src, 0b00000011, a); + let e = _mm_set_epi32(0, 0, 6, u32::MAX as i32); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtusepi64_epi32() { + let a = _mm_set_epi64x(6, i64::MAX); + let r = _mm_maskz_cvtusepi64_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtusepi64_epi32(0b00000011, a); + let e = _mm_set_epi32(0, 0, 6, u32::MAX as i32); + assert_eq_m128i(r, e); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvtusepi64_epi16() { let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN); @@ -3743,6 +4943,64 @@ mod tests { assert_eq_m128i(r, e); } + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtusepi64_epi16() { + let a = _mm256_set_epi64x(4, 5, 6, i64::MAX); + let r = _mm256_cvtusepi64_epi16(a); + let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, u16::MAX as i16); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtusepi64_epi16() { + let a = _mm256_set_epi64x(4, 5, 6, i64::MAX); + let src = _mm_set1_epi16(0); + let r = _mm256_mask_cvtusepi64_epi16(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtusepi64_epi16(src, 0b00001111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, u16::MAX as i16); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtusepi64_epi16() { + let a = _mm256_set_epi64x(4, 5, 6, i64::MAX); + let r = _mm256_maskz_cvtusepi64_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtusepi64_epi16(0b00001111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, u16::MAX as i16); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtusepi64_epi16() { + let a = _mm_set_epi64x(6, i64::MAX); + let r = _mm_cvtusepi64_epi16(a); + let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 6, u16::MAX as i16); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtusepi64_epi16() { + let a = _mm_set_epi64x(6, i64::MAX); + let src = _mm_set1_epi16(0); + let r = _mm_mask_cvtusepi64_epi16(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtusepi64_epi16(src, 0b00000011, a); + let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 6, u16::MAX as i16); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtusepi64_epi16() { + let a = _mm_set_epi64x(6, i64::MAX); + let r = _mm_maskz_cvtusepi64_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtusepi64_epi16(0b00000011, a); + let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 6, u16::MAX as i16); + assert_eq_m128i(r, e); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvtusepi64_epi8() { let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN); @@ -3772,6 +5030,64 @@ mod tests { assert_eq_m128i(r, e); } + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtusepi64_epi8() { + let a = _mm256_set_epi64x(4, 5, 6, i64::MAX); + let r = _mm256_cvtusepi64_epi8(a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, u8::MAX as i8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtusepi64_epi8() { + let a = _mm256_set_epi64x(4, 5, 6, i64::MAX); + let src = _mm_set1_epi8(0); + let r = _mm256_mask_cvtusepi64_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtusepi64_epi8(src, 0b00001111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, u8::MAX as i8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtusepi64_epi8() { + let a = _mm256_set_epi64x(4, 5, 6, i64::MAX); + let r = _mm256_maskz_cvtusepi64_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtusepi64_epi8(0b00001111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, u8::MAX as i8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtusepi64_epi8() { + let a = _mm_set_epi64x(6, i64::MAX); + let r = _mm_cvtusepi64_epi8(a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, u8::MAX as i8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtusepi64_epi8() { + let a = _mm_set_epi64x(6, i64::MAX); + let src = _mm_set1_epi8(0); + let r = _mm_mask_cvtusepi64_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtusepi64_epi8(src, 0b00000011, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, u8::MAX as i8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtusepi64_epi8() { + let a = _mm_set_epi64x(6, i64::MAX); + let r = _mm_maskz_cvtusepi64_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtusepi64_epi8(0b00000011, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, u8::MAX as i8); + assert_eq_m128i(r, e); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvtt_roundpd_epi32() { let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); @@ -3859,6 +5175,48 @@ mod tests { assert_eq_m256i(r, e); } + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvttpd_epi32() { + let a = _mm256_setr_pd(4., -5.5, 6., -7.5); + let src = _mm_set1_epi32(0); + let r = _mm256_mask_cvttpd_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvttpd_epi32(src, 0b00001111, a); + let e = _mm_setr_epi32(4, -5, 6, -7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvttpd_epi32() { + let a = _mm256_setr_pd(4., -5.5, 6., -7.5); + let r = _mm256_maskz_cvttpd_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvttpd_epi32(0b00001111, a); + let e = _mm_setr_epi32(4, -5, 6, -7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvttpd_epi32() { + let a = _mm_set_pd(6., -7.5); + let src = _mm_set1_epi32(0); + let r = _mm_mask_cvttpd_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvttpd_epi32(src, 0b00000011, a); + let e = _mm_set_epi32(0, 0, 6, -7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvttpd_epi32() { + let a = _mm_set_pd(6., -7.5); + let r = _mm_maskz_cvttpd_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvttpd_epi32(0b00000011, a); + let e = _mm_set_epi32(0, 0, 6, -7); + assert_eq_m128i(r, e); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_cvttpd_epu32() { let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); @@ -3878,14 +5236,72 @@ mod tests { assert_eq_m256i(r, e); } - #[simd_test(enable = "avx512f")] - unsafe fn test_mm512_maskz_cvttpd_epu32() { - let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); - let r = _mm512_maskz_cvttpd_epu32(0, a); - assert_eq_m256i(r, _mm256_setzero_si256()); - let r = _mm512_maskz_cvttpd_epu32(0b00001111, a); - let e = _mm256_setr_epi32(0, -1, 2, -1, 0, 0, 0, 0); - assert_eq_m256i(r, e); + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvttpd_epu32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_maskz_cvttpd_epu32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm512_maskz_cvttpd_epu32(0b00001111, a); + let e = _mm256_setr_epi32(0, -1, 2, -1, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvttpd_epu32() { + let a = _mm256_set_pd(4., 5.5, 6., 7.5); + let r = _mm256_cvttpd_epu32(a); + let e = _mm_set_epi32(4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvttpd_epu32() { + let a = _mm256_set_pd(4., 5.5, 6., 7.5); + let src = _mm_set1_epi32(0); + let r = _mm256_mask_cvttpd_epu32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvttpd_epu32(src, 0b00001111, a); + let e = _mm_set_epi32(4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvttpd_epu32() { + let a = _mm256_set_pd(4., 5.5, 6., 7.5); + let r = _mm256_maskz_cvttpd_epu32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvttpd_epu32(0b00001111, a); + let e = _mm_set_epi32(4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvttpd_epu32() { + let a = _mm_set_pd(6., 7.5); + let r = _mm_cvttpd_epu32(a); + let e = _mm_set_epi32(0, 0, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvttpd_epu32() { + let a = _mm_set_pd(6., 7.5); + let src = _mm_set1_epi32(0); + let r = _mm_mask_cvttpd_epu32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvttpd_epu32(src, 0b00000011, a); + let e = _mm_set_epi32(0, 0, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvttpd_epu32() { + let a = _mm_set_pd(6., 7.5); + let r = _mm_maskz_cvttpd_epu32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvttpd_epu32(0b00000011, a); + let e = _mm_set_epi32(0, 0, 6, 7); + assert_eq_m128i(r, e); } #[simd_test(enable = "avx512f")] @@ -10327,7 +11743,7 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_mask_cvtepi64_storeu_epi16() { - let a = _mm256_set1_epi32(9); + let a = _mm256_set1_epi64x(9); let mut r = _mm_set1_epi16(0); _mm256_mask_cvtepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a); let e = _mm_set_epi16(0, 0, 0, 0, 9, 9, 9, 9); @@ -10336,13 +11752,76 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_mask_cvtepi64_storeu_epi16() { - let a = _mm_set1_epi32(9); + let a = _mm_set1_epi64x(9); let mut r = _mm_set1_epi16(0); _mm_mask_cvtepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a); let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 9, 9); assert_eq_m128i(r, e); } + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtsepi64_storeu_epi16() { + let a = _mm512_set1_epi64(i64::MAX); + let mut r = _mm_undefined_si128(); + _mm512_mask_cvtsepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm_set1_epi16(i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtsepi64_storeu_epi16() { + let a = _mm256_set1_epi64x(i64::MAX); + let mut r = _mm_set1_epi16(0); + _mm256_mask_cvtsepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtsepi64_storeu_epi16() { + let a = _mm_set1_epi64x(i64::MAX); + let mut r = _mm_set1_epi16(0); + _mm_mask_cvtsepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtusepi64_storeu_epi16() { + let a = _mm512_set1_epi64(i64::MAX); + let mut r = _mm_undefined_si128(); + _mm512_mask_cvtusepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm_set1_epi16(u16::MAX as i16); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtusepi64_storeu_epi16() { + let a = _mm256_set1_epi64x(i64::MAX); + let mut r = _mm_set1_epi16(0); + _mm256_mask_cvtusepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm_set_epi16( + 0, + 0, + 0, + 0, + u16::MAX as i16, + u16::MAX as i16, + u16::MAX as i16, + u16::MAX as i16, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtusepi64_storeu_epi16() { + let a = _mm_set1_epi64x(i64::MAX); + let mut r = _mm_set1_epi16(0); + _mm_mask_cvtusepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16); + assert_eq_m128i(r, e); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_cvtepi64_storeu_epi8() { let a = _mm512_set1_epi64(9); @@ -10354,7 +11833,7 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_mask_cvtepi64_storeu_epi8() { - let a = _mm256_set1_epi32(9); + let a = _mm256_set1_epi64x(9); let mut r = _mm_set1_epi8(0); _mm256_mask_cvtepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9); @@ -10363,13 +11842,97 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_mask_cvtepi64_storeu_epi8() { - let a = _mm_set1_epi32(9); + let a = _mm_set1_epi64x(9); let mut r = _mm_set1_epi8(0); _mm_mask_cvtepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9); assert_eq_m128i(r, e); } + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtsepi64_storeu_epi8() { + let a = _mm512_set1_epi64(i64::MAX); + let mut r = _mm_set1_epi8(0); + _mm512_mask_cvtsepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + i8::MAX, i8::MAX, i8::MAX, i8::MAX, + i8::MAX, i8::MAX, i8::MAX, i8::MAX, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtsepi64_storeu_epi8() { + let a = _mm256_set1_epi64x(i64::MAX); + let mut r = _mm_set1_epi8(0); + _mm256_mask_cvtsepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + i8::MAX, i8::MAX, i8::MAX, i8::MAX, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtsepi64_storeu_epi8() { + let a = _mm_set1_epi64x(i64::MAX); + let mut r = _mm_set1_epi8(0); + _mm_mask_cvtsepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtusepi64_storeu_epi8() { + let a = _mm512_set1_epi64(i64::MAX); + let mut r = _mm_set1_epi8(0); + _mm512_mask_cvtusepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, + u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtusepi64_storeu_epi8() { + let a = _mm256_set1_epi64x(i64::MAX); + let mut r = _mm_set1_epi8(0); + _mm256_mask_cvtusepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtusepi64_storeu_epi8() { + let a = _mm_set1_epi64x(i64::MAX); + let mut r = _mm_set1_epi8(0); + _mm_mask_cvtusepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, u8::MAX as i8, u8::MAX as i8, + ); + assert_eq_m128i(r, e); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_mask_cvtepi64_storeu_epi32() { let a = _mm512_set1_epi64(9); @@ -10381,7 +11944,7 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm256_mask_cvtepi64_storeu_epi32() { - let a = _mm256_set1_epi32(9); + let a = _mm256_set1_epi64x(9); let mut r = _mm_set1_epi32(0); _mm256_mask_cvtepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b11111111, a); let e = _mm_set_epi32(9, 9, 9, 9); @@ -10390,13 +11953,67 @@ mod tests { #[simd_test(enable = "avx512f,avx512vl")] unsafe fn test_mm_mask_cvtepi64_storeu_epi32() { - let a = _mm_set1_epi32(9); + let a = _mm_set1_epi64x(9); let mut r = _mm_set1_epi16(0); _mm_mask_cvtepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b11111111, a); let e = _mm_set_epi32(0, 0, 9, 9); assert_eq_m128i(r, e); } + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtsepi64_storeu_epi32() { + let a = _mm512_set1_epi64(i64::MAX); + let mut r = _mm256_undefined_si256(); + _mm512_mask_cvtsepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm256_set1_epi32(i32::MAX); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtsepi64_storeu_epi32() { + let a = _mm256_set1_epi64x(i64::MAX); + let mut r = _mm_set1_epi32(0); + _mm256_mask_cvtsepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b00001111, a); + let e = _mm_set1_epi32(i32::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtsepi64_storeu_epi32() { + let a = _mm_set1_epi64x(i64::MAX); + let mut r = _mm_set1_epi16(0); + _mm_mask_cvtsepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b00000011, a); + let e = _mm_set_epi32(0, 0, i32::MAX, i32::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtusepi64_storeu_epi32() { + let a = _mm512_set1_epi64(i64::MAX); + let mut r = _mm256_undefined_si256(); + _mm512_mask_cvtusepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm256_set1_epi32(u32::MAX as i32); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtusepi64_storeu_epi32() { + let a = _mm256_set1_epi64x(i64::MAX); + let mut r = _mm_set1_epi32(0); + _mm256_mask_cvtusepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b00001111, a); + let e = _mm_set1_epi32(u32::MAX as i32); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtusepi64_storeu_epi32() { + let a = _mm_set1_epi64x(i64::MAX); + let mut r = _mm_set1_epi16(0); + _mm_mask_cvtusepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b00000011, a); + let e = _mm_set_epi32(0, 0, u32::MAX as i32, u32::MAX as i32); + assert_eq_m128i(r, e); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_storeu_epi64() { let a = _mm512_set1_epi64(9); @@ -10721,4 +12338,254 @@ mod tests { let e = _mm_set1_epi64x(11); assert_eq_m128i(r, e); } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtsd_i64() { + let a = _mm_set_pd(1., -1.5); + let r = _mm_cvtsd_i64(a); + let e: i64 = -2; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtss_i64() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvtss_i64(a); + let e: i64 = -2; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundi64_ss() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let b: i64 = 9; + let r = _mm_cvt_roundi64_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let e = _mm_set_ps(0., -0.5, 1., 9.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundsi64_ss() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let b: i64 = 9; + let r = _mm_cvt_roundsi64_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let e = _mm_set_ps(0., -0.5, 1., 9.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvti64_ss() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let b: i64 = 9; + let r = _mm_cvti64_ss(a, b); + let e = _mm_set_ps(0., -0.5, 1., 9.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvti64_sd() { + let a = _mm_set_pd(1., -1.5); + let b: i64 = 9; + let r = _mm_cvti64_sd(a, b); + let e = _mm_set_pd(1., 9.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundsd_si64() { + let a = _mm_set_pd(1., -1.5); + let r = _mm_cvt_roundsd_si64(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let e: i64 = -1; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundsd_i64() { + let a = _mm_set_pd(1., -1.5); + let r = _mm_cvt_roundsd_i64(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let e: i64 = -1; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundsd_u64() { + let a = _mm_set_pd(1., f64::MAX); + let r = _mm_cvt_roundsd_u64(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let e: u64 = u64::MAX; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtsd_u64() { + let a = _mm_set_pd(1., -1.5); + let r = _mm_cvtsd_u64(a); + let e: u64 = u64::MAX; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundss_i64() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvt_roundss_i64(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let e: i64 = -1; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundss_si64() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvt_roundss_si64(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let e: i64 = -1; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundss_u64() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvt_roundss_u64(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let e: u64 = u64::MAX; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtss_u64() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvtss_u64(a); + let e: u64 = u64::MAX; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvttsd_i64() { + let a = _mm_set_pd(1., -1.5); + let r = _mm_cvttsd_i64(a); + let e: i64 = -2; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtt_roundsd_i64() { + let a = _mm_set_pd(1., -1.5); + let r = _mm_cvtt_roundsd_i64(a, _MM_FROUND_CUR_DIRECTION); + let e: i64 = -2; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtt_roundsd_si64() { + let a = _mm_set_pd(1., -1.5); + let r = _mm_cvtt_roundsd_si64(a, _MM_FROUND_CUR_DIRECTION); + let e: i64 = -2; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtt_roundsd_u64() { + let a = _mm_set_pd(1., -1.5); + let r = _mm_cvtt_roundsd_u64(a, _MM_FROUND_CUR_DIRECTION); + let e: u64 = u64::MAX; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvttsd_u64() { + let a = _mm_set_pd(1., -1.5); + let r = _mm_cvttsd_u64(a); + let e: u64 = u64::MAX; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvttss_i64() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvttss_i64(a); + let e: i64 = -2; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtt_roundss_i64() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvtt_roundss_i64(a, _MM_FROUND_CUR_DIRECTION); + let e: i64 = -2; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtt_roundss_si64() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvtt_roundss_si64(a, _MM_FROUND_CUR_DIRECTION); + let e: i64 = -2; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtt_roundss_u64() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvtt_roundss_u64(a, _MM_FROUND_CUR_DIRECTION); + let e: u64 = u64::MAX; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvttss_u64() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvttss_u64(a); + let e: u64 = u64::MAX; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtu64_ss() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let b: u64 = 9; + let r = _mm_cvtu64_ss(a, b); + let e = _mm_set_ps(0., -0.5, 1., 9.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtu64_sd() { + let a = _mm_set_pd(1., -1.5); + let b: u64 = 9; + let r = _mm_cvtu64_sd(a, b); + let e = _mm_set_pd(1., 9.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundu64_ss() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let b: u64 = 9; + let r = _mm_cvt_roundu64_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let e = _mm_set_ps(0., -0.5, 1., 9.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundu64_sd() { + let a = _mm_set_pd(1., -1.5); + let b: u64 = 9; + let r = _mm_cvt_roundu64_sd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let e = _mm_set_pd(1., 9.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundi64_sd() { + let a = _mm_set_pd(1., -1.5); + let b: i64 = 9; + let r = _mm_cvt_roundi64_sd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let e = _mm_set_pd(1., 9.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundsi64_sd() { + let a = _mm_set_pd(1., -1.5); + let b: i64 = 9; + let r = _mm_cvt_roundsi64_sd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); + let e = _mm_set_pd(1., 9.); + assert_eq_m128d(r, e); + } } diff --git a/crates/core_arch/src/x86_64/macros.rs b/crates/core_arch/src/x86_64/macros.rs new file mode 100644 index 0000000000..e3682d40fe --- /dev/null +++ b/crates/core_arch/src/x86_64/macros.rs @@ -0,0 +1,32 @@ +//! Utility macros. + +// For round instructions, the only valid values for rounding are 4, 8, 9, 10 and 11. +// This macro enforces that. +#[allow(unused)] +macro_rules! constify_imm4_round { + ($imm8:expr, $expand:ident) => { + #[allow(overflowing_literals)] + match ($imm8) & 0b1111 { + 4 => $expand!(4), + 8 => $expand!(8), + 9 => $expand!(9), + 10 => $expand!(10), + 11 => $expand!(11), + _ => panic!("Invalid round value"), + } + }; +} + +// For sae instructions, the only valid values for sae are 4 and 8. +// This macro enforces that. +#[allow(unused)] +macro_rules! constify_imm4_sae { + ($imm8:expr, $expand:ident) => { + #[allow(overflowing_literals)] + match ($imm8) & 0b1111 { + 4 => $expand!(4), + 8 => $expand!(8), + _ => panic!("Invalid sae value"), + } + }; +} diff --git a/crates/core_arch/src/x86_64/mod.rs b/crates/core_arch/src/x86_64/mod.rs index c9f3bd637c..461874ece0 100644 --- a/crates/core_arch/src/x86_64/mod.rs +++ b/crates/core_arch/src/x86_64/mod.rs @@ -1,5 +1,8 @@ //! `x86_64` intrinsics +#[macro_use] +mod macros; + mod fxsr; pub use self::fxsr::*;