From 7bfea00ffca95a3237dc3401b7b426fec9b69b7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= Date: Wed, 11 Oct 2023 22:03:52 +0200 Subject: [PATCH] Fix UB in _mm_movemask_ps, _mm_movemask_pd, _mm256_movemask_ps and _mm256_movemask_pd The `simd_bitmask` intrinsic requires each element to be all-1 or all-0, while the x86 functions only check for the highest bit. --- crates/core_arch/src/x86/avx.rs | 10 ++++++++-- crates/core_arch/src/x86/sse.rs | 5 ++++- crates/core_arch/src/x86/sse2.rs | 5 ++++- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/crates/core_arch/src/x86/avx.rs b/crates/core_arch/src/x86/avx.rs index 074bf744d5..de5dc05b84 100644 --- a/crates/core_arch/src/x86/avx.rs +++ b/crates/core_arch/src/x86/avx.rs @@ -2066,7 +2066,10 @@ pub unsafe fn _mm_testnzc_ps(a: __m128, b: __m128) -> i32 { #[cfg_attr(test, assert_instr(vmovmskpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_movemask_pd(a: __m256d) -> i32 { - simd_bitmask::(transmute(a)).into() + // Propagate the highest bit to the rest, because simd_bitmask + // requires all-1 or all-0. + let mask: i64x4 = simd_lt(transmute(a), i64x4::splat(0)); + simd_bitmask::(mask).into() } /// Sets each bit of the returned mask based on the most significant bit of the @@ -2079,7 +2082,10 @@ pub unsafe fn _mm256_movemask_pd(a: __m256d) -> i32 { #[cfg_attr(test, assert_instr(vmovmskps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_movemask_ps(a: __m256) -> i32 { - simd_bitmask::(transmute(a)).into() + // Propagate the highest bit to the rest, because simd_bitmask + // requires all-1 or all-0. + let mask: i32x8 = simd_lt(transmute(a), i32x8::splat(0)); + simd_bitmask::(mask).into() } /// Returns vector of type __m256d with all elements set to zero. diff --git a/crates/core_arch/src/x86/sse.rs b/crates/core_arch/src/x86/sse.rs index 67d20512d7..6a2be09216 100644 --- a/crates/core_arch/src/x86/sse.rs +++ b/crates/core_arch/src/x86/sse.rs @@ -1081,7 +1081,10 @@ pub unsafe fn _mm_movelh_ps(a: __m128, b: __m128) -> __m128 { #[cfg_attr(test, assert_instr(movmskps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 { - simd_bitmask::(transmute(a)).into() + // Propagate the highest bit to the rest, because simd_bitmask + // requires all-1 or all-0. + let mask: i32x4 = simd_lt(transmute(a), i32x4::splat(0)); + simd_bitmask::(mask).into() } /// Construct a `__m128` with the lowest element read from `p` and the other diff --git a/crates/core_arch/src/x86/sse2.rs b/crates/core_arch/src/x86/sse2.rs index e784c407de..7831ea7435 100644 --- a/crates/core_arch/src/x86/sse2.rs +++ b/crates/core_arch/src/x86/sse2.rs @@ -2450,7 +2450,10 @@ pub unsafe fn _mm_setzero_pd() -> __m128d { #[cfg_attr(test, assert_instr(movmskpd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_movemask_pd(a: __m128d) -> i32 { - simd_bitmask::(transmute(a)).into() + // Propagate the highest bit to the rest, because simd_bitmask + // requires all-1 or all-0. + let mask: i64x2 = simd_lt(transmute(a), i64x2::splat(0)); + simd_bitmask::(mask).into() } /// Loads 128-bits (composed of 2 packed double-precision (64-bit)