Skip to content

Commit 17c96c1

Browse files
eduardosmAmanieu
authored andcommitted
Reimplement _mm_blendv_ps and _mm256_blendv_ps without LLVM intrinsics
1 parent 2763963 commit 17c96c1

File tree

2 files changed

+4
-6
lines changed

2 files changed

+4
-6
lines changed

crates/core_arch/src/x86/avx.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -524,7 +524,8 @@ pub unsafe fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
524524
#[cfg_attr(test, assert_instr(vblendvps))]
525525
#[stable(feature = "simd_x86", since = "1.27.0")]
526526
pub unsafe fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
527-
vblendvps(a, b, c)
527+
let mask: i32x8 = simd_lt(transmute::<_, i32x8>(c), i32x8::splat(0));
528+
transmute(simd_select(mask, b.as_f32x8(), a.as_f32x8()))
528529
}
529530

530531
/// Conditionally multiplies the packed single-precision (32-bit) floating-point
@@ -2915,8 +2916,6 @@ extern "C" {
29152916
fn roundps256(a: __m256, b: i32) -> __m256;
29162917
#[link_name = "llvm.x86.avx.sqrt.ps.256"]
29172918
fn sqrtps256(a: __m256) -> __m256;
2918-
#[link_name = "llvm.x86.avx.blendv.ps.256"]
2919-
fn vblendvps(a: __m256, b: __m256, c: __m256) -> __m256;
29202919
#[link_name = "llvm.x86.avx.dp.ps.256"]
29212920
fn vdpps(a: __m256, b: __m256, imm8: i32) -> __m256;
29222921
#[link_name = "llvm.x86.avx.hadd.pd.256"]

crates/core_arch/src/x86/sse41.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,8 @@ pub unsafe fn _mm_blendv_pd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d {
118118
#[cfg_attr(test, assert_instr(blendvps))]
119119
#[stable(feature = "simd_x86", since = "1.27.0")]
120120
pub unsafe fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 {
121-
blendvps(a, b, mask)
121+
let mask: i32x4 = simd_lt(transmute::<_, i32x4>(mask), i32x4::splat(0));
122+
transmute(simd_select(mask, b.as_f32x4(), a.as_f32x4()))
122123
}
123124

124125
/// Blend packed double-precision (64-bit) floating-point elements from `a`
@@ -1138,8 +1139,6 @@ pub unsafe fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 {
11381139

11391140
#[allow(improper_ctypes)]
11401141
extern "C" {
1141-
#[link_name = "llvm.x86.sse41.blendvps"]
1142-
fn blendvps(a: __m128, b: __m128, mask: __m128) -> __m128;
11431142
#[link_name = "llvm.x86.sse41.blendpd"]
11441143
fn blendpd(a: __m128d, b: __m128d, imm2: u8) -> __m128d;
11451144
#[link_name = "llvm.x86.sse41.blendps"]

0 commit comments

Comments
 (0)