Skip to content

Commit 2841bd4

Browse files
eduardosmAmanieu
authored andcommitted
Reimplement _mm_blend_pd and _mm_blend_ps without LLVM intrinsics
1 parent 17c96c1 commit 2841bd4

File tree

1 file changed

+15
-6
lines changed

1 file changed

+15
-6
lines changed

crates/core_arch/src/x86/sse41.rs

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,11 @@ pub unsafe fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 {
136136
#[stable(feature = "simd_x86", since = "1.27.0")]
137137
pub unsafe fn _mm_blend_pd<const IMM2: i32>(a: __m128d, b: __m128d) -> __m128d {
138138
static_assert_uimm_bits!(IMM2, 2);
139-
blendpd(a, b, IMM2 as u8)
139+
transmute::<f64x2, _>(simd_shuffle!(
140+
a.as_f64x2(),
141+
b.as_f64x2(),
142+
[[0, 2][IMM2 as usize & 1], [1, 3][(IMM2 >> 1) as usize & 1]]
143+
))
140144
}
141145

142146
/// Blend packed single-precision (32-bit) floating-point elements from `a`
@@ -150,7 +154,16 @@ pub unsafe fn _mm_blend_pd<const IMM2: i32>(a: __m128d, b: __m128d) -> __m128d {
150154
#[stable(feature = "simd_x86", since = "1.27.0")]
151155
pub unsafe fn _mm_blend_ps<const IMM4: i32>(a: __m128, b: __m128) -> __m128 {
152156
static_assert_uimm_bits!(IMM4, 4);
153-
blendps(a, b, IMM4 as u8)
157+
transmute::<f32x4, _>(simd_shuffle!(
158+
a.as_f32x4(),
159+
b.as_f32x4(),
160+
[
161+
[0, 4][IMM4 as usize & 1],
162+
[1, 5][(IMM4 >> 1) as usize & 1],
163+
[2, 6][(IMM4 >> 2) as usize & 1],
164+
[3, 7][(IMM4 >> 3) as usize & 1],
165+
]
166+
))
154167
}
155168

156169
/// Extracts a single-precision (32-bit) floating-point element from `a`,
@@ -1139,10 +1152,6 @@ pub unsafe fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 {
11391152

11401153
#[allow(improper_ctypes)]
11411154
extern "C" {
1142-
#[link_name = "llvm.x86.sse41.blendpd"]
1143-
fn blendpd(a: __m128d, b: __m128d, imm2: u8) -> __m128d;
1144-
#[link_name = "llvm.x86.sse41.blendps"]
1145-
fn blendps(a: __m128, b: __m128, imm4: u8) -> __m128;
11461155
#[link_name = "llvm.x86.sse41.insertps"]
11471156
fn insertps(a: __m128, b: __m128, imm8: u8) -> __m128;
11481157
#[link_name = "llvm.x86.sse41.packusdw"]

0 commit comments

Comments
 (0)