Skip to content

Commit 4f89e42

Browse files
eduardosmAmanieu
authored andcommitted
Reimplement _mm_addsub_ps and _mm_addsub_pd without LLVM intrinsics
1 parent 2841bd4 commit 4f89e42

File tree

1 file changed

+11
-7
lines changed

1 file changed

+11
-7
lines changed

crates/core_arch/src/x86/sse3.rs

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//! Streaming SIMD Extensions 3 (SSE3)
22
33
use crate::{
4-
core_arch::{simd::*, simd_llvm::simd_shuffle, x86::*},
4+
core_arch::{simd::*, simd_llvm::*, x86::*},
55
mem::transmute,
66
};
77

@@ -17,7 +17,11 @@ use stdarch_test::assert_instr;
1717
#[cfg_attr(test, assert_instr(addsubps))]
1818
#[stable(feature = "simd_x86", since = "1.27.0")]
1919
pub unsafe fn _mm_addsub_ps(a: __m128, b: __m128) -> __m128 {
20-
addsubps(a, b)
20+
let a = a.as_f32x4();
21+
let b = b.as_f32x4();
22+
let add = simd_add(a, b);
23+
let sub = simd_sub(a, b);
24+
simd_shuffle!(add, sub, [4, 1, 6, 3])
2125
}
2226

2327
/// Alternatively add and subtract packed double-precision (64-bit)
@@ -29,7 +33,11 @@ pub unsafe fn _mm_addsub_ps(a: __m128, b: __m128) -> __m128 {
2933
#[cfg_attr(test, assert_instr(addsubpd))]
3034
#[stable(feature = "simd_x86", since = "1.27.0")]
3135
pub unsafe fn _mm_addsub_pd(a: __m128d, b: __m128d) -> __m128d {
32-
addsubpd(a, b)
36+
let a = a.as_f64x2();
37+
let b = b.as_f64x2();
38+
let add = simd_add(a, b);
39+
let sub = simd_sub(a, b);
40+
simd_shuffle!(add, sub, [2, 1])
3341
}
3442

3543
/// Horizontally adds adjacent pairs of double-precision (64-bit)
@@ -143,10 +151,6 @@ pub unsafe fn _mm_moveldup_ps(a: __m128) -> __m128 {
143151

144152
#[allow(improper_ctypes)]
145153
extern "C" {
146-
#[link_name = "llvm.x86.sse3.addsub.ps"]
147-
fn addsubps(a: __m128, b: __m128) -> __m128;
148-
#[link_name = "llvm.x86.sse3.addsub.pd"]
149-
fn addsubpd(a: __m128d, b: __m128d) -> __m128d;
150154
#[link_name = "llvm.x86.sse3.hadd.pd"]
151155
fn haddpd(a: __m128d, b: __m128d) -> __m128d;
152156
#[link_name = "llvm.x86.sse3.hadd.ps"]

0 commit comments

Comments
 (0)