Skip to content

Commit 3b578c3

Browse files
eduardosmAmanieu
authored andcommitted
Reimplement _mm256_addsub_ps and _mm256_addsub_pd without LLVM intrinsics
1 parent 4f89e42 commit 3b578c3

File tree

1 file changed

+10
-6
lines changed
  • crates/core_arch/src/x86

1 file changed

+10
-6
lines changed

crates/core_arch/src/x86/avx.rs

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,11 @@ pub unsafe fn _mm256_mul_ps(a: __m256, b: __m256) -> __m256 {
268268
#[cfg_attr(test, assert_instr(vaddsubpd))]
269269
#[stable(feature = "simd_x86", since = "1.27.0")]
270270
pub unsafe fn _mm256_addsub_pd(a: __m256d, b: __m256d) -> __m256d {
271-
addsubpd256(a, b)
271+
let a = a.as_f64x4();
272+
let b = b.as_f64x4();
273+
let add = simd_add(a, b);
274+
let sub = simd_sub(a, b);
275+
simd_shuffle!(add, sub, [4, 1, 6, 3])
272276
}
273277

274278
/// Alternatively adds and subtracts packed single-precision (32-bit)
@@ -280,7 +284,11 @@ pub unsafe fn _mm256_addsub_pd(a: __m256d, b: __m256d) -> __m256d {
280284
#[cfg_attr(test, assert_instr(vaddsubps))]
281285
#[stable(feature = "simd_x86", since = "1.27.0")]
282286
pub unsafe fn _mm256_addsub_ps(a: __m256, b: __m256) -> __m256 {
283-
addsubps256(a, b)
287+
let a = a.as_f32x8();
288+
let b = b.as_f32x8();
289+
let add = simd_add(a, b);
290+
let sub = simd_sub(a, b);
291+
simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7])
284292
}
285293

286294
/// Subtracts packed double-precision (64-bit) floating-point elements in `b`
@@ -2906,10 +2914,6 @@ pub unsafe fn _mm256_cvtss_f32(a: __m256) -> f32 {
29062914
// LLVM intrinsics used in the above functions
29072915
#[allow(improper_ctypes)]
29082916
extern "C" {
2909-
#[link_name = "llvm.x86.avx.addsub.pd.256"]
2910-
fn addsubpd256(a: __m256d, b: __m256d) -> __m256d;
2911-
#[link_name = "llvm.x86.avx.addsub.ps.256"]
2912-
fn addsubps256(a: __m256, b: __m256) -> __m256;
29132917
#[link_name = "llvm.x86.avx.round.pd.256"]
29142918
fn roundpd256(a: __m256d, b: i32) -> __m256d;
29152919
#[link_name = "llvm.x86.avx.round.ps.256"]

0 commit comments

Comments
 (0)