@@ -268,7 +268,11 @@ pub unsafe fn _mm256_mul_ps(a: __m256, b: __m256) -> __m256 {
268
268
#[ cfg_attr( test, assert_instr( vaddsubpd) ) ]
269
269
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
270
270
pub unsafe fn _mm256_addsub_pd ( a : __m256d , b : __m256d ) -> __m256d {
271
- addsubpd256 ( a, b)
271
+ let a = a. as_f64x4 ( ) ;
272
+ let b = b. as_f64x4 ( ) ;
273
+ let add = simd_add ( a, b) ;
274
+ let sub = simd_sub ( a, b) ;
275
+ simd_shuffle ! ( add, sub, [ 4 , 1 , 6 , 3 ] )
272
276
}
273
277
274
278
/// Alternatively adds and subtracts packed single-precision (32-bit)
@@ -280,7 +284,11 @@ pub unsafe fn _mm256_addsub_pd(a: __m256d, b: __m256d) -> __m256d {
280
284
#[ cfg_attr( test, assert_instr( vaddsubps) ) ]
281
285
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
282
286
pub unsafe fn _mm256_addsub_ps ( a : __m256 , b : __m256 ) -> __m256 {
283
- addsubps256 ( a, b)
287
+ let a = a. as_f32x8 ( ) ;
288
+ let b = b. as_f32x8 ( ) ;
289
+ let add = simd_add ( a, b) ;
290
+ let sub = simd_sub ( a, b) ;
291
+ simd_shuffle ! ( add, sub, [ 8 , 1 , 10 , 3 , 12 , 5 , 14 , 7 ] )
284
292
}
285
293
286
294
/// Subtracts packed double-precision (64-bit) floating-point elements in `b`
@@ -2906,10 +2914,6 @@ pub unsafe fn _mm256_cvtss_f32(a: __m256) -> f32 {
2906
2914
// LLVM intrinsics used in the above functions
2907
2915
#[ allow( improper_ctypes) ]
2908
2916
extern "C" {
2909
- #[ link_name = "llvm.x86.avx.addsub.pd.256" ]
2910
- fn addsubpd256 ( a : __m256d , b : __m256d ) -> __m256d ;
2911
- #[ link_name = "llvm.x86.avx.addsub.ps.256" ]
2912
- fn addsubps256 ( a : __m256 , b : __m256 ) -> __m256 ;
2913
2917
#[ link_name = "llvm.x86.avx.round.pd.256" ]
2914
2918
fn roundpd256 ( a : __m256d , b : i32 ) -> __m256d ;
2915
2919
#[ link_name = "llvm.x86.avx.round.ps.256" ]
0 commit comments