@@ -136,7 +136,11 @@ pub unsafe fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 {
136
136
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
137
137
pub unsafe fn _mm_blend_pd < const IMM2 : i32 > ( a : __m128d , b : __m128d ) -> __m128d {
138
138
static_assert_uimm_bits ! ( IMM2 , 2 ) ;
139
- blendpd ( a, b, IMM2 as u8 )
139
+ transmute :: < f64x2 , _ > ( simd_shuffle ! (
140
+ a. as_f64x2( ) ,
141
+ b. as_f64x2( ) ,
142
+ [ [ 0 , 2 ] [ IMM2 as usize & 1 ] , [ 1 , 3 ] [ ( IMM2 >> 1 ) as usize & 1 ] ]
143
+ ) )
140
144
}
141
145
142
146
/// Blend packed single-precision (32-bit) floating-point elements from `a`
@@ -150,7 +154,16 @@ pub unsafe fn _mm_blend_pd<const IMM2: i32>(a: __m128d, b: __m128d) -> __m128d {
150
154
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
151
155
pub unsafe fn _mm_blend_ps < const IMM4 : i32 > ( a : __m128 , b : __m128 ) -> __m128 {
152
156
static_assert_uimm_bits ! ( IMM4 , 4 ) ;
153
- blendps ( a, b, IMM4 as u8 )
157
+ transmute :: < f32x4 , _ > ( simd_shuffle ! (
158
+ a. as_f32x4( ) ,
159
+ b. as_f32x4( ) ,
160
+ [
161
+ [ 0 , 4 ] [ IMM4 as usize & 1 ] ,
162
+ [ 1 , 5 ] [ ( IMM4 >> 1 ) as usize & 1 ] ,
163
+ [ 2 , 6 ] [ ( IMM4 >> 2 ) as usize & 1 ] ,
164
+ [ 3 , 7 ] [ ( IMM4 >> 3 ) as usize & 1 ] ,
165
+ ]
166
+ ) )
154
167
}
155
168
156
169
/// Extracts a single-precision (32-bit) floating-point element from `a`,
@@ -1139,10 +1152,6 @@ pub unsafe fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 {
1139
1152
1140
1153
#[ allow( improper_ctypes) ]
1141
1154
extern "C" {
1142
- #[ link_name = "llvm.x86.sse41.blendpd" ]
1143
- fn blendpd ( a : __m128d , b : __m128d , imm2 : u8 ) -> __m128d ;
1144
- #[ link_name = "llvm.x86.sse41.blendps" ]
1145
- fn blendps ( a : __m128 , b : __m128 , imm4 : u8 ) -> __m128 ;
1146
1155
#[ link_name = "llvm.x86.sse41.insertps" ]
1147
1156
fn insertps ( a : __m128 , b : __m128 , imm8 : u8 ) -> __m128 ;
1148
1157
#[ link_name = "llvm.x86.sse41.packusdw" ]
0 commit comments