Skip to content

Commit 594f74b

Browse files
committed
Refactor avx512f: sqrt + rounding fix
1 parent 19d7ac5 commit 594f74b

File tree

1 file changed

+28
-94
lines changed

1 file changed

+28
-94
lines changed

crates/core_arch/src/x86/avx512f.rs

Lines changed: 28 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -3001,7 +3001,7 @@ pub unsafe fn _mm_maskz_min_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128
30013001
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30023002
#[cfg_attr(test, assert_instr(vsqrtps))]
30033003
pub unsafe fn _mm512_sqrt_ps(a: __m512) -> __m512 {
3004-
transmute(vsqrtps(a.as_f32x16(), _MM_FROUND_CUR_DIRECTION))
3004+
simd_fsqrt(a)
30053005
}
30063006

30073007
/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -3012,8 +3012,7 @@ pub unsafe fn _mm512_sqrt_ps(a: __m512) -> __m512 {
30123012
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30133013
#[cfg_attr(test, assert_instr(vsqrtps))]
30143014
pub unsafe fn _mm512_mask_sqrt_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
3015-
let sqrt = _mm512_sqrt_ps(a).as_f32x16();
3016-
transmute(simd_select_bitmask(k, sqrt, src.as_f32x16()))
3015+
simd_select_bitmask(k, simd_fsqrt(a), src)
30173016
}
30183017

30193018
/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -3024,9 +3023,7 @@ pub unsafe fn _mm512_mask_sqrt_ps(src: __m512, k: __mmask16, a: __m512) -> __m51
30243023
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30253024
#[cfg_attr(test, assert_instr(vsqrtps))]
30263025
pub unsafe fn _mm512_maskz_sqrt_ps(k: __mmask16, a: __m512) -> __m512 {
3027-
let sqrt = _mm512_sqrt_ps(a).as_f32x16();
3028-
let zero = _mm512_setzero_ps().as_f32x16();
3029-
transmute(simd_select_bitmask(k, sqrt, zero))
3026+
simd_select_bitmask(k, simd_fsqrt(a), _mm512_setzero_ps())
30303027
}
30313028

30323029
/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -3037,8 +3034,7 @@ pub unsafe fn _mm512_maskz_sqrt_ps(k: __mmask16, a: __m512) -> __m512 {
30373034
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30383035
#[cfg_attr(test, assert_instr(vsqrtps))]
30393036
pub unsafe fn _mm256_mask_sqrt_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
3040-
let sqrt = _mm256_sqrt_ps(a).as_f32x8();
3041-
transmute(simd_select_bitmask(k, sqrt, src.as_f32x8()))
3037+
simd_select_bitmask(k, simd_fsqrt(a), src)
30423038
}
30433039

30443040
/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -3049,9 +3045,7 @@ pub unsafe fn _mm256_mask_sqrt_ps(src: __m256, k: __mmask8, a: __m256) -> __m256
30493045
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30503046
#[cfg_attr(test, assert_instr(vsqrtps))]
30513047
pub unsafe fn _mm256_maskz_sqrt_ps(k: __mmask8, a: __m256) -> __m256 {
3052-
let sqrt = _mm256_sqrt_ps(a).as_f32x8();
3053-
let zero = _mm256_setzero_ps().as_f32x8();
3054-
transmute(simd_select_bitmask(k, sqrt, zero))
3048+
simd_select_bitmask(k, simd_fsqrt(a), _mm256_setzero_ps())
30553049
}
30563050

30573051
/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -3062,8 +3056,7 @@ pub unsafe fn _mm256_maskz_sqrt_ps(k: __mmask8, a: __m256) -> __m256 {
30623056
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30633057
#[cfg_attr(test, assert_instr(vsqrtps))]
30643058
pub unsafe fn _mm_mask_sqrt_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
3065-
let sqrt = _mm_sqrt_ps(a).as_f32x4();
3066-
transmute(simd_select_bitmask(k, sqrt, src.as_f32x4()))
3059+
simd_select_bitmask(k, simd_fsqrt(a), src)
30673060
}
30683061

30693062
/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -3074,9 +3067,7 @@ pub unsafe fn _mm_mask_sqrt_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
30743067
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30753068
#[cfg_attr(test, assert_instr(vsqrtps))]
30763069
pub unsafe fn _mm_maskz_sqrt_ps(k: __mmask8, a: __m128) -> __m128 {
3077-
let sqrt = _mm_sqrt_ps(a).as_f32x4();
3078-
let zero = _mm_setzero_ps().as_f32x4();
3079-
transmute(simd_select_bitmask(k, sqrt, zero))
3070+
simd_select_bitmask(k, simd_fsqrt(a), _mm_setzero_ps())
30803071
}
30813072

30823073
/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
@@ -3087,7 +3078,7 @@ pub unsafe fn _mm_maskz_sqrt_ps(k: __mmask8, a: __m128) -> __m128 {
30873078
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30883079
#[cfg_attr(test, assert_instr(vsqrtpd))]
30893080
pub unsafe fn _mm512_sqrt_pd(a: __m512d) -> __m512d {
3090-
transmute(vsqrtpd(a.as_f64x8(), _MM_FROUND_CUR_DIRECTION))
3081+
simd_fsqrt(a)
30913082
}
30923083

30933084
/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -3098,8 +3089,7 @@ pub unsafe fn _mm512_sqrt_pd(a: __m512d) -> __m512d {
30983089
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30993090
#[cfg_attr(test, assert_instr(vsqrtpd))]
31003091
pub unsafe fn _mm512_mask_sqrt_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
3101-
let sqrt = _mm512_sqrt_pd(a).as_f64x8();
3102-
transmute(simd_select_bitmask(k, sqrt, src.as_f64x8()))
3092+
simd_select_bitmask(k, simd_fsqrt(a), src)
31033093
}
31043094

31053095
/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -3110,9 +3100,7 @@ pub unsafe fn _mm512_mask_sqrt_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m5
31103100
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31113101
#[cfg_attr(test, assert_instr(vsqrtpd))]
31123102
pub unsafe fn _mm512_maskz_sqrt_pd(k: __mmask8, a: __m512d) -> __m512d {
3113-
let sqrt = _mm512_sqrt_pd(a).as_f64x8();
3114-
let zero = _mm512_setzero_pd().as_f64x8();
3115-
transmute(simd_select_bitmask(k, sqrt, zero))
3103+
simd_select_bitmask(k, simd_fsqrt(a), _mm512_setzero_pd())
31163104
}
31173105

31183106
/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -3123,8 +3111,7 @@ pub unsafe fn _mm512_maskz_sqrt_pd(k: __mmask8, a: __m512d) -> __m512d {
31233111
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31243112
#[cfg_attr(test, assert_instr(vsqrtpd))]
31253113
pub unsafe fn _mm256_mask_sqrt_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
3126-
let sqrt = _mm256_sqrt_pd(a).as_f64x4();
3127-
transmute(simd_select_bitmask(k, sqrt, src.as_f64x4()))
3114+
simd_select_bitmask(k, simd_fsqrt(a), src)
31283115
}
31293116

31303117
/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -3135,9 +3122,7 @@ pub unsafe fn _mm256_mask_sqrt_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m2
31353122
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31363123
#[cfg_attr(test, assert_instr(vsqrtpd))]
31373124
pub unsafe fn _mm256_maskz_sqrt_pd(k: __mmask8, a: __m256d) -> __m256d {
3138-
let sqrt = _mm256_sqrt_pd(a).as_f64x4();
3139-
let zero = _mm256_setzero_pd().as_f64x4();
3140-
transmute(simd_select_bitmask(k, sqrt, zero))
3125+
simd_select_bitmask(k, simd_fsqrt(a), _mm256_setzero_pd())
31413126
}
31423127

31433128
/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -3148,8 +3133,7 @@ pub unsafe fn _mm256_maskz_sqrt_pd(k: __mmask8, a: __m256d) -> __m256d {
31483133
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31493134
#[cfg_attr(test, assert_instr(vsqrtpd))]
31503135
pub unsafe fn _mm_mask_sqrt_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
3151-
let sqrt = _mm_sqrt_pd(a).as_f64x2();
3152-
transmute(simd_select_bitmask(k, sqrt, src.as_f64x2()))
3136+
simd_select_bitmask(k, simd_fsqrt(a), src)
31533137
}
31543138

31553139
/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -3160,9 +3144,7 @@ pub unsafe fn _mm_mask_sqrt_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d
31603144
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31613145
#[cfg_attr(test, assert_instr(vsqrtpd))]
31623146
pub unsafe fn _mm_maskz_sqrt_pd(k: __mmask8, a: __m128d) -> __m128d {
3163-
let sqrt = _mm_sqrt_pd(a).as_f64x2();
3164-
let zero = _mm_setzero_pd().as_f64x2();
3165-
transmute(simd_select_bitmask(k, sqrt, zero))
3147+
simd_select_bitmask(k, simd_fsqrt(a), _mm_setzero_pd())
31663148
}
31673149

31683150
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
@@ -34762,13 +34744,7 @@ pub unsafe fn _mm_maskz_min_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
3476234744
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3476334745
#[cfg_attr(test, assert_instr(vsqrtss))]
3476434746
pub unsafe fn _mm_mask_sqrt_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
34765-
transmute(vsqrtss(
34766-
a.as_f32x4(),
34767-
b.as_f32x4(),
34768-
src.as_f32x4(),
34769-
k,
34770-
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
34771-
))
34747+
vsqrtss(a, b, src, k, _MM_FROUND_CUR_DIRECTION)
3477234748
}
3477334749

3477434750
/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
@@ -34779,13 +34755,7 @@ pub unsafe fn _mm_mask_sqrt_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -
3477934755
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3478034756
#[cfg_attr(test, assert_instr(vsqrtss))]
3478134757
pub unsafe fn _mm_maskz_sqrt_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
34782-
transmute(vsqrtss(
34783-
a.as_f32x4(),
34784-
b.as_f32x4(),
34785-
_mm_setzero_ps().as_f32x4(),
34786-
k,
34787-
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
34788-
))
34758+
vsqrtss(a, b, _mm_setzero_ps(), k, _MM_FROUND_CUR_DIRECTION)
3478934759
}
3479034760

3479134761
/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
@@ -34796,13 +34766,7 @@ pub unsafe fn _mm_maskz_sqrt_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
3479634766
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3479734767
#[cfg_attr(test, assert_instr(vsqrtsd))]
3479834768
pub unsafe fn _mm_mask_sqrt_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34799-
transmute(vsqrtsd(
34800-
a.as_f64x2(),
34801-
b.as_f64x2(),
34802-
src.as_f64x2(),
34803-
k,
34804-
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
34805-
))
34769+
vsqrtsd(a, b, src, k, _MM_FROUND_CUR_DIRECTION)
3480634770
}
3480734771

3480834772
/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
@@ -34813,13 +34777,7 @@ pub unsafe fn _mm_mask_sqrt_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d
3481334777
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3481434778
#[cfg_attr(test, assert_instr(vsqrtsd))]
3481534779
pub unsafe fn _mm_maskz_sqrt_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
34816-
transmute(vsqrtsd(
34817-
a.as_f64x2(),
34818-
b.as_f64x2(),
34819-
_mm_setzero_pd().as_f64x2(),
34820-
k,
34821-
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
34822-
))
34780+
vsqrtsd(a, b, _mm_setzero_pd(), k, _MM_FROUND_CUR_DIRECTION)
3482334781
}
3482434782

3482534783
/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
@@ -36907,11 +36865,7 @@ pub unsafe fn _mm_maskz_min_round_sd<const SAE: i32>(
3690736865
#[rustc_legacy_const_generics(2)]
3690836866
pub unsafe fn _mm_sqrt_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
3690936867
static_assert_rounding!(ROUNDING);
36910-
let a = a.as_f32x4();
36911-
let b = b.as_f32x4();
36912-
let zero = _mm_setzero_ps().as_f32x4();
36913-
let r = vsqrtss(a, b, zero, 0b1, ROUNDING);
36914-
transmute(r)
36868+
vsqrtss(a, b, _mm_setzero_ps(), 0b1, ROUNDING)
3691536869
}
3691636870

3691736871
/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
@@ -36936,11 +36890,7 @@ pub unsafe fn _mm_mask_sqrt_round_ss<const ROUNDING: i32>(
3693636890
b: __m128,
3693736891
) -> __m128 {
3693836892
static_assert_rounding!(ROUNDING);
36939-
let a = a.as_f32x4();
36940-
let b = b.as_f32x4();
36941-
let src = src.as_f32x4();
36942-
let r = vsqrtss(a, b, src, k, ROUNDING);
36943-
transmute(r)
36893+
vsqrtss(a, b, src, k, ROUNDING)
3694436894
}
3694536895

3694636896
/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
@@ -36964,11 +36914,7 @@ pub unsafe fn _mm_maskz_sqrt_round_ss<const ROUNDING: i32>(
3696436914
b: __m128,
3696536915
) -> __m128 {
3696636916
static_assert_rounding!(ROUNDING);
36967-
let a = a.as_f32x4();
36968-
let b = b.as_f32x4();
36969-
let zero = _mm_setzero_ps().as_f32x4();
36970-
let r = vsqrtss(a, b, zero, k, ROUNDING);
36971-
transmute(r)
36917+
vsqrtss(a, b, _mm_setzero_ps(), k, ROUNDING)
3697236918
}
3697336919

3697436920
/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
@@ -36988,11 +36934,7 @@ pub unsafe fn _mm_maskz_sqrt_round_ss<const ROUNDING: i32>(
3698836934
#[rustc_legacy_const_generics(2)]
3698936935
pub unsafe fn _mm_sqrt_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
3699036936
static_assert_rounding!(ROUNDING);
36991-
let a = a.as_f64x2();
36992-
let b = b.as_f64x2();
36993-
let zero = _mm_setzero_pd().as_f64x2();
36994-
let r = vsqrtsd(a, b, zero, 0b1, ROUNDING);
36995-
transmute(r)
36937+
vsqrtsd(a, b, _mm_setzero_pd(), 0b1, ROUNDING)
3699636938
}
3699736939

3699836940
/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
@@ -37017,11 +36959,7 @@ pub unsafe fn _mm_mask_sqrt_round_sd<const ROUNDING: i32>(
3701736959
b: __m128d,
3701836960
) -> __m128d {
3701936961
static_assert_rounding!(ROUNDING);
37020-
let a = a.as_f64x2();
37021-
let b = b.as_f64x2();
37022-
let src = src.as_f64x2();
37023-
let r = vsqrtsd(a, b, src, k, ROUNDING);
37024-
transmute(r)
36962+
vsqrtsd(a, b, src, k, ROUNDING)
3702536963
}
3702636964

3702736965
/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
@@ -37045,11 +36983,7 @@ pub unsafe fn _mm_maskz_sqrt_round_sd<const ROUNDING: i32>(
3704536983
b: __m128d,
3704636984
) -> __m128d {
3704736985
static_assert_rounding!(ROUNDING);
37048-
let a = a.as_f64x2();
37049-
let b = b.as_f64x2();
37050-
let zero = _mm_setzero_pd().as_f64x2();
37051-
let r = vsqrtsd(a, b, zero, k, ROUNDING);
37052-
transmute(r)
36986+
vsqrtsd(a, b, _mm_setzero_pd(), k, ROUNDING)
3705336987
}
3705436988

3705536989
/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
@@ -39062,7 +38996,7 @@ pub unsafe fn _mm_mask_cvtsd_ss(src: __m128, k: __mmask8, a: __m128, b: __m128d)
3906238996
b.as_f64x2(),
3906338997
src.as_f32x4(),
3906438998
k,
39065-
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
38999+
_MM_FROUND_CUR_DIRECTION,
3906639000
))
3906739001
}
3906839002

@@ -39079,7 +39013,7 @@ pub unsafe fn _mm_maskz_cvtsd_ss(k: __mmask8, a: __m128, b: __m128d) -> __m128 {
3907939013
b.as_f64x2(),
3908039014
_mm_setzero_ps().as_f32x4(),
3908139015
k,
39082-
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
39016+
_MM_FROUND_CUR_DIRECTION,
3908339017
))
3908439018
}
3908539019

@@ -41045,9 +40979,9 @@ extern "C" {
4104540979
#[link_name = "llvm.x86.avx512.mask.min.sd.round"]
4104640980
fn vminsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
4104740981
#[link_name = "llvm.x86.avx512.mask.sqrt.ss"]
41048-
fn vsqrtss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
40982+
fn vsqrtss(a: __m128, b: __m128, src: __m128, mask: u8, rounding: i32) -> __m128;
4104940983
#[link_name = "llvm.x86.avx512.mask.sqrt.sd"]
41050-
fn vsqrtsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
40984+
fn vsqrtsd(a: __m128d, b: __m128d, src: __m128d, mask: u8, rounding: i32) -> __m128d;
4105140985
#[link_name = "llvm.x86.avx512.mask.getexp.ss"]
4105240986
fn vgetexpss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
4105340987
#[link_name = "llvm.x86.avx512.mask.getexp.sd"]

0 commit comments

Comments
 (0)