@@ -4966,22 +4966,18 @@ define <4 x i32> @fptosi_2f16_to_4i32(<2 x half> %a) nounwind {
4966
4966
;
4967
4967
; F16C-LABEL: fptosi_2f16_to_4i32:
4968
4968
; F16C: # %bb.0:
4969
- ; F16C-NEXT: vpsrld $16, %xmm0, %xmm1
4970
- ; F16C-NEXT: vcvtph2ps %xmm1, %xmm1
4971
- ; F16C-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
4969
+ ; F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1
4970
+ ; F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
4972
4971
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
4973
- ; F16C-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
4974
4972
; F16C-NEXT: vcvttps2dq %xmm0, %xmm0
4975
4973
; F16C-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
4976
4974
; F16C-NEXT: retq
4977
4975
;
4978
4976
; AVX512-LABEL: fptosi_2f16_to_4i32:
4979
4977
; AVX512: # %bb.0:
4980
- ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
4981
- ; AVX512-NEXT: vcvtph2ps %xmm1, %xmm1
4982
- ; AVX512-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
4978
+ ; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
4979
+ ; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
4983
4980
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
4984
- ; AVX512-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
4985
4981
; AVX512-NEXT: vcvttps2dq %xmm0, %xmm0
4986
4982
; AVX512-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
4987
4983
; AVX512-NEXT: retq
@@ -5084,11 +5080,9 @@ define <4 x i32> @fptoui_2f16_to_4i32(<2 x half> %a) nounwind {
5084
5080
;
5085
5081
; F16C-LABEL: fptoui_2f16_to_4i32:
5086
5082
; F16C: # %bb.0:
5087
- ; F16C-NEXT: vpsrld $16, %xmm0, %xmm1
5088
- ; F16C-NEXT: vcvtph2ps %xmm1, %xmm1
5089
- ; F16C-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
5083
+ ; F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1
5084
+ ; F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
5090
5085
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
5091
- ; F16C-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
5092
5086
; F16C-NEXT: vcvttps2dq %xmm0, %xmm1
5093
5087
; F16C-NEXT: vpsrad $31, %xmm1, %xmm2
5094
5088
; F16C-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
@@ -5100,23 +5094,19 @@ define <4 x i32> @fptoui_2f16_to_4i32(<2 x half> %a) nounwind {
5100
5094
;
5101
5095
; AVX512F-LABEL: fptoui_2f16_to_4i32:
5102
5096
; AVX512F: # %bb.0:
5103
- ; AVX512F-NEXT: vpsrld $16, %xmm0, %xmm1
5104
- ; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm1
5105
- ; AVX512F-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
5097
+ ; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1
5098
+ ; AVX512F-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
5106
5099
; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
5107
- ; AVX512F-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
5108
5100
; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
5109
5101
; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
5110
5102
; AVX512F-NEXT: vzeroupper
5111
5103
; AVX512F-NEXT: retq
5112
5104
;
5113
5105
; AVX512-FASTLANE-LABEL: fptoui_2f16_to_4i32:
5114
5106
; AVX512-FASTLANE: # %bb.0:
5115
- ; AVX512-FASTLANE-NEXT: vpsrld $16, %xmm0, %xmm1
5116
- ; AVX512-FASTLANE-NEXT: vcvtph2ps %xmm1, %xmm1
5117
- ; AVX512-FASTLANE-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
5107
+ ; AVX512-FASTLANE-NEXT: vxorps %xmm1, %xmm1, %xmm1
5108
+ ; AVX512-FASTLANE-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
5118
5109
; AVX512-FASTLANE-NEXT: vcvtph2ps %xmm0, %xmm0
5119
- ; AVX512-FASTLANE-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
5120
5110
; AVX512-FASTLANE-NEXT: vcvttps2udq %xmm0, %xmm0
5121
5111
; AVX512-FASTLANE-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
5122
5112
; AVX512-FASTLANE-NEXT: retq
0 commit comments