Skip to content

Commit fe17c5f

Browse files
committed
SelectionDAG: Improve expandFP_TO_INT_SAT
Currently, expandFP_TO_INT_SAT uses FMAXNUM and FMINNUM, which is not correct if the Src is sNaN. Let's try all 3 flavor of Max/Min for it: 1) FMAXIMUMNUM/FMINIMUMNUM See test/CodeGen/RISCV/bfloat-convert.ll 2) FMAXNUM/FMAXNUM See test/CodeGen/Mips/Half2Int16.ll 3) FMAXIMUM/FMAXIMUM See test/CodeGen/WebAssembly/Half2Int16.ll
1 parent 4776f5f commit fe17c5f

File tree

3 files changed

+297
-8
lines changed

3 files changed

+297
-8
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 33 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11542,22 +11542,47 @@ SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
1154211542
// If the integer bounds are exactly representable as floats and min/max are
1154311543
// legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
1154411544
// of comparisons and selects.
11545-
bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
11545+
bool MinMax2019NumLegal = isOperationLegal(ISD::FMINIMUMNUM, SrcVT) &&
11546+
isOperationLegal(ISD::FMAXIMUMNUM, SrcVT);
11547+
bool MinMax2019Legal = isOperationLegal(ISD::FMINIMUM, SrcVT) &&
11548+
isOperationLegal(ISD::FMAXIMUM, SrcVT);
11549+
bool MinMax2008Legal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
1154611550
isOperationLegal(ISD::FMAXNUM, SrcVT);
11547-
if (AreExactFloatBounds && MinMaxLegal) {
11548-
SDValue Clamped = Src;
1154911551

11550-
// Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
11551-
Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
11552-
// Clamp by MaxFloat from above. NaN cannot occur.
11553-
Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
11552+
if (AreExactFloatBounds && (MinMax2019NumLegal || MinMax2019Legal || MinMax2008Legal)) {
11553+
SDValue Clamped = Src;
11554+
bool Use2019 = false;
11555+
11556+
if (MinMax2019NumLegal) {
11557+
// Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
11558+
Clamped = DAG.getNode(ISD::FMAXIMUMNUM, dl, SrcVT, Clamped, MinFloatNode);
11559+
// Clamp by MaxFloat from above. NaN cannot occur.
11560+
Clamped = DAG.getNode(ISD::FMINIMUMNUM, dl, SrcVT, Clamped, MaxFloatNode);
11561+
} else if (MinMax2008Legal) {
11562+
// Try 2008 first as it has better performance for converting SNaN to unsigned.
11563+
if (!IsSigned && !DAG.isKnownNeverSNaN(Clamped)) {
11564+
Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, Clamped);
11565+
}
11566+
// Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
11567+
Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
11568+
// Clamp by MaxFloat from above. NaN cannot occur.
11569+
Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
11570+
} else if (MinMax2019Legal) {
11571+
// Clamp Src by MinFloat from below. If Src is NaN the result is qNaN.
11572+
Clamped = DAG.getNode(ISD::FMAXIMUM, dl, SrcVT, Clamped, MinFloatNode);
11573+
// Clamp by MaxFloat from above. NaN may occur.
11574+
Clamped = DAG.getNode(ISD::FMINIMUM, dl, SrcVT, Clamped, MaxFloatNode);
11575+
Use2019 = true;
11576+
} else {
11577+
llvm_unreachable("No Min/Max supported?");
11578+
}
1155411579
// Convert clamped value to integer.
1155511580
SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
1155611581
dl, DstVT, Clamped);
1155711582

1155811583
// In the unsigned case we're done, because we mapped NaN to MinFloat,
1155911584
// which will cast to zero.
11560-
if (!IsSigned)
11585+
if ((!IsSigned && !Use2019) || DAG.isKnownNeverNaN(Src))
1156111586
return FpToInt;
1156211587

1156311588
// Otherwise, select 0 if Src is NaN.

llvm/test/CodeGen/Mips/Half2Int16.ll

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=mipsisa32r6 -mattr=-soft-float | FileCheck %s
3+
4+
define i16 @fcvt_h_s_sat(float %a) {
5+
; CHECK-LABEL: fcvt_h_s_sat:
6+
; CHECK: # %bb.0: # %start
7+
; CHECK-NEXT: lui $1, %hi($CPI0_0)
8+
; CHECK-NEXT: lwc1 $f0, %lo($CPI0_0)($1)
9+
; CHECK-NEXT: max.s $f0, $f12, $f0
10+
; CHECK-NEXT: lui $1, %hi($CPI0_1)
11+
; CHECK-NEXT: lwc1 $f1, %lo($CPI0_1)($1)
12+
; CHECK-NEXT: min.s $f0, $f0, $f1
13+
; CHECK-NEXT: trunc.w.s $f0, $f0
14+
; CHECK-NEXT: mfc1 $1, $f0
15+
; CHECK-NEXT: cmp.un.s $f0, $f12, $f12
16+
; CHECK-NEXT: mfc1 $2, $f0
17+
; CHECK-NEXT: jr $ra
18+
; CHECK-NEXT: seleqz $2, $1, $2
19+
start:
20+
%0 = tail call i16 @llvm.fptosi.sat.i16.f32(float %a)
21+
ret i16 %0
22+
}
23+
24+
define i16 @fcvt_hu_s_sat(float %a) {
25+
; CHECK-LABEL: fcvt_hu_s_sat:
26+
; CHECK: # %bb.0: # %start
27+
; CHECK-NEXT: max.s $f0, $f12, $f12
28+
; CHECK-NEXT: mtc1 $zero, $f1
29+
; CHECK-NEXT: max.s $f0, $f0, $f1
30+
; CHECK-NEXT: lui $1, %hi($CPI1_0)
31+
; CHECK-NEXT: lwc1 $f1, %lo($CPI1_0)($1)
32+
; CHECK-NEXT: min.s $f0, $f0, $f1
33+
; CHECK-NEXT: lui $1, %hi($CPI1_1)
34+
; CHECK-NEXT: lwc1 $f1, %lo($CPI1_1)($1)
35+
; CHECK-NEXT: cmp.lt.s $f2, $f0, $f1
36+
; CHECK-NEXT: trunc.w.s $f3, $f0
37+
; CHECK-NEXT: mfc1 $1, $f3
38+
; CHECK-NEXT: mfc1 $2, $f2
39+
; CHECK-NEXT: selnez $1, $1, $2
40+
; CHECK-NEXT: sub.s $f0, $f0, $f1
41+
; CHECK-NEXT: trunc.w.s $f0, $f0
42+
; CHECK-NEXT: mfc1 $3, $f0
43+
; CHECK-NEXT: lui $4, 32768
44+
; CHECK-NEXT: xor $3, $3, $4
45+
; CHECK-NEXT: seleqz $2, $3, $2
46+
; CHECK-NEXT: jr $ra
47+
; CHECK-NEXT: or $2, $1, $2
48+
start:
49+
%0 = tail call i16 @llvm.fptoui.sat.i16.f32(float %a)
50+
ret i16 %0
51+
}
52+
53+
define i16 @fcvt_h_s_sat_nnan(float nofpclass(nan) %a) {
54+
; CHECK-LABEL: fcvt_h_s_sat_nnan:
55+
; CHECK: # %bb.0: # %start
56+
; CHECK-NEXT: lui $1, %hi($CPI2_0)
57+
; CHECK-NEXT: lwc1 $f0, %lo($CPI2_0)($1)
58+
; CHECK-NEXT: max.s $f0, $f12, $f0
59+
; CHECK-NEXT: lui $1, %hi($CPI2_1)
60+
; CHECK-NEXT: lwc1 $f1, %lo($CPI2_1)($1)
61+
; CHECK-NEXT: min.s $f0, $f0, $f1
62+
; CHECK-NEXT: trunc.w.s $f0, $f0
63+
; CHECK-NEXT: jr $ra
64+
; CHECK-NEXT: mfc1 $2, $f0
65+
start:
66+
%0 = tail call i16 @llvm.fptosi.sat.i16.f32(float %a)
67+
ret i16 %0
68+
}
69+
70+
define i16 @fcvt_hu_s_sat_nnan(float nofpclass(nan) %a) {
71+
; CHECK-LABEL: fcvt_hu_s_sat_nnan:
72+
; CHECK: # %bb.0: # %start
73+
; CHECK-NEXT: mtc1 $zero, $f0
74+
; CHECK-NEXT: max.s $f0, $f12, $f0
75+
; CHECK-NEXT: lui $1, %hi($CPI3_0)
76+
; CHECK-NEXT: lwc1 $f1, %lo($CPI3_0)($1)
77+
; CHECK-NEXT: min.s $f0, $f0, $f1
78+
; CHECK-NEXT: lui $1, %hi($CPI3_1)
79+
; CHECK-NEXT: lwc1 $f1, %lo($CPI3_1)($1)
80+
; CHECK-NEXT: cmp.lt.s $f2, $f0, $f1
81+
; CHECK-NEXT: trunc.w.s $f3, $f0
82+
; CHECK-NEXT: mfc1 $1, $f3
83+
; CHECK-NEXT: mfc1 $2, $f2
84+
; CHECK-NEXT: selnez $1, $1, $2
85+
; CHECK-NEXT: sub.s $f0, $f0, $f1
86+
; CHECK-NEXT: trunc.w.s $f0, $f0
87+
; CHECK-NEXT: mfc1 $3, $f0
88+
; CHECK-NEXT: lui $4, 32768
89+
; CHECK-NEXT: xor $3, $3, $4
90+
; CHECK-NEXT: seleqz $2, $3, $2
91+
; CHECK-NEXT: jr $ra
92+
; CHECK-NEXT: or $2, $1, $2
93+
start:
94+
%0 = tail call i16 @llvm.fptoui.sat.i16.f32(float %a)
95+
ret i16 %0
96+
}
97+
98+
define i16 @fcvt_h_s_sat_nsnan(float nofpclass(snan) %a) {
99+
; CHECK-LABEL: fcvt_h_s_sat_nsnan:
100+
; CHECK: # %bb.0: # %start
101+
; CHECK-NEXT: lui $1, %hi($CPI4_0)
102+
; CHECK-NEXT: lwc1 $f0, %lo($CPI4_0)($1)
103+
; CHECK-NEXT: max.s $f0, $f12, $f0
104+
; CHECK-NEXT: lui $1, %hi($CPI4_1)
105+
; CHECK-NEXT: lwc1 $f1, %lo($CPI4_1)($1)
106+
; CHECK-NEXT: min.s $f0, $f0, $f1
107+
; CHECK-NEXT: trunc.w.s $f0, $f0
108+
; CHECK-NEXT: mfc1 $1, $f0
109+
; CHECK-NEXT: cmp.un.s $f0, $f12, $f12
110+
; CHECK-NEXT: mfc1 $2, $f0
111+
; CHECK-NEXT: jr $ra
112+
; CHECK-NEXT: seleqz $2, $1, $2
113+
start:
114+
%0 = tail call i16 @llvm.fptosi.sat.i16.f32(float %a)
115+
ret i16 %0
116+
}
117+
118+
define i16 @fcvt_hu_s_sat_nsnan(float nofpclass(snan) %a) {
119+
; CHECK-LABEL: fcvt_hu_s_sat_nsnan:
120+
; CHECK: # %bb.0: # %start
121+
; CHECK-NEXT: mtc1 $zero, $f0
122+
; CHECK-NEXT: max.s $f0, $f12, $f0
123+
; CHECK-NEXT: lui $1, %hi($CPI5_0)
124+
; CHECK-NEXT: lwc1 $f1, %lo($CPI5_0)($1)
125+
; CHECK-NEXT: min.s $f0, $f0, $f1
126+
; CHECK-NEXT: lui $1, %hi($CPI5_1)
127+
; CHECK-NEXT: lwc1 $f1, %lo($CPI5_1)($1)
128+
; CHECK-NEXT: cmp.lt.s $f2, $f0, $f1
129+
; CHECK-NEXT: trunc.w.s $f3, $f0
130+
; CHECK-NEXT: mfc1 $1, $f3
131+
; CHECK-NEXT: mfc1 $2, $f2
132+
; CHECK-NEXT: selnez $1, $1, $2
133+
; CHECK-NEXT: sub.s $f0, $f0, $f1
134+
; CHECK-NEXT: trunc.w.s $f0, $f0
135+
; CHECK-NEXT: mfc1 $3, $f0
136+
; CHECK-NEXT: lui $4, 32768
137+
; CHECK-NEXT: xor $3, $3, $4
138+
; CHECK-NEXT: seleqz $2, $3, $2
139+
; CHECK-NEXT: jr $ra
140+
; CHECK-NEXT: or $2, $1, $2
141+
start:
142+
%0 = tail call i16 @llvm.fptoui.sat.i16.f32(float %a)
143+
ret i16 %0
144+
}
145+
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=wasm32 | FileCheck %s
3+
4+
define i16 @fcvt_h_s_sat(float %a) {
5+
; CHECK-LABEL: fcvt_h_s_sat:
6+
; CHECK: .functype fcvt_h_s_sat (f32) -> (i32)
7+
; CHECK-NEXT: # %bb.0: # %start
8+
; CHECK-NEXT: i32.const 0
9+
; CHECK-NEXT: local.get 0
10+
; CHECK-NEXT: f32.const -0x1p15
11+
; CHECK-NEXT: f32.max
12+
; CHECK-NEXT: f32.const 0x1.fffcp14
13+
; CHECK-NEXT: f32.min
14+
; CHECK-NEXT: i32.trunc_sat_f32_s
15+
; CHECK-NEXT: local.get 0
16+
; CHECK-NEXT: local.get 0
17+
; CHECK-NEXT: f32.ne
18+
; CHECK-NEXT: i32.select
19+
; CHECK-NEXT: # fallthrough-return
20+
start:
21+
%0 = tail call i16 @llvm.fptosi.sat.i16.f32(float %a)
22+
ret i16 %0
23+
}
24+
25+
define i16 @fcvt_hu_s_sat(float %a) {
26+
; CHECK-LABEL: fcvt_hu_s_sat:
27+
; CHECK: .functype fcvt_hu_s_sat (f32) -> (i32)
28+
; CHECK-NEXT: # %bb.0: # %start
29+
; CHECK-NEXT: i32.const 0
30+
; CHECK-NEXT: local.get 0
31+
; CHECK-NEXT: f32.const 0x0p0
32+
; CHECK-NEXT: f32.max
33+
; CHECK-NEXT: f32.const 0x1.fffep15
34+
; CHECK-NEXT: f32.min
35+
; CHECK-NEXT: i32.trunc_sat_f32_u
36+
; CHECK-NEXT: local.get 0
37+
; CHECK-NEXT: local.get 0
38+
; CHECK-NEXT: f32.ne
39+
; CHECK-NEXT: i32.select
40+
; CHECK-NEXT: # fallthrough-return
41+
start:
42+
%0 = tail call i16 @llvm.fptoui.sat.i16.f32(float %a)
43+
ret i16 %0
44+
}
45+
46+
define i16 @fcvt_h_s_sat_nnan(float nofpclass(nan) %a) {
47+
; CHECK-LABEL: fcvt_h_s_sat_nnan:
48+
; CHECK: .functype fcvt_h_s_sat_nnan (f32) -> (i32)
49+
; CHECK-NEXT: # %bb.0: # %start
50+
; CHECK-NEXT: local.get 0
51+
; CHECK-NEXT: f32.const -0x1p15
52+
; CHECK-NEXT: f32.max
53+
; CHECK-NEXT: f32.const 0x1.fffcp14
54+
; CHECK-NEXT: f32.min
55+
; CHECK-NEXT: i32.trunc_sat_f32_s
56+
; CHECK-NEXT: # fallthrough-return
57+
start:
58+
%0 = tail call i16 @llvm.fptosi.sat.i16.f32(float %a)
59+
ret i16 %0
60+
}
61+
62+
define i16 @fcvt_hu_s_sat_nnan(float nofpclass(nan) %a) {
63+
; CHECK-LABEL: fcvt_hu_s_sat_nnan:
64+
; CHECK: .functype fcvt_hu_s_sat_nnan (f32) -> (i32)
65+
; CHECK-NEXT: # %bb.0: # %start
66+
; CHECK-NEXT: local.get 0
67+
; CHECK-NEXT: f32.const 0x0p0
68+
; CHECK-NEXT: f32.max
69+
; CHECK-NEXT: f32.const 0x1.fffep15
70+
; CHECK-NEXT: f32.min
71+
; CHECK-NEXT: i32.trunc_sat_f32_u
72+
; CHECK-NEXT: # fallthrough-return
73+
start:
74+
%0 = tail call i16 @llvm.fptoui.sat.i16.f32(float %a)
75+
ret i16 %0
76+
}
77+
78+
define i16 @fcvt_h_s_sat_nsnan(float nofpclass(snan) %a) {
79+
; CHECK-LABEL: fcvt_h_s_sat_nsnan:
80+
; CHECK: .functype fcvt_h_s_sat_nsnan (f32) -> (i32)
81+
; CHECK-NEXT: # %bb.0: # %start
82+
; CHECK-NEXT: i32.const 0
83+
; CHECK-NEXT: local.get 0
84+
; CHECK-NEXT: f32.const -0x1p15
85+
; CHECK-NEXT: f32.max
86+
; CHECK-NEXT: f32.const 0x1.fffcp14
87+
; CHECK-NEXT: f32.min
88+
; CHECK-NEXT: i32.trunc_sat_f32_s
89+
; CHECK-NEXT: local.get 0
90+
; CHECK-NEXT: local.get 0
91+
; CHECK-NEXT: f32.ne
92+
; CHECK-NEXT: i32.select
93+
; CHECK-NEXT: # fallthrough-return
94+
start:
95+
%0 = tail call i16 @llvm.fptosi.sat.i16.f32(float %a)
96+
ret i16 %0
97+
}
98+
99+
define i16 @fcvt_hu_s_sat_nsnan(float nofpclass(snan) %a) {
100+
; CHECK-LABEL: fcvt_hu_s_sat_nsnan:
101+
; CHECK: .functype fcvt_hu_s_sat_nsnan (f32) -> (i32)
102+
; CHECK-NEXT: # %bb.0: # %start
103+
; CHECK-NEXT: i32.const 0
104+
; CHECK-NEXT: local.get 0
105+
; CHECK-NEXT: f32.const 0x0p0
106+
; CHECK-NEXT: f32.max
107+
; CHECK-NEXT: f32.const 0x1.fffep15
108+
; CHECK-NEXT: f32.min
109+
; CHECK-NEXT: i32.trunc_sat_f32_u
110+
; CHECK-NEXT: local.get 0
111+
; CHECK-NEXT: local.get 0
112+
; CHECK-NEXT: f32.ne
113+
; CHECK-NEXT: i32.select
114+
; CHECK-NEXT: # fallthrough-return
115+
start:
116+
%0 = tail call i16 @llvm.fptoui.sat.i16.f32(float %a)
117+
ret i16 %0
118+
}
119+

0 commit comments

Comments
 (0)