Skip to content

Commit d97b546

Browse files
LU-JOHNfrederik-h
authored andcommitted
Reland "DAG: Preserve range metadata when load is narrowed" (llvm#128144) (llvm#130609)
Changes: Add guard to ensure truncation is strictly smaller than original size. --------- Signed-off-by: John Lu <John.Lu@amd.com>
1 parent 37c53e7 commit d97b546

File tree

3 files changed

+112
-14
lines changed

3 files changed

+112
-14
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14957,12 +14957,37 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
1495714957
AddToWorklist(NewPtr.getNode());
1495814958

1495914959
SDValue Load;
14960-
if (ExtType == ISD::NON_EXTLOAD)
14961-
Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
14962-
LN0->getPointerInfo().getWithOffset(PtrOff),
14963-
LN0->getOriginalAlign(),
14964-
LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14965-
else
14960+
if (ExtType == ISD::NON_EXTLOAD) {
14961+
const MDNode *OldRanges = LN0->getRanges();
14962+
const MDNode *NewRanges = nullptr;
14963+
// If LSBs are loaded and the truncated ConstantRange for the OldRanges
14964+
// metadata is not the full-set for the new width then create a NewRanges
14965+
// metadata for the truncated load
14966+
if (ShAmt == 0 && OldRanges) {
14967+
ConstantRange CR = getConstantRangeFromMetadata(*OldRanges);
14968+
unsigned BitSize = VT.getScalarSizeInBits();
14969+
14970+
// It is possible for an 8-bit extending load with 8-bit range
14971+
// metadata to be narrowed to an 8-bit load. This guard is necessary to
14972+
// ensure that truncation is strictly smaller.
14973+
if (CR.getBitWidth() > BitSize) {
14974+
ConstantRange TruncatedCR = CR.truncate(BitSize);
14975+
if (!TruncatedCR.isFullSet()) {
14976+
Metadata *Bounds[2] = {
14977+
ConstantAsMetadata::get(
14978+
ConstantInt::get(*DAG.getContext(), TruncatedCR.getLower())),
14979+
ConstantAsMetadata::get(
14980+
ConstantInt::get(*DAG.getContext(), TruncatedCR.getUpper()))};
14981+
NewRanges = MDNode::get(*DAG.getContext(), Bounds);
14982+
}
14983+
} else if (CR.getBitWidth() == BitSize)
14984+
NewRanges = OldRanges;
14985+
}
14986+
Load = DAG.getLoad(
14987+
VT, DL, LN0->getChain(), NewPtr,
14988+
LN0->getPointerInfo().getWithOffset(PtrOff), LN0->getOriginalAlign(),
14989+
LN0->getMemOperand()->getFlags(), LN0->getAAInfo(), NewRanges);
14990+
} else
1496614991
Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
1496714992
LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
1496814993
LN0->getOriginalAlign(),

llvm/test/CodeGen/AMDGPU/shl64_reduce.ll

Lines changed: 56 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,23 +13,66 @@
1313
; Test range with metadata
1414
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1515

16-
; FIXME: This case should be reduced, but SelectionDAG::computeKnownBits() cannot
17-
; determine the minimum from metadata in this case. Match current results
18-
; for now.
19-
2016
define i64 @shl_metadata(i64 %arg0, ptr %arg1.ptr) {
2117
; CHECK-LABEL: shl_metadata:
2218
; CHECK: ; %bb.0:
2319
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20+
; CHECK-NEXT: flat_load_dword v1, v[2:3]
21+
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
22+
; CHECK-NEXT: v_lshlrev_b32_e32 v1, v1, v0
23+
; CHECK-NEXT: v_mov_b32_e32 v0, 0
24+
; CHECK-NEXT: s_setpc_b64 s[30:31]
25+
%shift.amt = load i64, ptr %arg1.ptr, !range !0, !noundef !{}
26+
%shl = shl i64 %arg0, %shift.amt
27+
ret i64 %shl
28+
}
29+
30+
define i64 @shl_metadata_two_ranges(i64 %arg0, ptr %arg1.ptr) {
31+
; CHECK-LABEL: shl_metadata_two_ranges:
32+
; CHECK: ; %bb.0:
33+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34+
; CHECK-NEXT: flat_load_dword v1, v[2:3]
35+
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
36+
; CHECK-NEXT: v_lshlrev_b32_e32 v1, v1, v0
37+
; CHECK-NEXT: v_mov_b32_e32 v0, 0
38+
; CHECK-NEXT: s_setpc_b64 s[30:31]
39+
%shift.amt = load i64, ptr %arg1.ptr, !range !1, !noundef !{}
40+
%shl = shl i64 %arg0, %shift.amt
41+
ret i64 %shl
42+
}
43+
44+
; Known minimum is too low. Reduction must not be done.
45+
define i64 @shl_metadata_out_of_range(i64 %arg0, ptr %arg1.ptr) {
46+
; CHECK-LABEL: shl_metadata_out_of_range:
47+
; CHECK: ; %bb.0:
48+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
49+
; CHECK-NEXT: flat_load_dword v2, v[2:3]
50+
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
51+
; CHECK-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
52+
; CHECK-NEXT: s_setpc_b64 s[30:31]
53+
%shift.amt = load i64, ptr %arg1.ptr, !range !2, !noundef !{}
54+
%shl = shl i64 %arg0, %shift.amt
55+
ret i64 %shl
56+
}
57+
58+
; Bounds cannot be truncated to i32 when load is narrowed to i32.
59+
; Reduction must not be done.
60+
; Bounds were chosen so that if bounds were truncated to i32 the
61+
; known minimum would be 32 and the shl would be erroneously reduced.
62+
define i64 @shl_metadata_cant_be_narrowed_to_i32(i64 %arg0, ptr %arg1.ptr) {
63+
; CHECK-LABEL: shl_metadata_cant_be_narrowed_to_i32:
64+
; CHECK: ; %bb.0:
65+
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2466
; CHECK-NEXT: flat_load_dword v2, v[2:3]
2567
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2668
; CHECK-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
2769
; CHECK-NEXT: s_setpc_b64 s[30:31]
28-
%shift.amt = load i64, ptr %arg1.ptr, !range !0
70+
%shift.amt = load i64, ptr %arg1.ptr, !range !3, !noundef !{}
2971
%shl = shl i64 %arg0, %shift.amt
3072
ret i64 %shl
3173
}
3274

75+
; FIXME: This case should be reduced
3376
define <2 x i64> @shl_v2_metadata(<2 x i64> %arg0, ptr %arg1.ptr) {
3477
; CHECK-LABEL: shl_v2_metadata:
3578
; CHECK: ; %bb.0:
@@ -39,11 +82,12 @@ define <2 x i64> @shl_v2_metadata(<2 x i64> %arg0, ptr %arg1.ptr) {
3982
; CHECK-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1]
4083
; CHECK-NEXT: v_lshlrev_b64 v[2:3], v6, v[2:3]
4184
; CHECK-NEXT: s_setpc_b64 s[30:31]
42-
%shift.amt = load <2 x i64>, ptr %arg1.ptr, !range !0
85+
%shift.amt = load <2 x i64>, ptr %arg1.ptr, !range !0, !noundef !{}
4386
%shl = shl <2 x i64> %arg0, %shift.amt
4487
ret <2 x i64> %shl
4588
}
4689

90+
; FIXME: This case should be reduced
4791
define <3 x i64> @shl_v3_metadata(<3 x i64> %arg0, ptr %arg1.ptr) {
4892
; CHECK-LABEL: shl_v3_metadata:
4993
; CHECK: ; %bb.0:
@@ -55,11 +99,12 @@ define <3 x i64> @shl_v3_metadata(<3 x i64> %arg0, ptr %arg1.ptr) {
5599
; CHECK-NEXT: v_lshlrev_b64 v[0:1], v8, v[0:1]
56100
; CHECK-NEXT: v_lshlrev_b64 v[2:3], v10, v[2:3]
57101
; CHECK-NEXT: s_setpc_b64 s[30:31]
58-
%shift.amt = load <3 x i64>, ptr %arg1.ptr, !range !0
102+
%shift.amt = load <3 x i64>, ptr %arg1.ptr, !range !0, !noundef !{}
59103
%shl = shl <3 x i64> %arg0, %shift.amt
60104
ret <3 x i64> %shl
61105
}
62106

107+
; FIXME: This case should be reduced
63108
define <4 x i64> @shl_v4_metadata(<4 x i64> %arg0, ptr %arg1.ptr) {
64109
; CHECK-LABEL: shl_v4_metadata:
65110
; CHECK: ; %bb.0:
@@ -74,12 +119,15 @@ define <4 x i64> @shl_v4_metadata(<4 x i64> %arg0, ptr %arg1.ptr) {
74119
; CHECK-NEXT: v_lshlrev_b64 v[4:5], v13, v[4:5]
75120
; CHECK-NEXT: v_lshlrev_b64 v[6:7], v15, v[6:7]
76121
; CHECK-NEXT: s_setpc_b64 s[30:31]
77-
%shift.amt = load <4 x i64>, ptr %arg1.ptr, !range !0
122+
%shift.amt = load <4 x i64>, ptr %arg1.ptr, !range !0, !noundef !{}
78123
%shl = shl <4 x i64> %arg0, %shift.amt
79124
ret <4 x i64> %shl
80125
}
81126

82127
!0 = !{i64 32, i64 64}
128+
!1 = !{i64 32, i64 38, i64 42, i64 48}
129+
!2 = !{i64 31, i64 38, i64 42, i64 48}
130+
!3 = !{i64 32, i64 38, i64 2147483680, i64 2147483681}
83131

84132
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
85133
; Test range with an "or X, 16"
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
3+
;
4+
; This test case is reduced from RangeConstraintManager.cpp in a ASan build.
5+
; It crashes reduceLoadWidth in DAGCombiner.cpp. Preservation of range
6+
; metadata must ensure that ConstantRange truncation is strictly smaller.
7+
8+
define i8 @narrow_load_metadata(ptr %valptr) {
9+
; CHECK-LABEL: narrow_load_metadata:
10+
; CHECK: # %bb.0: # %entry
11+
; CHECK-NEXT: movzbl (%rdi), %eax
12+
; CHECK-NEXT: movb %al, 4(%rdi)
13+
; CHECK-NEXT: movl $0, (%rdi)
14+
; CHECK-NEXT: retq
15+
entry:
16+
%val = load i8, ptr %valptr, align 4, !range !0, !noundef !1
17+
%retval.sroa.1.0.insert.ext.i = zext i8 %val to i64
18+
%retval.sroa.1.0.insert.shift.i = shl i64 %retval.sroa.1.0.insert.ext.i, 32
19+
%coerce.val.ii = trunc i64 %retval.sroa.1.0.insert.shift.i to i40
20+
store i40 %coerce.val.ii, ptr %valptr, align 4
21+
ret i8 %val
22+
}
23+
24+
!0 = !{i8 0, i8 2}
25+
!1 = !{}

0 commit comments

Comments
 (0)