Skip to content

Commit 56140a8

Browse files
committed
[SLP]Fix PR104422: Wrong value truncation
The minbitwidth restrictions can be skipped only for immediate reduced values, for other nodes still need to check if external users allow bitwidth reduction. Fixes #104422
1 parent 13a6a79 commit 56140a8

File tree

2 files changed

+12
-8
lines changed

2 files changed

+12
-8
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15663,7 +15663,8 @@ bool BoUpSLP::collectValuesToDemote(
1566315663
if (any_of(E.Scalars, [&](Value *V) {
1566415664
return !all_of(V->users(), [=](User *U) {
1566515665
return getTreeEntry(U) ||
15666-
(UserIgnoreList && UserIgnoreList->contains(U)) ||
15666+
(E.Idx == 0 && UserIgnoreList &&
15667+
UserIgnoreList->contains(U)) ||
1566715668
(!isa<CmpInst>(U) && U->getType()->isSized() &&
1566815669
!U->getType()->isScalableTy() &&
1566915670
DL->getTypeSizeInBits(U->getType()) <= BitWidth);

llvm/test/Transforms/SLPVectorizer/X86/operand-is-reduced-val.ll

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,18 @@ define i64 @src(i32 %a) {
88
; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[A]] to i64
99
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0
1010
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
11-
; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
12-
; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i32> [[TMP3]] to <4 x i64>
13-
; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i32> [[TMP3]], <i32 1, i32 1, i32 1, i32 1>
14-
; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i32> [[TMP5]] to <4 x i64>
11+
; CHECK-NEXT: [[TMP3:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64>
12+
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i64> [[TMP3]], <i64 4294967297, i64 4294967297, i64 4294967297, i64 4294967297>
13+
; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i64> [[TMP4]], <i64 1, i64 1, i64 1, i64 1>
1514
; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP6]])
1615
; CHECK-NEXT: [[TMP16:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP4]])
17-
; CHECK-NEXT: [[TMP19:%.*]] = add i64 [[TMP18]], [[TMP16]]
18-
; CHECK-NEXT: [[OP_RDX1:%.*]] = add i64 [[TMP19]], 4294967297
19-
; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[OP_RDX1]], [[TMP17]]
16+
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP16]], i32 0
17+
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP18]], i32 1
18+
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> <i64 poison, i64 4294967297>, i64 [[TMP17]], i32 0
19+
; CHECK-NEXT: [[TMP11:%.*]] = add <2 x i64> [[TMP9]], [[TMP10]]
20+
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
21+
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
22+
; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[TMP12]], [[TMP13]]
2023
; CHECK-NEXT: ret i64 [[TMP21]]
2124
;
2225
entry:

0 commit comments

Comments
 (0)