Skip to content

Commit e78aa8f

Browse files
committed
[SLP]Use the size of gathered scalars when evaluating slice size
Need to use the size of the gathered scalars, not the original size of the buildvector scalars, since gathered scalar size might be changed during building the buildvector shuffles. Fixes #125259
1 parent f354981 commit e78aa8f

File tree

2 files changed

+3
-3
lines changed

2 files changed

+3
-3
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15094,7 +15094,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy,
1509415094
}
1509515095
}
1509615096
if (!GatherShuffles.empty()) {
15097-
unsigned SliceSize = getPartNumElems(E->Scalars.size(), NumParts);
15097+
unsigned SliceSize = getPartNumElems(GatheredScalars.size(), NumParts);
1509815098
SmallVector<int> VecMask(Mask.size(), PoisonMaskElem);
1509915099
for (const auto [I, TEs] : enumerate(Entries)) {
1510015100
if (TEs.empty()) {

llvm/test/Transforms/SLPVectorizer/X86/gathered-shuffle-resized.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@ define ptr @test(ptr %0, ptr %args_gep) {
1818
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !noalias [[META0:![0-9]+]]
1919
; CHECK-NEXT: [[TMP9:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8, !noalias [[META0]]
2020
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
21-
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> poison, <16 x i32> poison
21+
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> poison, <16 x i32> <i32 poison, i32 poison, i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
2222
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
23-
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP9]], <16 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
23+
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP9]], <16 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3>
2424
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <16 x i32> [[TMP13]], zeroinitializer
2525
; CHECK-NEXT: [[TMP15:%.*]] = zext <16 x i1> [[TMP14]] to <16 x i8>
2626
; CHECK-NEXT: store <16 x i8> [[TMP15]], ptr [[TMP5]], align 1

0 commit comments

Comments
 (0)