Skip to content

Commit ac217ee

Browse files
committed
[SLP] Check for PHI nodes (potentially cycles!) when checking dependencies
When checking for dependecies for gather nodes with users with the same last instruction, cannot rely on the index order, if there is (even potential!) cycle in the graph, which may cause order not work correctly and cause compiler crash. Fixes #127128
1 parent 1ff5f32 commit ac217ee

File tree

4 files changed

+70
-3
lines changed

4 files changed

+70
-3
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13206,8 +13206,16 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1320613206
continue;
1320713207
// If the user instruction is used for some reason in different
1320813208
// vectorized nodes - make it depend on index.
13209+
// If any vector node is PHI node, this dependency might not work
13210+
// because of cycle dependencies, so disable it.
1320913211
if (TEUseEI.UserTE != UseEI.UserTE &&
13210-
TEUseEI.UserTE->Idx < UseEI.UserTE->Idx)
13212+
(TEUseEI.UserTE->Idx < UseEI.UserTE->Idx ||
13213+
any_of(
13214+
VectorizableTree,
13215+
[](const std::unique_ptr<TreeEntry> &TE) {
13216+
return TE->State == TreeEntry::Vectorize &&
13217+
TE->getOpcode() == Instruction::PHI;
13218+
})))
1321113219
continue;
1321213220
}
1321313221

llvm/test/Transforms/SLPVectorizer/X86/delayed-gather-emission.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ define void @test() {
3131
; CHECK-NEXT: [[TOBOOL:%.*]] = fcmp une float [[I2]], 0.000000e+00
3232
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <2 x i32> <i32 poison, i32 0>
3333
; CHECK-NEXT: [[TMP9]] = insertelement <2 x float> [[TMP8]], float [[I2]], i32 0
34-
; CHECK-NEXT: [[TMP10]] = shufflevector <2 x float> [[TMP9]], <2 x float> [[TMP2]], <2 x i32> <i32 0, i32 3>
34+
; CHECK-NEXT: [[TMP10]] = insertelement <2 x float> [[TMP2]], float [[I2]], i32 0
3535
; CHECK-NEXT: br i1 [[TOBOOL]], label [[BB1]], label [[BB2]]
3636
;
3737
entry:

llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
; YAML: Function: test
99
; YAML: Args:
1010
; YAML: - String: 'Stores SLP vectorized with cost '
11-
; YAML: - Cost: '-6'
11+
; YAML: - Cost: '-3'
1212
; YAML: - String: ' and with tree size '
1313
; YAML: - TreeSize: '14'
1414
; YAML: ...
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=haswell < %s | FileCheck %s
3+
4+
define void @test(float %0) {
5+
; CHECK-LABEL: define void @test(
6+
; CHECK-SAME: float [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
7+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> <float 0.000000e+00, float poison>, float [[TMP0]], i32 1
8+
; CHECK-NEXT: [[TMP3:%.*]] = fdiv <2 x float> [[TMP2]], zeroinitializer
9+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> <float poison, float 0.000000e+00>, float [[TMP0]], i32 0
10+
; CHECK-NEXT: [[TMP5:%.*]] = fdiv <2 x float> [[TMP4]], zeroinitializer
11+
; CHECK-NEXT: br label %[[BB6:.*]]
12+
; CHECK: [[BB6]]:
13+
; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x float> [[TMP5]], zeroinitializer
14+
; CHECK-NEXT: [[TMP8:%.*]] = fsub <2 x float> zeroinitializer, [[TMP7]]
15+
; CHECK-NEXT: br label %[[BB10:.*]]
16+
; CHECK: [[BB9:.*]]:
17+
; CHECK-NEXT: br label %[[BB10]]
18+
; CHECK: [[BB10]]:
19+
; CHECK-NEXT: [[TMP11:%.*]] = phi <2 x float> [ [[TMP8]], %[[BB6]] ], [ poison, %[[BB9]] ]
20+
; CHECK-NEXT: br label %[[BB12:.*]]
21+
; CHECK: [[BB12]]:
22+
; CHECK-NEXT: [[TMP13:%.*]] = fmul <2 x float> [[TMP3]], zeroinitializer
23+
; CHECK-NEXT: [[TMP14:%.*]] = fsub <2 x float> [[TMP11]], [[TMP13]]
24+
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x float> [[TMP14]], i32 0
25+
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[TMP14]], i32 1
26+
; CHECK-NEXT: [[TMP17:%.*]] = fadd float [[TMP15]], [[TMP16]]
27+
; CHECK-NEXT: [[TMP18:%.*]] = call float @llvm.fabs.f32(float [[TMP17]])
28+
; CHECK-NEXT: ret void
29+
;
30+
%2 = fdiv float 0.000000e+00, 0.000000e+00
31+
%3 = fdiv float 0.000000e+00, 0.000000e+00
32+
%4 = fdiv float %0, 0.000000e+00
33+
br label %5
34+
35+
5:
36+
%6 = fmul float %4, 0.000000e+00
37+
%7 = fsub float 0.000000e+00, %6
38+
%8 = fmul float %3, 0.000000e+00
39+
%9 = fsub float 0.000000e+00, %8
40+
br label %11
41+
42+
10:
43+
br label %11
44+
45+
11:
46+
%12 = phi float [ %7, %5 ], [ 0.000000e+00, %10 ]
47+
%13 = phi float [ %9, %5 ], [ 0.000000e+00, %10 ]
48+
br label %14
49+
50+
14:
51+
%15 = fmul float %2, 0.000000e+00
52+
%16 = fsub float %12, %15
53+
%17 = fmul float %4, 0.000000e+00
54+
%18 = fsub float %13, %17
55+
%19 = fadd float %16, %18
56+
%20 = call float @llvm.fabs.f32(float %19)
57+
ret void
58+
}
59+

0 commit comments

Comments
 (0)