Skip to content

Commit 88e9b37

Browse files
authored
[FuncSpec] Query SCCPSolver in more places (#114964)
When traversing the use-def chain of an Argument in a candidate specialization, also query the SCCPSolver to see if a Value is constant. This allows us to better estimate the codesize savings of a candidate in the presence of instructions that are a user of the argument we are estimating savings for which also use arguments that have been found constant by IPSCCP. Similarly when estimating the dead basic blocks from branch and switch instructions which become constant, also query the SCCPSolver to see if a predecessor is unreachable.
1 parent 28452ac commit 88e9b37

File tree

4 files changed

+165
-24
lines changed

4 files changed

+165
-24
lines changed

llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
176176
SCCPSolver &Solver)
177177
: GetBFI(GetBFI), F(F), DL(DL), TTI(TTI), Solver(Solver) {}
178178

179-
bool isBlockExecutable(BasicBlock *BB) {
179+
bool isBlockExecutable(BasicBlock *BB) const {
180180
return Solver.isBlockExecutable(BB) && !DeadBlocks.contains(BB);
181181
}
182182

@@ -189,8 +189,9 @@ class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
189189
private:
190190
friend class InstVisitor<InstCostVisitor, Constant *>;
191191

192-
static bool canEliminateSuccessor(BasicBlock *BB, BasicBlock *Succ,
193-
DenseSet<BasicBlock *> &DeadBlocks);
192+
Constant *findConstantFor(Value *V) const;
193+
194+
bool canEliminateSuccessor(BasicBlock *BB, BasicBlock *Succ) const;
194195

195196
Cost getCodeSizeSavingsForUser(Instruction *User, Value *Use = nullptr,
196197
Constant *C = nullptr);

llvm/lib/Transforms/IPO/FunctionSpecialization.cpp

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -90,13 +90,12 @@ static cl::opt<bool> SpecializeLiteralConstant(
9090
"Enable specialization of functions that take a literal constant as an "
9191
"argument"));
9292

93-
bool InstCostVisitor::canEliminateSuccessor(BasicBlock *BB, BasicBlock *Succ,
94-
DenseSet<BasicBlock *> &DeadBlocks) {
93+
bool InstCostVisitor::canEliminateSuccessor(BasicBlock *BB,
94+
BasicBlock *Succ) const {
9595
unsigned I = 0;
96-
return all_of(predecessors(Succ),
97-
[&I, BB, Succ, &DeadBlocks] (BasicBlock *Pred) {
96+
return all_of(predecessors(Succ), [&I, BB, Succ, this](BasicBlock *Pred) {
9897
return I++ < MaxBlockPredecessors &&
99-
(Pred == BB || Pred == Succ || DeadBlocks.contains(Pred));
98+
(Pred == BB || Pred == Succ || !isBlockExecutable(Pred));
10099
});
101100
}
102101

@@ -116,6 +115,7 @@ Cost InstCostVisitor::estimateBasicBlocks(
116115
// These blocks are considered dead as far as the InstCostVisitor
117116
// is concerned. They haven't been proven dead yet by the Solver,
118117
// but may become if we propagate the specialization arguments.
118+
assert(Solver.isBlockExecutable(BB) && "BB already found dead by IPSCCP!");
119119
if (!DeadBlocks.insert(BB).second)
120120
continue;
121121

@@ -134,16 +134,17 @@ Cost InstCostVisitor::estimateBasicBlocks(
134134
// Keep adding dead successors to the list as long as they are
135135
// executable and only reachable from dead blocks.
136136
for (BasicBlock *SuccBB : successors(BB))
137-
if (isBlockExecutable(SuccBB) &&
138-
canEliminateSuccessor(BB, SuccBB, DeadBlocks))
137+
if (isBlockExecutable(SuccBB) && canEliminateSuccessor(BB, SuccBB))
139138
WorkList.push_back(SuccBB);
140139
}
141140
return CodeSize;
142141
}
143142

144-
static Constant *findConstantFor(Value *V, ConstMap &KnownConstants) {
143+
Constant *InstCostVisitor::findConstantFor(Value *V) const {
145144
if (auto *C = dyn_cast<Constant>(V))
146145
return C;
146+
if (auto *C = Solver.getConstantOrNull(V))
147+
return C;
147148
return KnownConstants.lookup(V);
148149
}
149150

@@ -266,7 +267,7 @@ Cost InstCostVisitor::estimateSwitchInst(SwitchInst &I) {
266267
for (const auto &Case : I.cases()) {
267268
BasicBlock *BB = Case.getCaseSuccessor();
268269
if (BB != Succ && isBlockExecutable(BB) &&
269-
canEliminateSuccessor(I.getParent(), BB, DeadBlocks))
270+
canEliminateSuccessor(I.getParent(), BB))
270271
WorkList.push_back(BB);
271272
}
272273

@@ -283,8 +284,7 @@ Cost InstCostVisitor::estimateBranchInst(BranchInst &I) {
283284
// Initialize the worklist with the dead successor as long as
284285
// it is executable and has a unique predecessor.
285286
SmallVector<BasicBlock *> WorkList;
286-
if (isBlockExecutable(Succ) &&
287-
canEliminateSuccessor(I.getParent(), Succ, DeadBlocks))
287+
if (isBlockExecutable(Succ) && canEliminateSuccessor(I.getParent(), Succ))
288288
WorkList.push_back(Succ);
289289

290290
return estimateBasicBlocks(WorkList);
@@ -312,10 +312,10 @@ bool InstCostVisitor::discoverTransitivelyIncomingValues(
312312

313313
// Disregard self-references and dead incoming values.
314314
if (auto *Inst = dyn_cast<Instruction>(V))
315-
if (Inst == PN || DeadBlocks.contains(PN->getIncomingBlock(I)))
315+
if (Inst == PN || !isBlockExecutable(PN->getIncomingBlock(I)))
316316
continue;
317317

318-
if (Constant *C = findConstantFor(V, KnownConstants)) {
318+
if (Constant *C = findConstantFor(V)) {
319319
// Not all incoming values are the same constant. Bail immediately.
320320
if (C != Const)
321321
return false;
@@ -347,10 +347,10 @@ Constant *InstCostVisitor::visitPHINode(PHINode &I) {
347347

348348
// Disregard self-references and dead incoming values.
349349
if (auto *Inst = dyn_cast<Instruction>(V))
350-
if (Inst == &I || DeadBlocks.contains(I.getIncomingBlock(Idx)))
350+
if (Inst == &I || !isBlockExecutable(I.getIncomingBlock(Idx)))
351351
continue;
352352

353-
if (Constant *C = findConstantFor(V, KnownConstants)) {
353+
if (Constant *C = findConstantFor(V)) {
354354
if (!Const)
355355
Const = C;
356356
// Not all incoming values are the same constant. Bail immediately.
@@ -415,7 +415,7 @@ Constant *InstCostVisitor::visitCallBase(CallBase &I) {
415415

416416
for (unsigned Idx = 0, E = I.getNumOperands() - 1; Idx != E; ++Idx) {
417417
Value *V = I.getOperand(Idx);
418-
Constant *C = findConstantFor(V, KnownConstants);
418+
Constant *C = findConstantFor(V);
419419
if (!C)
420420
return nullptr;
421421
Operands.push_back(C);
@@ -439,7 +439,7 @@ Constant *InstCostVisitor::visitGetElementPtrInst(GetElementPtrInst &I) {
439439

440440
for (unsigned Idx = 0, E = I.getNumOperands(); Idx != E; ++Idx) {
441441
Value *V = I.getOperand(Idx);
442-
Constant *C = findConstantFor(V, KnownConstants);
442+
Constant *C = findConstantFor(V);
443443
if (!C)
444444
return nullptr;
445445
Operands.push_back(C);
@@ -455,9 +455,9 @@ Constant *InstCostVisitor::visitSelectInst(SelectInst &I) {
455455
if (I.getCondition() == LastVisited->first) {
456456
Value *V = LastVisited->second->isZeroValue() ? I.getFalseValue()
457457
: I.getTrueValue();
458-
return findConstantFor(V, KnownConstants);
458+
return findConstantFor(V);
459459
}
460-
if (Constant *Condition = findConstantFor(I.getCondition(), KnownConstants))
460+
if (Constant *Condition = findConstantFor(I.getCondition()))
461461
if ((I.getTrueValue() == LastVisited->first && Condition->isOneValue()) ||
462462
(I.getFalseValue() == LastVisited->first && Condition->isZeroValue()))
463463
return LastVisited->second;
@@ -475,7 +475,7 @@ Constant *InstCostVisitor::visitCmpInst(CmpInst &I) {
475475
Constant *Const = LastVisited->second;
476476
bool ConstOnRHS = I.getOperand(1) == LastVisited->first;
477477
Value *V = ConstOnRHS ? I.getOperand(0) : I.getOperand(1);
478-
Constant *Other = findConstantFor(V, KnownConstants);
478+
Constant *Other = findConstantFor(V);
479479

480480
if (Other) {
481481
if (ConstOnRHS)
@@ -503,7 +503,7 @@ Constant *InstCostVisitor::visitBinaryOperator(BinaryOperator &I) {
503503

504504
bool ConstOnRHS = I.getOperand(1) == LastVisited->first;
505505
Value *V = ConstOnRHS ? I.getOperand(0) : I.getOperand(1);
506-
Constant *Other = findConstantFor(V, KnownConstants);
506+
Constant *Other = findConstantFor(V);
507507
Value *OtherVal = Other ? Other : V;
508508
Value *ConstVal = LastVisited->second;
509509

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5
2+
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=1 \
3+
; RUN: -funcspec-for-literal-constant=true \
4+
; RUN: -funcspec-min-codesize-savings=50 \
5+
; RUN: -funcspec-min-latency-savings=0 \
6+
; RUN: -S < %s | FileCheck %s
7+
8+
; Verify that we are able to estimate the codesize savings arising from a branch
9+
; based on a binary operator, where one operand is already found constant by
10+
; IPSCCP.
11+
define i32 @main(i1 %flag) {
12+
%notspec = call i32 @test(i1 %flag, i1 false)
13+
%spec = call i32 @test(i1 false, i1 false)
14+
%sum = add i32 %notspec, %spec
15+
ret i32 %sum
16+
}
17+
18+
define internal i32 @test(i1 %argflag, i1 %constflag) {
19+
entry:
20+
%cond = or i1 %argflag, %constflag
21+
br i1 %cond, label %if.then, label %if.end
22+
23+
if.then:
24+
call void @do_something()
25+
call void @do_something()
26+
call void @do_something()
27+
call void @do_something()
28+
br label %if.end
29+
30+
if.end:
31+
%res = phi i32 [ 0, %entry ], [ 1, %if.then]
32+
ret i32 %res
33+
}
34+
35+
declare void @do_something()
36+
; CHECK-LABEL: define range(i32 0, 2) i32 @main(
37+
; CHECK-SAME: i1 [[FLAG:%.*]]) {
38+
; CHECK-NEXT: [[NOTSPEC:%.*]] = call i32 @test(i1 [[FLAG]], i1 false)
39+
; CHECK-NEXT: [[SPEC:%.*]] = call i32 @test.specialized.1(i1 false, i1 false)
40+
; CHECK-NEXT: [[SUM:%.*]] = add nuw nsw i32 [[NOTSPEC]], 0
41+
; CHECK-NEXT: ret i32 [[SUM]]
42+
;
43+
;
44+
; CHECK-LABEL: define internal range(i32 0, 2) i32 @test(
45+
; CHECK-SAME: i1 [[ARGFLAG:%.*]], i1 [[CONSTFLAG:%.*]]) {
46+
; CHECK-NEXT: [[ENTRY:.*]]:
47+
; CHECK-NEXT: [[COND:%.*]] = or i1 [[ARGFLAG]], false
48+
; CHECK-NEXT: br i1 [[COND]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
49+
; CHECK: [[IF_THEN]]:
50+
; CHECK-NEXT: call void @do_something()
51+
; CHECK-NEXT: call void @do_something()
52+
; CHECK-NEXT: call void @do_something()
53+
; CHECK-NEXT: call void @do_something()
54+
; CHECK-NEXT: br label %[[IF_END]]
55+
; CHECK: [[IF_END]]:
56+
; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ 1, %[[IF_THEN]] ]
57+
; CHECK-NEXT: ret i32 [[RES]]
58+
;
59+
;
60+
; CHECK-LABEL: define internal i32 @test.specialized.1(
61+
; CHECK-SAME: i1 [[ARGFLAG:%.*]], i1 [[CONSTFLAG:%.*]]) {
62+
; CHECK-NEXT: [[ENTRY:.*:]]
63+
; CHECK-NEXT: br label %[[IF_END:.*]]
64+
; CHECK: [[IF_END]]:
65+
; CHECK-NEXT: ret i32 poison
66+
;
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5
2+
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=1 \
3+
; RUN: -funcspec-for-literal-constant=true \
4+
; RUN: -funcspec-min-codesize-savings=50 \
5+
; RUN: -funcspec-min-latency-savings=0 \
6+
; RUN: -S < %s | FileCheck %s
7+
8+
; Verify that we are able to estimate the codesize savings arising from a block
9+
; which is found dead, where the block has a predecessor that was found dead by
10+
; IPSCCP.
11+
define i32 @main(i1 %flag) {
12+
%notspec = call i32 @test(i1 %flag, i1 true)
13+
%spec = call i32 @test(i1 true, i1 true)
14+
%sum = add i32 %notspec, %spec
15+
ret i32 %sum
16+
}
17+
18+
define internal i32 @test(i1 %argflag, i1 %constflag) {
19+
entry:
20+
br i1 %argflag, label %block1, label %block3
21+
22+
block1:
23+
br i1 %constflag, label %end, label %block2
24+
25+
block2:
26+
br label %block3
27+
28+
block3:
29+
call void @do_something()
30+
call void @do_something()
31+
call void @do_something()
32+
call void @do_something()
33+
br label %end
34+
35+
end:
36+
%res = phi i32 [ 0, %block1 ], [ 1, %block3]
37+
ret i32 %res
38+
}
39+
40+
declare void @do_something()
41+
; CHECK-LABEL: define range(i32 0, 2) i32 @main(
42+
; CHECK-SAME: i1 [[FLAG:%.*]]) {
43+
; CHECK-NEXT: [[NOTSPEC:%.*]] = call i32 @test(i1 [[FLAG]], i1 true)
44+
; CHECK-NEXT: [[SPEC:%.*]] = call i32 @test.specialized.1(i1 true, i1 true)
45+
; CHECK-NEXT: [[SUM:%.*]] = add nuw nsw i32 [[NOTSPEC]], 0
46+
; CHECK-NEXT: ret i32 [[SUM]]
47+
;
48+
;
49+
; CHECK-LABEL: define internal range(i32 0, 2) i32 @test(
50+
; CHECK-SAME: i1 [[ARGFLAG:%.*]], i1 [[CONSTFLAG:%.*]]) {
51+
; CHECK-NEXT: [[ENTRY:.*:]]
52+
; CHECK-NEXT: br i1 [[ARGFLAG]], label %[[BLOCK1:.*]], label %[[BLOCK3:.*]]
53+
; CHECK: [[BLOCK1]]:
54+
; CHECK-NEXT: br label %[[END:.*]]
55+
; CHECK: [[BLOCK3]]:
56+
; CHECK-NEXT: call void @do_something()
57+
; CHECK-NEXT: call void @do_something()
58+
; CHECK-NEXT: call void @do_something()
59+
; CHECK-NEXT: call void @do_something()
60+
; CHECK-NEXT: br label %[[END]]
61+
; CHECK: [[END]]:
62+
; CHECK-NEXT: [[RES:%.*]] = phi i32 [ 0, %[[BLOCK1]] ], [ 1, %[[BLOCK3]] ]
63+
; CHECK-NEXT: ret i32 [[RES]]
64+
;
65+
;
66+
; CHECK-LABEL: define internal i32 @test.specialized.1(
67+
; CHECK-SAME: i1 [[ARGFLAG:%.*]], i1 [[CONSTFLAG:%.*]]) {
68+
; CHECK-NEXT: [[ENTRY:.*:]]
69+
; CHECK-NEXT: br label %[[BLOCK1:.*]]
70+
; CHECK: [[BLOCK1]]:
71+
; CHECK-NEXT: br label %[[END:.*]]
72+
; CHECK: [[END]]:
73+
; CHECK-NEXT: ret i32 poison
74+
;

0 commit comments

Comments
 (0)