Skip to content

Commit a004c70

Browse files
authored
[PGO] Make the PGO instrumentation insert point after alloca (#142043)
We're changing PGO instrumentation to insert the intrinsic after alloca instructions. For sampled instrumentation, a conditional check is placed before the intrinsic. If this intrinsic comes before an alloca, the alloca (whose size might be unknown due to Phi node) becomes conditional, resulting in inefficient code. We have seen some stack overflows due to this. This patch guarantees the intrinsic is always after the alloca.
1 parent 9bf6b2a commit a004c70

File tree

3 files changed

+61
-4
lines changed

3 files changed

+61
-4
lines changed

llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -855,7 +855,7 @@ BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
855855
auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
856856
// There are basic blocks (such as catchswitch) cannot be instrumented.
857857
// If the returned first insertion point is the end of BB, skip this BB.
858-
if (BB->getFirstInsertionPt() == BB->end())
858+
if (BB->getFirstNonPHIOrDbgOrAlloca() == BB->end())
859859
return nullptr;
860860
return BB;
861861
};
@@ -952,7 +952,7 @@ void FunctionInstrumenter::instrument() {
952952
Name, PointerType::get(M.getContext(), 0));
953953
if (PGOFunctionEntryCoverage) {
954954
auto &EntryBB = F.getEntryBlock();
955-
IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
955+
IRBuilder<> Builder(&EntryBB, EntryBB.getFirstNonPHIOrDbgOrAlloca());
956956
// llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,
957957
// i32 <index>)
958958
Builder.CreateIntrinsic(
@@ -1010,7 +1010,7 @@ void FunctionInstrumenter::instrument() {
10101010
if (PGOTemporalInstrumentation) {
10111011
NumCounters += PGOBlockCoverage ? 8 : 1;
10121012
auto &EntryBB = F.getEntryBlock();
1013-
IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
1013+
IRBuilder<> Builder(&EntryBB, EntryBB.getFirstNonPHIOrDbgOrAlloca());
10141014
// llvm.instrprof.timestamp(i8* <name>, i64 <hash>, i32 <num-counters>,
10151015
// i32 <index>)
10161016
Builder.CreateIntrinsic(Intrinsic::instrprof_timestamp,
@@ -1021,7 +1021,7 @@ void FunctionInstrumenter::instrument() {
10211021
}
10221022

10231023
for (auto *InstrBB : InstrumentBBs) {
1024-
IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
1024+
IRBuilder<> Builder(InstrBB, InstrBB->getFirstNonPHIOrDbgOrAlloca());
10251025
assert(Builder.GetInsertPoint() != InstrBB->end() &&
10261026
"Cannot get the Instrumentation point");
10271027
// llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>,
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
; Note: Make sure that instrumention intrinsic is after entry alloca.
2+
; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s
3+
; RUN: opt < %s -passes=pgo-instr-gen,instrprof -sampled-instrumentation -S | FileCheck %s --check-prefixes=SAMPLE
4+
5+
%struct.A = type { i32, [0 x i32] }
6+
%struct.B = type { i32, [0 x double] }
7+
8+
; CHECK-LABEL: @foo()
9+
; CHECK-NEXT: %1 = alloca %struct.A
10+
; CHECK-NEXT: %2 = alloca %struct.B
11+
; CHECK-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo
12+
13+
; SAMPLE: @foo()
14+
; SAMPLE-NEXT: %1 = alloca %struct.A
15+
; SAMPLE-NEXT: %2 = alloca %struct.B
16+
; SAMPLE-NEXT: %[[v:[0-9]+]] = load i16, ptr @__llvm_profile_sampling
17+
; SAMPLE-NEXT: {{.*}} = icmp ule i16 %[[v]], 199
18+
19+
define dso_local double @foo() {
20+
%1 = alloca %struct.A, align 4
21+
%2 = alloca %struct.B, align 8
22+
call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %1)
23+
call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %2)
24+
call void @bar(ptr noundef nonnull %1, ptr noundef nonnull %2)
25+
%3 = load i32, ptr %1, align 4
26+
%4 = icmp sgt i32 %3, 0
27+
br i1 %4, label %5, label %21
28+
29+
5:
30+
%6 = getelementptr inbounds i8, ptr %1, i64 4
31+
%7 = getelementptr inbounds i8, ptr %2, i64 8
32+
%8 = zext nneg i32 %3 to i64
33+
br label %9
34+
35+
9:
36+
%10 = phi i64 [ 0, %5 ], [ %19, %9 ]
37+
%11 = phi double [ 0.000000e+00, %5 ], [ %18, %9 ]
38+
%12 = getelementptr inbounds [0 x i32], ptr %6, i64 0, i64 %10
39+
%13 = load i32, ptr %12, align 4
40+
%14 = sitofp i32 %13 to double
41+
%15 = getelementptr inbounds [0 x double], ptr %7, i64 0, i64 %10
42+
%16 = load double, ptr %15, align 8
43+
%17 = fadd double %16, %14
44+
%18 = fadd double %11, %17
45+
%19 = add nuw nsw i64 %10, 1
46+
%20 = icmp eq i64 %19, %8
47+
br i1 %20, label %21, label %9
48+
49+
21:
50+
%22 = phi double [ 0.000000e+00, %0 ], [ %18, %9 ]
51+
call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %2)
52+
call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %1)
53+
ret double %22
54+
}
55+
56+
declare void @bar(ptr noundef, ptr noundef)

llvm/test/Transforms/PGOProfile/split-indirectbr-critical-edges.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ if.end: ; preds = %if.end.preheader, %
4242
;; The edge will not be profiled.
4343
; CHECK-LABEL: @cannot_split(
4444
; CHECK-NEXT: entry:
45+
; CHECK-NEXT: %targets = alloca <2 x ptr>, align 16
4546
; CHECK-NEXT: call void @llvm.instrprof.increment
4647
; CHECK: indirect:
4748
; CHECK-NOT: call void @llvm.instrprof.increment

0 commit comments

Comments
 (0)