Skip to content

Commit e52a811

Browse files
committed
[PGO] Add ability to mark cold functions as optsize/minsize/optnone
The performance of cold functions shouldn't matter too much, so if we care about binary sizes, add an option to mark cold functions as optsize/minsize for binary size, or optnone for compile times [1]. Clang patch will be in a future patch Initial version: https://reviews.llvm.org/D149800 [1] https://discourse.llvm.org/t/rfc-new-feature-proposal-de-optimizing-cold-functions-using-pgo-info/56388
1 parent 17bc449 commit e52a811

File tree

13 files changed

+252
-18
lines changed

13 files changed

+252
-18
lines changed

clang/lib/CodeGen/BackendUtil.cpp

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -746,7 +746,8 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
746746
CodeGenOpts.InstrProfileOutput.empty() ? getDefaultProfileGenName()
747747
: CodeGenOpts.InstrProfileOutput,
748748
"", "", CodeGenOpts.MemoryProfileUsePath, nullptr, PGOOptions::IRInstr,
749-
PGOOptions::NoCSAction, CodeGenOpts.DebugInfoForProfiling,
749+
PGOOptions::NoCSAction, PGOOptions::ColdFuncAttr::None,
750+
CodeGenOpts.DebugInfoForProfiling,
750751
/*PseudoProbeForProfiling=*/false, CodeGenOpts.AtomicProfileUpdate);
751752
else if (CodeGenOpts.hasProfileIRUse()) {
752753
// -fprofile-use.
@@ -755,28 +756,32 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
755756
PGOOpt = PGOOptions(
756757
CodeGenOpts.ProfileInstrumentUsePath, "",
757758
CodeGenOpts.ProfileRemappingFile, CodeGenOpts.MemoryProfileUsePath, VFS,
758-
PGOOptions::IRUse, CSAction, CodeGenOpts.DebugInfoForProfiling);
759+
PGOOptions::IRUse, CSAction, PGOOptions::ColdFuncAttr::None,
760+
CodeGenOpts.DebugInfoForProfiling);
759761
} else if (!CodeGenOpts.SampleProfileFile.empty())
760762
// -fprofile-sample-use
761763
PGOOpt = PGOOptions(
762764
CodeGenOpts.SampleProfileFile, "", CodeGenOpts.ProfileRemappingFile,
763765
CodeGenOpts.MemoryProfileUsePath, VFS, PGOOptions::SampleUse,
764-
PGOOptions::NoCSAction, CodeGenOpts.DebugInfoForProfiling,
765-
CodeGenOpts.PseudoProbeForProfiling);
766+
PGOOptions::NoCSAction, PGOOptions::ColdFuncAttr::None,
767+
CodeGenOpts.DebugInfoForProfiling, CodeGenOpts.PseudoProbeForProfiling);
766768
else if (!CodeGenOpts.MemoryProfileUsePath.empty())
767769
// -fmemory-profile-use (without any of the above options)
768770
PGOOpt = PGOOptions("", "", "", CodeGenOpts.MemoryProfileUsePath, VFS,
769771
PGOOptions::NoAction, PGOOptions::NoCSAction,
772+
PGOOptions::ColdFuncAttr::None,
770773
CodeGenOpts.DebugInfoForProfiling);
771774
else if (CodeGenOpts.PseudoProbeForProfiling)
772775
// -fpseudo-probe-for-profiling
773776
PGOOpt = PGOOptions("", "", "", /*MemoryProfile=*/"", nullptr,
774777
PGOOptions::NoAction, PGOOptions::NoCSAction,
778+
PGOOptions::ColdFuncAttr::None,
775779
CodeGenOpts.DebugInfoForProfiling, true);
776780
else if (CodeGenOpts.DebugInfoForProfiling)
777781
// -fdebug-info-for-profiling
778782
PGOOpt = PGOOptions("", "", "", /*MemoryProfile=*/"", nullptr,
779-
PGOOptions::NoAction, PGOOptions::NoCSAction, true);
783+
PGOOptions::NoAction, PGOOptions::NoCSAction,
784+
PGOOptions::ColdFuncAttr::None, true);
780785

781786
// Check to see if we want to generate a CS profile.
782787
if (CodeGenOpts.hasProfileCSIRInstr()) {
@@ -799,7 +804,8 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
799804
? getDefaultProfileGenName()
800805
: CodeGenOpts.InstrProfileOutput,
801806
"", /*MemoryProfile=*/"", nullptr, PGOOptions::NoAction,
802-
PGOOptions::CSIRInstr, CodeGenOpts.DebugInfoForProfiling);
807+
PGOOptions::CSIRInstr, PGOOptions::ColdFuncAttr::None,
808+
CodeGenOpts.DebugInfoForProfiling);
803809
}
804810
if (TM)
805811
TM->setPGOOption(PGOOpt);

llvm/include/llvm/Support/PGOOptions.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,12 @@ class FileSystem;
2727
struct PGOOptions {
2828
enum PGOAction { NoAction, IRInstr, IRUse, SampleUse };
2929
enum CSPGOAction { NoCSAction, CSIRInstr, CSIRUse };
30+
enum class ColdFuncAttr { None, OptSize, MinSize, OptNone };
3031
PGOOptions(std::string ProfileFile, std::string CSProfileGenFile,
3132
std::string ProfileRemappingFile, std::string MemoryProfile,
3233
IntrusiveRefCntPtr<vfs::FileSystem> FS,
3334
PGOAction Action = NoAction, CSPGOAction CSAction = NoCSAction,
35+
ColdFuncAttr ColdType = ColdFuncAttr::None,
3436
bool DebugInfoForProfiling = false,
3537
bool PseudoProbeForProfiling = false,
3638
bool AtomicCounterUpdate = false);
@@ -44,6 +46,7 @@ struct PGOOptions {
4446
std::string MemoryProfile;
4547
PGOAction Action;
4648
CSPGOAction CSAction;
49+
ColdFuncAttr ColdType;
4750
bool DebugInfoForProfiling;
4851
bool PseudoProbeForProfiling;
4952
bool AtomicCounterUpdate;
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
//===- MarkColdFunctions.h - ------------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_MARKCOLDFUNCTIONS_H
10+
#define LLVM_TRANSFORMS_INSTRUMENTATION_MARKCOLDFUNCTIONS_H
11+
12+
#include "llvm/IR/PassManager.h"
13+
#include "llvm/Support/PGOOptions.h"
14+
15+
namespace llvm {
16+
17+
struct MarkColdFunctionsPass : public PassInfoMixin<MarkColdFunctionsPass> {
18+
MarkColdFunctionsPass(PGOOptions::ColdFuncAttr ColdType)
19+
: ColdType(ColdType) {}
20+
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
21+
22+
private:
23+
PGOOptions::ColdFuncAttr ColdType;
24+
};
25+
26+
} // namespace llvm
27+
28+
#endif // LLVM_TRANSFORMS_INSTRUMENTATION_MARKCOLDFUNCTIONS_H

llvm/lib/LTO/LTOBackend.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -243,19 +243,23 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
243243
if (!Conf.SampleProfile.empty())
244244
PGOOpt = PGOOptions(Conf.SampleProfile, "", Conf.ProfileRemapping,
245245
/*MemoryProfile=*/"", FS, PGOOptions::SampleUse,
246-
PGOOptions::NoCSAction, true);
246+
PGOOptions::NoCSAction, PGOOptions::ColdFuncAttr::None,
247+
true);
247248
else if (Conf.RunCSIRInstr) {
248249
PGOOpt = PGOOptions("", Conf.CSIRProfile, Conf.ProfileRemapping,
249250
/*MemoryProfile=*/"", FS, PGOOptions::IRUse,
250-
PGOOptions::CSIRInstr, Conf.AddFSDiscriminator);
251+
PGOOptions::CSIRInstr, PGOOptions::ColdFuncAttr::None,
252+
Conf.AddFSDiscriminator);
251253
} else if (!Conf.CSIRProfile.empty()) {
252254
PGOOpt = PGOOptions(Conf.CSIRProfile, "", Conf.ProfileRemapping,
253255
/*MemoryProfile=*/"", FS, PGOOptions::IRUse,
254-
PGOOptions::CSIRUse, Conf.AddFSDiscriminator);
256+
PGOOptions::CSIRUse, PGOOptions::ColdFuncAttr::None,
257+
Conf.AddFSDiscriminator);
255258
NoPGOWarnMismatch = !Conf.PGOWarnMismatch;
256259
} else if (Conf.AddFSDiscriminator) {
257260
PGOOpt = PGOOptions("", "", "", /*MemoryProfile=*/"", nullptr,
258-
PGOOptions::NoAction, PGOOptions::NoCSAction, true);
261+
PGOOptions::NoAction, PGOOptions::NoCSAction,
262+
PGOOptions::ColdFuncAttr::None, true);
259263
}
260264
TM->setPGOOption(PGOOpt);
261265

llvm/lib/Passes/PassBuilder.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@
167167
#include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
168168
#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
169169
#include "llvm/Transforms/Instrumentation/KCFI.h"
170+
#include "llvm/Transforms/Instrumentation/MarkColdFunctions.h"
170171
#include "llvm/Transforms/Instrumentation/MemProfiler.h"
171172
#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
172173
#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"

llvm/lib/Passes/PassBuilderPipelines.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373
#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
7474
#include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
7575
#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
76+
#include "llvm/Transforms/Instrumentation/MarkColdFunctions.h"
7677
#include "llvm/Transforms/Instrumentation/MemProfiler.h"
7778
#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
7879
#include "llvm/Transforms/Scalar/ADCE.h"
@@ -212,6 +213,12 @@ static cl::opt<bool>
212213
cl::desc("Enable DFA jump threading"),
213214
cl::init(false), cl::Hidden);
214215

216+
// TODO: turn on and remove flag
217+
static cl::opt<bool>
218+
EnableMarkColdFunctions("enable-mark-cold-functions",
219+
cl::desc("Enable pass to mark cold functions"),
220+
cl::init(false));
221+
215222
static cl::opt<bool>
216223
EnableHotColdSplit("hot-cold-split",
217224
cl::desc("Enable hot-cold splitting pass"));
@@ -1137,6 +1144,11 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
11371144
if (EnableSyntheticCounts && !PGOOpt)
11381145
MPM.addPass(SyntheticCountsPropagation());
11391146

1147+
if (EnableMarkColdFunctions && PGOOpt &&
1148+
(PGOOpt->Action == PGOOptions::SampleUse ||
1149+
PGOOpt->Action == PGOOptions::IRUse))
1150+
MPM.addPass(MarkColdFunctionsPass(PGOOpt->ColdType));
1151+
11401152
MPM.addPass(AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));
11411153

11421154
if (EnableModuleInliner)

llvm/lib/Passes/PassRegistry.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ MODULE_PASS("lower-emutls", LowerEmuTLSPass())
8686
MODULE_PASS("lower-global-dtors", LowerGlobalDtorsPass())
8787
MODULE_PASS("lower-ifunc", LowerIFuncPass())
8888
MODULE_PASS("lowertypetests", LowerTypeTestsPass())
89+
MODULE_PASS("mark-cold-functions", MarkColdFunctionsPass(PGOOpt ? PGOOpt->ColdType : PGOOptions::ColdFuncAttr::None))
8990
MODULE_PASS("memprof-context-disambiguation", MemProfContextDisambiguation())
9091
MODULE_PASS("memprof-module", ModuleMemProfilerPass())
9192
MODULE_PASS("mergefunc", MergeFunctionsPass())

llvm/lib/Support/PGOOptions.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,12 @@ PGOOptions::PGOOptions(std::string ProfileFile, std::string CSProfileGenFile,
1515
std::string ProfileRemappingFile,
1616
std::string MemoryProfile,
1717
IntrusiveRefCntPtr<vfs::FileSystem> FS, PGOAction Action,
18-
CSPGOAction CSAction, bool DebugInfoForProfiling,
19-
bool PseudoProbeForProfiling, bool AtomicCounterUpdate)
18+
CSPGOAction CSAction, ColdFuncAttr ColdType,
19+
bool DebugInfoForProfiling, bool PseudoProbeForProfiling,
20+
bool AtomicCounterUpdate)
2021
: ProfileFile(ProfileFile), CSProfileGenFile(CSProfileGenFile),
2122
ProfileRemappingFile(ProfileRemappingFile), MemoryProfile(MemoryProfile),
22-
Action(Action), CSAction(CSAction),
23+
Action(Action), CSAction(CSAction), ColdType(ColdType),
2324
DebugInfoForProfiling(DebugInfoForProfiling ||
2425
(Action == SampleUse && !PseudoProbeForProfiling)),
2526
PseudoProbeForProfiling(PseudoProbeForProfiling),

llvm/lib/Transforms/Instrumentation/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ add_llvm_component_library(LLVMInstrumentation
66
DataFlowSanitizer.cpp
77
GCOVProfiling.cpp
88
BlockCoverageInference.cpp
9+
MarkColdFunctions.cpp
910
MemProfiler.cpp
1011
MemorySanitizer.cpp
1112
IndirectCallPromotion.cpp
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "llvm/Transforms/Instrumentation/MarkColdFunctions.h"
10+
#include "llvm/Analysis/BlockFrequencyInfo.h"
11+
#include "llvm/Analysis/ProfileSummaryInfo.h"
12+
#include "llvm/IR/PassManager.h"
13+
14+
using namespace llvm;
15+
16+
PreservedAnalyses MarkColdFunctionsPass::run(Module &M,
17+
ModuleAnalysisManager &AM) {
18+
if (ColdType == PGOOptions::ColdFuncAttr::None)
19+
return PreservedAnalyses::all();
20+
ProfileSummaryInfo &PSI = AM.getResult<ProfileSummaryAnalysis>(M);
21+
if (!PSI.hasProfileSummary())
22+
return PreservedAnalyses::all();
23+
FunctionAnalysisManager &FAM =
24+
AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
25+
bool MadeChange = false;
26+
for (Function &F : M) {
27+
if (F.isDeclaration())
28+
continue;
29+
BlockFrequencyInfo &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
30+
if (!PSI.isFunctionColdInCallGraph(&F, BFI))
31+
continue;
32+
// Add optsize/minsize/optnone if requested.
33+
switch (ColdType) {
34+
case PGOOptions::ColdFuncAttr::None:
35+
assert(false);
36+
break;
37+
case PGOOptions::ColdFuncAttr::OptSize:
38+
if (!F.hasFnAttribute(Attribute::OptimizeNone) &&
39+
!F.hasFnAttribute(Attribute::OptimizeForSize) &&
40+
!F.hasFnAttribute(Attribute::MinSize)) {
41+
F.addFnAttr(Attribute::OptimizeForSize);
42+
MadeChange = true;
43+
}
44+
break;
45+
case PGOOptions::ColdFuncAttr::MinSize:
46+
// Change optsize to minsize.
47+
if (!F.hasFnAttribute(Attribute::OptimizeNone) &&
48+
!F.hasFnAttribute(Attribute::MinSize)) {
49+
F.removeFnAttr(Attribute::OptimizeForSize);
50+
F.addFnAttr(Attribute::MinSize);
51+
MadeChange = true;
52+
}
53+
break;
54+
case PGOOptions::ColdFuncAttr::OptNone:
55+
// Strip optsize/minsize.
56+
F.removeFnAttr(Attribute::OptimizeForSize);
57+
F.removeFnAttr(Attribute::MinSize);
58+
F.addFnAttr(Attribute::OptimizeNone);
59+
F.addFnAttr(Attribute::NoInline);
60+
MadeChange = true;
61+
break;
62+
}
63+
}
64+
return MadeChange ? PreservedAnalyses::none() : PreservedAnalyses::all();
65+
}
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
; RUN: opt < %s -passes=mark-cold-functions -pgo-kind=pgo-instr-use-pipeline -S -pgo-cold-func-attr=none | FileCheck %s --check-prefixes=NONE,CHECK
2+
; RUN: opt < %s -passes=mark-cold-functions -pgo-kind=pgo-instr-use-pipeline -S -pgo-cold-func-attr=optsize | FileCheck %s --check-prefixes=OPTSIZE,CHECK
3+
; RUN: opt < %s -passes=mark-cold-functions -pgo-kind=pgo-instr-use-pipeline -S -pgo-cold-func-attr=minsize | FileCheck %s --check-prefixes=MINSIZE,CHECK
4+
; RUN: opt < %s -passes=mark-cold-functions -pgo-kind=pgo-instr-use-pipeline -S -pgo-cold-func-attr=optnone | FileCheck %s --check-prefixes=OPTNONE,CHECK
5+
6+
; Should be no changes without profile data
7+
; RUN: opt < %s -passes=mark-cold-functions -S -pgo-cold-func-attr=minsize | FileCheck %s --check-prefixes=NONE,CHECK
8+
9+
; NONE-NOT: Function Attrs:
10+
; OPTSIZE: Function Attrs: optsize{{$}}
11+
; MINSIZE: Function Attrs: minsize{{$}}
12+
; OPTNONE: Function Attrs: noinline optnone{{$}}
13+
; CHECK: define void @cold()
14+
15+
; NONE: Function Attrs: optsize{{$}}
16+
; OPTSIZE: Function Attrs: optsize{{$}}
17+
; MINSIZE: Function Attrs: minsize{{$}}
18+
; OPTNONE: Function Attrs: noinline optnone{{$}}
19+
; CHECK-NEXT: define void @cold1()
20+
21+
; NONE: Function Attrs: minsize{{$}}
22+
; OPTSIZE: Function Attrs: minsize{{$}}
23+
; MINSIZE: Function Attrs: minsize{{$}}
24+
; OPTNONE: Function Attrs: noinline optnone{{$}}
25+
; CHECK-NEXT: define void @cold2()
26+
27+
; CHECK: Function Attrs: noinline optnone{{$}}
28+
; CHECK-NEXT: define void @cold3()
29+
30+
; CHECK-NOT: Function Attrs: {{.*}}optsize
31+
; CHECK-NOT: Function Attrs: {{.*}}minsize
32+
; CHECK-NOT: Function Attrs: {{.*}}optnone
33+
34+
@s = global i32 0
35+
36+
define void @cold() !prof !27 {
37+
store i32 1, ptr @s, align 4
38+
ret void
39+
}
40+
41+
define void @cold1() optsize !prof !27 {
42+
store i32 1, ptr @s, align 4
43+
ret void
44+
}
45+
46+
define void @cold2() minsize !prof !27 {
47+
store i32 1, ptr @s, align 4
48+
ret void
49+
}
50+
51+
define void @cold3() noinline optnone !prof !27 {
52+
store i32 1, ptr @s, align 4
53+
ret void
54+
}
55+
56+
define void @hot() !prof !28 {
57+
%l = load i32, ptr @s, align 4
58+
%add = add nsw i32 %l, 4
59+
store i32 %add, ptr @s, align 4
60+
ret void
61+
}
62+
63+
attributes #0 = { optsize }
64+
attributes #1 = { minsize }
65+
attributes #2 = { noinline optnone }
66+
67+
!llvm.module.flags = !{!0}
68+
69+
!0 = !{i32 1, !"ProfileSummary", !1}
70+
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
71+
!2 = !{!"ProfileFormat", !"InstrProf"}
72+
!3 = !{!"TotalCount", i64 9040}
73+
!4 = !{!"MaxCount", i64 9000}
74+
!5 = !{!"MaxInternalCount", i64 0}
75+
!6 = !{!"MaxFunctionCount", i64 9000}
76+
!7 = !{!"NumCounts", i64 5}
77+
!8 = !{!"NumFunctions", i64 5}
78+
!9 = !{!"DetailedSummary", !10}
79+
!10 = !{!11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26}
80+
!11 = !{i32 10000, i64 9000, i32 1}
81+
!12 = !{i32 100000, i64 9000, i32 1}
82+
!13 = !{i32 200000, i64 9000, i32 1}
83+
!14 = !{i32 300000, i64 9000, i32 1}
84+
!15 = !{i32 400000, i64 9000, i32 1}
85+
!16 = !{i32 500000, i64 9000, i32 1}
86+
!17 = !{i32 600000, i64 9000, i32 1}
87+
!18 = !{i32 700000, i64 9000, i32 1}
88+
!19 = !{i32 800000, i64 9000, i32 1}
89+
!20 = !{i32 900000, i64 9000, i32 1}
90+
!21 = !{i32 950000, i64 9000, i32 1}
91+
!22 = !{i32 990000, i64 9000, i32 1}
92+
!23 = !{i32 999000, i64 10, i32 5}
93+
!24 = !{i32 999900, i64 10, i32 5}
94+
!25 = !{i32 999990, i64 10, i32 5}
95+
!26 = !{i32 999999, i64 10, i32 5}
96+
!27 = !{!"function_entry_count", i64 10}
97+
!28 = !{!"function_entry_count", i64 9000}

0 commit comments

Comments
 (0)