Skip to content

Commit 34f0edd

Browse files
[TypeProf][PGO]Support skipping vtable comparisons for a class and its derived ones (#110575)
Performance critical core libraries could be highly-optimized for arch or micro-arch features. For instance, the absl crc library specializes different templated classes among different hardwares [1]. In a practical setting, it's likely that instrumented profiles are collected on one type of machine and used to optimize binaries that run on multiple types of hardwares. While this kind of specialization is rare in terms of lines of code, compiler can do a better job to skip vtable-based ICP. * The per-class `Extend` implementation is arch-specific as well. If an instrumented profile is collected on one arch and applied to another arch where `Extend` implementation is different, `Extend` might be regarded as unlikely function in the latter case. `ABSL_ATTRIBUTE_HOT` annotation alleviates the problem by putting all `Extend` implementation into the hot text section [2] This change introduces a comma-separated list to specify the mangled vtable names, and ICP pass will skip vtable-based comparison if a vtable variable definition is shown to be in its class hierarchy (per LLVM type metadata). [1] https://github.com/abseil/abseil-cpp/blob/c6b27359c3d27438b1313dddd7598914c1274a50/absl/crc/internal/crc_x86_arm_combined.cc#L621-L650 [2] https://github.com/abseil/abseil-cpp/blame/c6b27359c3d27438b1313dddd7598914c1274a50/absl/crc/internal/crc_x86_arm_combined.cc#L370C3-L370C21
1 parent 694fd1f commit 34f0edd

File tree

2 files changed

+58
-4
lines changed

2 files changed

+58
-4
lines changed

llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp

Lines changed: 54 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,15 @@ static cl::opt<int> ICPMaxNumVTableLastCandidate(
132132
"icp-max-num-vtable-last-candidate", cl::init(1), cl::Hidden,
133133
cl::desc("The maximum number of vtable for the last candidate."));
134134

135+
static cl::list<std::string> ICPIgnoredBaseTypes(
136+
"icp-ignored-base-types", cl::Hidden,
137+
cl::desc(
138+
"A list of mangled vtable type info names. Classes specified by the "
139+
"type info names and their derived ones will not be vtable-ICP'ed. "
140+
"Useful when the profiled types and actual types in the optimized "
141+
"binary could be different due to profiling limitations. Type info "
142+
"names are those string literals used in LLVM type metadata"));
143+
135144
namespace {
136145

137146
// The key is a vtable global variable, and the value is a map.
@@ -316,6 +325,8 @@ class IndirectCallPromoter {
316325

317326
OptimizationRemarkEmitter &ORE;
318327

328+
const DenseSet<StringRef> &IgnoredBaseTypes;
329+
319330
// A struct that records the direct target and it's call count.
320331
struct PromotionCandidate {
321332
Function *const TargetFunction;
@@ -366,6 +377,10 @@ class IndirectCallPromoter {
366377
bool isProfitableToCompareVTables(const CallBase &CB,
367378
ArrayRef<PromotionCandidate> Candidates);
368379

380+
// Return true if the vtable corresponding to VTableGUID should be skipped
381+
// for vtable-based comparison.
382+
bool shouldSkipVTable(uint64_t VTableGUID);
383+
369384
// Given an indirect callsite and the list of function candidates, compute
370385
// the following vtable information in output parameters and return vtable
371386
// pointer if type profiles exist.
@@ -391,10 +406,12 @@ class IndirectCallPromoter {
391406
Function &Func, Module &M, InstrProfSymtab *Symtab, bool SamplePGO,
392407
const VirtualCallSiteTypeInfoMap &VirtualCSInfo,
393408
VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal,
409+
const DenseSet<StringRef> &IgnoredBaseTypes,
394410
OptimizationRemarkEmitter &ORE)
395411
: F(Func), M(M), Symtab(Symtab), SamplePGO(SamplePGO),
396412
VirtualCSInfo(VirtualCSInfo),
397-
VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE) {}
413+
VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE),
414+
IgnoredBaseTypes(IgnoredBaseTypes) {}
398415
IndirectCallPromoter(const IndirectCallPromoter &) = delete;
399416
IndirectCallPromoter &operator=(const IndirectCallPromoter &) = delete;
400417

@@ -851,9 +868,14 @@ bool IndirectCallPromoter::isProfitableToCompareVTables(
851868
LLVM_DEBUG(dbgs() << "\n");
852869

853870
uint64_t CandidateVTableCount = 0;
854-
for (auto &[GUID, Count] : VTableGUIDAndCounts)
871+
872+
for (auto &[GUID, Count] : VTableGUIDAndCounts) {
855873
CandidateVTableCount += Count;
856874

875+
if (shouldSkipVTable(GUID))
876+
return false;
877+
}
878+
857879
if (CandidateVTableCount < Candidate.Count * ICPVTablePercentageThreshold) {
858880
LLVM_DEBUG(
859881
dbgs() << " function count " << Candidate.Count
@@ -883,6 +905,27 @@ bool IndirectCallPromoter::isProfitableToCompareVTables(
883905
return true;
884906
}
885907

908+
bool IndirectCallPromoter::shouldSkipVTable(uint64_t VTableGUID) {
909+
if (IgnoredBaseTypes.empty())
910+
return false;
911+
912+
auto *VTableVar = Symtab->getGlobalVariable(VTableGUID);
913+
914+
assert(VTableVar && "VTableVar must exist for GUID in VTableGUIDAndCounts");
915+
916+
SmallVector<MDNode *, 2> Types;
917+
VTableVar->getMetadata(LLVMContext::MD_type, Types);
918+
919+
for (auto *Type : Types)
920+
if (auto *TypeId = dyn_cast<MDString>(Type->getOperand(1).get()))
921+
if (IgnoredBaseTypes.contains(TypeId->getString())) {
922+
LLVM_DEBUG(dbgs() << " vtable profiles should be ignored. Bail "
923+
"out of vtable comparison.");
924+
return true;
925+
}
926+
return false;
927+
}
928+
886929
// For virtual calls in the module, collect per-callsite information which will
887930
// be used to associate an ICP candidate with a vtable and a specific function
888931
// in the vtable. With type intrinsics (llvm.type.test), we can find virtual
@@ -956,9 +999,15 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
956999
bool Changed = false;
9571000
VirtualCallSiteTypeInfoMap VirtualCSInfo;
9581001

959-
if (EnableVTableProfileUse)
1002+
DenseSet<StringRef> IgnoredBaseTypes;
1003+
1004+
if (EnableVTableProfileUse) {
9601005
computeVirtualCallSiteTypeInfoMap(M, MAM, VirtualCSInfo);
9611006

1007+
for (StringRef Str : ICPIgnoredBaseTypes)
1008+
IgnoredBaseTypes.insert(Str);
1009+
}
1010+
9621011
// VTableAddressPointOffsetVal stores the vtable address points. The vtable
9631012
// address point of a given <vtable, address point offset> is static (doesn't
9641013
// change after being computed once).
@@ -977,7 +1026,8 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
9771026
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
9781027

9791028
IndirectCallPromoter CallPromoter(F, M, &Symtab, SamplePGO, VirtualCSInfo,
980-
VTableAddressPointOffsetVal, ORE);
1029+
VTableAddressPointOffsetVal,
1030+
IgnoredBaseTypes, ORE);
9811031
bool FuncChanged = CallPromoter.processFunction(PSI);
9821032
if (ICPDUMPAFTER && FuncChanged) {
9831033
LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));

llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
22

3+
; Tests that ICP compares vtables by checking IR.
34
; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=2 -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,VTABLE-CMP
5+
; Require exactly one vtable candidate for each function candidate. Tests that ICP compares function by checking IR.
46
; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=1 -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,FUNC-CMP
7+
; On top of line 4, ignore 'Base1' and its derived types for vtable-based comparison. Tests that ICP compares functions.
8+
; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=2 -icp-ignored-base-types='Base1' -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,FUNC-CMP
59

610
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
711
target triple = "x86_64-unknown-linux-gnu"

0 commit comments

Comments
 (0)