Skip to content

Commit 8f3f93c

Browse files
authored
[SampleFDO] Match functions with the same base function name (#126688)
Sometimes, there may be no matched anchors but the functions still match. e.g. if the function’s template typename changes, all the callsites that use the type are mismatched and the caller function that contains those callsite are mismatched. Introduce a check to match the functions if their demangled base names are the same.
1 parent 02ed659 commit 8f3f93c

File tree

4 files changed

+174
-0
lines changed

4 files changed

+174
-0
lines changed

llvm/lib/Transforms/IPO/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ add_llvm_component_library(LLVMipo
6565
FrontendOpenMP
6666
InstCombine
6767
IRReader
68+
Demangle
6869
Linker
6970
Object
7071
ProfileData

llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
//===----------------------------------------------------------------------===//
1313

1414
#include "llvm/Transforms/IPO/SampleProfileMatcher.h"
15+
#include "llvm/Demangle/Demangle.h"
1516
#include "llvm/IR/IntrinsicInst.h"
1617
#include "llvm/IR/MDBuilder.h"
1718
#include "llvm/Support/CommandLine.h"
@@ -727,6 +728,33 @@ bool SampleProfileMatcher::functionMatchesProfileHelper(
727728
// two sequences are.
728729
float Similarity = 0.0;
729730

731+
// Match the functions if they have the same base name(after demangling) and
732+
// skip the similarity check.
733+
ItaniumPartialDemangler Demangler;
734+
// Helper lambda to demangle and get the base name. If the demangling failed,
735+
// return an empty string.
736+
auto GetBaseName = [&](StringRef FName) {
737+
auto FunctionName = FName.str();
738+
if (Demangler.partialDemangle(FunctionName.c_str()))
739+
return std::string();
740+
constexpr size_t MaxBaseNameSize = 4096;
741+
char BaseNameBuf[MaxBaseNameSize] = {};
742+
size_t BaseNameSize = MaxBaseNameSize;
743+
char *BaseNamePtr =
744+
Demangler.getFunctionBaseName(BaseNameBuf, &BaseNameSize);
745+
return (BaseNamePtr && BaseNameSize)
746+
? std::string(BaseNamePtr, BaseNameSize)
747+
: std::string();
748+
};
749+
auto IRBaseName = GetBaseName(IRFunc.getName());
750+
auto ProfBaseName = GetBaseName(ProfFunc.stringRef());
751+
if (!IRBaseName.empty() && IRBaseName == ProfBaseName) {
752+
LLVM_DEBUG(dbgs() << "The functions " << IRFunc.getName() << "(IR) and "
753+
<< ProfFunc << "(Profile) share the same base name: "
754+
<< IRBaseName << ".\n");
755+
return true;
756+
}
757+
730758
const auto *FSForMatching = getFlattenedSamplesFor(ProfFunc);
731759
// With extbinary profile format, initial profile loading only reads profile
732760
// based on current function names in the module.
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
main:200:0
2+
1: 0
3+
2: 50
4+
3: 0
5+
4: 50
6+
5: 52 _Z3fooi:52
7+
6: 50
8+
7: 0
9+
!CFGChecksum: 281582264815352
10+
_Z3fooi:52:52
11+
1: 52
12+
!CFGChecksum: 4294967295000
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
; REQUIRES: x86_64-linux
2+
; REQUIRES: asserts
3+
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-name-similarity.prof --salvage-stale-profile --salvage-unused-profile -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl 2>&1 | FileCheck %s
4+
5+
; CHECK: Function _Z3fool is not in profile or profile symbol list.
6+
; CHECK: Run stale profile matching for main
7+
; CHECK: The functions _Z3fool(IR) and _Z3fooi(Profile) share the same base name: foo
8+
; CHECK: Function:_Z3fool matches profile:_Z3fooi
9+
; CHECK: Run stale profile matching for _Z3fool
10+
11+
12+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
13+
target triple = "x86_64-unknown-linux-gnu"
14+
15+
@x = dso_local global i32 0, align 4, !dbg !0
16+
17+
; Function Attrs: mustprogress noinline nounwind uwtable
18+
define dso_local void @_Z3fool(i64 noundef %y) #0 !dbg !17 {
19+
entry:
20+
#dbg_value(i64 %y, !22, !DIExpression(), !23)
21+
call void @llvm.pseudoprobe(i64 5326982120444056491, i64 1, i32 0, i64 -1), !dbg !24
22+
%0 = load volatile i32, ptr @x, align 4, !dbg !25, !tbaa !26
23+
%conv = sext i32 %0 to i64, !dbg !25
24+
%add = add nsw i64 %conv, %y, !dbg !25
25+
%conv1 = trunc i64 %add to i32, !dbg !25
26+
store volatile i32 %conv1, ptr @x, align 4, !dbg !25, !tbaa !26
27+
ret void, !dbg !30
28+
}
29+
30+
; Function Attrs: mustprogress norecurse nounwind uwtable
31+
define dso_local noundef i32 @main() #1 !dbg !31 {
32+
entry:
33+
call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 1, i32 0, i64 -1), !dbg !37
34+
#dbg_value(i32 0, !35, !DIExpression(), !38)
35+
br label %for.cond, !dbg !39
36+
37+
for.cond: ; preds = %for.body, %entry
38+
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ], !dbg !40
39+
#dbg_value(i32 %i.0, !35, !DIExpression(), !38)
40+
call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 2, i32 0, i64 -1), !dbg !41
41+
%cmp = icmp slt i32 %i.0, 1000000, !dbg !43
42+
br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !44
43+
44+
for.cond.cleanup: ; preds = %for.cond
45+
call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 3, i32 0, i64 -1), !dbg !45
46+
call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 7, i32 0, i64 -1), !dbg !46
47+
ret i32 0, !dbg !46
48+
49+
for.body: ; preds = %for.cond
50+
call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 4, i32 0, i64 -1), !dbg !47
51+
%conv = sext i32 %i.0 to i64, !dbg !47
52+
call void @_Z3fool(i64 noundef %conv), !dbg !49
53+
call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 6, i32 0, i64 -1), !dbg !51
54+
%inc = add nsw i32 %i.0, 1, !dbg !51
55+
#dbg_value(i32 %inc, !35, !DIExpression(), !38)
56+
br label %for.cond, !dbg !52, !llvm.loop !53
57+
}
58+
59+
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
60+
declare void @llvm.lifetime.start.p0(i64 immarg, ptr captures(none)) #2
61+
62+
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
63+
declare void @llvm.lifetime.end.p0(i64 immarg, ptr captures(none)) #2
64+
65+
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
66+
declare void @llvm.pseudoprobe(i64, i64, i32, i64) #3
67+
68+
attributes #0 = { mustprogress noinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
69+
attributes #1 = { mustprogress norecurse nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
70+
attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
71+
attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
72+
73+
!llvm.dbg.cu = !{!2}
74+
!llvm.module.flags = !{!7, !8, !9, !10, !11, !12, !13}
75+
!llvm.ident = !{!14}
76+
!llvm.pseudo_probe_desc = !{!15, !16}
77+
78+
!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
79+
!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true)
80+
!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 21.0.0git (https://github.com/llvm/llvm-project.git c9f1d2cbf18990311ea1287cc154e3784a10a3b0)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None)
81+
!3 = !DIFile(filename: "test_rename.c", directory: "/home", checksumkind: CSK_MD5, checksum: "2991f6c78cef4c393285c97c0f5dabc4")
82+
!4 = !{!0}
83+
!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6)
84+
!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
85+
!7 = !{i32 7, !"Dwarf Version", i32 5}
86+
!8 = !{i32 2, !"Debug Info Version", i32 3}
87+
!9 = !{i32 1, !"wchar_size", i32 4}
88+
!10 = !{i32 8, !"PIC Level", i32 2}
89+
!11 = !{i32 7, !"PIE Level", i32 2}
90+
!12 = !{i32 7, !"uwtable", i32 2}
91+
!13 = !{i32 7, !"debug-info-assignment-tracking", i1 true}
92+
!14 = !{!"clang version 21.0.0git (https://github.com/llvm/llvm-project.git c9f1d2cbf18990311ea1287cc154e3784a10a3b0)"}
93+
!15 = !{i64 5326982120444056491, i64 4294967295, !"_Z3fool"}
94+
!16 = !{i64 -2624081020897602054, i64 281582264815352, !"main"}
95+
!17 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fool", scope: !3, file: !3, line: 3, type: !18, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21)
96+
!18 = !DISubroutineType(types: !19)
97+
!19 = !{null, !20}
98+
!20 = !DIBasicType(name: "long", size: 64, encoding: DW_ATE_signed)
99+
!21 = !{!22}
100+
!22 = !DILocalVariable(name: "y", arg: 1, scope: !17, file: !3, line: 3, type: !20)
101+
!23 = !DILocation(line: 0, scope: !17)
102+
!24 = !DILocation(line: 4, column: 9, scope: !17)
103+
!25 = !DILocation(line: 4, column: 6, scope: !17)
104+
!26 = !{!27, !27, i64 0}
105+
!27 = !{!"int", !28, i64 0}
106+
!28 = !{!"omnipotent char", !29, i64 0}
107+
!29 = !{!"Simple C++ TBAA"}
108+
!30 = !DILocation(line: 5, column: 1, scope: !17)
109+
!31 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 7, type: !32, scopeLine: 7, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !34)
110+
!32 = !DISubroutineType(types: !33)
111+
!33 = !{!6}
112+
!34 = !{!35}
113+
!35 = !DILocalVariable(name: "i", scope: !36, file: !3, line: 8, type: !6)
114+
!36 = distinct !DILexicalBlock(scope: !31, file: !3, line: 8, column: 3)
115+
!37 = !DILocation(line: 8, column: 12, scope: !36)
116+
!38 = !DILocation(line: 0, scope: !36)
117+
!39 = !DILocation(line: 8, column: 8, scope: !36)
118+
!40 = !DILocation(line: 8, scope: !36)
119+
!41 = !DILocation(line: 8, column: 19, scope: !42)
120+
!42 = distinct !DILexicalBlock(scope: !36, file: !3, line: 8, column: 3)
121+
!43 = !DILocation(line: 8, column: 21, scope: !42)
122+
!44 = !DILocation(line: 8, column: 3, scope: !36)
123+
!45 = !DILocation(line: 0, scope: !31)
124+
!46 = !DILocation(line: 11, column: 1, scope: !31)
125+
!47 = !DILocation(line: 9, column: 11, scope: !48)
126+
!48 = distinct !DILexicalBlock(scope: !42, file: !3, line: 8, column: 41)
127+
!49 = !DILocation(line: 9, column: 7, scope: !50)
128+
!50 = !DILexicalBlockFile(scope: !48, file: !3, discriminator: 455082031)
129+
!51 = !DILocation(line: 8, column: 37, scope: !42)
130+
!52 = !DILocation(line: 8, column: 3, scope: !42)
131+
!53 = distinct !{!53, !44, !54, !55}
132+
!54 = !DILocation(line: 10, column: 3, scope: !36)
133+
!55 = !{!"llvm.loop.mustprogress"}

0 commit comments

Comments
 (0)