Skip to content

Commit 8f31c6d

Browse files
authored
[BOLT] Support profile density with basic samples (#137644)
For profile with LBR samples, binary function profile density is computed as a ratio of executed bytes to function size in bytes. For profile with IP samples, use the size of basic block containing the sample IP as a numerator. Test Plan: updated perf_test.test
1 parent 066bc49 commit 8f31c6d

File tree

4 files changed

+26
-8
lines changed

4 files changed

+26
-8
lines changed

bolt/include/bolt/Profile/DataReader.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,9 @@ struct FuncSampleData {
246246
/// Get the number of samples recorded in [Start, End)
247247
uint64_t getSamples(uint64_t Start, uint64_t End) const;
248248

249+
/// Returns the total number of samples recorded in this function.
250+
uint64_t getSamples() const;
251+
249252
/// Aggregation helper
250253
DenseMap<uint64_t, size_t> Index;
251254

bolt/lib/Profile/DataAggregator.cpp

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -565,15 +565,14 @@ void DataAggregator::processProfile(BinaryContext &BC) {
565565
processMemEvents();
566566

567567
// Mark all functions with registered events as having a valid profile.
568-
const auto Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
569-
: BinaryFunction::PF_LBR;
570568
for (auto &BFI : BC.getBinaryFunctions()) {
571569
BinaryFunction &BF = BFI.second;
572-
FuncBranchData *FBD = getBranchData(BF);
573-
if (FBD || getFuncSampleData(BF.getNames())) {
574-
BF.markProfiled(Flags);
575-
if (FBD)
576-
BF.RawBranchCount = FBD->getNumExecutedBranches();
570+
if (FuncBranchData *FBD = getBranchData(BF)) {
571+
BF.markProfiled(BinaryFunction::PF_LBR);
572+
BF.RawBranchCount = FBD->getNumExecutedBranches();
573+
} else if (FuncSampleData *FSD = getFuncSampleData(BF.getNames())) {
574+
BF.markProfiled(BinaryFunction::PF_SAMPLE);
575+
BF.RawBranchCount = FSD->getSamples();
577576
}
578577
}
579578

@@ -630,10 +629,18 @@ StringRef DataAggregator::getLocationName(const BinaryFunction &Func,
630629

631630
bool DataAggregator::doSample(BinaryFunction &OrigFunc, uint64_t Address,
632631
uint64_t Count) {
632+
// To record executed bytes, use basic block size as is regardless of BAT.
633+
uint64_t BlockSize = 0;
634+
if (BinaryBasicBlock *BB = OrigFunc.getBasicBlockContainingOffset(
635+
Address - OrigFunc.getAddress()))
636+
BlockSize = BB->getOriginalSize();
637+
633638
BinaryFunction *ParentFunc = getBATParentFunction(OrigFunc);
634639
BinaryFunction &Func = ParentFunc ? *ParentFunc : OrigFunc;
635-
if (ParentFunc || (BAT && !BAT->isBATFunction(OrigFunc.getAddress())))
640+
if (ParentFunc || (BAT && !BAT->isBATFunction(Func.getAddress())))
636641
NumColdSamples += Count;
642+
// Attach executed bytes to parent function in case of cold fragment.
643+
Func.SampleCountInBytes += Count * BlockSize;
637644

638645
auto I = NamesToSamples.find(Func.getOneName());
639646
if (I == NamesToSamples.end()) {

bolt/lib/Profile/DataReader.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,13 @@ uint64_t FuncSampleData::getSamples(uint64_t Start, uint64_t End) const {
128128
return Result;
129129
}
130130

131+
uint64_t FuncSampleData::getSamples() const {
132+
uint64_t Result = 0;
133+
for (const SampleInfo &I : Data)
134+
Result += I.Hits;
135+
return Result;
136+
}
137+
131138
void FuncSampleData::bumpCount(uint64_t Offset, uint64_t Count) {
132139
auto Iter = Index.find(Offset);
133140
if (Iter == Index.end()) {

bolt/test/perf2bolt/perf_test.test

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ RUN: perf2bolt %t -p=%t2 -o %t3 -nl -ignore-build-id 2>&1 | FileCheck %s
88

99
CHECK-NOT: PERF2BOLT-ERROR
1010
CHECK-NOT: !! WARNING !! This high mismatch ratio indicates the input binary is probably not the same binary used during profiling collection.
11+
CHECK: BOLT-INFO: Functions with density >= {{.*}} account for 99.00% total sample counts.
1112

1213
RUN: %clang %S/Inputs/perf_test.c -no-pie -fuse-ld=lld -o %t4
1314
RUN: perf record -Fmax -e cycles:u -o %t5 -- %t4

0 commit comments

Comments
 (0)