Skip to content

Commit 9a730d8

Browse files
[memprof] Add IndexedMemProfReader::getMemProfCallerCalleePairs (#115807)
Undrifting the MemProf profile requires two sets of information: - caller-callee pairs from the profile - callee-callee pairs from the IR This patch adds a function to do the former. The latter has been addressed by extractCallsFromIR. Unfortunately, the current MemProf format does not directly give us the caller-callee pairs from the profile. "struct Frame" just tells us where the call site is -- Caller GUID and line/column numbers; it doesn't tell us what function a given Frame is calling. To extract caller-callee pairs, we need to scan each call stack, look at two adjacent Frames, and extract a caller-callee pair. Conceptually, we would extract caller-callee pairs with: for each MemProfRecord in the profile: for each call stack in AllocSites: extract caller-callee pairs from adjacent pairs of Frames However, this is highly inefficient. Obtaining MemProfRecord involves looking up the OnDiskHashTable, allocating several vectors on the heap, and populating fields that are irrelevant to us, such as MIB and CallSites. This patch adds an efficient way of doing the above. Specifically, we - go though all IndexedMemProfRecords, - look at each linear call stack ID - extract caller-callee pairs from each call stack The extraction is done by a new class CallerCalleePairExtractor, modified from LinearCallStackIdConverter, which reconstructs a call stack from the radix tree array. For our purposes, we skip the reconstruction and immediately populates the data structure for caller-callee pairs. The resulting caller-callee-pairs is of the type: DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>> CallerCalleePairs; which can be passed directly to longestCommonSequence just like the result of extractCallsFromIR. Further performance optimizations are possible for the new functions in this patch. I'll address those in follow-up patches.
1 parent 627b8f8 commit 9a730d8

File tree

5 files changed

+174
-25
lines changed

5 files changed

+174
-25
lines changed

llvm/include/llvm/ProfileData/InstrProfReader.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -695,6 +695,9 @@ class IndexedMemProfReader {
695695

696696
Expected<memprof::MemProfRecord>
697697
getMemProfRecord(const uint64_t FuncNameHash) const;
698+
699+
DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>>
700+
getMemProfCallerCalleePairs() const;
698701
};
699702

700703
/// Reader for the indexed binary instrprof format.
@@ -793,6 +796,11 @@ class IndexedInstrProfReader : public InstrProfReader {
793796
return MemProfReader.getMemProfRecord(FuncNameHash);
794797
}
795798

799+
DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>>
800+
getMemProfCallerCalleePairs() {
801+
return MemProfReader.getMemProfCallerCalleePairs();
802+
}
803+
796804
/// Fill Counts with the profile data for the given function name.
797805
Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
798806
std::vector<uint64_t> &Counts);

llvm/include/llvm/ProfileData/MemProf.h

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -931,6 +931,83 @@ struct LinearCallStackIdConverter {
931931
}
932932
};
933933

934+
struct LineLocation {
935+
LineLocation(uint32_t L, uint32_t D) : LineOffset(L), Column(D) {}
936+
937+
bool operator<(const LineLocation &O) const {
938+
return LineOffset < O.LineOffset ||
939+
(LineOffset == O.LineOffset && Column < O.Column);
940+
}
941+
942+
bool operator==(const LineLocation &O) const {
943+
return LineOffset == O.LineOffset && Column == O.Column;
944+
}
945+
946+
bool operator!=(const LineLocation &O) const {
947+
return LineOffset != O.LineOffset || Column != O.Column;
948+
}
949+
950+
uint64_t getHashCode() const { return ((uint64_t)Column << 32) | LineOffset; }
951+
952+
uint32_t LineOffset;
953+
uint32_t Column;
954+
};
955+
956+
// A pair of a call site location and its corresponding callee GUID.
957+
using CallEdgeTy = std::pair<LineLocation, uint64_t>;
958+
959+
// Used to extract caller-callee pairs from the call stack array. The leaf
960+
// frame is assumed to call a heap allocation function with GUID 0. The
961+
// resulting pairs are accumulated in CallerCalleePairs. Users can take it
962+
// with:
963+
//
964+
// auto Pairs = std::move(Extractor.CallerCalleePairs);
965+
struct CallerCalleePairExtractor {
966+
// The base address of the radix tree array.
967+
const unsigned char *CallStackBase;
968+
// A functor to convert a linear FrameId to a Frame.
969+
std::function<Frame(LinearFrameId)> FrameIdToFrame;
970+
// A map from caller GUIDs to lists of call sites in respective callers.
971+
DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>> CallerCalleePairs;
972+
973+
CallerCalleePairExtractor() = delete;
974+
CallerCalleePairExtractor(const unsigned char *CallStackBase,
975+
std::function<Frame(LinearFrameId)> FrameIdToFrame)
976+
: CallStackBase(CallStackBase), FrameIdToFrame(FrameIdToFrame) {}
977+
978+
void operator()(LinearCallStackId LinearCSId) {
979+
const unsigned char *Ptr =
980+
CallStackBase +
981+
static_cast<uint64_t>(LinearCSId) * sizeof(LinearFrameId);
982+
uint32_t NumFrames =
983+
support::endian::readNext<uint32_t, llvm::endianness::little>(Ptr);
984+
// The leaf frame calls a function with GUID 0.
985+
uint64_t CalleeGUID = 0;
986+
for (; NumFrames; --NumFrames) {
987+
LinearFrameId Elem =
988+
support::endian::read<LinearFrameId, llvm::endianness::little>(Ptr);
989+
// Follow a pointer to the parent, if any. See comments below on
990+
// CallStackRadixTreeBuilder for the description of the radix tree format.
991+
if (static_cast<std::make_signed_t<LinearFrameId>>(Elem) < 0) {
992+
Ptr += (-Elem) * sizeof(LinearFrameId);
993+
Elem =
994+
support::endian::read<LinearFrameId, llvm::endianness::little>(Ptr);
995+
}
996+
// We shouldn't encounter another pointer.
997+
assert(static_cast<std::make_signed_t<LinearFrameId>>(Elem) >= 0);
998+
999+
// Add a new caller-callee pair.
1000+
Frame F = FrameIdToFrame(Elem);
1001+
uint64_t CallerGUID = F.Function;
1002+
LineLocation Loc(F.LineOffset, F.Column);
1003+
CallerCalleePairs[CallerGUID].emplace_back(Loc, CalleeGUID);
1004+
1005+
Ptr += sizeof(LinearFrameId);
1006+
CalleeGUID = CallerGUID;
1007+
}
1008+
}
1009+
};
1010+
9341011
struct IndexedMemProfData {
9351012
// A map to hold memprof data per function. The lower 64 bits obtained from
9361013
// the md5 hash of the function name is used to index into the map.

llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h

Lines changed: 1 addition & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
#include "llvm/ADT/IntrusiveRefCntPtr.h"
1616
#include "llvm/IR/PassManager.h"
17+
#include "llvm/ProfileData/MemProf.h"
1718

1819
namespace llvm {
1920
class Function;
@@ -60,31 +61,6 @@ class MemProfUsePass : public PassInfoMixin<MemProfUsePass> {
6061

6162
namespace memprof {
6263

63-
struct LineLocation {
64-
LineLocation(uint32_t L, uint32_t D) : LineOffset(L), Column(D) {}
65-
66-
bool operator<(const LineLocation &O) const {
67-
return LineOffset < O.LineOffset ||
68-
(LineOffset == O.LineOffset && Column < O.Column);
69-
}
70-
71-
bool operator==(const LineLocation &O) const {
72-
return LineOffset == O.LineOffset && Column == O.Column;
73-
}
74-
75-
bool operator!=(const LineLocation &O) const {
76-
return LineOffset != O.LineOffset || Column != O.Column;
77-
}
78-
79-
uint64_t getHashCode() const { return ((uint64_t)Column << 32) | LineOffset; }
80-
81-
uint32_t LineOffset;
82-
uint32_t Column;
83-
};
84-
85-
// A pair of a call site location and its corresponding callee GUID.
86-
using CallEdgeTy = std::pair<LineLocation, uint64_t>;
87-
8864
// Extract all calls from the IR. Arrange them in a map from caller GUIDs to a
8965
// list of call sites, each of the form {LineLocation, CalleeGUID}.
9066
DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>>

llvm/lib/ProfileData/InstrProfReader.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1666,6 +1666,32 @@ IndexedMemProfReader::getMemProfRecord(const uint64_t FuncNameHash) const {
16661666
memprof::MaximumSupportedVersion));
16671667
}
16681668

1669+
DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>>
1670+
IndexedMemProfReader::getMemProfCallerCalleePairs() const {
1671+
assert(MemProfRecordTable);
1672+
assert(Version == memprof::Version3);
1673+
1674+
memprof::LinearFrameIdConverter FrameIdConv(FrameBase);
1675+
memprof::CallerCalleePairExtractor Extractor(CallStackBase, FrameIdConv);
1676+
1677+
for (const memprof::IndexedMemProfRecord &IndexedRecord :
1678+
MemProfRecordTable->data())
1679+
for (const memprof::IndexedAllocationInfo &IndexedAI :
1680+
IndexedRecord.AllocSites)
1681+
Extractor(IndexedAI.CSId);
1682+
1683+
DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>> Pairs =
1684+
std::move(Extractor.CallerCalleePairs);
1685+
1686+
// Sort each call list by the source location.
1687+
for (auto &[CallerGUID, CallList] : Pairs) {
1688+
llvm::sort(CallList);
1689+
CallList.erase(llvm::unique(CallList), CallList.end());
1690+
}
1691+
1692+
return Pairs;
1693+
}
1694+
16691695
Error IndexedInstrProfReader::getFunctionCounts(StringRef FuncName,
16701696
uint64_t FuncHash,
16711697
std::vector<uint64_t> &Counts) {

llvm/unittests/ProfileData/InstrProfTest.cpp

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -580,6 +580,68 @@ TEST_F(InstrProfTest, test_memprof_v2_partial_schema) {
580580
EXPECT_THAT(WantRecord, EqualsRecord(Record));
581581
}
582582

583+
TEST_F(InstrProfTest, test_caller_callee_pairs) {
584+
const MemInfoBlock MIB = makePartialMIB();
585+
586+
Writer.setMemProfVersionRequested(memprof::Version3);
587+
Writer.setMemProfFullSchema(false);
588+
589+
ASSERT_THAT_ERROR(Writer.mergeProfileKind(InstrProfKind::MemProf),
590+
Succeeded());
591+
592+
// Call Hierarchy
593+
//
594+
// Function GUID:0x123
595+
// Line: 1, Column: 2
596+
// Function GUID: 0x234
597+
// Line: 3, Column: 4
598+
// new(...)
599+
// Line: 5, Column: 6
600+
// Function GUID: 0x345
601+
// Line: 7, Column: 8
602+
// new(...)
603+
604+
const std::pair<memprof::FrameId, memprof::Frame> Frames[] = {
605+
{0, {0x123, 1, 2, false}},
606+
{1, {0x234, 3, 4, true}},
607+
{2, {0x123, 5, 6, false}},
608+
{3, {0x345, 7, 8, true}}};
609+
for (const auto &[FrameId, Frame] : Frames)
610+
Writer.addMemProfFrame(FrameId, Frame, Err);
611+
612+
const std::pair<memprof::CallStackId, SmallVector<memprof::FrameId>>
613+
CallStacks[] = {{0x111, {1, 0}}, {0x222, {3, 2}}};
614+
for (const auto &[CSId, CallStack] : CallStacks)
615+
Writer.addMemProfCallStack(CSId, CallStack, Err);
616+
617+
const IndexedMemProfRecord IndexedMR = makeRecordV2(
618+
/*AllocFrames=*/{0x111, 0x222},
619+
/*CallSiteFrames=*/{}, MIB, memprof::getHotColdSchema());
620+
Writer.addMemProfRecord(/*Id=*/0x9999, IndexedMR);
621+
622+
auto Profile = Writer.writeBuffer();
623+
readProfile(std::move(Profile));
624+
625+
auto Pairs = Reader->getMemProfCallerCalleePairs();
626+
ASSERT_THAT(Pairs, SizeIs(3));
627+
628+
auto It = Pairs.find(0x123);
629+
ASSERT_NE(It, Pairs.end());
630+
ASSERT_THAT(It->second, SizeIs(2));
631+
EXPECT_THAT(It->second[0], testing::Pair(testing::FieldsAre(1U, 2U), 0x234U));
632+
EXPECT_THAT(It->second[1], testing::Pair(testing::FieldsAre(5U, 6U), 0x345U));
633+
634+
It = Pairs.find(0x234);
635+
ASSERT_NE(It, Pairs.end());
636+
ASSERT_THAT(It->second, SizeIs(1));
637+
EXPECT_THAT(It->second[0], testing::Pair(testing::FieldsAre(3U, 4U), 0U));
638+
639+
It = Pairs.find(0x345);
640+
ASSERT_NE(It, Pairs.end());
641+
ASSERT_THAT(It->second, SizeIs(1));
642+
EXPECT_THAT(It->second[0], testing::Pair(testing::FieldsAre(7U, 8U), 0U));
643+
}
644+
583645
TEST_F(InstrProfTest, test_memprof_getrecord_error) {
584646
ASSERT_THAT_ERROR(Writer.mergeProfileKind(InstrProfKind::MemProf),
585647
Succeeded());

0 commit comments

Comments
 (0)