Skip to content

Commit 90acfbf

Browse files
[memprof] Use linear IDs for Frames and call stacks (#93740)
With this patch, we stop using on-disk hash tables for Frames and call stacks. Instead, we'll write out all the Frames as a flat array while maintaining mappings from FrameIds to the indexes into the array. Then we serialize call stacks in terms of those indexes. Likewise, we'll write out all the call stacks as another flat array while maintaining mappings from CallStackIds to the indexes into the call stack array. One minor difference from Frames is that the indexes into the call stack array are not contiguous because call stacks are variable-length objects. Then we serialize IndexedMemProfRecords in terms of the indexes into the call stack array. Now, we describe each call stack with 32-bit indexes into the Frame array (as opposed to the 64-bit FrameIds in Version 2). The use of the smaller type cuts down the profile file size by about 40% relative to Version 2. The departure from the on-disk hash tables contributes a little bit to the savings, too. For now, IndexedMemProfRecords refer to call stacks with 64-bit indexes into the call stack array. As a follow-up, I'll change that to uint32_t, including necessary updates to RecordWriterTrait.
1 parent d5f077c commit 90acfbf

File tree

5 files changed

+185
-37
lines changed

5 files changed

+185
-37
lines changed

llvm/include/llvm/ProfileData/InstrProfReader.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -659,6 +659,10 @@ class IndexedMemProfReader {
659659
std::unique_ptr<MemProfFrameHashTable> MemProfFrameTable;
660660
/// MemProf call stack data on-disk indexed via call stack id.
661661
std::unique_ptr<MemProfCallStackHashTable> MemProfCallStackTable;
662+
/// The starting address of the frame array.
663+
const unsigned char *FrameBase = nullptr;
664+
/// The starting address of the call stack array.
665+
const unsigned char *CallStackBase = nullptr;
662666

663667
Error deserializeV012(const unsigned char *Start, const unsigned char *Ptr,
664668
uint64_t FirstWord, memprof::IndexedVersion Version);

llvm/include/llvm/ProfileData/MemProf.h

Lines changed: 56 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,9 @@ struct IndexedMemProfRecord {
418418
// Serializes the memprof records in \p Records to the ostream \p OS based
419419
// on the schema provided in \p Schema.
420420
void serialize(const MemProfSchema &Schema, raw_ostream &OS,
421-
IndexedVersion Version);
421+
IndexedVersion Version,
422+
llvm::DenseMap<memprof::CallStackId, uint32_t>
423+
*MemProfCallStackIndexes = nullptr);
422424

423425
// Deserializes memprof records from the Buffer.
424426
static IndexedMemProfRecord deserialize(const MemProfSchema &Schema,
@@ -557,11 +559,17 @@ class RecordWriterTrait {
557559
// The MemProf version to use for the serialization.
558560
IndexedVersion Version;
559561

562+
// Mappings from CallStackId to the indexes into the call stack array.
563+
llvm::DenseMap<memprof::CallStackId, uint32_t> *MemProfCallStackIndexes;
564+
560565
public:
561566
// We do not support the default constructor, which does not set Version.
562567
RecordWriterTrait() = delete;
563-
RecordWriterTrait(const MemProfSchema *Schema, IndexedVersion V)
564-
: Schema(Schema), Version(V) {}
568+
RecordWriterTrait(
569+
const MemProfSchema *Schema, IndexedVersion V,
570+
llvm::DenseMap<memprof::CallStackId, uint32_t> *MemProfCallStackIndexes)
571+
: Schema(Schema), Version(V),
572+
MemProfCallStackIndexes(MemProfCallStackIndexes) {}
565573

566574
static hash_value_type ComputeHash(key_type_ref K) { return K; }
567575

@@ -586,7 +594,7 @@ class RecordWriterTrait {
586594
void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V,
587595
offset_type /*Unused*/) {
588596
assert(Schema != nullptr && "MemProf schema is not initialized!");
589-
V.serialize(*Schema, Out, Version);
597+
V.serialize(*Schema, Out, Version, MemProfCallStackIndexes);
590598
// Clear the IndexedMemProfRecord which results in clearing/freeing its
591599
// vectors of allocs and callsites. This is owned by the associated on-disk
592600
// hash table, but unused after this point. See also the comment added to
@@ -835,6 +843,50 @@ template <typename MapTy> struct CallStackIdConverter {
835843
}
836844
};
837845

846+
// A function object that returns a Frame stored at a given index into the Frame
847+
// array in the profile.
848+
struct LinearFrameIdConverter {
849+
const unsigned char *FrameBase;
850+
851+
LinearFrameIdConverter() = delete;
852+
LinearFrameIdConverter(const unsigned char *FrameBase)
853+
: FrameBase(FrameBase) {}
854+
855+
Frame operator()(uint32_t LinearId) {
856+
uint64_t Offset = static_cast<uint64_t>(LinearId) * Frame::serializedSize();
857+
return Frame::deserialize(FrameBase + Offset);
858+
}
859+
};
860+
861+
// A function object that returns a call stack stored at a given index into the
862+
// call stack array in the profile.
863+
struct LinearCallStackIdConverter {
864+
const unsigned char *CallStackBase;
865+
std::function<Frame(uint32_t)> FrameIdToFrame;
866+
867+
LinearCallStackIdConverter() = delete;
868+
LinearCallStackIdConverter(const unsigned char *CallStackBase,
869+
std::function<Frame(uint32_t)> FrameIdToFrame)
870+
: CallStackBase(CallStackBase), FrameIdToFrame(FrameIdToFrame) {}
871+
872+
llvm::SmallVector<Frame> operator()(uint32_t LinearCSId) {
873+
llvm::SmallVector<Frame> Frames;
874+
875+
const unsigned char *Ptr =
876+
CallStackBase + static_cast<uint64_t>(LinearCSId) * sizeof(uint32_t);
877+
uint32_t NumFrames =
878+
support::endian::readNext<uint32_t, llvm::endianness::little>(Ptr);
879+
Frames.reserve(NumFrames);
880+
for (; NumFrames; --NumFrames) {
881+
uint32_t Elem =
882+
support::endian::readNext<uint32_t, llvm::endianness::little>(Ptr);
883+
Frames.push_back(FrameIdToFrame(Elem));
884+
}
885+
886+
return Frames;
887+
}
888+
};
889+
838890
struct IndexedMemProfData {
839891
// A map to hold memprof data per function. The lower 64 bits obtained from
840892
// the md5 hash of the function name is used to index into the map.

llvm/lib/ProfileData/InstrProfReader.cpp

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1261,16 +1261,10 @@ Error IndexedMemProfReader::deserializeV012(const unsigned char *Start,
12611261
Error IndexedMemProfReader::deserializeV3(const unsigned char *Start,
12621262
const unsigned char *Ptr,
12631263
memprof::IndexedVersion Version) {
1264-
// The value returned from FrameTableGenerator.Emit.
1265-
const uint64_t FrameTableOffset =
1266-
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
12671264
// The offset in the stream right before invoking
12681265
// CallStackTableGenerator.Emit.
12691266
const uint64_t CallStackPayloadOffset =
12701267
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
1271-
// The value returned from CallStackTableGenerator.Emit.
1272-
const uint64_t CallStackTableOffset =
1273-
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
12741268
// The offset in the stream right before invoking RecordTableGenerator.Emit.
12751269
const uint64_t RecordPayloadOffset =
12761270
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
@@ -1284,16 +1278,8 @@ Error IndexedMemProfReader::deserializeV3(const unsigned char *Start,
12841278
return SchemaOr.takeError();
12851279
Schema = SchemaOr.get();
12861280

1287-
// Initialize the frame table reader with the payload and bucket offsets.
1288-
MemProfFrameTable.reset(MemProfFrameHashTable::Create(
1289-
/*Buckets=*/Start + FrameTableOffset,
1290-
/*Payload=*/Ptr,
1291-
/*Base=*/Start));
1292-
1293-
MemProfCallStackTable.reset(MemProfCallStackHashTable::Create(
1294-
/*Buckets=*/Start + CallStackTableOffset,
1295-
/*Payload=*/Start + CallStackPayloadOffset,
1296-
/*Base=*/Start));
1281+
FrameBase = Ptr;
1282+
CallStackBase = Start + CallStackPayloadOffset;
12971283

12981284
// Now initialize the table reader with a pointer into data buffer.
12991285
MemProfRecordTable.reset(MemProfRecordHashTable::Create(
@@ -1605,6 +1591,16 @@ getMemProfRecordV2(const memprof::IndexedMemProfRecord &IndexedRecord,
16051591
return Record;
16061592
}
16071593

1594+
static Expected<memprof::MemProfRecord>
1595+
getMemProfRecordV3(const memprof::IndexedMemProfRecord &IndexedRecord,
1596+
const unsigned char *FrameBase,
1597+
const unsigned char *CallStackBase) {
1598+
memprof::LinearFrameIdConverter FrameIdConv(FrameBase);
1599+
memprof::LinearCallStackIdConverter CSIdConv(CallStackBase, FrameIdConv);
1600+
memprof::MemProfRecord Record = IndexedRecord.toMemProfRecord(CSIdConv);
1601+
return Record;
1602+
}
1603+
16081604
Expected<memprof::MemProfRecord>
16091605
IndexedMemProfReader::getMemProfRecord(const uint64_t FuncNameHash) const {
16101606
// TODO: Add memprof specific errors.
@@ -1626,11 +1622,17 @@ IndexedMemProfReader::getMemProfRecord(const uint64_t FuncNameHash) const {
16261622
"MemProfCallStackTable must not be available");
16271623
return getMemProfRecordV0(IndexedRecord, *MemProfFrameTable);
16281624
case memprof::Version2:
1629-
case memprof::Version3:
16301625
assert(MemProfFrameTable && "MemProfFrameTable must be available");
16311626
assert(MemProfCallStackTable && "MemProfCallStackTable must be available");
16321627
return getMemProfRecordV2(IndexedRecord, *MemProfFrameTable,
16331628
*MemProfCallStackTable);
1629+
case memprof::Version3:
1630+
assert(!MemProfFrameTable && "MemProfFrameTable must not be available");
1631+
assert(!MemProfCallStackTable &&
1632+
"MemProfCallStackTable must not be available");
1633+
assert(FrameBase && "FrameBase must be available");
1634+
assert(CallStackBase && "CallStackBase must be available");
1635+
return getMemProfRecordV3(IndexedRecord, FrameBase, CallStackBase);
16341636
}
16351637

16361638
return make_error<InstrProfError>(

llvm/lib/ProfileData/InstrProfWriter.cpp

Lines changed: 77 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ class ProfOStream {
5757

5858
uint64_t tell() { return OS.tell(); }
5959
void write(uint64_t V) { LE.write<uint64_t>(V); }
60+
void write32(uint32_t V) { LE.write<uint32_t>(V); }
6061
void writeByte(uint8_t V) { LE.write<uint8_t>(V); }
6162

6263
// \c patch can only be called when all data is written and flushed.
@@ -452,8 +453,11 @@ static uint64_t writeMemProfRecords(
452453
ProfOStream &OS,
453454
llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord>
454455
&MemProfRecordData,
455-
memprof::MemProfSchema *Schema, memprof::IndexedVersion Version) {
456-
memprof::RecordWriterTrait RecordWriter(Schema, Version);
456+
memprof::MemProfSchema *Schema, memprof::IndexedVersion Version,
457+
llvm::DenseMap<memprof::CallStackId, uint32_t> *MemProfCallStackIndexes =
458+
nullptr) {
459+
memprof::RecordWriterTrait RecordWriter(Schema, Version,
460+
MemProfCallStackIndexes);
457461
OnDiskChainedHashTableGenerator<memprof::RecordWriterTrait>
458462
RecordTableGenerator;
459463
for (auto &[GUID, Record] : MemProfRecordData) {
@@ -485,6 +489,39 @@ static uint64_t writeMemProfFrames(
485489
return FrameTableGenerator.Emit(OS.OS);
486490
}
487491

492+
// Serialize MemProfFrameData. Return the mapping from FrameIds to their
493+
// indexes within the frame array.
494+
static llvm::DenseMap<memprof::FrameId, uint32_t> writeMemProfFrameArray(
495+
ProfOStream &OS,
496+
llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData) {
497+
// Mappings from FrameIds to array indexes.
498+
llvm::DenseMap<memprof::FrameId, uint32_t> MemProfFrameIndexes;
499+
500+
// Sort the FrameIDs for stability.
501+
std::vector<std::pair<memprof::FrameId, const memprof::Frame *>> FrameIdOrder;
502+
FrameIdOrder.reserve(MemProfFrameData.size());
503+
for (const auto &[Id, Frame] : MemProfFrameData)
504+
FrameIdOrder.emplace_back(Id, &Frame);
505+
assert(MemProfFrameData.size() == FrameIdOrder.size());
506+
llvm::sort(FrameIdOrder);
507+
508+
// Serialize all frames while creating mappings from linear IDs to FrameIds.
509+
uint64_t Index = 0;
510+
MemProfFrameIndexes.reserve(FrameIdOrder.size());
511+
for (const auto &[Id, F] : FrameIdOrder) {
512+
F->serialize(OS.OS);
513+
MemProfFrameIndexes.insert({Id, Index});
514+
++Index;
515+
}
516+
assert(MemProfFrameData.size() == Index);
517+
assert(MemProfFrameData.size() == MemProfFrameIndexes.size());
518+
519+
// Release the memory of this MapVector as it is no longer needed.
520+
MemProfFrameData.clear();
521+
522+
return MemProfFrameIndexes;
523+
}
524+
488525
static uint64_t writeMemProfCallStacks(
489526
ProfOStream &OS,
490527
llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
@@ -499,6 +536,33 @@ static uint64_t writeMemProfCallStacks(
499536
return CallStackTableGenerator.Emit(OS.OS);
500537
}
501538

539+
static llvm::DenseMap<memprof::CallStackId, uint32_t>
540+
writeMemProfCallStackArray(
541+
ProfOStream &OS,
542+
llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
543+
&MemProfCallStackData,
544+
llvm::DenseMap<memprof::FrameId, uint32_t> &MemProfFrameIndexes) {
545+
llvm::DenseMap<memprof::CallStackId, uint32_t> MemProfCallStackIndexes;
546+
547+
MemProfCallStackIndexes.reserve(MemProfCallStackData.size());
548+
uint64_t CallStackBase = OS.tell();
549+
for (const auto &[CSId, CallStack] : MemProfCallStackData) {
550+
uint64_t CallStackIndex = (OS.tell() - CallStackBase) / sizeof(uint32_t);
551+
MemProfCallStackIndexes.insert({CSId, CallStackIndex});
552+
const llvm::SmallVector<memprof::FrameId> CS = CallStack;
553+
OS.write32(CS.size());
554+
for (const auto F : CS) {
555+
assert(MemProfFrameIndexes.contains(F));
556+
OS.write32(MemProfFrameIndexes[F]);
557+
}
558+
}
559+
560+
// Release the memory of this vector as it is no longer needed.
561+
MemProfCallStackData.clear();
562+
563+
return MemProfCallStackIndexes;
564+
}
565+
502566
// Write out MemProf Version0 as follows:
503567
// uint64_t RecordTableOffset = RecordTableGenerator.Emit
504568
// uint64_t FramePayloadOffset = Offset for the frame payload
@@ -619,9 +683,7 @@ static Error writeMemProfV2(ProfOStream &OS,
619683

620684
// Write out MemProf Version3 as follows:
621685
// uint64_t Version
622-
// uint64_t FrameTableOffset = FrameTableGenerator.Emit
623686
// uint64_t CallStackPayloadOffset = Offset for the call stack payload
624-
// uint64_t CallStackTableOffset = CallStackTableGenerator.Emit
625687
// uint64_t RecordPayloadOffset = Offset for the record payload
626688
// uint64_t RecordTableOffset = RecordTableGenerator.Emit
627689
// uint64_t Num schema entries
@@ -637,9 +699,7 @@ static Error writeMemProfV3(ProfOStream &OS,
637699
bool MemProfFullSchema) {
638700
OS.write(memprof::Version3);
639701
uint64_t HeaderUpdatePos = OS.tell();
640-
OS.write(0ULL); // Reserve space for the memprof frame table offset.
641702
OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
642-
OS.write(0ULL); // Reserve space for the memprof call stack table offset.
643703
OS.write(0ULL); // Reserve space for the memprof record payload offset.
644704
OS.write(0ULL); // Reserve space for the memprof record table offset.
645705

@@ -648,19 +708,23 @@ static Error writeMemProfV3(ProfOStream &OS,
648708
Schema = memprof::getFullSchema();
649709
writeMemProfSchema(OS, Schema);
650710

651-
uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfData.FrameData);
711+
llvm::DenseMap<memprof::FrameId, uint32_t> MemProfFrameIndexes =
712+
writeMemProfFrameArray(OS, MemProfData.FrameData);
652713

653714
uint64_t CallStackPayloadOffset = OS.tell();
654-
uint64_t CallStackTableOffset =
655-
writeMemProfCallStacks(OS, MemProfData.CallStackData);
715+
llvm::DenseMap<memprof::CallStackId, uint32_t> MemProfCallStackIndexes =
716+
writeMemProfCallStackArray(OS, MemProfData.CallStackData,
717+
MemProfFrameIndexes);
656718

657719
uint64_t RecordPayloadOffset = OS.tell();
658-
uint64_t RecordTableOffset = writeMemProfRecords(OS, MemProfData.RecordData,
659-
&Schema, memprof::Version3);
720+
uint64_t RecordTableOffset =
721+
writeMemProfRecords(OS, MemProfData.RecordData, &Schema,
722+
memprof::Version3, &MemProfCallStackIndexes);
660723

661724
uint64_t Header[] = {
662-
FrameTableOffset, CallStackPayloadOffset, CallStackTableOffset,
663-
RecordPayloadOffset, RecordTableOffset,
725+
CallStackPayloadOffset,
726+
RecordPayloadOffset,
727+
RecordTableOffset,
664728
};
665729
OS.patch({{HeaderUpdatePos, Header, std::size(Header)}});
666730

llvm/lib/ProfileData/MemProf.cpp

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -143,17 +143,43 @@ static void serializeV2(const IndexedMemProfRecord &Record,
143143
LE.write<CallStackId>(CSId);
144144
}
145145

146-
void IndexedMemProfRecord::serialize(const MemProfSchema &Schema,
147-
raw_ostream &OS, IndexedVersion Version) {
146+
static void
147+
serializeV3(const IndexedMemProfRecord &Record, const MemProfSchema &Schema,
148+
raw_ostream &OS,
149+
llvm::DenseMap<CallStackId, uint32_t> &MemProfCallStackIndexes) {
150+
using namespace support;
151+
152+
endian::Writer LE(OS, llvm::endianness::little);
153+
154+
LE.write<uint64_t>(Record.AllocSites.size());
155+
for (const IndexedAllocationInfo &N : Record.AllocSites) {
156+
assert(MemProfCallStackIndexes.contains(N.CSId));
157+
LE.write<uint64_t>(MemProfCallStackIndexes[N.CSId]);
158+
N.Info.serialize(Schema, OS);
159+
}
160+
161+
// Related contexts.
162+
LE.write<uint64_t>(Record.CallSiteIds.size());
163+
for (const auto &CSId : Record.CallSiteIds) {
164+
assert(MemProfCallStackIndexes.contains(CSId));
165+
LE.write<uint64_t>(MemProfCallStackIndexes[CSId]);
166+
}
167+
}
168+
169+
void IndexedMemProfRecord::serialize(
170+
const MemProfSchema &Schema, raw_ostream &OS, IndexedVersion Version,
171+
llvm::DenseMap<CallStackId, uint32_t> *MemProfCallStackIndexes) {
148172
switch (Version) {
149173
case Version0:
150174
case Version1:
151175
serializeV0(*this, Schema, OS);
152176
return;
153177
case Version2:
154-
case Version3:
155178
serializeV2(*this, Schema, OS);
156179
return;
180+
case Version3:
181+
serializeV3(*this, Schema, OS, *MemProfCallStackIndexes);
182+
return;
157183
}
158184
llvm_unreachable("unsupported MemProf version");
159185
}

0 commit comments

Comments
 (0)