@@ -57,6 +57,7 @@ class ProfOStream {
57
57
58
58
uint64_t tell () { return OS.tell (); }
59
59
void write (uint64_t V) { LE.write <uint64_t >(V); }
60
+ void write32 (uint32_t V) { LE.write <uint32_t >(V); }
60
61
void writeByte (uint8_t V) { LE.write <uint8_t >(V); }
61
62
62
63
// \c patch can only be called when all data is written and flushed.
@@ -452,8 +453,11 @@ static uint64_t writeMemProfRecords(
452
453
ProfOStream &OS,
453
454
llvm::MapVector<GlobalValue::GUID, memprof::IndexedMemProfRecord>
454
455
&MemProfRecordData,
455
- memprof::MemProfSchema *Schema, memprof::IndexedVersion Version) {
456
- memprof::RecordWriterTrait RecordWriter (Schema, Version);
456
+ memprof::MemProfSchema *Schema, memprof::IndexedVersion Version,
457
+ llvm::DenseMap<memprof::CallStackId, uint32_t > *MemProfCallStackIndexes =
458
+ nullptr ) {
459
+ memprof::RecordWriterTrait RecordWriter (Schema, Version,
460
+ MemProfCallStackIndexes);
457
461
OnDiskChainedHashTableGenerator<memprof::RecordWriterTrait>
458
462
RecordTableGenerator;
459
463
for (auto &[GUID, Record] : MemProfRecordData) {
@@ -485,6 +489,39 @@ static uint64_t writeMemProfFrames(
485
489
return FrameTableGenerator.Emit (OS.OS );
486
490
}
487
491
492
+ // Serialize MemProfFrameData. Return the mapping from FrameIds to their
493
+ // indexes within the frame array.
494
+ static llvm::DenseMap<memprof::FrameId, uint32_t > writeMemProfFrameArray (
495
+ ProfOStream &OS,
496
+ llvm::MapVector<memprof::FrameId, memprof::Frame> &MemProfFrameData) {
497
+ // Mappings from FrameIds to array indexes.
498
+ llvm::DenseMap<memprof::FrameId, uint32_t > MemProfFrameIndexes;
499
+
500
+ // Sort the FrameIDs for stability.
501
+ std::vector<std::pair<memprof::FrameId, const memprof::Frame *>> FrameIdOrder;
502
+ FrameIdOrder.reserve (MemProfFrameData.size ());
503
+ for (const auto &[Id, Frame] : MemProfFrameData)
504
+ FrameIdOrder.emplace_back (Id, &Frame);
505
+ assert (MemProfFrameData.size () == FrameIdOrder.size ());
506
+ llvm::sort (FrameIdOrder);
507
+
508
+ // Serialize all frames while creating mappings from linear IDs to FrameIds.
509
+ uint64_t Index = 0 ;
510
+ MemProfFrameIndexes.reserve (FrameIdOrder.size ());
511
+ for (const auto &[Id, F] : FrameIdOrder) {
512
+ F->serialize (OS.OS );
513
+ MemProfFrameIndexes.insert ({Id, Index});
514
+ ++Index;
515
+ }
516
+ assert (MemProfFrameData.size () == Index);
517
+ assert (MemProfFrameData.size () == MemProfFrameIndexes.size ());
518
+
519
+ // Release the memory of this MapVector as it is no longer needed.
520
+ MemProfFrameData.clear ();
521
+
522
+ return MemProfFrameIndexes;
523
+ }
524
+
488
525
static uint64_t writeMemProfCallStacks (
489
526
ProfOStream &OS,
490
527
llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
@@ -499,6 +536,33 @@ static uint64_t writeMemProfCallStacks(
499
536
return CallStackTableGenerator.Emit (OS.OS );
500
537
}
501
538
539
+ static llvm::DenseMap<memprof::CallStackId, uint32_t >
540
+ writeMemProfCallStackArray (
541
+ ProfOStream &OS,
542
+ llvm::MapVector<memprof::CallStackId, llvm::SmallVector<memprof::FrameId>>
543
+ &MemProfCallStackData,
544
+ llvm::DenseMap<memprof::FrameId, uint32_t > &MemProfFrameIndexes) {
545
+ llvm::DenseMap<memprof::CallStackId, uint32_t > MemProfCallStackIndexes;
546
+
547
+ MemProfCallStackIndexes.reserve (MemProfCallStackData.size ());
548
+ uint64_t CallStackBase = OS.tell ();
549
+ for (const auto &[CSId, CallStack] : MemProfCallStackData) {
550
+ uint64_t CallStackIndex = (OS.tell () - CallStackBase) / sizeof (uint32_t );
551
+ MemProfCallStackIndexes.insert ({CSId, CallStackIndex});
552
+ const llvm::SmallVector<memprof::FrameId> CS = CallStack;
553
+ OS.write32 (CS.size ());
554
+ for (const auto F : CS) {
555
+ assert (MemProfFrameIndexes.contains (F));
556
+ OS.write32 (MemProfFrameIndexes[F]);
557
+ }
558
+ }
559
+
560
+ // Release the memory of this vector as it is no longer needed.
561
+ MemProfCallStackData.clear ();
562
+
563
+ return MemProfCallStackIndexes;
564
+ }
565
+
502
566
// Write out MemProf Version0 as follows:
503
567
// uint64_t RecordTableOffset = RecordTableGenerator.Emit
504
568
// uint64_t FramePayloadOffset = Offset for the frame payload
@@ -619,9 +683,7 @@ static Error writeMemProfV2(ProfOStream &OS,
619
683
620
684
// Write out MemProf Version3 as follows:
621
685
// uint64_t Version
622
- // uint64_t FrameTableOffset = FrameTableGenerator.Emit
623
686
// uint64_t CallStackPayloadOffset = Offset for the call stack payload
624
- // uint64_t CallStackTableOffset = CallStackTableGenerator.Emit
625
687
// uint64_t RecordPayloadOffset = Offset for the record payload
626
688
// uint64_t RecordTableOffset = RecordTableGenerator.Emit
627
689
// uint64_t Num schema entries
@@ -637,9 +699,7 @@ static Error writeMemProfV3(ProfOStream &OS,
637
699
bool MemProfFullSchema) {
638
700
OS.write (memprof::Version3);
639
701
uint64_t HeaderUpdatePos = OS.tell ();
640
- OS.write (0ULL ); // Reserve space for the memprof frame table offset.
641
702
OS.write (0ULL ); // Reserve space for the memprof call stack payload offset.
642
- OS.write (0ULL ); // Reserve space for the memprof call stack table offset.
643
703
OS.write (0ULL ); // Reserve space for the memprof record payload offset.
644
704
OS.write (0ULL ); // Reserve space for the memprof record table offset.
645
705
@@ -648,19 +708,23 @@ static Error writeMemProfV3(ProfOStream &OS,
648
708
Schema = memprof::getFullSchema ();
649
709
writeMemProfSchema (OS, Schema);
650
710
651
- uint64_t FrameTableOffset = writeMemProfFrames (OS, MemProfData.FrameData );
711
+ llvm::DenseMap<memprof::FrameId, uint32_t > MemProfFrameIndexes =
712
+ writeMemProfFrameArray (OS, MemProfData.FrameData );
652
713
653
714
uint64_t CallStackPayloadOffset = OS.tell ();
654
- uint64_t CallStackTableOffset =
655
- writeMemProfCallStacks (OS, MemProfData.CallStackData );
715
+ llvm::DenseMap<memprof::CallStackId, uint32_t > MemProfCallStackIndexes =
716
+ writeMemProfCallStackArray (OS, MemProfData.CallStackData ,
717
+ MemProfFrameIndexes);
656
718
657
719
uint64_t RecordPayloadOffset = OS.tell ();
658
- uint64_t RecordTableOffset = writeMemProfRecords (OS, MemProfData.RecordData ,
659
- &Schema, memprof::Version3);
720
+ uint64_t RecordTableOffset =
721
+ writeMemProfRecords (OS, MemProfData.RecordData , &Schema,
722
+ memprof::Version3, &MemProfCallStackIndexes);
660
723
661
724
uint64_t Header[] = {
662
- FrameTableOffset, CallStackPayloadOffset, CallStackTableOffset,
663
- RecordPayloadOffset, RecordTableOffset,
725
+ CallStackPayloadOffset,
726
+ RecordPayloadOffset,
727
+ RecordTableOffset,
664
728
};
665
729
OS.patch ({{HeaderUpdatePos, Header, std::size (Header)}});
666
730
0 commit comments