Skip to content

Commit 49d48c3

Browse files
[MemProf] Emit remarks when hinting allocations not needing cloning (#141859)
The context disambiguation code already emits remarks when hinting allocations (by adding hotness attributes) during cloning. However, we did not yet emit hints when applying the hotness attributes during building of the metadata (during matching and again after inlining). Add remarks when we apply the hint attributes for these non-context-sensitive allocations.
1 parent ed5eb1c commit 49d48c3

File tree

8 files changed

+55
-28
lines changed

8 files changed

+55
-28
lines changed

llvm/include/llvm/Analysis/MemoryProfileInfo.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@
1919
#include <map>
2020

2121
namespace llvm {
22+
23+
class OptimizationRemarkEmitter;
24+
2225
namespace memprof {
2326

2427
/// Build callstack metadata from the provided list of call stack ids. Returns
@@ -80,6 +83,10 @@ class CallStackTrie {
8083
// The allocation's leaf stack id.
8184
uint64_t AllocStackId = 0;
8285

86+
// If the client provides a remarks emitter object, we will emit remarks on
87+
// allocations for which we apply non-context sensitive allocation hints.
88+
OptimizationRemarkEmitter *ORE;
89+
8390
void deleteTrieNode(CallStackTrieNode *Node) {
8491
if (!Node)
8592
return;
@@ -106,7 +113,7 @@ class CallStackTrie {
106113
uint64_t &ColdBytes);
107114

108115
public:
109-
CallStackTrie() = default;
116+
CallStackTrie(OptimizationRemarkEmitter *ORE = nullptr) : ORE(ORE) {}
110117
~CallStackTrie() { deleteTrieNode(Alloc); }
111118

112119
bool empty() const { return Alloc == nullptr; }

llvm/include/llvm/Transforms/Utils/Cloning.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ class Instruction;
4242
class Loop;
4343
class LoopInfo;
4444
class Module;
45+
class OptimizationRemarkEmitter;
4546
class PGOContextualProfile;
4647
class ProfileSummaryInfo;
4748
class ReturnInst;
@@ -314,7 +315,8 @@ InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
314315
bool MergeAttributes = false,
315316
AAResults *CalleeAAR = nullptr,
316317
bool InsertLifetime = true,
317-
Function *ForwardVarArgsTo = nullptr);
318+
Function *ForwardVarArgsTo = nullptr,
319+
OptimizationRemarkEmitter *ORE = nullptr);
318320

319321
/// Same as above, but it will update the contextual profile. If the contextual
320322
/// profile is invalid (i.e. not loaded because it is not present), it defaults

llvm/lib/Analysis/MemoryProfileInfo.cpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
#include "llvm/Analysis/MemoryProfileInfo.h"
14+
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
1415
#include "llvm/IR/Constants.h"
1516
#include "llvm/Support/CommandLine.h"
1617
#include "llvm/Support/Compiler.h"
@@ -95,13 +96,6 @@ std::string llvm::memprof::getAllocTypeAttributeString(AllocationType Type) {
9596
llvm_unreachable("invalid alloc type");
9697
}
9798

98-
static void addAllocTypeAttribute(LLVMContext &Ctx, CallBase *CI,
99-
AllocationType AllocType) {
100-
auto AllocTypeString = getAllocTypeAttributeString(AllocType);
101-
auto A = llvm::Attribute::get(Ctx, "memprof", AllocTypeString);
102-
CI->addFnAttr(A);
103-
}
104-
10599
bool llvm::memprof::hasSingleAllocType(uint8_t AllocTypes) {
106100
const unsigned NumAllocTypes = llvm::popcount(AllocTypes);
107101
assert(NumAllocTypes != 0);
@@ -425,7 +419,9 @@ bool CallStackTrie::buildMIBNodes(CallStackTrieNode *Node, LLVMContext &Ctx,
425419

426420
void CallStackTrie::addSingleAllocTypeAttribute(CallBase *CI, AllocationType AT,
427421
StringRef Descriptor) {
428-
addAllocTypeAttribute(CI->getContext(), CI, AT);
422+
auto AllocTypeString = getAllocTypeAttributeString(AT);
423+
auto A = llvm::Attribute::get(CI->getContext(), "memprof", AllocTypeString);
424+
CI->addFnAttr(A);
429425
if (MemProfReportHintedSizes) {
430426
std::vector<ContextTotalSize> ContextSizeInfo;
431427
collectContextSizeInfo(Alloc, ContextSizeInfo);
@@ -435,6 +431,12 @@ void CallStackTrie::addSingleAllocTypeAttribute(CallBase *CI, AllocationType AT,
435431
<< getAllocTypeAttributeString(AT) << ": " << TotalSize << "\n";
436432
}
437433
}
434+
if (ORE)
435+
ORE->emit(OptimizationRemark(DEBUG_TYPE, "MemprofAttribute", CI)
436+
<< ore::NV("AllocationCall", CI) << " in function "
437+
<< ore::NV("Caller", CI->getFunction())
438+
<< " marked with memprof allocation attribute "
439+
<< ore::NV("Attribute", AllocTypeString));
438440
}
439441

440442
// Build and attach the minimal necessary MIB metadata. If the alloc has a

llvm/lib/Transforms/IPO/Inliner.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -382,9 +382,10 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
382382
&FAM.getResult<BlockFrequencyAnalysis>(*(CB->getCaller())),
383383
&FAM.getResult<BlockFrequencyAnalysis>(Callee));
384384

385-
InlineResult IR =
386-
InlineFunction(*CB, IFI, /*MergeAttributes=*/true,
387-
&FAM.getResult<AAManager>(*CB->getCaller()));
385+
InlineResult IR = InlineFunction(
386+
*CB, IFI, /*MergeAttributes=*/true,
387+
&FAM.getResult<AAManager>(*CB->getCaller()), true, nullptr,
388+
&FAM.getResult<OptimizationRemarkEmitterAnalysis>(*CB->getCaller()));
388389
if (!IR.isSuccess()) {
389390
Advice->recordUnsuccessfulInlining(IR);
390391
continue;

llvm/lib/Transforms/Instrumentation/MemProfiler.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "llvm/ADT/StringRef.h"
2121
#include "llvm/Analysis/MemoryBuiltins.h"
2222
#include "llvm/Analysis/MemoryProfileInfo.h"
23+
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
2324
#include "llvm/Analysis/TargetLibraryInfo.h"
2425
#include "llvm/Analysis/ValueTracking.h"
2526
#include "llvm/IR/Constant.h"
@@ -956,7 +957,8 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
956957
const TargetLibraryInfo &TLI,
957958
std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo,
958959
std::set<std::vector<uint64_t>> &MatchedCallSites,
959-
DenseMap<uint64_t, LocToLocMap> &UndriftMaps) {
960+
DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
961+
OptimizationRemarkEmitter &ORE) {
960962
auto &Ctx = M.getContext();
961963
// Previously we used getIRPGOFuncName() here. If F is local linkage,
962964
// getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
@@ -1123,7 +1125,7 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
11231125
// We may match this instruction's location list to multiple MIB
11241126
// contexts. Add them to a Trie specialized for trimming the contexts to
11251127
// the minimal needed to disambiguate contexts with unique behavior.
1126-
CallStackTrie AllocTrie;
1128+
CallStackTrie AllocTrie(&ORE);
11271129
uint64_t TotalSize = 0;
11281130
uint64_t TotalColdSize = 0;
11291131
for (auto *AllocInfo : AllocInfoIter->second) {
@@ -1270,8 +1272,9 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
12701272
continue;
12711273

12721274
const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
1275+
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
12731276
readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo,
1274-
MatchedCallSites, UndriftMaps);
1277+
MatchedCallSites, UndriftMaps, ORE);
12751278
}
12761279

12771280
if (ClPrintMemProfMatchInfo) {

llvm/lib/Transforms/Utils/InlineFunction.cpp

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -828,12 +828,13 @@ static void removeCallsiteMetadata(CallBase *Call) {
828828
}
829829

830830
static void updateMemprofMetadata(CallBase *CI,
831-
const std::vector<Metadata *> &MIBList) {
831+
const std::vector<Metadata *> &MIBList,
832+
OptimizationRemarkEmitter *ORE) {
832833
assert(!MIBList.empty());
833834
// Remove existing memprof, which will either be replaced or may not be needed
834835
// if we are able to use a single allocation type function attribute.
835836
removeMemProfMetadata(CI);
836-
CallStackTrie CallStack;
837+
CallStackTrie CallStack(ORE);
837838
for (Metadata *MIB : MIBList)
838839
CallStack.addCallStack(cast<MDNode>(MIB));
839840
bool MemprofMDAttached = CallStack.buildAndAttachMIBMetadata(CI);
@@ -848,7 +849,8 @@ static void updateMemprofMetadata(CallBase *CI,
848849
// the call that was inlined.
849850
static void propagateMemProfHelper(const CallBase *OrigCall,
850851
CallBase *ClonedCall,
851-
MDNode *InlinedCallsiteMD) {
852+
MDNode *InlinedCallsiteMD,
853+
OptimizationRemarkEmitter *ORE) {
852854
MDNode *OrigCallsiteMD = ClonedCall->getMetadata(LLVMContext::MD_callsite);
853855
MDNode *ClonedCallsiteMD = nullptr;
854856
// Check if the call originally had callsite metadata, and update it for the
@@ -891,7 +893,7 @@ static void propagateMemProfHelper(const CallBase *OrigCall,
891893
return;
892894
}
893895
if (NewMIBList.size() < OrigMemProfMD->getNumOperands())
894-
updateMemprofMetadata(ClonedCall, NewMIBList);
896+
updateMemprofMetadata(ClonedCall, NewMIBList, ORE);
895897
}
896898

897899
// Update memprof related metadata (!memprof and !callsite) based on the
@@ -902,7 +904,8 @@ static void propagateMemProfHelper(const CallBase *OrigCall,
902904
static void
903905
propagateMemProfMetadata(Function *Callee, CallBase &CB,
904906
bool ContainsMemProfMetadata,
905-
const ValueMap<const Value *, WeakTrackingVH> &VMap) {
907+
const ValueMap<const Value *, WeakTrackingVH> &VMap,
908+
OptimizationRemarkEmitter *ORE) {
906909
MDNode *CallsiteMD = CB.getMetadata(LLVMContext::MD_callsite);
907910
// Only need to update if the inlined callsite had callsite metadata, or if
908911
// there was any memprof metadata inlined.
@@ -925,7 +928,7 @@ propagateMemProfMetadata(Function *Callee, CallBase &CB,
925928
removeCallsiteMetadata(ClonedCall);
926929
continue;
927930
}
928-
propagateMemProfHelper(OrigCall, ClonedCall, CallsiteMD);
931+
propagateMemProfHelper(OrigCall, ClonedCall, CallsiteMD, ORE);
929932
}
930933
}
931934

@@ -2473,7 +2476,8 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
24732476
bool MergeAttributes,
24742477
AAResults *CalleeAAR,
24752478
bool InsertLifetime,
2476-
Function *ForwardVarArgsTo) {
2479+
Function *ForwardVarArgsTo,
2480+
OptimizationRemarkEmitter *ORE) {
24772481
assert(CB.getParent() && CB.getFunction() && "Instruction not in function!");
24782482

24792483
// FIXME: we don't inline callbr yet.
@@ -2807,8 +2811,8 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
28072811
// inlined function which use the same param.
28082812
AddParamAndFnBasicAttributes(CB, VMap, InlinedFunctionInfo);
28092813

2810-
propagateMemProfMetadata(CalledFunc, CB,
2811-
InlinedFunctionInfo.ContainsMemProfMetadata, VMap);
2814+
propagateMemProfMetadata(
2815+
CalledFunc, CB, InlinedFunctionInfo.ContainsMemProfMetadata, VMap, ORE);
28122816

28132817
// Propagate metadata on the callsite if necessary.
28142818
PropagateCallSiteMetadata(CB, FirstNewBlock, Caller->end());

llvm/test/Transforms/Inline/memprof_inline.ll

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,12 @@
2828
;; }
2929

3030

31-
; RUN: opt -passes=inline %s -S | FileCheck %s
31+
;; Also check that remarks are emitted when the allocations are hinted.
32+
; RUN: opt -passes=inline -pass-remarks=memory-profile-info %s -S 2>&1 | FileCheck %s
33+
34+
; CHECK: remark: memprof_inline.cc:5:10: call in function _Z4foo2v marked with memprof allocation attribute cold
35+
; CHECK: remark: memprof_inline.cc:5:10: call in function main marked with memprof allocation attribute notcold
36+
; CHECK: remark: memprof_inline.cc:5:10: call in function main marked with memprof allocation attribute cold
3237

3338
; ModuleID = 'memprof_inline.cc'
3439
source_filename = "memprof_inline.cc"

llvm/test/Transforms/PGOProfile/memprof.ll

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,9 @@
7575
; RUN: opt < %s -passes='pgo-instr-use,memprof-use<profile-filename=%t.pgomemprofdata>' -pgo-test-profile-file=%t.pgomemprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,PGO
7676

7777
;; Check that the total sizes are reported if requested. A message should be
78-
;; emitted for the pruned context.
79-
; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S -memprof-report-hinted-sizes 2>&1 | FileCheck %s --check-prefixes=TOTALSIZESSINGLE,TOTALSIZES,TOTALSIZENOKEEPALL
78+
;; emitted for the pruned context. Also check that remarks are emitted for the
79+
;; allocations hinted without context sensitivity.
80+
; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S -memprof-report-hinted-sizes -pass-remarks=memory-profile-info 2>&1 | FileCheck %s --check-prefixes=TOTALSIZESSINGLE,TOTALSIZES,TOTALSIZENOKEEPALL,REMARKSINGLE
8081

8182
;; Check that the total sizes are reported if requested, and prevent pruning
8283
;; via -memprof-keep-all-not-cold-contexts.
@@ -397,7 +398,9 @@ for.end: ; preds = %for.cond
397398
; TOTALSIZESTHRESH60: Total size for full allocation context hash 18254812774972004394 and dominant alloc type cold: 10
398399
; TOTALSIZESTHRESH60: Total size for full allocation context hash 1093248920606587996 and dominant alloc type cold: 10
399400
; TOTALSIZESSINGLE: Total size for full allocation context hash 6792096022461663180 and single alloc type notcold: 10
401+
; REMARKSINGLE: remark: memprof.cc:25:13: call in function main marked with memprof allocation attribute notcold
400402
; TOTALSIZESSINGLE: Total size for full allocation context hash 15737101490731057601 and single alloc type cold: 10
403+
; REMARKSINGLE: remark: memprof.cc:26:13: call in function main marked with memprof allocation attribute cold
401404
;; For context sensitive allocations the full context hash and size in bytes
402405
;; are in separate metadata nodes included on the MIB metadata.
403406
; TOTALSIZES: !"cold", ![[CONTEXT1:[0-9]+]]}

0 commit comments

Comments
 (0)