Skip to content

Commit 1afb81d

Browse files
authored
[StructuralHash] Global Variable (#118412)
This update enhances the implementation of structural hashing for global variables, using their initial contents. Private global variables or constants are often used for metadata, where their names are not unique. This can lead to the creation of different hash results although they could be merged by the linker as they are effectively identical. - Refine the hashing of GlobalVariables for strings or certain Objective-C metadata cases that have section names. This can be further extended to other scenarios. - Expose StructuralHash for GlobalVariable so that this API can be utilized by MachineStableHashing, which is also employed in the global function outliner. This change significantly improves size reduction by an additional 1% on the LLD binary when the global function outliner and merger are enabled together. As discussed in the RFC https://discourse.llvm.org/t/loh-conflicting-with-machineoutliner/83279/8?u=kyulee-com, if we disable or relocate the LOH pass, the size impact could increase to 4%.
1 parent c8b7ec2 commit 1afb81d

File tree

7 files changed

+244
-13
lines changed

7 files changed

+244
-13
lines changed

llvm/include/llvm/IR/StructuralHash.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ class Module;
3131
/// to true includes instruction and operand type information.
3232
stable_hash StructuralHash(const Function &F, bool DetailedHash = false);
3333

34+
/// Returns a hash of the global variable \p G.
35+
stable_hash StructuralHash(const GlobalVariable &G);
36+
3437
/// Returns a hash of the module \p M by hashing all functions and global
3538
/// variables contained within. \param M The module to hash. \param DetailedHash
3639
/// Whether or not to encode additional information in the function hashes that

llvm/lib/CodeGen/MachineStableHash.cpp

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727
#include "llvm/CodeGen/Register.h"
2828
#include "llvm/Config/llvm-config.h"
2929
#include "llvm/IR/Constants.h"
30+
#include "llvm/IR/GlobalVariable.h"
31+
#include "llvm/IR/StructuralHash.h"
3032
#include "llvm/MC/MCSymbol.h"
3133
#include "llvm/Support/Alignment.h"
3234
#include "llvm/Support/ErrorHandling.h"
@@ -93,13 +95,19 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
9395
return 0;
9496
case MachineOperand::MO_GlobalAddress: {
9597
const GlobalValue *GV = MO.getGlobal();
96-
if (!GV->hasName()) {
97-
++StableHashBailingGlobalAddress;
98-
return 0;
98+
stable_hash GVHash = 0;
99+
if (auto *GVar = dyn_cast<GlobalVariable>(GV))
100+
GVHash = StructuralHash(*GVar);
101+
if (!GVHash) {
102+
if (!GV->hasName()) {
103+
++StableHashBailingGlobalAddress;
104+
return 0;
105+
}
106+
GVHash = stable_hash_name(GV->getName());
99107
}
100-
auto Name = GV->getName();
101-
return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
102-
stable_hash_name(Name), MO.getOffset());
108+
109+
return stable_hash_combine(MO.getType(), MO.getTargetFlags(), GVHash,
110+
MO.getOffset());
103111
}
104112

105113
case MachineOperand::MO_TargetIndex: {

llvm/lib/IR/StructuralHash.cpp

Lines changed: 47 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ class StructuralHashImpl {
4646
/// Assign a unique ID to each Value in the order they are first seen.
4747
DenseMap<const Value *, int> ValueToId;
4848

49-
stable_hash hashType(Type *ValueType) {
49+
static stable_hash hashType(Type *ValueType) {
5050
SmallVector<stable_hash> Hashes;
5151
Hashes.emplace_back(ValueType->getTypeID());
5252
if (ValueType->isIntegerTy())
@@ -65,19 +65,47 @@ class StructuralHashImpl {
6565
}
6666
}
6767

68-
stable_hash hashAPInt(const APInt &I) {
68+
static stable_hash hashAPInt(const APInt &I) {
6969
SmallVector<stable_hash> Hashes;
7070
Hashes.emplace_back(I.getBitWidth());
7171
auto RawVals = ArrayRef<uint64_t>(I.getRawData(), I.getNumWords());
7272
Hashes.append(RawVals.begin(), RawVals.end());
7373
return stable_hash_combine(Hashes);
7474
}
7575

76-
stable_hash hashAPFloat(const APFloat &F) {
76+
static stable_hash hashAPFloat(const APFloat &F) {
7777
return hashAPInt(F.bitcastToAPInt());
7878
}
7979

80-
stable_hash hashGlobalValue(const GlobalValue *GV) {
80+
static stable_hash hashGlobalVariable(const GlobalVariable &GVar) {
81+
if (!GVar.hasInitializer())
82+
return hashGlobalValue(&GVar);
83+
84+
// Hash the contents of a string.
85+
if (GVar.getName().starts_with(".str")) {
86+
auto *C = GVar.getInitializer();
87+
if (const auto *Seq = dyn_cast<ConstantDataSequential>(C))
88+
if (Seq->isString())
89+
return stable_hash_name(Seq->getAsString());
90+
}
91+
92+
// Hash structural contents of Objective-C metadata in specific sections.
93+
// This can be extended to other metadata if needed.
94+
static constexpr const char *SectionNames[] = {
95+
"__cfstring", "__cstring", "__objc_classrefs",
96+
"__objc_methname", "__objc_selrefs",
97+
};
98+
if (GVar.hasSection()) {
99+
StringRef SectionName = GVar.getSection();
100+
for (const char *Name : SectionNames)
101+
if (SectionName.contains(Name))
102+
return hashConstant(GVar.getInitializer());
103+
}
104+
105+
return hashGlobalValue(&GVar);
106+
}
107+
108+
static stable_hash hashGlobalValue(const GlobalValue *GV) {
81109
if (!GV->hasName())
82110
return 0;
83111
return stable_hash_name(GV->getName());
@@ -87,7 +115,7 @@ class StructuralHashImpl {
87115
// FunctionComparator::cmpConstants() in FunctionComparator.cpp, but here
88116
// we're interested in computing a hash rather than comparing two Constants.
89117
// Some of the logic is simplified, e.g, we don't expand GEPOperator.
90-
stable_hash hashConstant(Constant *C) {
118+
static stable_hash hashConstant(const Constant *C) {
91119
SmallVector<stable_hash> Hashes;
92120

93121
Type *Ty = C->getType();
@@ -98,14 +126,21 @@ class StructuralHashImpl {
98126
return stable_hash_combine(Hashes);
99127
}
100128

129+
if (auto *GVar = dyn_cast<GlobalVariable>(C)) {
130+
Hashes.emplace_back(hashGlobalVariable(*GVar));
131+
return stable_hash_combine(Hashes);
132+
}
133+
101134
if (auto *G = dyn_cast<GlobalValue>(C)) {
102135
Hashes.emplace_back(hashGlobalValue(G));
103136
return stable_hash_combine(Hashes);
104137
}
105138

106139
if (const auto *Seq = dyn_cast<ConstantDataSequential>(C)) {
107-
Hashes.emplace_back(xxh3_64bits(Seq->getRawDataValues()));
108-
return stable_hash_combine(Hashes);
140+
if (Seq->isString()) {
141+
Hashes.emplace_back(stable_hash_name(Seq->getAsString()));
142+
return stable_hash_combine(Hashes);
143+
}
109144
}
110145

111146
switch (C->getValueID()) {
@@ -266,6 +301,7 @@ class StructuralHashImpl {
266301
Hashes.emplace_back(Hash);
267302
Hashes.emplace_back(GlobalHeaderHash);
268303
Hashes.emplace_back(GV.getValueType()->getTypeID());
304+
Hashes.emplace_back(hashGlobalVariable(GV));
269305

270306
// Update the combined hash in place.
271307
Hash = stable_hash_combine(Hashes);
@@ -297,6 +333,10 @@ stable_hash llvm::StructuralHash(const Function &F, bool DetailedHash) {
297333
return H.getHash();
298334
}
299335

336+
stable_hash llvm::StructuralHash(const GlobalVariable &GVar) {
337+
return StructuralHashImpl::hashGlobalVariable(GVar);
338+
}
339+
300340
stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) {
301341
StructuralHashImpl H(DetailedHash);
302342
H.update(M);
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
; This test verifies that global variables (ns constant) are hashed based on their initial contents,
2+
; allowing them to be merged even if they appear different due to their names.
3+
; Now they become identical functions that can be merged without creating a parameter
4+
5+
; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %s | FileCheck %s
6+
7+
; CHECK: _f1.Tgm
8+
; CHECK: _f2.Tgm
9+
10+
%struct.__NSConstantString_tag = type { ptr, i32, ptr, i64 }
11+
@__CFConstantStringClassReference = external global [0 x i32]
12+
@.str.2 = private unnamed_addr constant [9 x i8] c"cfstring\00", section "__TEXT,__cstring,cstring_literals", align 1
13+
@_unnamed_cfstring_ = private global %struct.__NSConstantString_tag { ptr @__CFConstantStringClassReference, i32 1992, ptr @.str.2, i64 8 }, section "__DATA,__cfstring", align 8
14+
15+
@.str.3 = private unnamed_addr constant [9 x i8] c"cfstring\00", section "__TEXT,__cstring,cstring_literals", align 1
16+
@_unnamed_cfstring_.2 = private global %struct.__NSConstantString_tag { ptr @__CFConstantStringClassReference, i32 1992, ptr @.str.3, i64 8 }, section "__DATA,__cfstring", align 8
17+
18+
declare i32 @hoo(ptr noundef)
19+
20+
define i32 @f1() {
21+
entry:
22+
%call = tail call i32 @hoo(ptr noundef nonnull @_unnamed_cfstring_)
23+
%add = sub nsw i32 %call, 1
24+
ret i32 %add
25+
}
26+
27+
define i32 @f2() {
28+
entry:
29+
%call = tail call i32 @hoo(ptr noundef nonnull @_unnamed_cfstring_.2)
30+
%add = sub nsw i32 %call, 1
31+
ret i32 %add
32+
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
; This test verifies that global variables (objc metadata) are hashed based on their initial contents,
2+
; allowing them to be merged even if they appear different due to their names.
3+
; Now they become identical functions that can be merged without creating a parameter
4+
5+
; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %s | FileCheck %s
6+
7+
; CHECK: _f1.Tgm
8+
; CHECK: _f2.Tgm
9+
10+
%struct._class_t = type { ptr, ptr, ptr, ptr, ptr }
11+
12+
@"OBJC_CLASS_$_MyClass" = external global %struct._class_t
13+
@"OBJC_CLASSLIST_REFERENCES_$_" = internal global ptr @"OBJC_CLASS_$_MyClass", section "__DATA,__objc_classrefs,regular,no_dead_strip", align 8
14+
@"OBJC_CLASSLIST_REFERENCES_$_.1" = internal global ptr @"OBJC_CLASS_$_MyClass", section "__DATA,__objc_classrefs,regular,no_dead_strip", align 8
15+
16+
@OBJC_METH_VAR_NAME_ = private unnamed_addr constant [6 x i8] c"hello\00", section "__TEXT,__objc_methname,cstring_literals", align 1
17+
@OBJC_METH_VAR_NAME_.1 = private unnamed_addr constant [6 x i8] c"hello\00", section "__TEXT,__objc_methname,cstring_literals", align 1
18+
19+
@OBJC_SELECTOR_REFERENCES_ = internal externally_initialized global ptr @OBJC_METH_VAR_NAME_, section "__DATA,__objc_selrefs,literal_pointers,no_dead_strip", align 8
20+
@OBJC_SELECTOR_REFERENCES_.1 = internal externally_initialized global ptr @OBJC_METH_VAR_NAME_.1, section "__DATA,__objc_selrefs,literal_pointers,no_dead_strip", align 8
21+
22+
declare ptr @objc_msgSend(ptr, ptr, ...)
23+
24+
define i32 @f1() {
25+
entry:
26+
%0 = load ptr, ptr @"OBJC_CLASSLIST_REFERENCES_$_", align 8
27+
%1 = load ptr, ptr @OBJC_SELECTOR_REFERENCES_, align 8
28+
%call = tail call i32 @objc_msgSend(ptr noundef %0, ptr noundef %1)
29+
ret i32 %call
30+
}
31+
32+
define i32 @f2() {
33+
entry:
34+
%0 = load ptr, ptr @"OBJC_CLASSLIST_REFERENCES_$_.1", align 8
35+
%1 = load ptr, ptr @OBJC_SELECTOR_REFERENCES_.1, align 8
36+
%call = tail call i32 @objc_msgSend(ptr noundef %0, ptr noundef %1)
37+
ret i32 %call
38+
}
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
; This test verifies that global variables (string) are hashed based on their initial contents,
2+
; allowing them to be merged even if they appear different due to their names.
3+
; Now they become identical functions that can be merged without creating a parameter.
4+
5+
; RUN: llc -mtriple=arm64-apple-darwin -enable-global-merge-func=true -global-merging-skip-no-params=false < %s | FileCheck %s
6+
7+
; CHECK: _f1.Tgm
8+
; CHECK: _f2.Tgm
9+
; CHECK-NOT: _f3.Tgm
10+
; CHECK-NOT: _f4.Tgm
11+
12+
; The initial contents of `.str` and `.str.1` are identical, but not with those of `.str.2` and `.str.3`.
13+
@.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1
14+
@.str.1 = private unnamed_addr constant [6 x i8] c"hello\00", align 1
15+
@.str.2 = private unnamed_addr constant [6 x i8] c"diff2\00", align 1
16+
@.str.3 = private unnamed_addr constant [6 x i8] c"diff3\00", align 1
17+
18+
declare i32 @goo(ptr noundef)
19+
20+
define i32 @f1() {
21+
entry:
22+
%call = tail call i32 @goo(ptr noundef nonnull @.str)
23+
%add = add nsw i32 %call, 1
24+
ret i32 %add
25+
}
26+
27+
define i32 @f2() {
28+
entry:
29+
%call = tail call i32 @goo(ptr noundef nonnull @.str.1)
30+
%add = add nsw i32 %call, 1
31+
ret i32 %add
32+
}
33+
34+
define i32 @f3() {
35+
entry:
36+
%call = tail call noundef i32 @goo(ptr noundef nonnull @.str.2)
37+
%add = sub nsw i32 %call, 1
38+
ret i32 %add
39+
}
40+
41+
define i32 @f4() {
42+
entry:
43+
%call = tail call noundef i32 @goo(ptr noundef nonnull @.str.3)
44+
%add = sub nsw i32 %call, 1
45+
ret i32 %add
46+
}
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
; This test verifies that global variables are hashed based on their initial contents,
2+
; allowing them to be outlined even if they appear different due to their names.
3+
4+
; RUN: split-file %s %t
5+
6+
; The outlined function is created locally.
7+
; Note that `.str.3` is commonly used in both `f1()` and `f2()`.
8+
; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate -aarch64-enable-collect-loh=false \
9+
; RUN: %t/local-two.ll -o - | FileCheck %s --check-prefix=WRITE
10+
11+
; WRITE-LABEL: _OUTLINED_FUNCTION_{{.*}}:
12+
; WRITE: adrp x1, l_.str.3
13+
; WRITE-NEXT: add x1, x1, l_.str.3
14+
; WRITE-NEXT: mov w2
15+
; WRITE-NEXT: mov w3
16+
; WRITE-NEXT: mov w4
17+
; WRITE-NEXT: b
18+
19+
; Create an object file and merge it into the cgdata.
20+
; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-generate -aarch64-enable-collect-loh=false \
21+
; RUN: -filetype=obj %t/local-two.ll -o %t_write_base
22+
; RUN: llvm-cgdata --merge %t_write_base -o %t_cgdata_base
23+
24+
; Read the cgdata in the machine outliner for optimistically outlining in local-one.ll.
25+
; Note that the hash of `.str.5` in local-one.ll matches that of `.str.3` in an outlined tree in the cgdata.
26+
27+
; RUN: llc -mtriple=arm64-apple-darwin -enable-machine-outliner -codegen-data-use-path=%t_cgdata_base -aarch64-enable-collect-loh=false \
28+
; RUN: %t/local-one.ll -o - | FileCheck %s --check-prefix=READ
29+
30+
; READ-LABEL: _OUTLINED_FUNCTION_{{.*}}:
31+
; READ: adrp x1, l_.str.5
32+
; READ-NEXT: add x1, x1, l_.str.5
33+
; READ-NEXT: mov w2
34+
; READ-NEXT: mov w3
35+
; READ-NEXT: mov w4
36+
; READ-NEXT: b
37+
38+
;--- local-two.ll
39+
@.str.1 = private unnamed_addr constant [3 x i8] c"f1\00", align 1
40+
@.str.2 = private unnamed_addr constant [3 x i8] c"f2\00", align 1
41+
@.str.3 = private unnamed_addr constant [6 x i8] c"hello\00", align 1
42+
43+
declare noundef i32 @goo(ptr noundef, ptr noundef, i32, i32, i32)
44+
define i32 @f1() minsize {
45+
entry:
46+
%call = tail call noundef i32 @goo(ptr noundef nonnull @.str.1, ptr noundef nonnull @.str.3, i32 1, i32 2, i32 3)
47+
ret i32 %call
48+
}
49+
define i32 @f2() minsize {
50+
entry:
51+
%call = tail call noundef i32 @goo(ptr noundef nonnull @.str.2, ptr noundef nonnull @.str.3, i32 1, i32 2, i32 3)
52+
ret i32 %call
53+
}
54+
55+
;--- local-one.ll
56+
@.str.4 = private unnamed_addr constant [3 x i8] c"f3\00", align 1
57+
@.str.5 = private unnamed_addr constant [6 x i8] c"hello\00", align 1
58+
59+
declare noundef i32 @goo(ptr noundef, ptr noundef, i32, i32, i32)
60+
define i32 @f1() minsize {
61+
entry:
62+
%call = tail call noundef i32 @goo(ptr noundef nonnull @.str.4, ptr noundef nonnull @.str.5, i32 1, i32 2, i32 3)
63+
ret i32 %call
64+
}

0 commit comments

Comments
 (0)