Skip to content

Commit 3c357a4

Browse files
authored
[BOLT] Add support for safe-icf (#116275)
Identical Code Folding (ICF) folds functions that are identical into one function, and updates symbol addresses to the new address. This reduces the size of a binary, but can lead to problems. For example when function pointers are compared. This can be done either explicitly in the code or generated IR by optimization passes like Indirect Call Promotion (ICP). After ICF what used to be two different addresses become the same address. This can lead to a different code path being taken. This is where safe ICF comes in. Linker (LLD) does it using address significant section generated by clang. If symbol is in it, or an object doesn't have this section symbols are not folded. BOLT does not have the information regarding which objects do not have this section, so can't re-use this mechanism. This implementation scans code section and conservatively marks functions symbols as unsafe. It treats symbols as unsafe if they are used in non-control flow instruction. It also scans through the data relocation sections and does the same for relocations that reference a function symbol. The latter handles the case when function pointer is stored in a local or global variable, etc. If a relocation address points within a vtable these symbols are skipped.
1 parent eb5c211 commit 3c357a4

13 files changed

+644
-19
lines changed

bolt/docs/CommandLineArgumentReference.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -498,9 +498,12 @@
498498
Automatically put hot code on 2MB page(s) (hugify) at runtime. No manual call
499499
to hugify is needed in the binary (which is what --hot-text relies on).
500500

501-
- `--icf`
501+
- `--icf=<value>`
502502

503503
Fold functions with identical code
504+
- `all`: Enable identical code folding
505+
- `none`: Disable identical code folding (default)
506+
- `safe`: Enable safe identical code folding
504507

505508
- `--icp`
506509

bolt/include/bolt/Core/BinaryFunction.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,9 @@ class BinaryFunction {
428428
/// Function order for streaming into the destination binary.
429429
uint32_t Index{-1U};
430430

431+
/// Function is referenced by a non-control flow instruction.
432+
bool HasAddressTaken{false};
433+
431434
/// Get basic block index assuming it belongs to this function.
432435
unsigned getIndex(const BinaryBasicBlock *BB) const {
433436
assert(BB->getIndex() < BasicBlocks.size());
@@ -822,6 +825,14 @@ class BinaryFunction {
822825
return nullptr;
823826
}
824827

828+
/// Return true if function is referenced in a non-control flow instruction.
829+
/// This flag is set when the code and relocation analyses are being
830+
/// performed, which occurs when safe ICF (Identical Code Folding) is enabled.
831+
bool hasAddressTaken() const { return HasAddressTaken; }
832+
833+
/// Set whether function is referenced in a non-control flow instruction.
834+
void setHasAddressTaken(bool AddressTaken) { HasAddressTaken = AddressTaken; }
835+
825836
/// Returns the raw binary encoding of this function.
826837
ErrorOr<ArrayRef<uint8_t>> getData() const;
827838

@@ -2135,6 +2146,9 @@ class BinaryFunction {
21352146
// adjustments.
21362147
void handleAArch64IndirectCall(MCInst &Instruction, const uint64_t Offset);
21372148

2149+
/// Analyze instruction to identify a function reference.
2150+
void analyzeInstructionForFuncReference(const MCInst &Inst);
2151+
21382152
/// Scan function for references to other functions. In relocation mode,
21392153
/// add relocations for external references. In non-relocation mode, detect
21402154
/// and mark new entry points.

bolt/include/bolt/Passes/IdenticalCodeFolding.h

Lines changed: 60 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
#include "bolt/Core/BinaryFunction.h"
1313
#include "bolt/Passes/BinaryPasses.h"
14+
#include "llvm/ADT/SparseBitVector.h"
1415

1516
namespace llvm {
1617
namespace bolt {
@@ -20,22 +21,72 @@ namespace bolt {
2021
///
2122
class IdenticalCodeFolding : public BinaryFunctionPass {
2223
protected:
23-
bool shouldOptimize(const BinaryFunction &BF) const override {
24-
if (BF.hasUnknownControlFlow())
25-
return false;
26-
if (BF.isFolded())
27-
return false;
28-
if (BF.hasSDTMarker())
29-
return false;
30-
return BinaryFunctionPass::shouldOptimize(BF);
31-
}
24+
/// Return true if the function is safe to fold.
25+
bool shouldOptimize(const BinaryFunction &BF) const override;
3226

3327
public:
28+
enum class ICFLevel {
29+
None, /// No ICF. (Default)
30+
Safe, /// Safe ICF.
31+
All, /// Aggressive ICF.
32+
};
3433
explicit IdenticalCodeFolding(const cl::opt<bool> &PrintPass)
3534
: BinaryFunctionPass(PrintPass) {}
3635

3736
const char *getName() const override { return "identical-code-folding"; }
3837
Error runOnFunctions(BinaryContext &BC) override;
38+
39+
private:
40+
/// Bit vector of memory addresses of vtables.
41+
llvm::SparseBitVector<> VTableBitVector;
42+
43+
/// Return true if the memory address is in a vtable.
44+
bool isAddressInVTable(uint64_t Address) const {
45+
return VTableBitVector.test(Address / 8);
46+
}
47+
48+
/// Mark memory address of a vtable as used.
49+
void setAddressUsedInVTable(uint64_t Address) {
50+
VTableBitVector.set(Address / 8);
51+
}
52+
53+
/// Scan symbol table and mark memory addresses of
54+
/// vtables.
55+
void initVTableReferences(const BinaryContext &BC);
56+
57+
/// Analyze code section and relocations and mark functions that are not
58+
/// safe to fold.
59+
void markFunctionsUnsafeToFold(BinaryContext &BC);
60+
61+
/// Process static and dynamic relocations in the data sections to identify
62+
/// function references, and mark them as unsafe to fold. It filters out
63+
/// symbol references that are in vtables.
64+
void analyzeDataRelocations(BinaryContext &BC);
65+
66+
/// Process functions that have been disassembled and mark functions that are
67+
/// used in non-control flow instructions as unsafe to fold.
68+
void analyzeFunctions(BinaryContext &BC);
69+
};
70+
71+
class DeprecatedICFNumericOptionParser
72+
: public cl::parser<IdenticalCodeFolding::ICFLevel> {
73+
public:
74+
explicit DeprecatedICFNumericOptionParser(cl::Option &O)
75+
: cl::parser<IdenticalCodeFolding::ICFLevel>(O) {}
76+
77+
bool parse(cl::Option &O, StringRef ArgName, StringRef Arg,
78+
IdenticalCodeFolding::ICFLevel &Value) {
79+
if (Arg == "0" || Arg == "1") {
80+
Value = (Arg == "0") ? IdenticalCodeFolding::ICFLevel::None
81+
: IdenticalCodeFolding::ICFLevel::All;
82+
errs() << formatv("BOLT-WARNING: specifying numeric value \"{0}\" "
83+
"for option -{1} is deprecated\n",
84+
Arg, ArgName);
85+
return false;
86+
}
87+
return cl::parser<IdenticalCodeFolding::ICFLevel>::parse(O, ArgName, Arg,
88+
Value);
89+
}
3990
};
4091

4192
} // namespace bolt

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1504,6 +1504,20 @@ MCSymbol *BinaryFunction::registerBranch(uint64_t Src, uint64_t Dst) {
15041504
return Target;
15051505
}
15061506

1507+
void BinaryFunction::analyzeInstructionForFuncReference(const MCInst &Inst) {
1508+
for (const MCOperand &Op : MCPlus::primeOperands(Inst)) {
1509+
if (!Op.isExpr())
1510+
continue;
1511+
const MCExpr &Expr = *Op.getExpr();
1512+
if (Expr.getKind() != MCExpr::SymbolRef)
1513+
continue;
1514+
const MCSymbol &Symbol = cast<MCSymbolRefExpr>(Expr).getSymbol();
1515+
// Set HasAddressTaken for a function regardless of the ICF level.
1516+
if (BinaryFunction *BF = BC.getFunctionForSymbol(&Symbol))
1517+
BF->setHasAddressTaken(true);
1518+
}
1519+
}
1520+
15071521
bool BinaryFunction::scanExternalRefs() {
15081522
bool Success = true;
15091523
bool DisassemblyFailed = false;
@@ -1624,6 +1638,8 @@ bool BinaryFunction::scanExternalRefs() {
16241638
[](const MCOperand &Op) { return Op.isExpr(); })) {
16251639
// Skip assembly if the instruction may not have any symbolic operands.
16261640
continue;
1641+
} else {
1642+
analyzeInstructionForFuncReference(Instruction);
16271643
}
16281644

16291645
// Emit the instruction using temp emitter and generate relocations.

bolt/lib/Passes/IdenticalCodeFolding.cpp

Lines changed: 105 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "bolt/Core/ParallelUtilities.h"
1616
#include "llvm/ADT/SmallVector.h"
1717
#include "llvm/Support/CommandLine.h"
18+
#include "llvm/Support/FormatVariadic.h"
1819
#include "llvm/Support/ThreadPool.h"
1920
#include "llvm/Support/Timer.h"
2021
#include <atomic>
@@ -42,8 +43,41 @@ TimeICF("time-icf",
4243
cl::ReallyHidden,
4344
cl::ZeroOrMore,
4445
cl::cat(BoltOptCategory));
46+
47+
cl::opt<bolt::IdenticalCodeFolding::ICFLevel, false,
48+
DeprecatedICFNumericOptionParser>
49+
ICF("icf", cl::desc("fold functions with identical code"),
50+
cl::init(bolt::IdenticalCodeFolding::ICFLevel::None),
51+
cl::values(clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::All, "all",
52+
"Enable identical code folding"),
53+
clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::All, "1",
54+
"Enable identical code folding"),
55+
clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::All, "",
56+
"Enable identical code folding"),
57+
clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::None,
58+
"none",
59+
"Disable identical code folding (default)"),
60+
clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::None, "0",
61+
"Disable identical code folding (default)"),
62+
clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::Safe,
63+
"safe", "Enable safe identical code folding")),
64+
cl::ZeroOrMore, cl::ValueOptional, cl::cat(BoltOptCategory));
4565
} // namespace opts
4666

67+
bool IdenticalCodeFolding::shouldOptimize(const BinaryFunction &BF) const {
68+
if (BF.hasUnknownControlFlow())
69+
return false;
70+
if (BF.isFolded())
71+
return false;
72+
if (BF.hasSDTMarker())
73+
return false;
74+
if (BF.isPseudo())
75+
return false;
76+
if (opts::ICF == ICFLevel::Safe && BF.hasAddressTaken())
77+
return false;
78+
return BinaryFunctionPass::shouldOptimize(BF);
79+
}
80+
4781
/// Compare two jump tables in 2 functions. The function relies on consistent
4882
/// ordering of basic blocks in both binary functions (e.g. DFS).
4983
static bool equalJumpTables(const JumpTable &JumpTableA,
@@ -340,6 +374,74 @@ typedef std::unordered_map<BinaryFunction *, std::vector<BinaryFunction *>,
340374

341375
namespace llvm {
342376
namespace bolt {
377+
void IdenticalCodeFolding::initVTableReferences(const BinaryContext &BC) {
378+
for (const auto &[Address, Data] : BC.getBinaryData()) {
379+
// Filter out all symbols that are not vtables.
380+
if (!Data->getName().starts_with("_ZTV"))
381+
continue;
382+
for (uint64_t I = Address, End = I + Data->getSize(); I < End; I += 8)
383+
setAddressUsedInVTable(I);
384+
}
385+
}
386+
387+
void IdenticalCodeFolding::analyzeDataRelocations(BinaryContext &BC) {
388+
initVTableReferences(BC);
389+
// For static relocations there should be a symbol for function references.
390+
for (const BinarySection &Sec : BC.sections()) {
391+
if (!Sec.hasSectionRef() || !Sec.isData())
392+
continue;
393+
for (const auto &Rel : Sec.relocations()) {
394+
const uint64_t RelAddr = Rel.Offset + Sec.getAddress();
395+
if (isAddressInVTable(RelAddr))
396+
continue;
397+
if (BinaryFunction *BF = BC.getFunctionForSymbol(Rel.Symbol))
398+
BF->setHasAddressTaken(true);
399+
}
400+
// For dynamic relocations there are two cases:
401+
// 1: No symbol and only addend.
402+
// 2: There is a symbol, but it does not references a function in a binary.
403+
for (const auto &Rel : Sec.dynamicRelocations()) {
404+
const uint64_t RelAddr = Rel.Offset + Sec.getAddress();
405+
if (isAddressInVTable(RelAddr))
406+
continue;
407+
if (BinaryFunction *BF = BC.getBinaryFunctionAtAddress(Rel.Addend))
408+
BF->setHasAddressTaken(true);
409+
}
410+
}
411+
}
412+
413+
void IdenticalCodeFolding::analyzeFunctions(BinaryContext &BC) {
414+
ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
415+
for (const BinaryBasicBlock &BB : BF)
416+
for (const MCInst &Inst : BB)
417+
if (!(BC.MIB->isCall(Inst) || BC.MIB->isBranch(Inst)))
418+
BF.analyzeInstructionForFuncReference(Inst);
419+
};
420+
ParallelUtilities::PredicateTy SkipFunc =
421+
[&](const BinaryFunction &BF) -> bool { return !BF.hasCFG(); };
422+
ParallelUtilities::runOnEachFunction(
423+
BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun,
424+
SkipFunc, "markUnsafe");
425+
426+
LLVM_DEBUG({
427+
for (const auto &BFIter : BC.getBinaryFunctions()) {
428+
if (!BFIter.second.hasAddressTaken())
429+
continue;
430+
dbgs() << "BOLT-DEBUG: skipping function with reference taken "
431+
<< BFIter.second.getOneName() << '\n';
432+
}
433+
});
434+
}
435+
436+
void IdenticalCodeFolding::markFunctionsUnsafeToFold(BinaryContext &BC) {
437+
NamedRegionTimer MarkFunctionsUnsafeToFoldTimer(
438+
"markFunctionsUnsafeToFold", "markFunctionsUnsafeToFold", "ICF breakdown",
439+
"ICF breakdown", opts::TimeICF);
440+
if (!BC.isX86())
441+
BC.outs() << "BOLT-WARNING: safe ICF is only supported for x86\n";
442+
analyzeDataRelocations(BC);
443+
analyzeFunctions(BC);
444+
}
343445

344446
Error IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) {
345447
const size_t OriginalFunctionCount = BC.getBinaryFunctions().size();
@@ -385,7 +487,7 @@ Error IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) {
385487
"ICF breakdown", opts::TimeICF);
386488
for (auto &BFI : BC.getBinaryFunctions()) {
387489
BinaryFunction &BF = BFI.second;
388-
if (!this->shouldOptimize(BF))
490+
if (!shouldOptimize(BF))
389491
continue;
390492
CongruentBuckets[&BF].emplace(&BF);
391493
}
@@ -475,7 +577,8 @@ Error IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) {
475577

476578
LLVM_DEBUG(SinglePass.stopTimer());
477579
};
478-
580+
if (opts::ICF == ICFLevel::Safe)
581+
markFunctionsUnsafeToFold(BC);
479582
hashFunctions();
480583
createCongruentBuckets();
481584

bolt/lib/Rewrite/BinaryPassManager.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@ extern cl::opt<bool> PrintDynoStats;
5454
extern cl::opt<bool> DumpDotAll;
5555
extern cl::opt<std::string> AsmDump;
5656
extern cl::opt<bolt::PLTCall::OptType> PLT;
57+
extern cl::opt<bolt::IdenticalCodeFolding::ICFLevel, false,
58+
llvm::bolt::DeprecatedICFNumericOptionParser>
59+
ICF;
5760

5861
static cl::opt<bool>
5962
DynoStatsAll("dyno-stats-all",
@@ -65,9 +68,6 @@ static cl::opt<bool>
6568
cl::desc("eliminate unreachable code"), cl::init(true),
6669
cl::cat(BoltOptCategory));
6770

68-
cl::opt<bool> ICF("icf", cl::desc("fold functions with identical code"),
69-
cl::cat(BoltOptCategory));
70-
7171
static cl::opt<bool> JTFootprintReductionFlag(
7272
"jt-footprint-reduction",
7373
cl::desc("make jump tables size smaller at the cost of using more "
@@ -403,7 +403,7 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
403403
opts::StripRepRet);
404404

405405
Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF),
406-
opts::ICF);
406+
opts::ICF != IdenticalCodeFolding::ICFLevel::None);
407407

408408
Manager.registerPass(
409409
std::make_unique<SpecializeMemcpy1>(NeverPrint, opts::SpecializeMemcpy1),
@@ -428,7 +428,7 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
428428
Manager.registerPass(std::make_unique<Inliner>(PrintInline));
429429

430430
Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF),
431-
opts::ICF);
431+
opts::ICF != IdenticalCodeFolding::ICFLevel::None);
432432

433433
Manager.registerPass(std::make_unique<PLTCall>(PrintPLT));
434434

bolt/lib/Rewrite/BoltDiff.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,9 @@ using namespace bolt;
2828
namespace opts {
2929
extern cl::OptionCategory BoltDiffCategory;
3030
extern cl::opt<bool> NeverPrint;
31-
extern cl::opt<bool> ICF;
31+
extern cl::opt<bolt::IdenticalCodeFolding::ICFLevel, false,
32+
llvm::bolt::DeprecatedICFNumericOptionParser>
33+
ICF;
3234

3335
static cl::opt<bool> IgnoreLTOSuffix(
3436
"ignore-lto-suffix",
@@ -697,7 +699,7 @@ void RewriteInstance::compare(RewriteInstance &RI2) {
697699
}
698700

699701
// Pre-pass ICF
700-
if (opts::ICF) {
702+
if (opts::ICF != IdenticalCodeFolding::ICFLevel::None) {
701703
IdenticalCodeFolding ICF(opts::NeverPrint);
702704
outs() << "BOLT-DIFF: Starting ICF pass for binary 1";
703705
BC->logBOLTErrorsAndQuitOnFatal(ICF.runOnFunctions(*BC));

0 commit comments

Comments
 (0)