Skip to content
This repository was archived by the owner on Apr 23, 2020. It is now read-only.

Commit fa405a6

Browse files
committed
[MBP] Code cleanup #3 /NFC
This is third patch to clean up the code. Included in this patch: 1. Further unclutter trace/chain formation main routine; 2. Isolate the logic to compute global cost/conflict detection into its own method; 3. Heavily document the selection algorithm; 4. Added helper hook to allow PGO specific logic to be added in the future. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@272582 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 6027190 commit fa405a6

File tree

1 file changed

+137
-43
lines changed

1 file changed

+137
-43
lines changed

lib/CodeGen/MachineBlockPlacement.cpp

Lines changed: 137 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,11 @@ class MachineBlockPlacement : public MachineFunctionPass {
288288
const BlockFilterSet *BlockFilter,
289289
BranchProbability SuccProb,
290290
BranchProbability HotProb);
291+
bool
292+
hasBetterLayoutPredecessor(MachineBasicBlock *BB, MachineBasicBlock *Succ,
293+
BlockChain &SuccChain, BranchProbability SuccProb,
294+
BranchProbability RealSuccProb, BlockChain &Chain,
295+
const BlockFilterSet *BlockFilter);
291296
MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB,
292297
BlockChain &Chain,
293298
const BlockFilterSet *BlockFilter);
@@ -512,6 +517,128 @@ bool MachineBlockPlacement::shouldPredBlockBeOutlined(
512517
return false;
513518
}
514519

520+
// FIXME (PGO handling)
521+
// For now this method just returns a fixed threshold. It needs to be enhanced
522+
// such that BB and Succ is passed in so that CFG shapes are examined such that
523+
// the threshold is computed with more precise cost model when PGO is on.
524+
static BranchProbability getLayoutSuccessorProbThreshold() {
525+
BranchProbability HotProb(StaticLikelyProb, 100);
526+
return HotProb;
527+
}
528+
529+
/// Checks to see if the layout candidate block \p Succ has a better layout
530+
/// predecessor than \c BB. If yes, returns true.
531+
bool MachineBlockPlacement::hasBetterLayoutPredecessor(
532+
MachineBasicBlock *BB, MachineBasicBlock *Succ, BlockChain &SuccChain,
533+
BranchProbability SuccProb, BranchProbability RealSuccProb,
534+
BlockChain &Chain, const BlockFilterSet *BlockFilter) {
535+
536+
// This is no global conflict, just return false.
537+
if (SuccChain.UnscheduledPredecessors == 0)
538+
return false;
539+
540+
// There are two basic scenarios here:
541+
// -------------------------------------
542+
// Case 1: triagular shape CFG:
543+
// BB
544+
// | \
545+
// | \
546+
// | Pred
547+
// | /
548+
// Succ
549+
// In this case, we are evaluating whether to select edge -> Succ, e.g.
550+
// set Succ as the layout successor of BB. Picking Succ as BB's
551+
// successor breaks the CFG constraints. With this layout, Pred BB
552+
// is forced to be outlined, so the overall cost will be cost of the
553+
// branch taken from BB to Pred, plus the cost of back taken branch
554+
// from Pred to Succ, as well as the additional cost asssociated
555+
// with the needed unconditional jump instruction from Pred To Succ.
556+
// The cost of the topological order layout is the taken branch cost
557+
// from BB to Succ, so to make BB->Succ a viable candidate, the following
558+
// must hold:
559+
// 2 * freq(BB->Pred) * taken_branch_cost + unconditional_jump_cost
560+
// < freq(BB->Succ) * taken_branch_cost.
561+
// Ignoring unconditional jump cost, we get
562+
// freq(BB->Succ) > 2 * freq(BB->Pred), i.e.,
563+
// prob(BB->Succ) > 2 * prob(BB->Pred)
564+
//
565+
// When real profile data is available, we can precisely compute the the
566+
// probabililty threshold that is needed for edge BB->Succ to be considered.
567+
// With out profile data, the heuristic requires the branch bias to be
568+
// a lot larger to make sure the signal is very strong (e.g. 80% default).
569+
// -----------------------------------------------------------------
570+
// Case 2: diamond like CFG:
571+
// S
572+
// / \
573+
// | \
574+
// BB Pred
575+
// \ /
576+
// Succ
577+
// ..
578+
// In this case, edge S->BB has already been selected, and we are evaluating
579+
// candidate edge BB->Succ. Edge S->BB is selected because prob(S->BB)
580+
// is no less than prob(S->Pred). When real profile data is *available*, if
581+
// the condition is true, it will be always better to continue the trace with
582+
// edge BB->Succ instead of laying out with topological order (i.e. laying
583+
// Pred first). The cost of S->BB->Succ is 2 * freq (S->Pred), while with
584+
// the topo order, the cost is freq(S-> Pred) + Pred(S->BB) which is larger.
585+
// When profile data is not available, however, we need to be more
586+
// conservative. If the branch prediction is wrong, breaking the topo-order
587+
// will actually yield a layout with large cost. For this reason, we need
588+
// strong biaaed branch at block S with Prob(S->BB) in order to select
589+
// BB->Succ. This is equialant to looking the CFG backward with backward
590+
// edge: Prob(Succ->BB) needs to >= HotProb in order to be selected (without
591+
// profile data).
592+
593+
BranchProbability HotProb = getLayoutSuccessorProbThreshold();
594+
595+
// Forward checking. For case 2, SuccProb will be 1.
596+
if (SuccProb < HotProb) {
597+
DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb
598+
<< " (prob) (CFG conflict)\n");
599+
return true;
600+
}
601+
602+
// Make sure that a hot successor doesn't have a globally more
603+
// important predecessor.
604+
BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;
605+
bool BadCFGConflict = false;
606+
607+
for (MachineBasicBlock *Pred : Succ->predecessors()) {
608+
if (Pred == Succ || BlockToChain[Pred] == &SuccChain ||
609+
(BlockFilter && !BlockFilter->count(Pred)) ||
610+
BlockToChain[Pred] == &Chain)
611+
continue;
612+
// Do backward checking. For case 1, it is actually redundant check. For
613+
// case 2 above, we need a backward checking to filter out edges that are
614+
// not 'strongly' biased. With profile data available, the check is mostly
615+
// redundant too (when threshold prob is set at 50%) unless S has more than
616+
// two successors.
617+
// BB Pred
618+
// \ /
619+
// Succ
620+
// We select edgee BB->Succ if
621+
// freq(BB->Succ) > freq(Succ) * HotProb
622+
// i.e. freq(BB->Succ) > freq(BB->Succ) * HotProb + freq(Pred->Succ) *
623+
// HotProb
624+
// i.e. freq((BB->Succ) * (1 - HotProb) > freq(Pred->Succ) * HotProb
625+
BlockFrequency PredEdgeFreq =
626+
MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, Succ);
627+
if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.getCompl()) {
628+
BadCFGConflict = true;
629+
break;
630+
}
631+
}
632+
633+
if (BadCFGConflict) {
634+
DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb
635+
<< " (prob) (non-cold CFG conflict)\n");
636+
return true;
637+
}
638+
639+
return false;
640+
}
641+
515642
/// \brief Select the best successor for a block.
516643
///
517644
/// This looks across all successors of a particular block and attempts to
@@ -545,51 +672,18 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
545672
HotProb))
546673
return Succ;
547674

548-
// Only consider successors which are either "hot", or wouldn't violate
549-
// any CFG constraints.
550675
BlockChain &SuccChain = *BlockToChain[Succ];
551-
if (SuccChain.UnscheduledPredecessors != 0) {
552-
if (SuccProb < HotProb) {
553-
DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb
554-
<< " (prob) (CFG conflict)\n");
555-
continue;
556-
}
557-
558-
// Make sure that a hot successor doesn't have a globally more
559-
// important predecessor.
560-
BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;
561-
bool BadCFGConflict = false;
562-
for (MachineBasicBlock *Pred : Succ->predecessors()) {
563-
if (Pred == Succ || BlockToChain[Pred] == &SuccChain ||
564-
(BlockFilter && !BlockFilter->count(Pred)) ||
565-
BlockToChain[Pred] == &Chain)
566-
continue;
567-
BlockFrequency PredEdgeFreq =
568-
MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, Succ);
569-
// A B
570-
// \ /
571-
// C
572-
// We layout ACB iff A.freq > C.freq * HotProb
573-
// i.e. A.freq > A.freq * HotProb + B.freq * HotProb
574-
// i.e. A.freq * (1 - HotProb) > B.freq * HotProb
575-
// A: CandidateEdge
576-
// B: PredEdge
577-
if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.getCompl()) {
578-
BadCFGConflict = true;
579-
break;
580-
}
581-
}
582-
if (BadCFGConflict) {
583-
DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb
584-
<< " (prob) (non-cold CFG conflict)\n");
585-
continue;
586-
}
587-
}
676+
// Skip the edge \c BB->Succ if block \c Succ has a better layout
677+
// predecessor that yields lower global cost.
678+
if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,
679+
Chain, BlockFilter))
680+
continue;
588681

589-
DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb
590-
<< " (prob)"
591-
<< (SuccChain.UnscheduledPredecessors != 0 ? " (CFG break)" : "")
592-
<< "\n");
682+
DEBUG(
683+
dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb
684+
<< " (prob)"
685+
<< (SuccChain.UnscheduledPredecessors != 0 ? " (CFG break)" : "")
686+
<< "\n");
593687
if (BestSucc && BestProb >= SuccProb)
594688
continue;
595689
BestSucc = Succ;

0 commit comments

Comments
 (0)