@@ -288,6 +288,11 @@ class MachineBlockPlacement : public MachineFunctionPass {
288
288
const BlockFilterSet *BlockFilter,
289
289
BranchProbability SuccProb,
290
290
BranchProbability HotProb);
291
+ bool
292
+ hasBetterLayoutPredecessor (MachineBasicBlock *BB, MachineBasicBlock *Succ,
293
+ BlockChain &SuccChain, BranchProbability SuccProb,
294
+ BranchProbability RealSuccProb, BlockChain &Chain,
295
+ const BlockFilterSet *BlockFilter);
291
296
MachineBasicBlock *selectBestSuccessor (MachineBasicBlock *BB,
292
297
BlockChain &Chain,
293
298
const BlockFilterSet *BlockFilter);
@@ -512,6 +517,128 @@ bool MachineBlockPlacement::shouldPredBlockBeOutlined(
512
517
return false ;
513
518
}
514
519
520
+ // FIXME (PGO handling)
521
+ // For now this method just returns a fixed threshold. It needs to be enhanced
522
+ // such that BB and Succ is passed in so that CFG shapes are examined such that
523
+ // the threshold is computed with more precise cost model when PGO is on.
524
+ static BranchProbability getLayoutSuccessorProbThreshold () {
525
+ BranchProbability HotProb (StaticLikelyProb, 100 );
526
+ return HotProb;
527
+ }
528
+
529
+ // / Checks to see if the layout candidate block \p Succ has a better layout
530
+ // / predecessor than \c BB. If yes, returns true.
531
+ bool MachineBlockPlacement::hasBetterLayoutPredecessor (
532
+ MachineBasicBlock *BB, MachineBasicBlock *Succ, BlockChain &SuccChain,
533
+ BranchProbability SuccProb, BranchProbability RealSuccProb,
534
+ BlockChain &Chain, const BlockFilterSet *BlockFilter) {
535
+
536
+ // This is no global conflict, just return false.
537
+ if (SuccChain.UnscheduledPredecessors == 0 )
538
+ return false ;
539
+
540
+ // There are two basic scenarios here:
541
+ // -------------------------------------
542
+ // Case 1: triagular shape CFG:
543
+ // BB
544
+ // | \
545
+ // | \
546
+ // | Pred
547
+ // | /
548
+ // Succ
549
+ // In this case, we are evaluating whether to select edge -> Succ, e.g.
550
+ // set Succ as the layout successor of BB. Picking Succ as BB's
551
+ // successor breaks the CFG constraints. With this layout, Pred BB
552
+ // is forced to be outlined, so the overall cost will be cost of the
553
+ // branch taken from BB to Pred, plus the cost of back taken branch
554
+ // from Pred to Succ, as well as the additional cost asssociated
555
+ // with the needed unconditional jump instruction from Pred To Succ.
556
+ // The cost of the topological order layout is the taken branch cost
557
+ // from BB to Succ, so to make BB->Succ a viable candidate, the following
558
+ // must hold:
559
+ // 2 * freq(BB->Pred) * taken_branch_cost + unconditional_jump_cost
560
+ // < freq(BB->Succ) * taken_branch_cost.
561
+ // Ignoring unconditional jump cost, we get
562
+ // freq(BB->Succ) > 2 * freq(BB->Pred), i.e.,
563
+ // prob(BB->Succ) > 2 * prob(BB->Pred)
564
+ //
565
+ // When real profile data is available, we can precisely compute the the
566
+ // probabililty threshold that is needed for edge BB->Succ to be considered.
567
+ // With out profile data, the heuristic requires the branch bias to be
568
+ // a lot larger to make sure the signal is very strong (e.g. 80% default).
569
+ // -----------------------------------------------------------------
570
+ // Case 2: diamond like CFG:
571
+ // S
572
+ // / \
573
+ // | \
574
+ // BB Pred
575
+ // \ /
576
+ // Succ
577
+ // ..
578
+ // In this case, edge S->BB has already been selected, and we are evaluating
579
+ // candidate edge BB->Succ. Edge S->BB is selected because prob(S->BB)
580
+ // is no less than prob(S->Pred). When real profile data is *available*, if
581
+ // the condition is true, it will be always better to continue the trace with
582
+ // edge BB->Succ instead of laying out with topological order (i.e. laying
583
+ // Pred first). The cost of S->BB->Succ is 2 * freq (S->Pred), while with
584
+ // the topo order, the cost is freq(S-> Pred) + Pred(S->BB) which is larger.
585
+ // When profile data is not available, however, we need to be more
586
+ // conservative. If the branch prediction is wrong, breaking the topo-order
587
+ // will actually yield a layout with large cost. For this reason, we need
588
+ // strong biaaed branch at block S with Prob(S->BB) in order to select
589
+ // BB->Succ. This is equialant to looking the CFG backward with backward
590
+ // edge: Prob(Succ->BB) needs to >= HotProb in order to be selected (without
591
+ // profile data).
592
+
593
+ BranchProbability HotProb = getLayoutSuccessorProbThreshold ();
594
+
595
+ // Forward checking. For case 2, SuccProb will be 1.
596
+ if (SuccProb < HotProb) {
597
+ DEBUG (dbgs () << " " << getBlockName (Succ) << " -> " << SuccProb
598
+ << " (prob) (CFG conflict)\n " );
599
+ return true ;
600
+ }
601
+
602
+ // Make sure that a hot successor doesn't have a globally more
603
+ // important predecessor.
604
+ BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq (BB) * RealSuccProb;
605
+ bool BadCFGConflict = false ;
606
+
607
+ for (MachineBasicBlock *Pred : Succ->predecessors ()) {
608
+ if (Pred == Succ || BlockToChain[Pred] == &SuccChain ||
609
+ (BlockFilter && !BlockFilter->count (Pred)) ||
610
+ BlockToChain[Pred] == &Chain)
611
+ continue ;
612
+ // Do backward checking. For case 1, it is actually redundant check. For
613
+ // case 2 above, we need a backward checking to filter out edges that are
614
+ // not 'strongly' biased. With profile data available, the check is mostly
615
+ // redundant too (when threshold prob is set at 50%) unless S has more than
616
+ // two successors.
617
+ // BB Pred
618
+ // \ /
619
+ // Succ
620
+ // We select edgee BB->Succ if
621
+ // freq(BB->Succ) > freq(Succ) * HotProb
622
+ // i.e. freq(BB->Succ) > freq(BB->Succ) * HotProb + freq(Pred->Succ) *
623
+ // HotProb
624
+ // i.e. freq((BB->Succ) * (1 - HotProb) > freq(Pred->Succ) * HotProb
625
+ BlockFrequency PredEdgeFreq =
626
+ MBFI->getBlockFreq (Pred) * MBPI->getEdgeProbability (Pred, Succ);
627
+ if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.getCompl ()) {
628
+ BadCFGConflict = true ;
629
+ break ;
630
+ }
631
+ }
632
+
633
+ if (BadCFGConflict) {
634
+ DEBUG (dbgs () << " " << getBlockName (Succ) << " -> " << SuccProb
635
+ << " (prob) (non-cold CFG conflict)\n " );
636
+ return true ;
637
+ }
638
+
639
+ return false ;
640
+ }
641
+
515
642
// / \brief Select the best successor for a block.
516
643
// /
517
644
// / This looks across all successors of a particular block and attempts to
@@ -545,51 +672,18 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
545
672
HotProb))
546
673
return Succ;
547
674
548
- // Only consider successors which are either "hot", or wouldn't violate
549
- // any CFG constraints.
550
675
BlockChain &SuccChain = *BlockToChain[Succ];
551
- if (SuccChain.UnscheduledPredecessors != 0 ) {
552
- if (SuccProb < HotProb) {
553
- DEBUG (dbgs () << " " << getBlockName (Succ) << " -> " << SuccProb
554
- << " (prob) (CFG conflict)\n " );
555
- continue ;
556
- }
557
-
558
- // Make sure that a hot successor doesn't have a globally more
559
- // important predecessor.
560
- BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq (BB) * RealSuccProb;
561
- bool BadCFGConflict = false ;
562
- for (MachineBasicBlock *Pred : Succ->predecessors ()) {
563
- if (Pred == Succ || BlockToChain[Pred] == &SuccChain ||
564
- (BlockFilter && !BlockFilter->count (Pred)) ||
565
- BlockToChain[Pred] == &Chain)
566
- continue ;
567
- BlockFrequency PredEdgeFreq =
568
- MBFI->getBlockFreq (Pred) * MBPI->getEdgeProbability (Pred, Succ);
569
- // A B
570
- // \ /
571
- // C
572
- // We layout ACB iff A.freq > C.freq * HotProb
573
- // i.e. A.freq > A.freq * HotProb + B.freq * HotProb
574
- // i.e. A.freq * (1 - HotProb) > B.freq * HotProb
575
- // A: CandidateEdge
576
- // B: PredEdge
577
- if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.getCompl ()) {
578
- BadCFGConflict = true ;
579
- break ;
580
- }
581
- }
582
- if (BadCFGConflict) {
583
- DEBUG (dbgs () << " " << getBlockName (Succ) << " -> " << SuccProb
584
- << " (prob) (non-cold CFG conflict)\n " );
585
- continue ;
586
- }
587
- }
676
+ // Skip the edge \c BB->Succ if block \c Succ has a better layout
677
+ // predecessor that yields lower global cost.
678
+ if (hasBetterLayoutPredecessor (BB, Succ, SuccChain, SuccProb, RealSuccProb,
679
+ Chain, BlockFilter))
680
+ continue ;
588
681
589
- DEBUG (dbgs () << " " << getBlockName (Succ) << " -> " << SuccProb
590
- << " (prob)"
591
- << (SuccChain.UnscheduledPredecessors != 0 ? " (CFG break)" : " " )
592
- << " \n " );
682
+ DEBUG (
683
+ dbgs () << " " << getBlockName (Succ) << " -> " << SuccProb
684
+ << " (prob)"
685
+ << (SuccChain.UnscheduledPredecessors != 0 ? " (CFG break)" : " " )
686
+ << " \n " );
593
687
if (BestSucc && BestProb >= SuccProb)
594
688
continue ;
595
689
BestSucc = Succ;
0 commit comments