@@ -38,6 +38,14 @@ static cl::opt<bool>
38
38
cl::desc (" Disable unclustred high register pressure "
39
39
" reduction scheduling stage." ),
40
40
cl::init(false ));
41
+ static cl::opt<unsigned > ScheduleMetricBias (
42
+ " amdgpu-schedule-metric-bias" , cl::Hidden,
43
+ cl::desc (
44
+ " Sets the bias which adds weight to occupancy vs latency. Set it to "
45
+ " 100 to chase the occupancy only." ),
46
+ cl::init(10 ));
47
+
48
+ const unsigned ScheduleMetrics::ScaleFactor = 100 ;
41
49
42
50
GCNSchedStrategy::GCNSchedStrategy (const MachineSchedContext *C)
43
51
: GenericScheduler(C), TargetOccupancy(0 ), MF(nullptr ),
@@ -862,6 +870,7 @@ void GCNSchedStage::checkScheduling() {
862
870
// Check the results of scheduling.
863
871
PressureAfter = DAG.getRealRegPressure (RegionIdx);
864
872
LLVM_DEBUG (dbgs () << " Pressure after scheduling: " << print (PressureAfter));
873
+ LLVM_DEBUG (dbgs () << " Region: " << RegionIdx << " .\n " );
865
874
866
875
if (PressureAfter.getSGPRNum () <= S.SGPRCriticalLimit &&
867
876
PressureAfter.getVGPRNum (ST.hasGFX90AInsts ()) <= S.VGPRCriticalLimit ) {
@@ -925,6 +934,120 @@ void GCNSchedStage::checkScheduling() {
925
934
}
926
935
}
927
936
937
+ unsigned
938
+ GCNSchedStage::computeSUnitReadyCycle (const SUnit &SU, unsigned CurrCycle,
939
+ DenseMap<unsigned , unsigned > &ReadyCycles,
940
+ const TargetSchedModel &SM) {
941
+ unsigned ReadyCycle = CurrCycle;
942
+ for (auto &D : SU.Preds ) {
943
+ if (D.isAssignedRegDep ()) {
944
+ MachineInstr *DefMI = D.getSUnit ()->getInstr ();
945
+ unsigned Latency = SM.computeInstrLatency (DefMI);
946
+ unsigned DefReady = ReadyCycles[DAG.getSUnit (DefMI)->NodeNum ];
947
+ ReadyCycle = std::max (ReadyCycle, DefReady + Latency);
948
+ }
949
+ }
950
+ ReadyCycles[SU.NodeNum ] = ReadyCycle;
951
+ return ReadyCycle;
952
+ }
953
+
954
+ #ifndef NDEBUG
955
+ struct EarlierIssuingCycle {
956
+ bool operator ()(std::pair<MachineInstr *, unsigned > A,
957
+ std::pair<MachineInstr *, unsigned > B) const {
958
+ return A.second < B.second ;
959
+ }
960
+ };
961
+
962
+ static void printScheduleModel (std::set<std::pair<MachineInstr *, unsigned >,
963
+ EarlierIssuingCycle> &ReadyCycles) {
964
+ if (ReadyCycles.empty ())
965
+ return ;
966
+ unsigned BBNum = ReadyCycles.begin ()->first ->getParent ()->getNumber ();
967
+ dbgs () << " \n ################## Schedule time ReadyCycles for MBB : " << BBNum
968
+ << " ##################\n # Cycle #\t\t\t Instruction "
969
+ " "
970
+ " \n " ;
971
+ unsigned IPrev = 1 ;
972
+ for (auto &I : ReadyCycles) {
973
+ if (I.second > IPrev + 1 )
974
+ dbgs () << " ****************************** BUBBLE OF " << I.second - IPrev
975
+ << " CYCLES DETECTED ******************************\n\n " ;
976
+ dbgs () << " [ " << I.second << " ] : " << *I.first << " \n " ;
977
+ IPrev = I.second ;
978
+ }
979
+ }
980
+ #endif
981
+
982
+ ScheduleMetrics
983
+ GCNSchedStage::getScheduleMetrics (const std::vector<SUnit> &InputSchedule) {
984
+ #ifndef NDEBUG
985
+ std::set<std::pair<MachineInstr *, unsigned >, EarlierIssuingCycle>
986
+ ReadyCyclesSorted;
987
+ #endif
988
+ const TargetSchedModel &SM = ST.getInstrInfo ()->getSchedModel ();
989
+ unsigned SumBubbles = 0 ;
990
+ DenseMap<unsigned , unsigned > ReadyCycles;
991
+ unsigned CurrCycle = 0 ;
992
+ for (auto &SU : InputSchedule) {
993
+ unsigned ReadyCycle =
994
+ computeSUnitReadyCycle (SU, CurrCycle, ReadyCycles, SM);
995
+ SumBubbles += ReadyCycle - CurrCycle;
996
+ #ifndef NDEBUG
997
+ ReadyCyclesSorted.insert (std::make_pair (SU.getInstr (), ReadyCycle));
998
+ #endif
999
+ CurrCycle = ++ReadyCycle;
1000
+ }
1001
+ #ifndef NDEBUG
1002
+ LLVM_DEBUG (
1003
+ printScheduleModel (ReadyCyclesSorted);
1004
+ dbgs () << " \n\t "
1005
+ << " Metric: "
1006
+ << (SumBubbles
1007
+ ? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle
1008
+ : 1 )
1009
+ << " \n\n " );
1010
+ #endif
1011
+
1012
+ return ScheduleMetrics (CurrCycle, SumBubbles);
1013
+ }
1014
+
1015
+ ScheduleMetrics
1016
+ GCNSchedStage::getScheduleMetrics (const GCNScheduleDAGMILive &DAG) {
1017
+ #ifndef NDEBUG
1018
+ std::set<std::pair<MachineInstr *, unsigned >, EarlierIssuingCycle>
1019
+ ReadyCyclesSorted;
1020
+ #endif
1021
+ const TargetSchedModel &SM = ST.getInstrInfo ()->getSchedModel ();
1022
+ unsigned SumBubbles = 0 ;
1023
+ DenseMap<unsigned , unsigned > ReadyCycles;
1024
+ unsigned CurrCycle = 0 ;
1025
+ for (auto &MI : DAG) {
1026
+ SUnit *SU = DAG.getSUnit (&MI);
1027
+ if (!SU)
1028
+ continue ;
1029
+ unsigned ReadyCycle =
1030
+ computeSUnitReadyCycle (*SU, CurrCycle, ReadyCycles, SM);
1031
+ SumBubbles += ReadyCycle - CurrCycle;
1032
+ #ifndef NDEBUG
1033
+ ReadyCyclesSorted.insert (std::make_pair (SU->getInstr (), ReadyCycle));
1034
+ #endif
1035
+ CurrCycle = ++ReadyCycle;
1036
+ }
1037
+ #ifndef NDEBUG
1038
+ LLVM_DEBUG (
1039
+ printScheduleModel (ReadyCyclesSorted);
1040
+ dbgs () << " \n\t "
1041
+ << " Metric: "
1042
+ << (SumBubbles
1043
+ ? (SumBubbles * ScheduleMetrics::ScaleFactor) / CurrCycle
1044
+ : 1 )
1045
+ << " \n\n " );
1046
+ #endif
1047
+
1048
+ return ScheduleMetrics (CurrCycle, SumBubbles);
1049
+ }
1050
+
928
1051
bool GCNSchedStage::shouldRevertScheduling (unsigned WavesAfter) {
929
1052
if (WavesAfter < DAG.MinOccupancy )
930
1053
return true ;
@@ -955,7 +1078,28 @@ bool UnclusteredHighRPStage::shouldRevertScheduling(unsigned WavesAfter) {
955
1078
return true ;
956
1079
}
957
1080
958
- return false ;
1081
+ LLVM_DEBUG (
1082
+ dbgs ()
1083
+ << " \n\t *** In shouldRevertScheduling ***\n "
1084
+ << " *********** BEFORE UnclusteredHighRPStage ***********\n " );
1085
+ ScheduleMetrics MBefore =
1086
+ getScheduleMetrics (DAG.SUnits );
1087
+ LLVM_DEBUG (
1088
+ dbgs ()
1089
+ << " \n *********** AFTER UnclusteredHighRPStage ***********\n " );
1090
+ ScheduleMetrics MAfter = getScheduleMetrics (DAG);
1091
+ unsigned OldMetric = MBefore.getMetric ();
1092
+ unsigned NewMetric = MAfter.getMetric ();
1093
+ unsigned WavesBefore =
1094
+ std::min (S.getTargetOccupancy (), PressureBefore.getOccupancy (ST));
1095
+ unsigned Profit =
1096
+ ((WavesAfter * ScheduleMetrics::ScaleFactor) / WavesBefore *
1097
+ ((OldMetric + ScheduleMetricBias) * ScheduleMetrics::ScaleFactor) /
1098
+ NewMetric) /
1099
+ ScheduleMetrics::ScaleFactor;
1100
+ LLVM_DEBUG (dbgs () << " \t Metric before " << MBefore << " \t Metric after "
1101
+ << MAfter << " Profit: " << Profit << " \n " );
1102
+ return Profit < ScheduleMetrics::ScaleFactor;
959
1103
}
960
1104
961
1105
bool ClusteredLowOccStage::shouldRevertScheduling (unsigned WavesAfter) {
0 commit comments