@@ -4872,31 +4872,14 @@ void LoopVectorizationCostModel::collectElementTypesForWidening() {
4872
4872
}
4873
4873
}
4874
4874
4875
- // / Estimate the register usage for \p Plan and vectorization factors in \p VFs.
4876
- // / Returns the register usage for each VF in \p VFs.
4875
+ // / Estimate the register usage for \p Plan and vectorization factors in \p VFs
4876
+ // / by calculating the highest number of values that are live at a single
4877
+ // / location as a rough estimate. Returns the register usage for each VF in \p
4878
+ // / VFs.
4877
4879
static SmallVector<LoopVectorizationCostModel::RegisterUsage, 8 >
4878
4880
calculateRegisterUsage (VPlan &Plan, ArrayRef<ElementCount> VFs,
4879
4881
const TargetTransformInfo &TTI,
4880
4882
const SmallPtrSetImpl<const Value *> &ValuesToIgnore) {
4881
- // This function calculates the register usage by measuring the highest number
4882
- // of values that are alive at a single location. Obviously, this is a very
4883
- // rough estimation. We scan the loop in a topological order in order and
4884
- // assign a number to each recipe. We use RPO to ensure that defs are
4885
- // met before their users. We assume that each recipe that has in-loop
4886
- // users starts an interval. We record every time that an in-loop value is
4887
- // used, so we have a list of the first and last occurrences of each
4888
- // recipe. Next, we transpose this data structure into a multi map that
4889
- // holds the list of intervals that *end* at a specific location. This multi
4890
- // map allows us to perform a linear search. We scan the instructions linearly
4891
- // and record each time that a new interval starts, by placing it in a set.
4892
- // If we find this value in the multi-map then we remove it from the set.
4893
- // The max register usage is the maximum size of the set.
4894
- // We also search for instructions that are defined outside the loop, but are
4895
- // used inside the loop. We need this number separately from the max-interval
4896
- // usage number because when we unroll, loop-invariant values do not take
4897
- // more register.
4898
- LoopVectorizationCostModel::RegisterUsage RU;
4899
-
4900
4883
// Each 'key' in the map opens a new interval. The values
4901
4884
// of the map are the index of the 'last seen' usage of the
4902
4885
// recipe that is the key.
@@ -4914,6 +4897,11 @@ calculateRegisterUsage(VPlan &Plan, ArrayRef<ElementCount> VFs,
4914
4897
SmallSetVector<VPValue *, 8 > LoopInvariants;
4915
4898
LoopInvariants.insert (&Plan.getVectorTripCount ());
4916
4899
4900
+ // We scan the loop in a topological order in order and assign a number to
4901
+ // each recipe. We use RPO to ensure that defs are met before their users. We
4902
+ // assume that each recipe that has in-loop users starts an interval. We
4903
+ // record every time that an in-loop value is used, so we have a list of the
4904
+ // first and last occurrences of each recipe.
4917
4905
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT (
4918
4906
Plan.getVectorLoopRegion ());
4919
4907
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
@@ -4961,7 +4949,8 @@ calculateRegisterUsage(VPlan &Plan, ArrayRef<ElementCount> VFs,
4961
4949
using RecipeList = SmallVector<VPRecipeBase *, 2 >;
4962
4950
SmallDenseMap<unsigned , RecipeList, 16 > TransposeEnds;
4963
4951
4964
- // Transpose the EndPoints to a list of values that end at each index.
4952
+ // Next, we transpose the EndPoints into a multi map that holds the list of
4953
+ // intervals that *end* at a specific location.
4965
4954
for (auto &Interval : EndPoint)
4966
4955
TransposeEnds[Interval.second ].push_back (Interval.first );
4967
4956
@@ -4982,10 +4971,14 @@ calculateRegisterUsage(VPlan &Plan, ArrayRef<ElementCount> VFs,
4982
4971
return TTICapture.getRegUsageForType (VectorType::get (Ty, VF));
4983
4972
};
4984
4973
4974
+ // We scan the instructions linearly and record each time that a new interval
4975
+ // starts, by placing it in a set. If we find this value in TransposEnds then
4976
+ // we remove it from the set. The max register usage is the maximum register
4977
+ // usage of the recipes of the set.
4985
4978
for (unsigned int Idx = 0 , Sz = Idx2Recipe.size (); Idx < Sz; ++Idx) {
4986
4979
VPRecipeBase *R = Idx2Recipe[Idx];
4987
4980
4988
- // Remove all of the recipes that end at this location.
4981
+ // Remove all of the recipes that end at this location.
4989
4982
RecipeList &List = TransposeEnds[Idx];
4990
4983
for (VPRecipeBase *ToRemove : List)
4991
4984
OpenIntervals.erase (ToRemove);
@@ -5012,38 +5005,31 @@ calculateRegisterUsage(VPlan &Plan, ArrayRef<ElementCount> VFs,
5012
5005
// there is no previous entry for ClassID.
5013
5006
SmallMapVector<unsigned , unsigned , 4 > RegUsage;
5014
5007
5015
- if (VFs[J].isScalar ()) {
5016
- for (auto *Inst : OpenIntervals) {
5017
- for (VPValue *DefV : Inst->definedValues ()) {
5018
- unsigned ClassID = TTI.getRegisterClassForType (
5019
- false , TypeInfo.inferScalarType (DefV));
5020
- // FIXME: The target might use more than one register for the type
5021
- // even in the scalar case.
5022
- RegUsage[ClassID] += 1 ;
5023
- }
5024
- }
5025
- } else {
5026
- for (auto *R : OpenIntervals) {
5027
- if (isa<VPVectorPointerRecipe, VPVectorEndPointerRecipe>(R))
5028
- continue ;
5029
- if (isa<VPCanonicalIVPHIRecipe, VPReplicateRecipe, VPDerivedIVRecipe,
5030
- VPScalarIVStepsRecipe>(R) ||
5031
- (isa<VPInstruction>(R) &&
5032
- all_of (cast<VPSingleDefRecipe>(R)->users (), [&](VPUser *U) {
5033
- return cast<VPRecipeBase>(U)->usesScalars (
5034
- R->getVPSingleValue ());
5035
- }))) {
5036
- unsigned ClassID = TTI.getRegisterClassForType (
5037
- false , TypeInfo.inferScalarType (R->getVPSingleValue ()));
5038
- // FIXME: The target might use more than one register for the type
5039
- // even in the scalar case.
5040
- RegUsage[ClassID] += 1 ;
5041
- } else {
5042
- for (VPValue *DefV : R->definedValues ()) {
5043
- Type *ScalarTy = TypeInfo.inferScalarType (DefV);
5044
- unsigned ClassID = TTI.getRegisterClassForType (true , ScalarTy);
5045
- RegUsage[ClassID] += GetRegUsage (ScalarTy, VFs[J]);
5046
- }
5008
+ for (auto *R : OpenIntervals) {
5009
+ // Skip recipes that weren't present in the original loop.
5010
+ // TODO: Remove after removing the legacy
5011
+ // LoopVectorizationCostModel::calculateRegisterUsage
5012
+ if (isa<VPVectorPointerRecipe, VPVectorEndPointerRecipe,
5013
+ VPBranchOnMaskRecipe>(R))
5014
+ continue ;
5015
+
5016
+ if (VFs[J].isScalar () ||
5017
+ isa<VPCanonicalIVPHIRecipe, VPReplicateRecipe, VPDerivedIVRecipe,
5018
+ VPScalarIVStepsRecipe>(R) ||
5019
+ (isa<VPInstruction>(R) &&
5020
+ all_of (cast<VPSingleDefRecipe>(R)->users (), [&](VPUser *U) {
5021
+ return cast<VPRecipeBase>(U)->usesScalars (R->getVPSingleValue ());
5022
+ }))) {
5023
+ unsigned ClassID = TTI.getRegisterClassForType (
5024
+ false , TypeInfo.inferScalarType (R->getVPSingleValue ()));
5025
+ // FIXME: The target might use more than one register for the type
5026
+ // even in the scalar case.
5027
+ RegUsage[ClassID] += 1 ;
5028
+ } else {
5029
+ for (VPValue *DefV : R->definedValues ()) {
5030
+ Type *ScalarTy = TypeInfo.inferScalarType (DefV);
5031
+ unsigned ClassID = TTI.getRegisterClassForType (true , ScalarTy);
5032
+ RegUsage[ClassID] += GetRegUsage (ScalarTy, VFs[J]);
5047
5033
}
5048
5034
}
5049
5035
}
@@ -5061,6 +5047,11 @@ calculateRegisterUsage(VPlan &Plan, ArrayRef<ElementCount> VFs,
5061
5047
OpenIntervals.insert (R);
5062
5048
}
5063
5049
5050
+ // We also search for instructions that are defined outside the loop, but are
5051
+ // used inside the loop. We need this number separately from the max-interval
5052
+ // usage number because when we unroll, loop-invariant values do not take
5053
+ // more register.
5054
+ LoopVectorizationCostModel::RegisterUsage RU;
5064
5055
for (unsigned Idx = 0 , End = VFs.size (); Idx < End; ++Idx) {
5065
5056
// Note that elements in this SmallMapVector will be default constructed
5066
5057
// as 0. So we can use "Invariant[ClassID] += n" in the code below even if
0 commit comments