Skip to content

Commit 0c2c8e1

Browse files
committed
!fixup address comments.
1 parent 73260a8 commit 0c2c8e1

File tree

1 file changed

+46
-55
lines changed

1 file changed

+46
-55
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 46 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -4872,31 +4872,14 @@ void LoopVectorizationCostModel::collectElementTypesForWidening() {
48724872
}
48734873
}
48744874

4875-
/// Estimate the register usage for \p Plan and vectorization factors in \p VFs.
4876-
/// Returns the register usage for each VF in \p VFs.
4875+
/// Estimate the register usage for \p Plan and vectorization factors in \p VFs
4876+
/// by calculating the highest number of values that are live at a single
4877+
/// location as a rough estimate. Returns the register usage for each VF in \p
4878+
/// VFs.
48774879
static SmallVector<LoopVectorizationCostModel::RegisterUsage, 8>
48784880
calculateRegisterUsage(VPlan &Plan, ArrayRef<ElementCount> VFs,
48794881
const TargetTransformInfo &TTI,
48804882
const SmallPtrSetImpl<const Value *> &ValuesToIgnore) {
4881-
// This function calculates the register usage by measuring the highest number
4882-
// of values that are alive at a single location. Obviously, this is a very
4883-
// rough estimation. We scan the loop in a topological order in order and
4884-
// assign a number to each recipe. We use RPO to ensure that defs are
4885-
// met before their users. We assume that each recipe that has in-loop
4886-
// users starts an interval. We record every time that an in-loop value is
4887-
// used, so we have a list of the first and last occurrences of each
4888-
// recipe. Next, we transpose this data structure into a multi map that
4889-
// holds the list of intervals that *end* at a specific location. This multi
4890-
// map allows us to perform a linear search. We scan the instructions linearly
4891-
// and record each time that a new interval starts, by placing it in a set.
4892-
// If we find this value in the multi-map then we remove it from the set.
4893-
// The max register usage is the maximum size of the set.
4894-
// We also search for instructions that are defined outside the loop, but are
4895-
// used inside the loop. We need this number separately from the max-interval
4896-
// usage number because when we unroll, loop-invariant values do not take
4897-
// more register.
4898-
LoopVectorizationCostModel::RegisterUsage RU;
4899-
49004883
// Each 'key' in the map opens a new interval. The values
49014884
// of the map are the index of the 'last seen' usage of the
49024885
// recipe that is the key.
@@ -4914,6 +4897,11 @@ calculateRegisterUsage(VPlan &Plan, ArrayRef<ElementCount> VFs,
49144897
SmallSetVector<VPValue *, 8> LoopInvariants;
49154898
LoopInvariants.insert(&Plan.getVectorTripCount());
49164899

4900+
// We scan the loop in a topological order in order and assign a number to
4901+
// each recipe. We use RPO to ensure that defs are met before their users. We
4902+
// assume that each recipe that has in-loop users starts an interval. We
4903+
// record every time that an in-loop value is used, so we have a list of the
4904+
// first and last occurrences of each recipe.
49174905
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
49184906
Plan.getVectorLoopRegion());
49194907
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
@@ -4961,7 +4949,8 @@ calculateRegisterUsage(VPlan &Plan, ArrayRef<ElementCount> VFs,
49614949
using RecipeList = SmallVector<VPRecipeBase *, 2>;
49624950
SmallDenseMap<unsigned, RecipeList, 16> TransposeEnds;
49634951

4964-
// Transpose the EndPoints to a list of values that end at each index.
4952+
// Next, we transpose the EndPoints into a multi map that holds the list of
4953+
// intervals that *end* at a specific location.
49654954
for (auto &Interval : EndPoint)
49664955
TransposeEnds[Interval.second].push_back(Interval.first);
49674956

@@ -4982,10 +4971,14 @@ calculateRegisterUsage(VPlan &Plan, ArrayRef<ElementCount> VFs,
49824971
return TTICapture.getRegUsageForType(VectorType::get(Ty, VF));
49834972
};
49844973

4974+
// We scan the instructions linearly and record each time that a new interval
4975+
// starts, by placing it in a set. If we find this value in TransposEnds then
4976+
// we remove it from the set. The max register usage is the maximum register
4977+
// usage of the recipes of the set.
49854978
for (unsigned int Idx = 0, Sz = Idx2Recipe.size(); Idx < Sz; ++Idx) {
49864979
VPRecipeBase *R = Idx2Recipe[Idx];
49874980

4988-
// Remove all of the recipes that end at this location.
4981+
// Remove all of the recipes that end at this location.
49894982
RecipeList &List = TransposeEnds[Idx];
49904983
for (VPRecipeBase *ToRemove : List)
49914984
OpenIntervals.erase(ToRemove);
@@ -5012,38 +5005,31 @@ calculateRegisterUsage(VPlan &Plan, ArrayRef<ElementCount> VFs,
50125005
// there is no previous entry for ClassID.
50135006
SmallMapVector<unsigned, unsigned, 4> RegUsage;
50145007

5015-
if (VFs[J].isScalar()) {
5016-
for (auto *Inst : OpenIntervals) {
5017-
for (VPValue *DefV : Inst->definedValues()) {
5018-
unsigned ClassID = TTI.getRegisterClassForType(
5019-
false, TypeInfo.inferScalarType(DefV));
5020-
// FIXME: The target might use more than one register for the type
5021-
// even in the scalar case.
5022-
RegUsage[ClassID] += 1;
5023-
}
5024-
}
5025-
} else {
5026-
for (auto *R : OpenIntervals) {
5027-
if (isa<VPVectorPointerRecipe, VPVectorEndPointerRecipe>(R))
5028-
continue;
5029-
if (isa<VPCanonicalIVPHIRecipe, VPReplicateRecipe, VPDerivedIVRecipe,
5030-
VPScalarIVStepsRecipe>(R) ||
5031-
(isa<VPInstruction>(R) &&
5032-
all_of(cast<VPSingleDefRecipe>(R)->users(), [&](VPUser *U) {
5033-
return cast<VPRecipeBase>(U)->usesScalars(
5034-
R->getVPSingleValue());
5035-
}))) {
5036-
unsigned ClassID = TTI.getRegisterClassForType(
5037-
false, TypeInfo.inferScalarType(R->getVPSingleValue()));
5038-
// FIXME: The target might use more than one register for the type
5039-
// even in the scalar case.
5040-
RegUsage[ClassID] += 1;
5041-
} else {
5042-
for (VPValue *DefV : R->definedValues()) {
5043-
Type *ScalarTy = TypeInfo.inferScalarType(DefV);
5044-
unsigned ClassID = TTI.getRegisterClassForType(true, ScalarTy);
5045-
RegUsage[ClassID] += GetRegUsage(ScalarTy, VFs[J]);
5046-
}
5008+
for (auto *R : OpenIntervals) {
5009+
// Skip recipes that weren't present in the original loop.
5010+
// TODO: Remove after removing the legacy
5011+
// LoopVectorizationCostModel::calculateRegisterUsage
5012+
if (isa<VPVectorPointerRecipe, VPVectorEndPointerRecipe,
5013+
VPBranchOnMaskRecipe>(R))
5014+
continue;
5015+
5016+
if (VFs[J].isScalar() ||
5017+
isa<VPCanonicalIVPHIRecipe, VPReplicateRecipe, VPDerivedIVRecipe,
5018+
VPScalarIVStepsRecipe>(R) ||
5019+
(isa<VPInstruction>(R) &&
5020+
all_of(cast<VPSingleDefRecipe>(R)->users(), [&](VPUser *U) {
5021+
return cast<VPRecipeBase>(U)->usesScalars(R->getVPSingleValue());
5022+
}))) {
5023+
unsigned ClassID = TTI.getRegisterClassForType(
5024+
false, TypeInfo.inferScalarType(R->getVPSingleValue()));
5025+
// FIXME: The target might use more than one register for the type
5026+
// even in the scalar case.
5027+
RegUsage[ClassID] += 1;
5028+
} else {
5029+
for (VPValue *DefV : R->definedValues()) {
5030+
Type *ScalarTy = TypeInfo.inferScalarType(DefV);
5031+
unsigned ClassID = TTI.getRegisterClassForType(true, ScalarTy);
5032+
RegUsage[ClassID] += GetRegUsage(ScalarTy, VFs[J]);
50475033
}
50485034
}
50495035
}
@@ -5061,6 +5047,11 @@ calculateRegisterUsage(VPlan &Plan, ArrayRef<ElementCount> VFs,
50615047
OpenIntervals.insert(R);
50625048
}
50635049

5050+
// We also search for instructions that are defined outside the loop, but are
5051+
// used inside the loop. We need this number separately from the max-interval
5052+
// usage number because when we unroll, loop-invariant values do not take
5053+
// more register.
5054+
LoopVectorizationCostModel::RegisterUsage RU;
50645055
for (unsigned Idx = 0, End = VFs.size(); Idx < End; ++Idx) {
50655056
// Note that elements in this SmallMapVector will be default constructed
50665057
// as 0. So we can use "Invariant[ClassID] += n" in the code below even if

0 commit comments

Comments
 (0)