Skip to content

Commit 7841520

Browse files
[SLP]Fix the cost of the adjusted extracts in per-register analysis.
Previous patch did not pass the list of the extract indices by reference, so the compiler just ignored them. Pass indices by reference and fix the per-register analysis. Reviewers: RKSimon Reviewed By: RKSimon Pull Request: #96808
1 parent 4e2e485 commit 7841520

File tree

4 files changed

+160
-101
lines changed

4 files changed

+160
-101
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 46 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8304,35 +8304,57 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
83048304
});
83058305
// FIXME: this must be moved to TTI for better estimation.
83068306
unsigned EltsPerVector = getPartNumElems(VL.size(), NumParts);
8307-
auto CheckPerRegistersShuffle =
8308-
[&](MutableArrayRef<int> Mask,
8309-
SmallVector<int> Indices) -> std::optional<TTI::ShuffleKind> {
8307+
auto CheckPerRegistersShuffle = [&](MutableArrayRef<int> Mask,
8308+
SmallVectorImpl<unsigned> &Indices)
8309+
-> std::optional<TTI::ShuffleKind> {
83108310
if (NumElts <= EltsPerVector)
83118311
return std::nullopt;
8312+
int OffsetReg0 =
8313+
alignDown(std::accumulate(Mask.begin(), Mask.end(), INT_MAX,
8314+
[](int S, int I) {
8315+
if (I == PoisonMaskElem)
8316+
return S;
8317+
return std::min(S, I);
8318+
}),
8319+
EltsPerVector);
8320+
int OffsetReg1 = OffsetReg0;
83128321
DenseSet<int> RegIndices;
83138322
// Check that if trying to permute same single/2 input vectors.
83148323
TTI::ShuffleKind ShuffleKind = TTI::SK_PermuteSingleSrc;
83158324
int FirstRegId = -1;
8316-
Indices.assign(1, -1);
8317-
for (int &I : Mask) {
8325+
Indices.assign(1, OffsetReg0);
8326+
for (auto [Pos, I] : enumerate(Mask)) {
83188327
if (I == PoisonMaskElem)
83198328
continue;
8320-
int RegId = (I / NumElts) * NumParts + (I % NumElts) / EltsPerVector;
8329+
int Idx = I - OffsetReg0;
8330+
int RegId =
8331+
(Idx / NumElts) * NumParts + (Idx % NumElts) / EltsPerVector;
83218332
if (FirstRegId < 0)
83228333
FirstRegId = RegId;
83238334
RegIndices.insert(RegId);
83248335
if (RegIndices.size() > 2)
83258336
return std::nullopt;
83268337
if (RegIndices.size() == 2) {
83278338
ShuffleKind = TTI::SK_PermuteTwoSrc;
8328-
if (Indices.size() == 1)
8329-
Indices.push_back(-1);
8339+
if (Indices.size() == 1) {
8340+
OffsetReg1 = alignDown(
8341+
std::accumulate(
8342+
std::next(Mask.begin(), Pos), Mask.end(), INT_MAX,
8343+
[&](int S, int I) {
8344+
if (I == PoisonMaskElem)
8345+
return S;
8346+
int RegId = ((I - OffsetReg0) / NumElts) * NumParts +
8347+
((I - OffsetReg0) % NumElts) / EltsPerVector;
8348+
if (RegId == FirstRegId)
8349+
return S;
8350+
return std::min(S, I);
8351+
}),
8352+
EltsPerVector);
8353+
Indices.push_back(OffsetReg1);
8354+
}
8355+
Idx = I - OffsetReg1;
83308356
}
8331-
if (RegId == FirstRegId)
8332-
Indices.front() = I % NumElts;
8333-
else
8334-
Indices.back() = I % NumElts;
8335-
I = (I % NumElts) % EltsPerVector +
8357+
I = (Idx % NumElts) % EltsPerVector +
83368358
(RegId == FirstRegId ? 0 : EltsPerVector);
83378359
}
83388360
return ShuffleKind;
@@ -8349,7 +8371,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
83498371
Part * EltsPerVector, getNumElems(Mask.size(), EltsPerVector, Part));
83508372
SmallVector<int> SubMask(EltsPerVector, PoisonMaskElem);
83518373
copy(MaskSlice, SubMask.begin());
8352-
SmallVector<int> Indices;
8374+
SmallVector<unsigned, 2> Indices;
83538375
std::optional<TTI::ShuffleKind> RegShuffleKind =
83548376
CheckPerRegistersShuffle(SubMask, Indices);
83558377
if (!RegShuffleKind) {
@@ -8367,12 +8389,21 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
83678389
FixedVectorType::get(ScalarTy, EltsPerVector),
83688390
SubMask);
83698391
}
8370-
for (int Idx : Indices) {
8392+
for (unsigned Idx : Indices) {
83718393
Cost += ::getShuffleCost(TTI, TTI::SK_ExtractSubvector,
83728394
FixedVectorType::get(ScalarTy, NumElts),
83738395
std::nullopt, CostKind, Idx,
83748396
FixedVectorType::get(ScalarTy, EltsPerVector));
83758397
}
8398+
// Second attempt to check, if just a permute is better estimated than
8399+
// subvector extract.
8400+
SubMask.assign(NumElts, PoisonMaskElem);
8401+
copy(MaskSlice, SubMask.begin());
8402+
InstructionCost OriginalCost =
8403+
::getShuffleCost(TTI, *ShuffleKinds[Part],
8404+
FixedVectorType::get(ScalarTy, NumElts), SubMask);
8405+
if (OriginalCost < Cost)
8406+
Cost = OriginalCost;
83768407
}
83778408
return Cost;
83788409
}

0 commit comments

Comments
 (0)