@@ -8304,35 +8304,57 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
8304
8304
});
8305
8305
// FIXME: this must be moved to TTI for better estimation.
8306
8306
unsigned EltsPerVector = getPartNumElems(VL.size(), NumParts);
8307
- auto CheckPerRegistersShuffle =
8308
- [&](MutableArrayRef<int> Mask,
8309
- SmallVector<int> Indices) -> std::optional<TTI::ShuffleKind> {
8307
+ auto CheckPerRegistersShuffle = [&](MutableArrayRef<int> Mask,
8308
+ SmallVectorImpl<unsigned> &Indices)
8309
+ -> std::optional<TTI::ShuffleKind> {
8310
8310
if (NumElts <= EltsPerVector)
8311
8311
return std::nullopt;
8312
+ int OffsetReg0 =
8313
+ alignDown(std::accumulate(Mask.begin(), Mask.end(), INT_MAX,
8314
+ [](int S, int I) {
8315
+ if (I == PoisonMaskElem)
8316
+ return S;
8317
+ return std::min(S, I);
8318
+ }),
8319
+ EltsPerVector);
8320
+ int OffsetReg1 = OffsetReg0;
8312
8321
DenseSet<int> RegIndices;
8313
8322
// Check that if trying to permute same single/2 input vectors.
8314
8323
TTI::ShuffleKind ShuffleKind = TTI::SK_PermuteSingleSrc;
8315
8324
int FirstRegId = -1;
8316
- Indices.assign(1, -1 );
8317
- for (int &I : Mask) {
8325
+ Indices.assign(1, OffsetReg0 );
8326
+ for (auto [Pos, I] : enumerate( Mask) ) {
8318
8327
if (I == PoisonMaskElem)
8319
8328
continue;
8320
- int RegId = (I / NumElts) * NumParts + (I % NumElts) / EltsPerVector;
8329
+ int Idx = I - OffsetReg0;
8330
+ int RegId =
8331
+ (Idx / NumElts) * NumParts + (Idx % NumElts) / EltsPerVector;
8321
8332
if (FirstRegId < 0)
8322
8333
FirstRegId = RegId;
8323
8334
RegIndices.insert(RegId);
8324
8335
if (RegIndices.size() > 2)
8325
8336
return std::nullopt;
8326
8337
if (RegIndices.size() == 2) {
8327
8338
ShuffleKind = TTI::SK_PermuteTwoSrc;
8328
- if (Indices.size() == 1)
8329
- Indices.push_back(-1);
8339
+ if (Indices.size() == 1) {
8340
+ OffsetReg1 = alignDown(
8341
+ std::accumulate(
8342
+ std::next(Mask.begin(), Pos), Mask.end(), INT_MAX,
8343
+ [&](int S, int I) {
8344
+ if (I == PoisonMaskElem)
8345
+ return S;
8346
+ int RegId = ((I - OffsetReg0) / NumElts) * NumParts +
8347
+ ((I - OffsetReg0) % NumElts) / EltsPerVector;
8348
+ if (RegId == FirstRegId)
8349
+ return S;
8350
+ return std::min(S, I);
8351
+ }),
8352
+ EltsPerVector);
8353
+ Indices.push_back(OffsetReg1);
8354
+ }
8355
+ Idx = I - OffsetReg1;
8330
8356
}
8331
- if (RegId == FirstRegId)
8332
- Indices.front() = I % NumElts;
8333
- else
8334
- Indices.back() = I % NumElts;
8335
- I = (I % NumElts) % EltsPerVector +
8357
+ I = (Idx % NumElts) % EltsPerVector +
8336
8358
(RegId == FirstRegId ? 0 : EltsPerVector);
8337
8359
}
8338
8360
return ShuffleKind;
@@ -8349,7 +8371,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
8349
8371
Part * EltsPerVector, getNumElems(Mask.size(), EltsPerVector, Part));
8350
8372
SmallVector<int> SubMask(EltsPerVector, PoisonMaskElem);
8351
8373
copy(MaskSlice, SubMask.begin());
8352
- SmallVector<int > Indices;
8374
+ SmallVector<unsigned, 2 > Indices;
8353
8375
std::optional<TTI::ShuffleKind> RegShuffleKind =
8354
8376
CheckPerRegistersShuffle(SubMask, Indices);
8355
8377
if (!RegShuffleKind) {
@@ -8367,12 +8389,21 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
8367
8389
FixedVectorType::get(ScalarTy, EltsPerVector),
8368
8390
SubMask);
8369
8391
}
8370
- for (int Idx : Indices) {
8392
+ for (unsigned Idx : Indices) {
8371
8393
Cost += ::getShuffleCost(TTI, TTI::SK_ExtractSubvector,
8372
8394
FixedVectorType::get(ScalarTy, NumElts),
8373
8395
std::nullopt, CostKind, Idx,
8374
8396
FixedVectorType::get(ScalarTy, EltsPerVector));
8375
8397
}
8398
+ // Second attempt to check, if just a permute is better estimated than
8399
+ // subvector extract.
8400
+ SubMask.assign(NumElts, PoisonMaskElem);
8401
+ copy(MaskSlice, SubMask.begin());
8402
+ InstructionCost OriginalCost =
8403
+ ::getShuffleCost(TTI, *ShuffleKinds[Part],
8404
+ FixedVectorType::get(ScalarTy, NumElts), SubMask);
8405
+ if (OriginalCost < Cost)
8406
+ Cost = OriginalCost;
8376
8407
}
8377
8408
return Cost;
8378
8409
}
0 commit comments