Skip to content

Commit 723c45a

Browse files
committed
[CostModel] Add a DstTy to getShuffleCost
A shuffle will take two input vectors and a mask, to produce a new vector of size <MaskElts x SrcEltTy>. Historically it has been assumed that the SrcTy and the DstTy are the same for getShuffleCost, with that being relaxed in recent years. If the Tp passed to getShuffleCost is the SrcTy, then the DstTy can be calculated from the Mask elts and the src elt size, but the Mask is not always provided and the Tp is not reliably always the SrcTy. This has led to situations notably in the SLP vectorizer but also in the generic cost routines where assumption about how vectors will be legalized are built into the generic cost routines - for example whether they will widen or promote, with the cost modelling assuming they will widen but the default lowering to promote for integer vectors. This patch attempts to start improving that - it originally tried to alter more of the cost model but that too quickly became too many changes at once, so this patch just plumbs in a DstTy to getShuffleCost so that DstTy and SrcTy can be reliably distinguished. The callers of getShuffleCost have been updated to try and include a DstTy that is more accurate. Otherwise it tries to be fairly non-functional, keeping the SrcTy used as the primary type used in shuffle cost routines, only using DstTy where it was in the past (for InsertSubVector for example). Some asserts have been added that help to check for consistent values when a Mask and a DstTy are provided to getShuffleCost. Some of them took a while to get right, and some non-mask calls might still be incorrect. Hopefully this will provide a useful base to build more shuffles that alter size.
1 parent 2278f5e commit 723c45a

25 files changed

+441
-346
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1360,16 +1360,17 @@ class TargetTransformInfo {
13601360
const SmallBitVector &OpcodeMask,
13611361
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
13621362

1363-
/// \return The cost of a shuffle instruction of kind Kind and of type Tp.
1364-
/// The exact mask may be passed as Mask, or else the array will be empty.
1365-
/// The index and subtype parameters are used by the subvector insertion and
1366-
/// extraction shuffle kinds to show the insert/extract point and the type of
1367-
/// the subvector being inserted/extracted. The operands of the shuffle can be
1368-
/// passed through \p Args, which helps improve the cost estimation in some
1369-
/// cases, like in broadcast loads.
1370-
/// NOTE: For subvector extractions Tp represents the source type.
1363+
/// \return The cost of a shuffle instruction of kind Kind with inputs of type
1364+
/// SrcTy, producing a vector of type DstTy. The exact mask may be passed as
1365+
/// Mask, or else the array will be empty. The index and subtype parameters
1366+
/// are used by the subvector insertion and extraction shuffle kinds to show
1367+
/// the insert/extract point and the type of the subvector being
1368+
/// inserted/extracted. The operands of the shuffle can be passed through \p
1369+
/// Args, which helps improve the cost estimation in some cases, like in
1370+
/// broadcast loads.
13711371
InstructionCost
1372-
getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask = {},
1372+
getShuffleCost(ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy,
1373+
ArrayRef<int> Mask = {},
13731374
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
13741375
int Index = 0, VectorType *SubTp = nullptr,
13751376
ArrayRef<const Value *> Args = {},

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 34 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -711,9 +711,9 @@ class TargetTransformInfoImplBase {
711711
}
712712

713713
virtual InstructionCost
714-
getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask,
715-
TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
716-
ArrayRef<const Value *> Args = {},
714+
getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy,
715+
ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index,
716+
VectorType *SubTp, ArrayRef<const Value *> Args = {},
717717
const Instruction *CxtI = nullptr) const {
718718
return 1;
719719
}
@@ -1545,13 +1545,14 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
15451545
return 0;
15461546

15471547
if (Shuffle->isExtractSubvectorMask(SubIndex))
1548-
return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy,
1549-
Mask, CostKind, SubIndex, VecTy,
1550-
Operands, Shuffle);
1548+
return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecTy,
1549+
VecSrcTy, Mask, CostKind, SubIndex,
1550+
VecTy, Operands, Shuffle);
15511551

15521552
if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
15531553
return TargetTTI->getShuffleCost(
1554-
TTI::SK_InsertSubvector, VecTy, Mask, CostKind, SubIndex,
1554+
TTI::SK_InsertSubvector, VecTy, VecSrcTy, Mask, CostKind,
1555+
SubIndex,
15551556
FixedVectorType::get(VecTy->getScalarType(), NumSubElts),
15561557
Operands, Shuffle);
15571558

@@ -1580,62 +1581,69 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
15801581

15811582
return TargetTTI->getShuffleCost(
15821583
IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc, VecTy,
1583-
AdjustMask, CostKind, 0, nullptr, Operands, Shuffle);
1584+
VecTy, AdjustMask, CostKind, 0, nullptr, Operands, Shuffle);
15841585
}
15851586

15861587
// Narrowing shuffle - perform shuffle at original wider width and
15871588
// then extract the lower elements.
1589+
// FIXME: This can assume widening, which is not true of all vector
1590+
// architectures (and is not even the default).
15881591
AdjustMask.append(NumSubElts - Mask.size(), PoisonMaskElem);
15891592

15901593
InstructionCost ShuffleCost = TargetTTI->getShuffleCost(
15911594
IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc,
1592-
VecSrcTy, AdjustMask, CostKind, 0, nullptr, Operands, Shuffle);
1595+
VecSrcTy, VecSrcTy, AdjustMask, CostKind, 0, nullptr, Operands,
1596+
Shuffle);
15931597

15941598
SmallVector<int, 16> ExtractMask(Mask.size());
15951599
std::iota(ExtractMask.begin(), ExtractMask.end(), 0);
15961600
return ShuffleCost + TargetTTI->getShuffleCost(
1597-
TTI::SK_ExtractSubvector, VecSrcTy,
1601+
TTI::SK_ExtractSubvector, VecTy, VecSrcTy,
15981602
ExtractMask, CostKind, 0, VecTy, {}, Shuffle);
15991603
}
16001604

16011605
if (Shuffle->isIdentity())
16021606
return 0;
16031607

16041608
if (Shuffle->isReverse())
1605-
return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, Mask, CostKind,
1606-
0, nullptr, Operands, Shuffle);
1609+
return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, VecSrcTy, Mask,
1610+
CostKind, 0, nullptr, Operands,
1611+
Shuffle);
16071612

16081613
if (Shuffle->isSelect())
1609-
return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, Mask, CostKind,
1610-
0, nullptr, Operands, Shuffle);
1614+
return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, VecSrcTy, Mask,
1615+
CostKind, 0, nullptr, Operands,
1616+
Shuffle);
16111617

16121618
if (Shuffle->isTranspose())
1613-
return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, Mask,
1614-
CostKind, 0, nullptr, Operands,
1619+
return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, VecSrcTy,
1620+
Mask, CostKind, 0, nullptr, Operands,
16151621
Shuffle);
16161622

16171623
if (Shuffle->isZeroEltSplat())
1618-
return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, Mask,
1619-
CostKind, 0, nullptr, Operands,
1624+
return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, VecSrcTy,
1625+
Mask, CostKind, 0, nullptr, Operands,
16201626
Shuffle);
16211627

16221628
if (Shuffle->isSingleSource())
1623-
return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy, Mask,
1624-
CostKind, 0, nullptr, Operands,
1625-
Shuffle);
1629+
return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy,
1630+
VecSrcTy, Mask, CostKind, 0, nullptr,
1631+
Operands, Shuffle);
16261632

16271633
if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
16281634
return TargetTTI->getShuffleCost(
1629-
TTI::SK_InsertSubvector, VecTy, Mask, CostKind, SubIndex,
1635+
TTI::SK_InsertSubvector, VecTy, VecSrcTy, Mask, CostKind, SubIndex,
16301636
FixedVectorType::get(VecTy->getScalarType(), NumSubElts), Operands,
16311637
Shuffle);
16321638

16331639
if (Shuffle->isSplice(SubIndex))
1634-
return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy, Mask, CostKind,
1635-
SubIndex, nullptr, Operands, Shuffle);
1640+
return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy, VecSrcTy, Mask,
1641+
CostKind, SubIndex, nullptr, Operands,
1642+
Shuffle);
16361643

1637-
return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, Mask,
1638-
CostKind, 0, nullptr, Operands, Shuffle);
1644+
return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, VecSrcTy,
1645+
Mask, CostKind, 0, nullptr, Operands,
1646+
Shuffle);
16391647
}
16401648
case Instruction::ExtractElement: {
16411649
auto *EEI = dyn_cast<ExtractElementInst>(U);

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 31 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -329,11 +329,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
329329
// Cost the call + mask.
330330
auto Cost =
331331
thisT()->getCallInstrCost(nullptr, RetTy, ICA.getArgTypes(), CostKind);
332-
if (VD->isMasked())
333-
Cost += thisT()->getShuffleCost(
334-
TargetTransformInfo::SK_Broadcast,
335-
VectorType::get(IntegerType::getInt1Ty(Ctx), VF), {}, CostKind, 0,
336-
nullptr, {});
332+
if (VD->isMasked()) {
333+
auto VecTy = VectorType::get(IntegerType::getInt1Ty(Ctx), VF);
334+
Cost += thisT()->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy,
335+
VecTy, {}, CostKind, 0, nullptr, {});
336+
}
337337

338338
// Lowering to a library call (with output pointers) may require us to emit
339339
// reloads for the results.
@@ -1101,11 +1101,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
11011101

11021102
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind,
11031103
ArrayRef<int> Mask,
1104-
VectorType *Ty, int &Index,
1104+
VectorType *SrcTy, int &Index,
11051105
VectorType *&SubTy) const {
11061106
if (Mask.empty())
11071107
return Kind;
1108-
int NumSrcElts = Ty->getElementCount().getKnownMinValue();
1108+
int NumSrcElts = SrcTy->getElementCount().getKnownMinValue();
11091109
switch (Kind) {
11101110
case TTI::SK_PermuteSingleSrc: {
11111111
if (ShuffleVectorInst::isReverseMask(Mask, NumSrcElts))
@@ -1116,7 +1116,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
11161116
return TTI::SK_Broadcast;
11171117
if (ShuffleVectorInst::isExtractSubvectorMask(Mask, NumSrcElts, Index) &&
11181118
(Index + Mask.size()) <= (size_t)NumSrcElts) {
1119-
SubTy = FixedVectorType::get(Ty->getElementType(), Mask.size());
1119+
SubTy = FixedVectorType::get(SrcTy->getElementType(), Mask.size());
11201120
return TTI::SK_ExtractSubvector;
11211121
}
11221122
break;
@@ -1127,7 +1127,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
11271127
Mask, NumSrcElts, NumSubElts, Index)) {
11281128
if (Index + NumSubElts > NumSrcElts)
11291129
return Kind;
1130-
SubTy = FixedVectorType::get(Ty->getElementType(), NumSubElts);
1130+
SubTy = FixedVectorType::get(SrcTy->getElementType(), NumSubElts);
11311131
return TTI::SK_InsertSubvector;
11321132
}
11331133
if (ShuffleVectorInst::isSelectMask(Mask, NumSrcElts))
@@ -1151,13 +1151,13 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
11511151
}
11521152

11531153
InstructionCost
1154-
getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask,
1155-
TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
1156-
ArrayRef<const Value *> Args = {},
1154+
getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy,
1155+
ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index,
1156+
VectorType *SubTp, ArrayRef<const Value *> Args = {},
11571157
const Instruction *CxtI = nullptr) const override {
1158-
switch (improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp)) {
1158+
switch (improveShuffleKindFromMask(Kind, Mask, SrcTy, Index, SubTp)) {
11591159
case TTI::SK_Broadcast:
1160-
if (auto *FVT = dyn_cast<FixedVectorType>(Tp))
1160+
if (auto *FVT = dyn_cast<FixedVectorType>(SrcTy))
11611161
return getBroadcastShuffleOverhead(FVT, CostKind);
11621162
return InstructionCost::getInvalid();
11631163
case TTI::SK_Select:
@@ -1166,14 +1166,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
11661166
case TTI::SK_Transpose:
11671167
case TTI::SK_PermuteSingleSrc:
11681168
case TTI::SK_PermuteTwoSrc:
1169-
if (auto *FVT = dyn_cast<FixedVectorType>(Tp))
1169+
if (auto *FVT = dyn_cast<FixedVectorType>(SrcTy))
11701170
return getPermuteShuffleOverhead(FVT, CostKind);
11711171
return InstructionCost::getInvalid();
11721172
case TTI::SK_ExtractSubvector:
1173-
return getExtractSubvectorOverhead(Tp, CostKind, Index,
1173+
return getExtractSubvectorOverhead(SrcTy, CostKind, Index,
11741174
cast<FixedVectorType>(SubTp));
11751175
case TTI::SK_InsertSubvector:
1176-
return getInsertSubvectorOverhead(Tp, CostKind, Index,
1176+
return getInsertSubvectorOverhead(DstTy, CostKind, Index,
11771177
cast<FixedVectorType>(SubTp));
11781178
}
11791179
llvm_unreachable("Unknown TTI::ShuffleKind");
@@ -1910,6 +1910,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
19101910
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
19111911
unsigned Index = cast<ConstantInt>(Args[1])->getZExtValue();
19121912
return thisT()->getShuffleCost(TTI::SK_ExtractSubvector,
1913+
cast<VectorType>(RetTy),
19131914
cast<VectorType>(Args[0]->getType()), {},
19141915
CostKind, Index, cast<VectorType>(RetTy));
19151916
}
@@ -1920,17 +1921,18 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
19201921
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
19211922
unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue();
19221923
return thisT()->getShuffleCost(
1923-
TTI::SK_InsertSubvector, cast<VectorType>(Args[0]->getType()), {},
1924-
CostKind, Index, cast<VectorType>(Args[1]->getType()));
1924+
TTI::SK_InsertSubvector, cast<VectorType>(RetTy),
1925+
cast<VectorType>(Args[0]->getType()), {}, CostKind, Index,
1926+
cast<VectorType>(Args[1]->getType()));
19251927
}
19261928
case Intrinsic::vector_reverse: {
1927-
return thisT()->getShuffleCost(TTI::SK_Reverse,
1929+
return thisT()->getShuffleCost(TTI::SK_Reverse, cast<VectorType>(RetTy),
19281930
cast<VectorType>(Args[0]->getType()), {},
19291931
CostKind, 0, cast<VectorType>(RetTy));
19301932
}
19311933
case Intrinsic::vector_splice: {
19321934
unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue();
1933-
return thisT()->getShuffleCost(TTI::SK_Splice,
1935+
return thisT()->getShuffleCost(TTI::SK_Splice, cast<VectorType>(RetTy),
19341936
cast<VectorType>(Args[0]->getType()), {},
19351937
CostKind, Index, cast<VectorType>(RetTy));
19361938
}
@@ -2376,8 +2378,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
23762378
CostKind, 1, nullptr, nullptr);
23772379
Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, SearchTy,
23782380
CostKind, 0, nullptr, nullptr);
2379-
Cost += thisT()->getShuffleCost(TTI::SK_Broadcast, SearchTy, std::nullopt,
2380-
CostKind, 0, nullptr);
2381+
Cost += thisT()->getShuffleCost(TTI::SK_Broadcast, SearchTy, SearchTy,
2382+
std::nullopt, CostKind, 0, nullptr);
23812383
Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SearchTy, RetTy,
23822384
CmpInst::ICMP_EQ, CostKind);
23832385
Cost +=
@@ -2956,8 +2958,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
29562958
while (NumVecElts > MVTLen) {
29572959
NumVecElts /= 2;
29582960
VectorType *SubTy = FixedVectorType::get(ScalarTy, NumVecElts);
2959-
ShuffleCost += thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, {},
2960-
CostKind, NumVecElts, SubTy);
2961+
ShuffleCost += thisT()->getShuffleCost(
2962+
TTI::SK_ExtractSubvector, SubTy, Ty, {}, CostKind, NumVecElts, SubTy);
29612963
ArithCost += thisT()->getArithmeticInstrCost(Opcode, SubTy, CostKind);
29622964
Ty = SubTy;
29632965
++LongVectorCount;
@@ -2973,7 +2975,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
29732975
// By default reductions need one shuffle per reduction level.
29742976
ShuffleCost +=
29752977
NumReduxLevels * thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
2976-
{}, CostKind, 0, Ty);
2978+
Ty, {}, CostKind, 0, Ty);
29772979
ArithCost +=
29782980
NumReduxLevels * thisT()->getArithmeticInstrCost(Opcode, Ty, CostKind);
29792981
return ShuffleCost + ArithCost +
@@ -3047,8 +3049,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
30473049
NumVecElts /= 2;
30483050
auto *SubTy = FixedVectorType::get(ScalarTy, NumVecElts);
30493051

3050-
ShuffleCost += thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, {},
3051-
CostKind, NumVecElts, SubTy);
3052+
ShuffleCost += thisT()->getShuffleCost(
3053+
TTI::SK_ExtractSubvector, SubTy, Ty, {}, CostKind, NumVecElts, SubTy);
30523054

30533055
IntrinsicCostAttributes Attrs(IID, SubTy, {SubTy, SubTy}, FMF);
30543056
MinMaxCost += getIntrinsicInstrCost(Attrs, CostKind);
@@ -3064,7 +3066,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
30643066
// architecture-dependent length.
30653067
ShuffleCost +=
30663068
NumReduxLevels * thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
3067-
{}, CostKind, 0, Ty);
3069+
Ty, {}, CostKind, 0, Ty);
30683070
IntrinsicCostAttributes Attrs(IID, Ty, {Ty, Ty}, FMF);
30693071
MinMaxCost += NumReduxLevels * getIntrinsicInstrCost(Attrs, CostKind);
30703072
// The last min/max should be in vector registers and we counted it above.

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -980,11 +980,16 @@ InstructionCost TargetTransformInfo::getAltInstrCost(
980980
}
981981

982982
InstructionCost TargetTransformInfo::getShuffleCost(
983-
ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask,
983+
ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef<int> Mask,
984984
TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
985985
ArrayRef<const Value *> Args, const Instruction *CxtI) const {
986-
InstructionCost Cost = TTIImpl->getShuffleCost(Kind, Ty, Mask, CostKind,
987-
Index, SubTp, Args, CxtI);
986+
assert((Mask.empty() || DstTy->isScalableTy() ||
987+
Mask.size() == DstTy->getElementCount().getKnownMinValue()) &&
988+
"Expected the Mask to match the return size if given");
989+
assert(SrcTy->getScalarType() == DstTy->getScalarType() &&
990+
"Expected the same scalar types");
991+
InstructionCost Cost = TTIImpl->getShuffleCost(
992+
Kind, DstTy, SrcTy, Mask, CostKind, Index, SubTp, Args, CxtI);
988993
assert(Cost >= 0 && "TTI should not produce negative costs!");
989994
return Cost;
990995
}

0 commit comments

Comments
 (0)