Skip to content

[CostModel] Add a DstTy to getShuffleCost #141634

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions llvm/include/llvm/Analysis/TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1381,16 +1381,16 @@ class TargetTransformInfo {
const SmallBitVector &OpcodeMask,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;

/// \return The cost of a shuffle instruction of kind Kind and of type Tp.
/// The exact mask may be passed as Mask, or else the array will be empty.
/// The index and subtype parameters are used by the subvector insertion and
/// extraction shuffle kinds to show the insert/extract point and the type of
/// the subvector being inserted/extracted. The operands of the shuffle can be
/// passed through \p Args, which helps improve the cost estimation in some
/// cases, like in broadcast loads.
/// NOTE: For subvector extractions Tp represents the source type.
/// \return The cost of a shuffle instruction of kind Kind with inputs of type
/// SrcTy, producing a vector of type DstTy. The exact mask may be passed as
/// Mask, or else the array will be empty. The Index and SubTp parameters
/// are used by the subvector insertions shuffle kinds to show the insert
/// point and the type of the subvector being inserted. The operands of the
/// shuffle can be passed through \p Args, which helps improve the cost
/// estimation in some cases, like in broadcast loads.
LLVM_ABI InstructionCost getShuffleCost(
ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask = {},
ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy,
ArrayRef<int> Mask = {},
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, int Index = 0,
VectorType *SubTp = nullptr, ArrayRef<const Value *> Args = {},
const Instruction *CxtI = nullptr) const;
Expand Down
60 changes: 34 additions & 26 deletions llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -711,9 +711,9 @@ class TargetTransformInfoImplBase {
}

virtual InstructionCost
getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
ArrayRef<const Value *> Args = {},
getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy,
ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp, ArrayRef<const Value *> Args = {},
const Instruction *CxtI = nullptr) const {
return 1;
}
Expand Down Expand Up @@ -1545,13 +1545,14 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
return 0;

if (Shuffle->isExtractSubvectorMask(SubIndex))
return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy,
Mask, CostKind, SubIndex, VecTy,
Operands, Shuffle);
return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecTy,
VecSrcTy, Mask, CostKind, SubIndex,
VecTy, Operands, Shuffle);

if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
return TargetTTI->getShuffleCost(
TTI::SK_InsertSubvector, VecTy, Mask, CostKind, SubIndex,
TTI::SK_InsertSubvector, VecTy, VecSrcTy, Mask, CostKind,
SubIndex,
FixedVectorType::get(VecTy->getScalarType(), NumSubElts),
Operands, Shuffle);

Expand Down Expand Up @@ -1580,62 +1581,69 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {

return TargetTTI->getShuffleCost(
IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc, VecTy,
AdjustMask, CostKind, 0, nullptr, Operands, Shuffle);
VecTy, AdjustMask, CostKind, 0, nullptr, Operands, Shuffle);
}

// Narrowing shuffle - perform shuffle at original wider width and
// then extract the lower elements.
// FIXME: This can assume widening, which is not true of all vector
// architectures (and is not even the default).
AdjustMask.append(NumSubElts - Mask.size(), PoisonMaskElem);

InstructionCost ShuffleCost = TargetTTI->getShuffleCost(
IsUnary ? TTI::SK_PermuteSingleSrc : TTI::SK_PermuteTwoSrc,
VecSrcTy, AdjustMask, CostKind, 0, nullptr, Operands, Shuffle);
VecSrcTy, VecSrcTy, AdjustMask, CostKind, 0, nullptr, Operands,
Shuffle);

SmallVector<int, 16> ExtractMask(Mask.size());
std::iota(ExtractMask.begin(), ExtractMask.end(), 0);
return ShuffleCost + TargetTTI->getShuffleCost(
TTI::SK_ExtractSubvector, VecSrcTy,
TTI::SK_ExtractSubvector, VecTy, VecSrcTy,
ExtractMask, CostKind, 0, VecTy, {}, Shuffle);
}

if (Shuffle->isIdentity())
return 0;

if (Shuffle->isReverse())
return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, Mask, CostKind,
0, nullptr, Operands, Shuffle);
return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, VecSrcTy, Mask,
CostKind, 0, nullptr, Operands,
Shuffle);

if (Shuffle->isSelect())
return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, Mask, CostKind,
0, nullptr, Operands, Shuffle);
return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, VecSrcTy, Mask,
CostKind, 0, nullptr, Operands,
Shuffle);

if (Shuffle->isTranspose())
return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, Mask,
CostKind, 0, nullptr, Operands,
return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, VecSrcTy,
Mask, CostKind, 0, nullptr, Operands,
Shuffle);

if (Shuffle->isZeroEltSplat())
return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, Mask,
CostKind, 0, nullptr, Operands,
return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, VecSrcTy,
Mask, CostKind, 0, nullptr, Operands,
Shuffle);

if (Shuffle->isSingleSource())
return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy, Mask,
CostKind, 0, nullptr, Operands,
Shuffle);
return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy,
VecSrcTy, Mask, CostKind, 0, nullptr,
Operands, Shuffle);

if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
return TargetTTI->getShuffleCost(
TTI::SK_InsertSubvector, VecTy, Mask, CostKind, SubIndex,
TTI::SK_InsertSubvector, VecTy, VecSrcTy, Mask, CostKind, SubIndex,
FixedVectorType::get(VecTy->getScalarType(), NumSubElts), Operands,
Shuffle);

if (Shuffle->isSplice(SubIndex))
return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy, Mask, CostKind,
SubIndex, nullptr, Operands, Shuffle);
return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy, VecSrcTy, Mask,
CostKind, SubIndex, nullptr, Operands,
Shuffle);

return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, Mask,
CostKind, 0, nullptr, Operands, Shuffle);
return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, VecSrcTy,
Mask, CostKind, 0, nullptr, Operands,
Shuffle);
}
case Instruction::ExtractElement: {
auto *EEI = dyn_cast<ExtractElementInst>(U);
Expand Down
60 changes: 31 additions & 29 deletions llvm/include/llvm/CodeGen/BasicTTIImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -329,11 +329,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
// Cost the call + mask.
auto Cost =
thisT()->getCallInstrCost(nullptr, RetTy, ICA.getArgTypes(), CostKind);
if (VD->isMasked())
Cost += thisT()->getShuffleCost(
TargetTransformInfo::SK_Broadcast,
VectorType::get(IntegerType::getInt1Ty(Ctx), VF), {}, CostKind, 0,
nullptr, {});
if (VD->isMasked()) {
auto VecTy = VectorType::get(IntegerType::getInt1Ty(Ctx), VF);
Cost += thisT()->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy,
VecTy, {}, CostKind, 0, nullptr, {});
}

// Lowering to a library call (with output pointers) may require us to emit
// reloads for the results.
Expand Down Expand Up @@ -1101,11 +1101,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {

TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind,
ArrayRef<int> Mask,
VectorType *Ty, int &Index,
VectorType *SrcTy, int &Index,
VectorType *&SubTy) const {
if (Mask.empty())
return Kind;
int NumSrcElts = Ty->getElementCount().getKnownMinValue();
int NumSrcElts = SrcTy->getElementCount().getKnownMinValue();
switch (Kind) {
case TTI::SK_PermuteSingleSrc: {
if (ShuffleVectorInst::isReverseMask(Mask, NumSrcElts))
Expand All @@ -1116,7 +1116,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
return TTI::SK_Broadcast;
if (ShuffleVectorInst::isExtractSubvectorMask(Mask, NumSrcElts, Index) &&
(Index + Mask.size()) <= (size_t)NumSrcElts) {
SubTy = FixedVectorType::get(Ty->getElementType(), Mask.size());
SubTy = FixedVectorType::get(SrcTy->getElementType(), Mask.size());
return TTI::SK_ExtractSubvector;
}
break;
Expand All @@ -1127,7 +1127,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
Mask, NumSrcElts, NumSubElts, Index)) {
if (Index + NumSubElts > NumSrcElts)
return Kind;
SubTy = FixedVectorType::get(Ty->getElementType(), NumSubElts);
SubTy = FixedVectorType::get(SrcTy->getElementType(), NumSubElts);
return TTI::SK_InsertSubvector;
}
if (ShuffleVectorInst::isSelectMask(Mask, NumSrcElts))
Expand All @@ -1151,13 +1151,13 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
}

InstructionCost
getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
ArrayRef<const Value *> Args = {},
getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy,
ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp, ArrayRef<const Value *> Args = {},
const Instruction *CxtI = nullptr) const override {
switch (improveShuffleKindFromMask(Kind, Mask, Tp, Index, SubTp)) {
switch (improveShuffleKindFromMask(Kind, Mask, SrcTy, Index, SubTp)) {
case TTI::SK_Broadcast:
if (auto *FVT = dyn_cast<FixedVectorType>(Tp))
if (auto *FVT = dyn_cast<FixedVectorType>(SrcTy))
return getBroadcastShuffleOverhead(FVT, CostKind);
return InstructionCost::getInvalid();
case TTI::SK_Select:
Expand All @@ -1166,14 +1166,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
case TTI::SK_Transpose:
case TTI::SK_PermuteSingleSrc:
case TTI::SK_PermuteTwoSrc:
if (auto *FVT = dyn_cast<FixedVectorType>(Tp))
if (auto *FVT = dyn_cast<FixedVectorType>(SrcTy))
return getPermuteShuffleOverhead(FVT, CostKind);
return InstructionCost::getInvalid();
case TTI::SK_ExtractSubvector:
return getExtractSubvectorOverhead(Tp, CostKind, Index,
return getExtractSubvectorOverhead(SrcTy, CostKind, Index,
cast<FixedVectorType>(SubTp));
case TTI::SK_InsertSubvector:
return getInsertSubvectorOverhead(Tp, CostKind, Index,
return getInsertSubvectorOverhead(DstTy, CostKind, Index,
cast<FixedVectorType>(SubTp));
}
llvm_unreachable("Unknown TTI::ShuffleKind");
Expand Down Expand Up @@ -1910,6 +1910,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
unsigned Index = cast<ConstantInt>(Args[1])->getZExtValue();
return thisT()->getShuffleCost(TTI::SK_ExtractSubvector,
cast<VectorType>(RetTy),
cast<VectorType>(Args[0]->getType()), {},
CostKind, Index, cast<VectorType>(RetTy));
}
Expand All @@ -1920,17 +1921,18 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue();
return thisT()->getShuffleCost(
TTI::SK_InsertSubvector, cast<VectorType>(Args[0]->getType()), {},
CostKind, Index, cast<VectorType>(Args[1]->getType()));
TTI::SK_InsertSubvector, cast<VectorType>(RetTy),
cast<VectorType>(Args[0]->getType()), {}, CostKind, Index,
cast<VectorType>(Args[1]->getType()));
}
case Intrinsic::vector_reverse: {
return thisT()->getShuffleCost(TTI::SK_Reverse,
return thisT()->getShuffleCost(TTI::SK_Reverse, cast<VectorType>(RetTy),
cast<VectorType>(Args[0]->getType()), {},
CostKind, 0, cast<VectorType>(RetTy));
}
case Intrinsic::vector_splice: {
unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue();
return thisT()->getShuffleCost(TTI::SK_Splice,
return thisT()->getShuffleCost(TTI::SK_Splice, cast<VectorType>(RetTy),
cast<VectorType>(Args[0]->getType()), {},
CostKind, Index, cast<VectorType>(RetTy));
}
Expand Down Expand Up @@ -2376,8 +2378,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
CostKind, 1, nullptr, nullptr);
Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, SearchTy,
CostKind, 0, nullptr, nullptr);
Cost += thisT()->getShuffleCost(TTI::SK_Broadcast, SearchTy, std::nullopt,
CostKind, 0, nullptr);
Cost += thisT()->getShuffleCost(TTI::SK_Broadcast, SearchTy, SearchTy,
std::nullopt, CostKind, 0, nullptr);
Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SearchTy, RetTy,
CmpInst::ICMP_EQ, CostKind);
Cost +=
Expand Down Expand Up @@ -2961,8 +2963,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
while (NumVecElts > MVTLen) {
NumVecElts /= 2;
VectorType *SubTy = FixedVectorType::get(ScalarTy, NumVecElts);
ShuffleCost += thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, {},
CostKind, NumVecElts, SubTy);
ShuffleCost += thisT()->getShuffleCost(
TTI::SK_ExtractSubvector, SubTy, Ty, {}, CostKind, NumVecElts, SubTy);
ArithCost += thisT()->getArithmeticInstrCost(Opcode, SubTy, CostKind);
Ty = SubTy;
++LongVectorCount;
Expand All @@ -2978,7 +2980,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
// By default reductions need one shuffle per reduction level.
ShuffleCost +=
NumReduxLevels * thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
{}, CostKind, 0, Ty);
Ty, {}, CostKind, 0, Ty);
ArithCost +=
NumReduxLevels * thisT()->getArithmeticInstrCost(Opcode, Ty, CostKind);
return ShuffleCost + ArithCost +
Expand Down Expand Up @@ -3052,8 +3054,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
NumVecElts /= 2;
auto *SubTy = FixedVectorType::get(ScalarTy, NumVecElts);

ShuffleCost += thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, {},
CostKind, NumVecElts, SubTy);
ShuffleCost += thisT()->getShuffleCost(
TTI::SK_ExtractSubvector, SubTy, Ty, {}, CostKind, NumVecElts, SubTy);

IntrinsicCostAttributes Attrs(IID, SubTy, {SubTy, SubTy}, FMF);
MinMaxCost += getIntrinsicInstrCost(Attrs, CostKind);
Expand All @@ -3069,7 +3071,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
// architecture-dependent length.
ShuffleCost +=
NumReduxLevels * thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
{}, CostKind, 0, Ty);
Ty, {}, CostKind, 0, Ty);
IntrinsicCostAttributes Attrs(IID, Ty, {Ty, Ty}, FMF);
MinMaxCost += NumReduxLevels * getIntrinsicInstrCost(Attrs, CostKind);
// The last min/max should be in vector registers and we counted it above.
Expand Down
11 changes: 8 additions & 3 deletions llvm/lib/Analysis/TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -984,11 +984,16 @@ InstructionCost TargetTransformInfo::getAltInstrCost(
}

InstructionCost TargetTransformInfo::getShuffleCost(
ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask,
ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
ArrayRef<const Value *> Args, const Instruction *CxtI) const {
InstructionCost Cost = TTIImpl->getShuffleCost(Kind, Ty, Mask, CostKind,
Index, SubTp, Args, CxtI);
assert((Mask.empty() || DstTy->isScalableTy() ||
Mask.size() == DstTy->getElementCount().getKnownMinValue()) &&
"Expected the Mask to match the return size if given");
assert(SrcTy->getScalarType() == DstTy->getScalarType() &&
"Expected the same scalar types");
InstructionCost Cost = TTIImpl->getShuffleCost(
Kind, DstTy, SrcTy, Mask, CostKind, Index, SubTp, Args, CxtI);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
Expand Down
Loading
Loading