Skip to content

Commit fd18547

Browse files
committed
[SLP]Allow masked gathers only if allowed by target.
Need to check if target allows/supports masked gathers before trying to estimate its cost, otherwise we may fail to vectorize some of the patterns because of too pessimistic cost model. Part of D57059. Differential Revision: https://reviews.llvm.org/D101297
1 parent 27b651c commit fd18547

File tree

4 files changed

+333
-603
lines changed

4 files changed

+333
-603
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3921,9 +3921,31 @@ InstructionCost BoUpSLP::getEntryCost(TreeEntry *E) {
39213921
CostKind, VL0);
39223922
} else {
39233923
assert(E->State == TreeEntry::ScatterVectorize && "Unknown EntryState");
3924-
VecLdCost = TTI->getGatherScatterOpCost(
3925-
Instruction::Load, VecTy, cast<LoadInst>(VL0)->getPointerOperand(),
3926-
/*VariableMask=*/false, alignment, CostKind, VL0);
3924+
if (TTI->isLegalMaskedGather(VecTy, alignment)) {
3925+
VecLdCost = TTI->getGatherScatterOpCost(
3926+
Instruction::Load, VecTy,
3927+
cast<LoadInst>(VL0)->getPointerOperand(),
3928+
/*VariableMask=*/false, alignment, CostKind, VL0);
3929+
} else {
3930+
// Lower just to a gather if masked gather is not legal. Also,
3931+
// compensate the cost of next entry for pointers.
3932+
VecLdCost =
3933+
getGatherCost(VL);
3934+
// Tru to compensate the cost of the next entry for pointers iff all
3935+
// users are ScatterVectorize nodes.
3936+
const auto *It = find_if(
3937+
VectorizableTree, [E](const std::unique_ptr<TreeEntry> &TE) {
3938+
return !TE->UserTreeIndices.empty() &&
3939+
all_of(TE->UserTreeIndices,
3940+
[](const EdgeInfo &EI) {
3941+
return EI.UserTE->State ==
3942+
TreeEntry::ScatterVectorize;
3943+
}) &&
3944+
TE->UserTreeIndices.front().UserTE == E;
3945+
});
3946+
if (It != VectorizableTree.end())
3947+
VecLdCost -= getEntryCost(It->get());
3948+
}
39273949
}
39283950
if (!NeedToShuffleReuses && !E->ReorderIndices.empty()) {
39293951
SmallVector<int> NewMask;

0 commit comments

Comments
 (0)