Skip to content

Commit 1d43cdc

Browse files
[LV][EVL]Support reversed loads/stores.
Support for predicated vector reverse intrinsic was added some time ago. Adds support for predicated reversed loads/stores in the loop vectorizer. Reviewers: fhahn Reviewed By: fhahn Pull Request: #88025
1 parent fc7e74e commit 1d43cdc

File tree

4 files changed

+213
-56
lines changed

4 files changed

+213
-56
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 39 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1571,13 +1571,7 @@ class LoopVectorizationCostModel {
15711571
/// Returns true if VP intrinsics with explicit vector length support should
15721572
/// be generated in the tail folded loop.
15731573
bool foldTailWithEVL() const {
1574-
return getTailFoldingStyle() == TailFoldingStyle::DataWithEVL &&
1575-
// FIXME: remove this once vp_reverse is supported.
1576-
none_of(
1577-
WideningDecisions,
1578-
[](const std::pair<std::pair<Instruction *, ElementCount>,
1579-
std::pair<InstWidening, InstructionCost>>
1580-
&Data) { return Data.second.first == CM_Widen_Reverse; });
1574+
return getTailFoldingStyle() == TailFoldingStyle::DataWithEVL;
15811575
}
15821576

15831577
/// Returns true if the Phi is part of an inloop reduction.
@@ -9388,12 +9382,18 @@ void VPWidenLoadRecipe::execute(VPTransformState &State) {
93889382
}
93899383
}
93909384

9385+
static Instruction *createReverseEVL(IRBuilderBase &Builder, Value *Operand,
9386+
Value *EVL, const Twine &Name) {
9387+
VectorType *ValTy = cast<VectorType>(Operand->getType());
9388+
Value *AllTrueMask =
9389+
Builder.CreateVectorSplat(ValTy->getElementCount(), Builder.getTrue());
9390+
return Builder.CreateIntrinsic(ValTy, Intrinsic::experimental_vp_reverse,
9391+
{Operand, AllTrueMask, EVL}, nullptr, Name);
9392+
}
9393+
93919394
void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
93929395
assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
93939396
"explicit vector length.");
9394-
// FIXME: Support reverse loading after vp_reverse is added.
9395-
assert(!isReverse() && "Reverse loads are not implemented yet.");
9396-
93979397
auto *LI = cast<LoadInst>(&Ingredient);
93989398

93999399
Type *ScalarDataTy = getLoadStoreType(&Ingredient);
@@ -9406,9 +9406,15 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
94069406
CallInst *NewLI;
94079407
Value *EVL = State.get(getEVL(), VPIteration(0, 0));
94089408
Value *Addr = State.get(getAddr(), 0, !CreateGather);
9409-
Value *Mask = getMask()
9410-
? State.get(getMask(), 0)
9411-
: Builder.CreateVectorSplat(State.VF, Builder.getTrue());
9409+
Value *Mask = nullptr;
9410+
if (VPValue *VPMask = getMask()) {
9411+
Mask = State.get(VPMask, 0);
9412+
if (isReverse())
9413+
Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
9414+
} else {
9415+
Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
9416+
}
9417+
94129418
if (CreateGather) {
94139419
NewLI =
94149420
Builder.CreateIntrinsic(DataTy, Intrinsic::vp_gather, {Addr, Mask, EVL},
@@ -9422,7 +9428,13 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
94229428
NewLI->addParamAttr(
94239429
0, Attribute::getWithAlignment(NewLI->getContext(), Alignment));
94249430
State.addMetadata(NewLI, LI);
9425-
State.set(this, NewLI, 0);
9431+
Instruction *Res = NewLI;
9432+
if (isReverse()) {
9433+
// Use cheap all-true mask for reverse rather than actual mask, it does not
9434+
// affect the result.
9435+
Res = createReverseEVL(Builder, Res, EVL, "vp.reverse");
9436+
}
9437+
State.set(this, Res, 0);
94269438
}
94279439

94289440
void VPWidenStoreRecipe::execute(VPTransformState &State) {
@@ -9468,9 +9480,6 @@ void VPWidenStoreRecipe::execute(VPTransformState &State) {
94689480
void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
94699481
assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
94709482
"explicit vector length.");
9471-
// FIXME: Support reverse loading after vp_reverse is added.
9472-
assert(!isReverse() && "Reverse store are not implemented yet.");
9473-
94749483
auto *SI = cast<StoreInst>(&Ingredient);
94759484

94769485
VPValue *StoredValue = getStoredValue();
@@ -9483,10 +9492,19 @@ void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
94839492
CallInst *NewSI = nullptr;
94849493
Value *StoredVal = State.get(StoredValue, 0);
94859494
Value *EVL = State.get(getEVL(), VPIteration(0, 0));
9486-
// FIXME: Support reverse store after vp_reverse is added.
9487-
Value *Mask = getMask()
9488-
? State.get(getMask(), 0)
9489-
: Builder.CreateVectorSplat(State.VF, Builder.getTrue());
9495+
if (isReverse()) {
9496+
// Use cheap all-true mask for reverse rather than actual mask, it does not
9497+
// affect the result.
9498+
StoredVal = createReverseEVL(Builder, StoredVal, EVL, "vp.reverse");
9499+
}
9500+
Value *Mask = nullptr;
9501+
if (VPValue *VPMask = getMask()) {
9502+
Mask = State.get(VPMask, 0);
9503+
if (isReverse())
9504+
Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
9505+
} else {
9506+
Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
9507+
}
94909508
Value *Addr = State.get(getAddr(), 0, !CreateScatter);
94919509
if (CreateScatter) {
94929510
NewSI = Builder.CreateIntrinsic(Type::getVoidTy(EVL->getContext()),

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2413,8 +2413,8 @@ struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {
24132413
struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
24142414
VPWidenLoadEVLRecipe(VPWidenLoadRecipe *L, VPValue *EVL, VPValue *Mask)
24152415
: VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L->getIngredient(),
2416-
{L->getAddr(), EVL}, L->isConsecutive(), false,
2417-
L->getDebugLoc()),
2416+
{L->getAddr(), EVL}, L->isConsecutive(),
2417+
L->isReverse(), L->getDebugLoc()),
24182418
VPValue(this, &getIngredient()) {
24192419
setMask(Mask);
24202420
}
@@ -2490,7 +2490,8 @@ struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {
24902490
VPWidenStoreEVLRecipe(VPWidenStoreRecipe *S, VPValue *EVL, VPValue *Mask)
24912491
: VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S->getIngredient(),
24922492
{S->getAddr(), S->getStoredValue(), EVL},
2493-
S->isConsecutive(), false, S->getDebugLoc()) {
2493+
S->isConsecutive(), S->isReverse(),
2494+
S->getDebugLoc()) {
24942495
setMask(Mask);
24952496
}
24962497

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1341,8 +1341,6 @@ void VPlanTransforms::addExplicitVectorLength(VPlan &Plan) {
13411341
auto *MemR = dyn_cast<VPWidenMemoryRecipe>(U);
13421342
if (!MemR)
13431343
continue;
1344-
assert(!MemR->isReverse() &&
1345-
"Reversed memory operations not supported yet.");
13461344
VPValue *OrigMask = MemR->getMask();
13471345
assert(OrigMask && "Unmasked widen memory recipe when folding tail");
13481346
VPValue *NewMask = HeaderMask == OrigMask ? nullptr : OrigMask;

0 commit comments

Comments
 (0)