-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[VPlan] Add ComputeAnyOfResult VPInstruction (NFC) #141932
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-vectorizers Author: Florian Hahn (fhahn) ChangesAdd a dedicated opcode for any-of reduction, similar to #132689 and #132690. The patch also explictly adds the start value to not require RecurrenceDescriptor during execute. It also allows freezing the start value to make it poison-safe. Full diff: https://github.com/llvm/llvm-project/pull/141932.diff 6 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 90e224ea8f37a..d777fc746b12b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7484,6 +7484,13 @@ static void addRuntimeUnrollDisableMetaData(Loop *L) {
}
}
+static Value *getStartValueFromReductionResult(VPInstruction *RdxResult) {
+ using namespace VPlanPatternMatch;
+ VPValue *StartVPV = RdxResult->getOperand(1);
+ match(StartVPV, m_Freeze(m_VPValue(StartVPV)));
+ return StartVPV->getLiveInIRValue();
+}
+
// If \p R is a ComputeReductionResult when vectorizing the epilog loop,
// fix the reduction's scalar PHI node by adding the incoming value from the
// main vector loop.
@@ -7492,7 +7499,8 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
BasicBlock *BypassBlock) {
auto *EpiRedResult = dyn_cast<VPInstruction>(R);
if (!EpiRedResult ||
- (EpiRedResult->getOpcode() != VPInstruction::ComputeReductionResult &&
+ (EpiRedResult->getOpcode() != VPInstruction::ComputeAnyOfResult &&
+ EpiRedResult->getOpcode() != VPInstruction::ComputeReductionResult &&
EpiRedResult->getOpcode() != VPInstruction::ComputeFindLastIVResult))
return;
@@ -7504,15 +7512,19 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
EpiRedHeaderPhi->getStartValue()->getUnderlyingValue();
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(
RdxDesc.getRecurrenceKind())) {
+ Value *StartV = EpiRedResult->getOperand(1)->getLiveInIRValue();
+ (void)StartV;
auto *Cmp = cast<ICmpInst>(MainResumeValue);
assert(Cmp->getPredicate() == CmpInst::ICMP_NE &&
"AnyOf expected to start with ICMP_NE");
- assert(Cmp->getOperand(1) == RdxDesc.getRecurrenceStartValue() &&
+ assert(Cmp->getOperand(1) == StartV &&
"AnyOf expected to start by comparing main resume value to original "
"start value");
MainResumeValue = Cmp->getOperand(0);
} else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(
RdxDesc.getRecurrenceKind())) {
+ Value *StartV = getStartValueFromReductionResult(EpiRedResult);
+ (void)StartV;
using namespace llvm::PatternMatch;
Value *Cmp, *OrigResumeV, *CmpOp;
bool IsExpectedPattern =
@@ -7521,10 +7533,7 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
m_Value(OrigResumeV))) &&
(match(Cmp, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Specific(OrigResumeV),
m_Value(CmpOp))) &&
- (match(CmpOp,
- m_Freeze(m_Specific(RdxDesc.getRecurrenceStartValue()))) ||
- (CmpOp == RdxDesc.getRecurrenceStartValue() &&
- isGuaranteedNotToBeUndefOrPoison(CmpOp))));
+ ((CmpOp == StartV && isGuaranteedNotToBeUndefOrPoison(CmpOp))));
assert(IsExpectedPattern && "Unexpected reduction resume pattern");
(void)IsExpectedPattern;
MainResumeValue = OrigResumeV;
@@ -9466,7 +9475,10 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
OrigExitingVPV->replaceUsesWithIf(NewExitingVPV, [](VPUser &U, unsigned) {
return isa<VPInstruction>(&U) &&
(cast<VPInstruction>(&U)->getOpcode() ==
+ VPInstruction::ComputeAnyOfResult ||
+ cast<VPInstruction>(&U)->getOpcode() ==
VPInstruction::ComputeReductionResult ||
+
cast<VPInstruction>(&U)->getOpcode() ==
VPInstruction::ComputeFindLastIVResult);
});
@@ -9519,6 +9531,12 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
FinalReductionResult =
Builder.createNaryOp(VPInstruction::ComputeFindLastIVResult,
{PhiR, Start, NewExitingVPV}, ExitDL);
+ } else if (RecurrenceDescriptor::isAnyOfRecurrenceKind(
+ RdxDesc.getRecurrenceKind())) {
+ VPValue *Start = PhiR->getStartValue();
+ FinalReductionResult =
+ Builder.createNaryOp(VPInstruction::ComputeAnyOfResult,
+ {PhiR, Start, NewExitingVPV}, ExitDL);
} else {
VPIRFlags Flags = RecurrenceDescriptor::isFloatingPointRecurrenceKind(
RdxDesc.getRecurrenceKind())
@@ -10050,23 +10068,36 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
Value *ResumeV = nullptr;
// TODO: Move setting of resume values to prepareToExecute.
if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R)) {
+ auto *RdxResult =
+ cast<VPInstruction>(*find_if(ReductionPhi->users(), [](VPUser *U) {
+ auto *VPI = dyn_cast<VPInstruction>(U);
+ return VPI &&
+ (VPI->getOpcode() == VPInstruction::ComputeReductionResult ||
+ VPI->getOpcode() == VPInstruction::ComputeFindLastIVResult);
+ }));
ResumeV = cast<PHINode>(ReductionPhi->getUnderlyingInstr())
->getIncomingValueForBlock(L->getLoopPreheader());
const RecurrenceDescriptor &RdxDesc =
ReductionPhi->getRecurrenceDescriptor();
RecurKind RK = RdxDesc.getRecurrenceKind();
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) {
+ Value *StartV = RdxResult->getOperand(1)->getLiveInIRValue();
+ assert(RdxDesc.getRecurrenceStartValue() == StartV &&
+ "start value from ComputeAnyOfResult must match");
+
// VPReductionPHIRecipes for AnyOf reductions expect a boolean as
// start value; compare the final value from the main vector loop
// to the start value.
BasicBlock *PBB = cast<Instruction>(ResumeV)->getParent();
IRBuilder<> Builder(PBB, PBB->getFirstNonPHIIt());
- ResumeV =
- Builder.CreateICmpNE(ResumeV, RdxDesc.getRecurrenceStartValue());
+ ResumeV = Builder.CreateICmpNE(ResumeV, StartV);
} else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) {
- ToFrozen[RdxDesc.getRecurrenceStartValue()] =
- cast<PHINode>(ResumeV)->getIncomingValueForBlock(
- EPI.MainLoopIterationCountCheck);
+ Value *StartV = getStartValueFromReductionResult(RdxResult);
+ assert(RdxDesc.getRecurrenceStartValue() == StartV &&
+ "start value from ComputeFindLastIVResult must match");
+
+ ToFrozen[StartV] = cast<PHINode>(ResumeV)->getIncomingValueForBlock(
+ EPI.MainLoopIterationCountCheck);
// VPReductionPHIRecipe for FindLastIV reductions requires an adjustment
// to the resume value. The resume value is adjusted to the sentinel
@@ -10076,8 +10107,7 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
// variable.
BasicBlock *ResumeBB = cast<Instruction>(ResumeV)->getParent();
IRBuilder<> Builder(ResumeBB, ResumeBB->getFirstNonPHIIt());
- Value *Cmp = Builder.CreateICmpEQ(
- ResumeV, ToFrozen[RdxDesc.getRecurrenceStartValue()]);
+ Value *Cmp = Builder.CreateICmpEQ(ResumeV, ToFrozen[StartV]);
ResumeV =
Builder.CreateSelect(Cmp, RdxDesc.getSentinelValue(), ResumeV);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 13c0d84f5e219..af97477b9ae7f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -898,6 +898,7 @@ class VPInstruction : public VPRecipeWithIRFlags,
BranchOnCount,
BranchOnCond,
Broadcast,
+ ComputeAnyOfResult,
ComputeFindLastIVResult,
ComputeReductionResult,
// Extracts the last lane from its operand if it is a vector, or the last
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index ac0f30cb4693c..ecb6045e7a79f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -87,6 +87,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
inferScalarType(R->getOperand(1)) &&
"different types inferred for different operands");
return IntegerType::get(Ctx, 1);
+ case VPInstruction::ComputeAnyOfResult:
+ return inferScalarType(R->getOperand(1));
case VPInstruction::ComputeFindLastIVResult:
case VPInstruction::ComputeReductionResult: {
auto *PhiR = cast<VPReductionPHIRecipe>(R->getOperand(0));
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index f2a7f16e19a79..dfd9fc3d4d719 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -318,6 +318,12 @@ m_VPInstruction(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) {
{Op0, Op1, Op2});
}
+template <typename Op0_t>
+inline UnaryVPInstruction_match<Op0_t, Instruction::Freeze>
+m_Freeze(const Op0_t &Op0) {
+ return m_VPInstruction<Instruction::Freeze>(Op0);
+}
+
template <typename Op0_t>
inline UnaryVPInstruction_match<Op0_t, VPInstruction::Not>
m_Not(const Op0_t &Op0) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index d16c8b44891f5..9d92ed1630a32 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -604,6 +604,20 @@ Value *VPInstruction::generate(VPTransformState &State) {
return Builder.CreateVectorSplat(
State.VF, State.get(getOperand(0), /*IsScalar*/ true), "broadcast");
}
+ case VPInstruction::ComputeAnyOfResult: {
+ // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
+ // and will be removed by breaking up the recipe further.
+ auto *PhiR = cast<VPReductionPHIRecipe>(getOperand(0));
+ auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
+ Value *ReducedPartRdx = State.get(getOperand(2));
+ for (unsigned Idx = 3; Idx < getNumOperands(); ++Idx)
+ ReducedPartRdx = Builder.CreateBinOp(
+ (Instruction::BinaryOps)RecurrenceDescriptor::getOpcode(
+ RecurKind::AnyOf),
+ State.get(getOperand(Idx)), ReducedPartRdx, "bin.rdx");
+ return createAnyOfReduction(Builder, ReducedPartRdx,
+ State.get(getOperand(1), VPLane(0)), OrigPhi);
+ }
case VPInstruction::ComputeFindLastIVResult: {
// FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
// and will be removed by breaking up the recipe further.
@@ -681,17 +695,10 @@ Value *VPInstruction::generate(VPTransformState &State) {
// Create the reduction after the loop. Note that inloop reductions create
// the target reduction in the loop using a Reduction recipe.
- if ((State.VF.isVector() ||
- RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) &&
- !PhiR->isInLoop()) {
+ if (State.VF.isVector() && !PhiR->isInLoop()) {
// TODO: Support in-order reductions based on the recurrence descriptor.
// All ops in the reduction inherit fast-math-flags from the recurrence
// descriptor.
- if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK))
- ReducedPartRdx =
- createAnyOfReduction(Builder, ReducedPartRdx,
- RdxDesc.getRecurrenceStartValue(), OrigPhi);
- else
ReducedPartRdx = createSimpleReduction(Builder, ReducedPartRdx, RK);
// If the reduction can be performed in a smaller type, we need to extend
@@ -841,6 +848,7 @@ bool VPInstruction::isVectorToScalar() const {
getOpcode() == VPInstruction::ExtractPenultimateElement ||
getOpcode() == Instruction::ExtractElement ||
getOpcode() == VPInstruction::FirstActiveLane ||
+ getOpcode() == VPInstruction::ComputeAnyOfResult ||
getOpcode() == VPInstruction::ComputeFindLastIVResult ||
getOpcode() == VPInstruction::ComputeReductionResult ||
getOpcode() == VPInstruction::AnyOf;
@@ -938,6 +946,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
return true;
case VPInstruction::PtrAdd:
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
+ case VPInstruction::ComputeAnyOfResult:
case VPInstruction::ComputeFindLastIVResult:
return Op == getOperand(1);
};
@@ -1021,6 +1030,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::ExtractPenultimateElement:
O << "extract-penultimate-element";
break;
+ case VPInstruction::ComputeAnyOfResult:
+ O << "compute-anyof-result";
+ break;
case VPInstruction::ComputeFindLastIVResult:
O << "compute-find-last-iv-result";
break;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index e1fb3d476c58d..335301a927ceb 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -327,7 +327,9 @@ void UnrollState::unrollBlock(VPBlockBase *VPB) {
// Add all VPValues for all parts to ComputeReductionResult which combines
// the parts to compute the final reduction value.
VPValue *Op1;
- if (match(&R, m_VPInstruction<VPInstruction::ComputeReductionResult>(
+ if (match(&R, m_VPInstruction<VPInstruction::ComputeAnyOfResult>(
+ m_VPValue(), m_VPValue(), m_VPValue(Op1))) ||
+ match(&R, m_VPInstruction<VPInstruction::ComputeReductionResult>(
m_VPValue(), m_VPValue(Op1))) ||
match(&R, m_VPInstruction<VPInstruction::ComputeFindLastIVResult>(
m_VPValue(), m_VPValue(), m_VPValue(Op1)))) {
|
You can test this locally with the following command:git-clang-format --diff HEAD~1 HEAD --extensions cpp,h -- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp llvm/lib/Transforms/Vectorize/VPlan.h llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp View the diff from clang-format here.diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 9d92ed163..af323ed78 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -699,7 +699,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
// TODO: Support in-order reductions based on the recurrence descriptor.
// All ops in the reduction inherit fast-math-flags from the recurrence
// descriptor.
- ReducedPartRdx = createSimpleReduction(Builder, ReducedPartRdx, RK);
+ ReducedPartRdx = createSimpleReduction(Builder, ReducedPartRdx, RK);
// If the reduction can be performed in a smaller type, we need to extend
// the reduction to the wider type before we branch to the original loop.
|
Add a dedicated opcode for any-of reduction, similar to #132689 and #132690.
The patch also explictly adds the start value to not require RecurrenceDescriptor during execute. It also allows freezing the start value to make it poison-safe.