-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[VPlan] Construct initial once and pass clones to tryToBuildVPlan (NFC). #141363
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Update to only build an initial, plain-CFG VPlan once, and then transform & optimize clones. This requires changes to ::clone() for VPInstruction and VPWidenPHIRecipe to allow for proper cloning of the recipes in the initial VPlan.
@llvm/pr-subscribers-vectorizers Author: Florian Hahn (fhahn) ChangesUpdate to only build an initial, plain-CFG VPlan once, and then transform & optimize clones. This requires changes to ::clone() for VPInstruction and VPWidenPHIRecipe to allow for proper cloning of the recipes in the initial VPlan. Full diff: https://github.com/llvm/llvm-project/pull/141363.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index bae53c600c18c..c772c74113d97 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -524,7 +524,8 @@ class LoopVectorizationPlanner {
/// returned VPlan is valid for. If no VPlan can be built for the input range,
/// set the largest included VF to the maximum VF for which no plan could be
/// built.
- VPlanPtr tryToBuildVPlanWithVPRecipes(VFRange &Range, LoopVersioning *LVer);
+ VPlanPtr tryToBuildVPlanWithVPRecipes(VPlanPtr InitialPlan, VFRange &Range,
+ LoopVersioning *LVer);
/// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
/// according to the information gathered by Legal when it checked if it is
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 8636550d4f644..1232538e68dd0 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8715,11 +8715,13 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
// overlap across all iterations.
LVer.prepareNoAliasMetadata();
}
+ auto VPlan0 = VPlanTransforms::buildPlainCFG(OrigLoop, *LI);
auto MaxVFTimes2 = MaxVF * 2;
for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
VFRange SubRange = {VF, MaxVFTimes2};
- if (auto Plan = tryToBuildVPlanWithVPRecipes(SubRange, &LVer)) {
+ if (auto Plan = tryToBuildVPlanWithVPRecipes(
+ std::unique_ptr<VPlan>(VPlan0->duplicate()), SubRange, &LVer)) {
bool HasScalarVF = Plan->hasScalarVFOnly();
// Now optimize the initial VPlan.
if (!HasScalarVF)
@@ -8980,9 +8982,8 @@ static void addExitUsersForFirstOrderRecurrences(
}
}
-VPlanPtr
-LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range,
- LoopVersioning *LVer) {
+VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
+ VPlanPtr Plan, VFRange &Range, LoopVersioning *LVer) {
using namespace llvm::VPlanPatternMatch;
SmallPtrSet<const InterleaveGroup<Instruction> *, 1> InterleaveGroups;
@@ -9004,7 +9005,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range,
return !CM.requiresScalarEpilogue(VF.isVector());
},
Range);
- auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI);
VPlanTransforms::prepareForVectorization(
*Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck,
CM.foldTailByMasking(), OrigLoop,
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index c4e66cd89e69c..940df596e5cec 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1004,6 +1004,8 @@ class VPInstruction : public VPRecipeWithIRFlags,
VPInstruction *clone() override {
SmallVector<VPValue *, 2> Operands(operands());
auto *New = new VPInstruction(Opcode, Operands, getDebugLoc(), Name);
+ if (getUnderlyingValue())
+ New->setUnderlyingValue(getUnderlyingInstr());
New->transferFlags(*this);
return New;
}
@@ -2129,7 +2131,11 @@ class VPWidenPHIRecipe : public VPSingleDefRecipe, public VPPhiAccessors {
}
VPWidenPHIRecipe *clone() override {
- llvm_unreachable("cloning not implemented yet");
+ auto *C = new VPWidenPHIRecipe(cast<PHINode>(getUnderlyingValue()),
+ getOperand(0), getDebugLoc(), Name);
+ for (VPValue *Op : make_range(std::next(op_begin()), op_end()))
+ C->addOperand(Op);
+ return C;
}
~VPWidenPHIRecipe() override = default;
|
@llvm/pr-subscribers-llvm-transforms Author: Florian Hahn (fhahn) ChangesUpdate to only build an initial, plain-CFG VPlan once, and then transform & optimize clones. This requires changes to ::clone() for VPInstruction and VPWidenPHIRecipe to allow for proper cloning of the recipes in the initial VPlan. Full diff: https://github.com/llvm/llvm-project/pull/141363.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index bae53c600c18c..c772c74113d97 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -524,7 +524,8 @@ class LoopVectorizationPlanner {
/// returned VPlan is valid for. If no VPlan can be built for the input range,
/// set the largest included VF to the maximum VF for which no plan could be
/// built.
- VPlanPtr tryToBuildVPlanWithVPRecipes(VFRange &Range, LoopVersioning *LVer);
+ VPlanPtr tryToBuildVPlanWithVPRecipes(VPlanPtr InitialPlan, VFRange &Range,
+ LoopVersioning *LVer);
/// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
/// according to the information gathered by Legal when it checked if it is
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 8636550d4f644..1232538e68dd0 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8715,11 +8715,13 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
// overlap across all iterations.
LVer.prepareNoAliasMetadata();
}
+ auto VPlan0 = VPlanTransforms::buildPlainCFG(OrigLoop, *LI);
auto MaxVFTimes2 = MaxVF * 2;
for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
VFRange SubRange = {VF, MaxVFTimes2};
- if (auto Plan = tryToBuildVPlanWithVPRecipes(SubRange, &LVer)) {
+ if (auto Plan = tryToBuildVPlanWithVPRecipes(
+ std::unique_ptr<VPlan>(VPlan0->duplicate()), SubRange, &LVer)) {
bool HasScalarVF = Plan->hasScalarVFOnly();
// Now optimize the initial VPlan.
if (!HasScalarVF)
@@ -8980,9 +8982,8 @@ static void addExitUsersForFirstOrderRecurrences(
}
}
-VPlanPtr
-LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range,
- LoopVersioning *LVer) {
+VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
+ VPlanPtr Plan, VFRange &Range, LoopVersioning *LVer) {
using namespace llvm::VPlanPatternMatch;
SmallPtrSet<const InterleaveGroup<Instruction> *, 1> InterleaveGroups;
@@ -9004,7 +9005,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range,
return !CM.requiresScalarEpilogue(VF.isVector());
},
Range);
- auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI);
VPlanTransforms::prepareForVectorization(
*Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck,
CM.foldTailByMasking(), OrigLoop,
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index c4e66cd89e69c..940df596e5cec 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1004,6 +1004,8 @@ class VPInstruction : public VPRecipeWithIRFlags,
VPInstruction *clone() override {
SmallVector<VPValue *, 2> Operands(operands());
auto *New = new VPInstruction(Opcode, Operands, getDebugLoc(), Name);
+ if (getUnderlyingValue())
+ New->setUnderlyingValue(getUnderlyingInstr());
New->transferFlags(*this);
return New;
}
@@ -2129,7 +2131,11 @@ class VPWidenPHIRecipe : public VPSingleDefRecipe, public VPPhiAccessors {
}
VPWidenPHIRecipe *clone() override {
- llvm_unreachable("cloning not implemented yet");
+ auto *C = new VPWidenPHIRecipe(cast<PHINode>(getUnderlyingValue()),
+ getOperand(0), getDebugLoc(), Name);
+ for (VPValue *Op : make_range(std::next(op_begin()), op_end()))
+ C->addOperand(Op);
+ return C;
}
~VPWidenPHIRecipe() override = default;
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice hoisting of buildPlainCFG() out of tryToBuildVPlanWithVPRecipes(), ship it!
@@ -524,7 +524,8 @@ class LoopVectorizationPlanner { | |||
/// returned VPlan is valid for. If no VPlan can be built for the input range, | |||
/// set the largest included VF to the maximum VF for which no plan could be | |||
/// built. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Worth updating the documentation, say by adding something like:
/// built. | |
/// built. Each VPlan is built starting from a copy of \p InitialPlan, which is a plain CFG VPlan wrapping the original scalar loop. |
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added thanks
@@ -8715,11 +8715,13 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, | |||
// overlap across all iterations. | |||
LVer.prepareNoAliasMetadata(); | |||
} | |||
auto VPlan0 = VPlanTransforms::buildPlainCFG(OrigLoop, *LI); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
auto VPlan0 = VPlanTransforms::buildPlainCFG(OrigLoop, *LI); | |
auto VPlan0 = VPlanTransforms::buildPlainCFG(OrigLoop, *LI); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
moved down, thanks!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Next follow-up would be checking what is needed to pull out also early region creation
…ldVPlan (NFC). (#141363) Update to only build an initial, plain-CFG VPlan once, and then transform & optimize clones. This requires changes to ::clone() for VPInstruction and VPWidenPHIRecipe to allow for proper cloning of the recipes in the initial VPlan. PR: llvm/llvm-project#141363
FYI this has some overhead, though not a lot: https://llvm-compile-time-tracker.com/compare.php?from=435d8b12efc1447b3c9f95ace2c57dae22ea486d&to=567b3172da2d52f5df70a37f3de06b7000b25968&stat=instructions:u I assume that's to be expected given what the change does. |
…Plan (NFC). (llvm#141363)" This reverts commit 567b317.
Thanks for the heads-up, it is a bit surprising. There was at least one case where we now are doing unnecessary work which should be fixed by ad58ea3. This recovers some of the impact, but not yet all of it: http://llvm-compile-time-tracker.com/compare.php?from=3033f202f6707937cd28c2473479db134993f96f&to=1a0b9e5834f7fd4abf058864e656f8e26b7a26ff&stat=instructions:u Will check if there's anything else to improve. |
Update to only build an initial, plain-CFG VPlan once, and then transform & optimize clones.
This requires changes to ::clone() for VPInstruction and VPWidenPHIRecipe to allow for proper cloning of the recipes in the initial VPlan.