From 8be25aac369567a7b0bf33934d6d8cbb073ce688 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Sat, 5 Apr 2025 15:41:08 +0100 Subject: [PATCH 1/2] [VPlan] Construct initial once and pass clones to tryToBuildVPlan (NFC). Update to only build an initial, plain-CFG VPlan once, and then transform & optimize clones. This requires changes to ::clone() for VPInstruction and VPWidenPHIRecipe to allow for proper cloning of the recipes in the initial VPlan. --- .../Transforms/Vectorize/LoopVectorizationPlanner.h | 3 ++- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 10 +++++----- llvm/lib/Transforms/Vectorize/VPlan.h | 8 +++++++- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index bae53c600c18c..c772c74113d97 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -524,7 +524,8 @@ class LoopVectorizationPlanner { /// returned VPlan is valid for. If no VPlan can be built for the input range, /// set the largest included VF to the maximum VF for which no plan could be /// built. - VPlanPtr tryToBuildVPlanWithVPRecipes(VFRange &Range, LoopVersioning *LVer); + VPlanPtr tryToBuildVPlanWithVPRecipes(VPlanPtr InitialPlan, VFRange &Range, + LoopVersioning *LVer); /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive, /// according to the information gathered by Legal when it checked if it is diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 8636550d4f644..1232538e68dd0 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8715,11 +8715,13 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, // overlap across all iterations. LVer.prepareNoAliasMetadata(); } + auto VPlan0 = VPlanTransforms::buildPlainCFG(OrigLoop, *LI); auto MaxVFTimes2 = MaxVF * 2; for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) { VFRange SubRange = {VF, MaxVFTimes2}; - if (auto Plan = tryToBuildVPlanWithVPRecipes(SubRange, &LVer)) { + if (auto Plan = tryToBuildVPlanWithVPRecipes( + std::unique_ptr(VPlan0->duplicate()), SubRange, &LVer)) { bool HasScalarVF = Plan->hasScalarVFOnly(); // Now optimize the initial VPlan. if (!HasScalarVF) @@ -8980,9 +8982,8 @@ static void addExitUsersForFirstOrderRecurrences( } } -VPlanPtr -LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range, - LoopVersioning *LVer) { +VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( + VPlanPtr Plan, VFRange &Range, LoopVersioning *LVer) { using namespace llvm::VPlanPatternMatch; SmallPtrSet *, 1> InterleaveGroups; @@ -9004,7 +9005,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range, return !CM.requiresScalarEpilogue(VF.isVector()); }, Range); - auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI); VPlanTransforms::prepareForVectorization( *Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck, CM.foldTailByMasking(), OrigLoop, diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index c4e66cd89e69c..940df596e5cec 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1004,6 +1004,8 @@ class VPInstruction : public VPRecipeWithIRFlags, VPInstruction *clone() override { SmallVector Operands(operands()); auto *New = new VPInstruction(Opcode, Operands, getDebugLoc(), Name); + if (getUnderlyingValue()) + New->setUnderlyingValue(getUnderlyingInstr()); New->transferFlags(*this); return New; } @@ -2129,7 +2131,11 @@ class VPWidenPHIRecipe : public VPSingleDefRecipe, public VPPhiAccessors { } VPWidenPHIRecipe *clone() override { - llvm_unreachable("cloning not implemented yet"); + auto *C = new VPWidenPHIRecipe(cast(getUnderlyingValue()), + getOperand(0), getDebugLoc(), Name); + for (VPValue *Op : make_range(std::next(op_begin()), op_end())) + C->addOperand(Op); + return C; } ~VPWidenPHIRecipe() override = default; From 8253640881bff8a785d3cfede9899296aa7e8fc3 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 26 May 2025 12:46:27 +0100 Subject: [PATCH 2/2] !fixup address latest comments, thanks --- llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h | 3 ++- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 352f7399e2275..b81d582f07e88 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -519,7 +519,8 @@ class LoopVectorizationPlanner { /// \p Range's largest included VF is restricted to the maximum VF the /// returned VPlan is valid for. If no VPlan can be built for the input range, /// set the largest included VF to the maximum VF for which no plan could be - /// built. + /// built. Each VPlan is built starting from a copy of \p InitialPlan, which + /// is a plain CFG VPlan wrapping the original scalar loop. VPlanPtr tryToBuildVPlanWithVPRecipes(VPlanPtr InitialPlan, VFRange &Range, LoopVersioning *LVer); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 1232538e68dd0..05b5764ffcafc 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8715,9 +8715,9 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, // overlap across all iterations. LVer.prepareNoAliasMetadata(); } - auto VPlan0 = VPlanTransforms::buildPlainCFG(OrigLoop, *LI); auto MaxVFTimes2 = MaxVF * 2; + auto VPlan0 = VPlanTransforms::buildPlainCFG(OrigLoop, *LI); for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) { VFRange SubRange = {VF, MaxVFTimes2}; if (auto Plan = tryToBuildVPlanWithVPRecipes(