diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index c751f053cb65a..b81d582f07e88 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -519,8 +519,10 @@ class LoopVectorizationPlanner { /// \p Range's largest included VF is restricted to the maximum VF the /// returned VPlan is valid for. If no VPlan can be built for the input range, /// set the largest included VF to the maximum VF for which no plan could be - /// built. - VPlanPtr tryToBuildVPlanWithVPRecipes(VFRange &Range, LoopVersioning *LVer); + /// built. Each VPlan is built starting from a copy of \p InitialPlan, which + /// is a plain CFG VPlan wrapping the original scalar loop. + VPlanPtr tryToBuildVPlanWithVPRecipes(VPlanPtr InitialPlan, VFRange &Range, + LoopVersioning *LVer); /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive, /// according to the information gathered by Legal when it checked if it is diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 8636550d4f644..05b5764ffcafc 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8717,9 +8717,11 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, } auto MaxVFTimes2 = MaxVF * 2; + auto VPlan0 = VPlanTransforms::buildPlainCFG(OrigLoop, *LI); for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) { VFRange SubRange = {VF, MaxVFTimes2}; - if (auto Plan = tryToBuildVPlanWithVPRecipes(SubRange, &LVer)) { + if (auto Plan = tryToBuildVPlanWithVPRecipes( + std::unique_ptr(VPlan0->duplicate()), SubRange, &LVer)) { bool HasScalarVF = Plan->hasScalarVFOnly(); // Now optimize the initial VPlan. if (!HasScalarVF) @@ -8980,9 +8982,8 @@ static void addExitUsersForFirstOrderRecurrences( } } -VPlanPtr -LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range, - LoopVersioning *LVer) { +VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( + VPlanPtr Plan, VFRange &Range, LoopVersioning *LVer) { using namespace llvm::VPlanPatternMatch; SmallPtrSet *, 1> InterleaveGroups; @@ -9004,7 +9005,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range, return !CM.requiresScalarEpilogue(VF.isVector()); }, Range); - auto Plan = VPlanTransforms::buildPlainCFG(OrigLoop, *LI); VPlanTransforms::prepareForVectorization( *Plan, Legal->getWidestInductionType(), PSE, RequiresScalarEpilogueCheck, CM.foldTailByMasking(), OrigLoop, diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 28fad085b4b7b..16c461cd60919 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -972,7 +972,10 @@ class VPInstruction : public VPRecipeWithIRFlags, VPInstruction *clone() override { SmallVector Operands(operands()); - return new VPInstruction(Opcode, Operands, *this, getDebugLoc(), Name); + auto *New = new VPInstruction(Opcode, Operands, *this, getDebugLoc(), Name); + if (getUnderlyingValue()) + New->setUnderlyingValue(getUnderlyingInstr()); + return New; } unsigned getOpcode() const { return Opcode; } @@ -2090,7 +2093,11 @@ class VPWidenPHIRecipe : public VPSingleDefRecipe, public VPPhiAccessors { } VPWidenPHIRecipe *clone() override { - llvm_unreachable("cloning not implemented yet"); + auto *C = new VPWidenPHIRecipe(cast(getUnderlyingValue()), + getOperand(0), getDebugLoc(), Name); + for (VPValue *Op : make_range(std::next(op_begin()), op_end())) + C->addOperand(Op); + return C; } ~VPWidenPHIRecipe() override = default;