Skip to content

Commit bf92b2d

Browse files
committed
Partially support Extended-reduction.
1 parent ea25db2 commit bf92b2d

File tree

6 files changed

+359
-25
lines changed

6 files changed

+359
-25
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7662,6 +7662,10 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
76627662
ILV.getOrCreateVectorTripCount(nullptr),
76637663
CanonicalIVStartValue, State);
76647664

7665+
// TODO: Rebase to fhahn's implementation.
7666+
VPlanTransforms::prepareExecute(BestVPlan);
7667+
dbgs() << "\n\n print plan\n";
7668+
BestVPlan.print(dbgs());
76657669
BestVPlan.execute(&State);
76667670

76677671
// 2.5 Collect reduction resume values.
@@ -9256,6 +9260,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
92569260
// Adjust AnyOf reductions; replace the reduction phi for the selected value
92579261
// with a boolean reduction phi node to check if the condition is true in any
92589262
// iteration. The final value is selected by the final ComputeReductionResult.
9263+
// TODO: Implement VPMulAccHere.
92599264
void LoopVectorizationPlanner::adjustRecipesForReductions(
92609265
VPlanPtr &Plan, VPRecipeBuilder &RecipeBuilder, ElementCount MinVF) {
92619266
using namespace VPlanPatternMatch;
@@ -9374,9 +9379,22 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
93749379
if (CM.blockNeedsPredicationForAnyReason(BB))
93759380
CondOp = RecipeBuilder.getBlockInMask(BB);
93769381

9377-
VPReductionRecipe *RedRecipe =
9378-
new VPReductionRecipe(RdxDesc, CurrentLinkI, PreviousLink, VecOp,
9379-
CondOp, CM.useOrderedReductions(RdxDesc));
9382+
// VPWidenCastRecipes can folded into VPReductionRecipe
9383+
VPValue *A;
9384+
VPSingleDefRecipe *RedRecipe;
9385+
if (match(VecOp, m_ZExtOrSExt(m_VPValue(A))) &&
9386+
!VecOp->hasMoreThanOneUniqueUser()) {
9387+
RedRecipe = new VPExtendedReductionRecipe(
9388+
RdxDesc, CurrentLinkI,
9389+
cast<CastInst>(
9390+
cast<VPWidenCastRecipe>(VecOp)->getUnderlyingInstr()),
9391+
PreviousLink, A, CondOp, CM.useOrderedReductions(RdxDesc),
9392+
cast<VPWidenCastRecipe>(VecOp)->getResultType());
9393+
} else {
9394+
RedRecipe =
9395+
new VPReductionRecipe(RdxDesc, CurrentLinkI, PreviousLink, VecOp,
9396+
CondOp, CM.useOrderedReductions(RdxDesc));
9397+
}
93809398
// Append the recipe to the end of the VPBasicBlock because we need to
93819399
// ensure that it comes after all of it's inputs, including CondOp.
93829400
// Note that this transformation may leave over dead recipes (including

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -859,6 +859,8 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
859859
case VPRecipeBase::VPInstructionSC:
860860
case VPRecipeBase::VPReductionEVLSC:
861861
case VPRecipeBase::VPReductionSC:
862+
case VPRecipeBase::VPMulAccSC:
863+
case VPRecipeBase::VPExtendedReductionSC:
862864
case VPRecipeBase::VPReplicateSC:
863865
case VPRecipeBase::VPScalarIVStepsSC:
864866
case VPRecipeBase::VPVectorPointerSC:
@@ -2655,6 +2657,221 @@ class VPReductionEVLRecipe : public VPReductionRecipe {
26552657
}
26562658
};
26572659

2660+
/// A recipe to represent inloop extended reduction operations, performing a
2661+
/// reduction on a vector operand into a scalar value, and adding the result to
2662+
/// a chain. This recipe is high level abstract which will generate
2663+
/// VPReductionRecipe and VPWidenCastRecipe before execution. The Operands are
2664+
/// {ChainOp, VecOp, [Condition]}.
2665+
class VPExtendedReductionRecipe : public VPSingleDefRecipe {
2666+
/// The recurrence decriptor for the reduction in question.
2667+
const RecurrenceDescriptor &RdxDesc;
2668+
bool IsOrdered;
2669+
/// Whether the reduction is conditional.
2670+
bool IsConditional = false;
2671+
/// Type after extend.
2672+
Type *ResultTy;
2673+
Instruction::CastOps ExtOp;
2674+
CastInst *CastInstr;
2675+
bool IsZExt;
2676+
2677+
protected:
2678+
VPExtendedReductionRecipe(const unsigned char SC,
2679+
const RecurrenceDescriptor &R, Instruction *RedI,
2680+
Instruction::CastOps ExtOp, CastInst *CastI,
2681+
ArrayRef<VPValue *> Operands, VPValue *CondOp,
2682+
bool IsOrdered, Type *ResultTy)
2683+
: VPSingleDefRecipe(SC, Operands, RedI), RdxDesc(R), IsOrdered(IsOrdered),
2684+
ResultTy(ResultTy), ExtOp(ExtOp), CastInstr(CastI) {
2685+
if (CondOp) {
2686+
IsConditional = true;
2687+
addOperand(CondOp);
2688+
}
2689+
IsZExt = ExtOp == Instruction::CastOps::ZExt;
2690+
}
2691+
2692+
public:
2693+
VPExtendedReductionRecipe(const RecurrenceDescriptor &R, Instruction *RedI,
2694+
CastInst *CastI, VPValue *ChainOp, VPValue *VecOp,
2695+
VPValue *CondOp, bool IsOrdered, Type *ResultTy)
2696+
: VPExtendedReductionRecipe(VPDef::VPExtendedReductionSC, R, RedI,
2697+
CastI->getOpcode(), CastI,
2698+
ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2699+
IsOrdered, ResultTy) {}
2700+
2701+
VPExtendedReductionRecipe(VPReductionRecipe *Red, VPWidenCastRecipe *Ext)
2702+
: VPExtendedReductionRecipe(
2703+
VPDef::VPExtendedReductionSC, Red->getRecurrenceDescriptor(),
2704+
Red->getUnderlyingInstr(), Ext->getOpcode(),
2705+
cast<CastInst>(Ext->getUnderlyingInstr()),
2706+
ArrayRef<VPValue *>({Red->getChainOp(), Ext->getOperand(0)}),
2707+
Red->getCondOp(), Red->isOrdered(), Ext->getResultType()) {}
2708+
2709+
~VPExtendedReductionRecipe() override = default;
2710+
2711+
VPExtendedReductionRecipe *clone() override {
2712+
llvm_unreachable("Not implement yet");
2713+
}
2714+
2715+
static inline bool classof(const VPRecipeBase *R) {
2716+
return R->getVPDefID() == VPDef::VPExtendedReductionSC;
2717+
}
2718+
2719+
static inline bool classof(const VPUser *U) {
2720+
auto *R = dyn_cast<VPRecipeBase>(U);
2721+
return R && classof(R);
2722+
}
2723+
2724+
/// Generate the reduction in the loop
2725+
void execute(VPTransformState &State) override {
2726+
llvm_unreachable("VPExtendedReductionRecipe should be transform to "
2727+
"VPExtendedRecipe + VPReductionRecipe before execution.");
2728+
};
2729+
2730+
/// Return the cost of VPExtendedReductionRecipe.
2731+
InstructionCost computeCost(ElementCount VF,
2732+
VPCostContext &Ctx) const override;
2733+
2734+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2735+
/// Print the recipe.
2736+
void print(raw_ostream &O, const Twine &Indent,
2737+
VPSlotTracker &SlotTracker) const override;
2738+
#endif
2739+
2740+
/// Return the recurrence decriptor for the in-loop reduction.
2741+
const RecurrenceDescriptor &getRecurrenceDescriptor() const {
2742+
return RdxDesc;
2743+
}
2744+
/// Return true if the in-loop reduction is ordered.
2745+
bool isOrdered() const { return IsOrdered; };
2746+
/// Return true if the in-loop reduction is conditional.
2747+
bool isConditional() const { return IsConditional; };
2748+
/// The VPValue of the scalar Chain being accumulated.
2749+
VPValue *getChainOp() const { return getOperand(0); }
2750+
/// The VPValue of the vector value to be extended and reduced.
2751+
VPValue *getVecOp() const { return getOperand(1); }
2752+
/// The VPValue of the condition for the block.
2753+
VPValue *getCondOp() const {
2754+
return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
2755+
}
2756+
Type *getResultType() const { return ResultTy; };
2757+
Instruction::CastOps getExtOpcode() const { return ExtOp; };
2758+
CastInst *getExtInstr() const { return CastInstr; };
2759+
};
2760+
2761+
/// A recipe to represent inloop MulAccreduction operations, performing a
2762+
/// reduction on a vector operand into a scalar value, and adding the result to
2763+
/// a chain. This recipe is high level abstract which will generate
2764+
/// VPReductionRecipe VPWidenRecipe(mul)and VPWidenCastRecipe before execution.
2765+
/// The Operands are {ChainOp, VecOp1, VecOp2, [Condition]}.
2766+
class VPMulAccRecipe : public VPSingleDefRecipe {
2767+
/// The recurrence decriptor for the reduction in question.
2768+
const RecurrenceDescriptor &RdxDesc;
2769+
bool IsOrdered;
2770+
/// Whether the reduction is conditional.
2771+
bool IsConditional = false;
2772+
/// Type after extend.
2773+
Type *ResultTy;
2774+
/// Type for mul.
2775+
Type *MulTy;
2776+
/// reduce.add(OuterExt(mul(InnerExt(), InnerExt())))
2777+
Instruction::CastOps OuterExtOp;
2778+
Instruction::CastOps InnerExtOp;
2779+
2780+
Instruction *MulI;
2781+
Instruction *OuterExtI;
2782+
Instruction *InnerExt0I;
2783+
Instruction *InnerExt1I;
2784+
2785+
protected:
2786+
VPMulAccRecipe(const unsigned char SC, const RecurrenceDescriptor &R,
2787+
Instruction *RedI, Instruction::CastOps OuterExtOp,
2788+
Instruction *OuterExtI, Instruction *MulI,
2789+
Instruction::CastOps InnerExtOp, Instruction *InnerExt0I,
2790+
Instruction *InnerExt1I, ArrayRef<VPValue *> Operands,
2791+
VPValue *CondOp, bool IsOrdered, Type *ResultTy, Type *MulTy)
2792+
: VPSingleDefRecipe(SC, Operands, RedI), RdxDesc(R), IsOrdered(IsOrdered),
2793+
ResultTy(ResultTy), MulTy(MulTy), OuterExtOp(OuterExtOp),
2794+
InnerExtOp(InnerExtOp), MulI(MulI), OuterExtI(OuterExtI),
2795+
InnerExt0I(InnerExt0I), InnerExt1I(InnerExt1I) {
2796+
if (CondOp) {
2797+
IsConditional = true;
2798+
addOperand(CondOp);
2799+
}
2800+
}
2801+
2802+
public:
2803+
VPMulAccRecipe(const RecurrenceDescriptor &R, Instruction *RedI,
2804+
Instruction *OuterExt, Instruction *Mul,
2805+
Instruction *InnerExt0, Instruction *InnerExt1,
2806+
VPValue *ChainOp, VPValue *InnerExt0Op, VPValue *InnerExt1Op,
2807+
VPValue *CondOp, bool IsOrdered, Type *ResultTy, Type *MulTy)
2808+
: VPMulAccRecipe(
2809+
VPDef::VPMulAccSC, R, RedI, cast<CastInst>(OuterExt)->getOpcode(),
2810+
OuterExt, Mul, cast<CastInst>(InnerExt0)->getOpcode(), InnerExt0,
2811+
InnerExt1, ArrayRef<VPValue *>({ChainOp, InnerExt0Op, InnerExt1Op}),
2812+
CondOp, IsOrdered, ResultTy, MulTy) {}
2813+
2814+
VPMulAccRecipe(VPReductionRecipe *Red, VPWidenCastRecipe *OuterExt,
2815+
VPWidenRecipe *Mul, VPWidenCastRecipe *InnerExt0,
2816+
VPWidenCastRecipe *InnerExt1)
2817+
: VPMulAccRecipe(
2818+
VPDef::VPMulAccSC, Red->getRecurrenceDescriptor(),
2819+
Red->getUnderlyingInstr(), OuterExt->getOpcode(),
2820+
OuterExt->getUnderlyingInstr(), Mul->getUnderlyingInstr(),
2821+
InnerExt0->getOpcode(), InnerExt0->getUnderlyingInstr(),
2822+
InnerExt1->getUnderlyingInstr(),
2823+
ArrayRef<VPValue *>({Red->getChainOp(), InnerExt0->getOperand(0),
2824+
InnerExt1->getOperand(0)}),
2825+
Red->getCondOp(), Red->isOrdered(), OuterExt->getResultType(),
2826+
InnerExt0->getResultType()) {}
2827+
2828+
~VPMulAccRecipe() override = default;
2829+
2830+
VPMulAccRecipe *clone() override { llvm_unreachable("Not implement yet"); }
2831+
2832+
static inline bool classof(const VPRecipeBase *R) {
2833+
return R->getVPDefID() == VPRecipeBase::VPMulAccSC;
2834+
}
2835+
2836+
static inline bool classof(const VPUser *U) {
2837+
auto *R = dyn_cast<VPRecipeBase>(U);
2838+
return R && classof(R);
2839+
}
2840+
2841+
/// Generate the reduction in the loop
2842+
void execute(VPTransformState &State) override;
2843+
2844+
/// Return the cost of VPExtendedReductionRecipe.
2845+
InstructionCost computeCost(ElementCount VF,
2846+
VPCostContext &Ctx) const override;
2847+
2848+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2849+
/// Print the recipe.
2850+
void print(raw_ostream &O, const Twine &Indent,
2851+
VPSlotTracker &SlotTracker) const override;
2852+
#endif
2853+
2854+
/// Return the recurrence decriptor for the in-loop reduction.
2855+
const RecurrenceDescriptor &getRecurrenceDescriptor() const {
2856+
return RdxDesc;
2857+
}
2858+
/// Return true if the in-loop reduction is ordered.
2859+
bool isOrdered() const { return IsOrdered; };
2860+
/// Return true if the in-loop reduction is conditional.
2861+
bool isConditional() const { return IsConditional; };
2862+
/// The VPValue of the scalar Chain being accumulated.
2863+
VPValue *getChainOp() const { return getOperand(0); }
2864+
/// The VPValue of the vector value to be extended and reduced.
2865+
VPValue *getVecOp() const { return getOperand(1); }
2866+
/// The VPValue of the condition for the block.
2867+
VPValue *getCondOp() const {
2868+
return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
2869+
}
2870+
Type *getResultTy() const { return ResultTy; };
2871+
Instruction::CastOps getOuterExtOpcode() const { return OuterExtOp; };
2872+
Instruction::CastOps getInnerExtOpcode() const { return InnerExtOp; };
2873+
};
2874+
26582875
/// VPReplicateRecipe replicates a given instruction producing multiple scalar
26592876
/// copies of the original scalar type, one per lane, instead of producing a
26602877
/// single copy of widened type for all lanes. If the instruction is known to be

0 commit comments

Comments
 (0)