@@ -859,6 +859,8 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
859
859
case VPRecipeBase::VPInstructionSC:
860
860
case VPRecipeBase::VPReductionEVLSC:
861
861
case VPRecipeBase::VPReductionSC:
862
+ case VPRecipeBase::VPMulAccSC:
863
+ case VPRecipeBase::VPExtendedReductionSC:
862
864
case VPRecipeBase::VPReplicateSC:
863
865
case VPRecipeBase::VPScalarIVStepsSC:
864
866
case VPRecipeBase::VPVectorPointerSC:
@@ -2655,6 +2657,221 @@ class VPReductionEVLRecipe : public VPReductionRecipe {
2655
2657
}
2656
2658
};
2657
2659
2660
+ // / A recipe to represent inloop extended reduction operations, performing a
2661
+ // / reduction on a vector operand into a scalar value, and adding the result to
2662
+ // / a chain. This recipe is high level abstract which will generate
2663
+ // / VPReductionRecipe and VPWidenCastRecipe before execution. The Operands are
2664
+ // / {ChainOp, VecOp, [Condition]}.
2665
+ class VPExtendedReductionRecipe : public VPSingleDefRecipe {
2666
+ // / The recurrence decriptor for the reduction in question.
2667
+ const RecurrenceDescriptor &RdxDesc;
2668
+ bool IsOrdered;
2669
+ // / Whether the reduction is conditional.
2670
+ bool IsConditional = false ;
2671
+ // / Type after extend.
2672
+ Type *ResultTy;
2673
+ Instruction::CastOps ExtOp;
2674
+ CastInst *CastInstr;
2675
+ bool IsZExt;
2676
+
2677
+ protected:
2678
+ VPExtendedReductionRecipe (const unsigned char SC,
2679
+ const RecurrenceDescriptor &R, Instruction *RedI,
2680
+ Instruction::CastOps ExtOp, CastInst *CastI,
2681
+ ArrayRef<VPValue *> Operands, VPValue *CondOp,
2682
+ bool IsOrdered, Type *ResultTy)
2683
+ : VPSingleDefRecipe(SC, Operands, RedI), RdxDesc(R), IsOrdered(IsOrdered),
2684
+ ResultTy (ResultTy), ExtOp(ExtOp), CastInstr(CastI) {
2685
+ if (CondOp) {
2686
+ IsConditional = true ;
2687
+ addOperand (CondOp);
2688
+ }
2689
+ IsZExt = ExtOp == Instruction::CastOps::ZExt;
2690
+ }
2691
+
2692
+ public:
2693
+ VPExtendedReductionRecipe (const RecurrenceDescriptor &R, Instruction *RedI,
2694
+ CastInst *CastI, VPValue *ChainOp, VPValue *VecOp,
2695
+ VPValue *CondOp, bool IsOrdered, Type *ResultTy)
2696
+ : VPExtendedReductionRecipe(VPDef::VPExtendedReductionSC, R, RedI,
2697
+ CastI->getOpcode (), CastI,
2698
+ ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2699
+ IsOrdered, ResultTy) {}
2700
+
2701
+ VPExtendedReductionRecipe (VPReductionRecipe *Red, VPWidenCastRecipe *Ext)
2702
+ : VPExtendedReductionRecipe(
2703
+ VPDef::VPExtendedReductionSC, Red->getRecurrenceDescriptor (),
2704
+ Red->getUnderlyingInstr(), Ext->getOpcode(),
2705
+ cast<CastInst>(Ext->getUnderlyingInstr ()),
2706
+ ArrayRef<VPValue *>({Red->getChainOp (), Ext->getOperand (0 )}),
2707
+ Red->getCondOp(), Red->isOrdered(), Ext->getResultType()) {}
2708
+
2709
+ ~VPExtendedReductionRecipe () override = default ;
2710
+
2711
+ VPExtendedReductionRecipe *clone () override {
2712
+ llvm_unreachable (" Not implement yet" );
2713
+ }
2714
+
2715
+ static inline bool classof (const VPRecipeBase *R) {
2716
+ return R->getVPDefID () == VPDef::VPExtendedReductionSC;
2717
+ }
2718
+
2719
+ static inline bool classof (const VPUser *U) {
2720
+ auto *R = dyn_cast<VPRecipeBase>(U);
2721
+ return R && classof (R);
2722
+ }
2723
+
2724
+ // / Generate the reduction in the loop
2725
+ void execute (VPTransformState &State) override {
2726
+ llvm_unreachable (" VPExtendedReductionRecipe should be transform to "
2727
+ " VPExtendedRecipe + VPReductionRecipe before execution." );
2728
+ };
2729
+
2730
+ // / Return the cost of VPExtendedReductionRecipe.
2731
+ InstructionCost computeCost (ElementCount VF,
2732
+ VPCostContext &Ctx) const override ;
2733
+
2734
+ #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2735
+ // / Print the recipe.
2736
+ void print (raw_ostream &O, const Twine &Indent,
2737
+ VPSlotTracker &SlotTracker) const override ;
2738
+ #endif
2739
+
2740
+ // / Return the recurrence decriptor for the in-loop reduction.
2741
+ const RecurrenceDescriptor &getRecurrenceDescriptor () const {
2742
+ return RdxDesc;
2743
+ }
2744
+ // / Return true if the in-loop reduction is ordered.
2745
+ bool isOrdered () const { return IsOrdered; };
2746
+ // / Return true if the in-loop reduction is conditional.
2747
+ bool isConditional () const { return IsConditional; };
2748
+ // / The VPValue of the scalar Chain being accumulated.
2749
+ VPValue *getChainOp () const { return getOperand (0 ); }
2750
+ // / The VPValue of the vector value to be extended and reduced.
2751
+ VPValue *getVecOp () const { return getOperand (1 ); }
2752
+ // / The VPValue of the condition for the block.
2753
+ VPValue *getCondOp () const {
2754
+ return isConditional () ? getOperand (getNumOperands () - 1 ) : nullptr ;
2755
+ }
2756
+ Type *getResultType () const { return ResultTy; };
2757
+ Instruction::CastOps getExtOpcode () const { return ExtOp; };
2758
+ CastInst *getExtInstr () const { return CastInstr; };
2759
+ };
2760
+
2761
+ // / A recipe to represent inloop MulAccreduction operations, performing a
2762
+ // / reduction on a vector operand into a scalar value, and adding the result to
2763
+ // / a chain. This recipe is high level abstract which will generate
2764
+ // / VPReductionRecipe VPWidenRecipe(mul)and VPWidenCastRecipe before execution.
2765
+ // / The Operands are {ChainOp, VecOp1, VecOp2, [Condition]}.
2766
+ class VPMulAccRecipe : public VPSingleDefRecipe {
2767
+ // / The recurrence decriptor for the reduction in question.
2768
+ const RecurrenceDescriptor &RdxDesc;
2769
+ bool IsOrdered;
2770
+ // / Whether the reduction is conditional.
2771
+ bool IsConditional = false ;
2772
+ // / Type after extend.
2773
+ Type *ResultTy;
2774
+ // / Type for mul.
2775
+ Type *MulTy;
2776
+ // / reduce.add(OuterExt(mul(InnerExt(), InnerExt())))
2777
+ Instruction::CastOps OuterExtOp;
2778
+ Instruction::CastOps InnerExtOp;
2779
+
2780
+ Instruction *MulI;
2781
+ Instruction *OuterExtI;
2782
+ Instruction *InnerExt0I;
2783
+ Instruction *InnerExt1I;
2784
+
2785
+ protected:
2786
+ VPMulAccRecipe (const unsigned char SC, const RecurrenceDescriptor &R,
2787
+ Instruction *RedI, Instruction::CastOps OuterExtOp,
2788
+ Instruction *OuterExtI, Instruction *MulI,
2789
+ Instruction::CastOps InnerExtOp, Instruction *InnerExt0I,
2790
+ Instruction *InnerExt1I, ArrayRef<VPValue *> Operands,
2791
+ VPValue *CondOp, bool IsOrdered, Type *ResultTy, Type *MulTy)
2792
+ : VPSingleDefRecipe(SC, Operands, RedI), RdxDesc(R), IsOrdered(IsOrdered),
2793
+ ResultTy (ResultTy), MulTy(MulTy), OuterExtOp(OuterExtOp),
2794
+ InnerExtOp(InnerExtOp), MulI(MulI), OuterExtI(OuterExtI),
2795
+ InnerExt0I(InnerExt0I), InnerExt1I(InnerExt1I) {
2796
+ if (CondOp) {
2797
+ IsConditional = true ;
2798
+ addOperand (CondOp);
2799
+ }
2800
+ }
2801
+
2802
+ public:
2803
+ VPMulAccRecipe (const RecurrenceDescriptor &R, Instruction *RedI,
2804
+ Instruction *OuterExt, Instruction *Mul,
2805
+ Instruction *InnerExt0, Instruction *InnerExt1,
2806
+ VPValue *ChainOp, VPValue *InnerExt0Op, VPValue *InnerExt1Op,
2807
+ VPValue *CondOp, bool IsOrdered, Type *ResultTy, Type *MulTy)
2808
+ : VPMulAccRecipe(
2809
+ VPDef::VPMulAccSC, R, RedI, cast<CastInst>(OuterExt)->getOpcode(),
2810
+ OuterExt, Mul, cast<CastInst>(InnerExt0)->getOpcode(), InnerExt0,
2811
+ InnerExt1, ArrayRef<VPValue *>({ChainOp, InnerExt0Op, InnerExt1Op}),
2812
+ CondOp, IsOrdered, ResultTy, MulTy) {}
2813
+
2814
+ VPMulAccRecipe (VPReductionRecipe *Red, VPWidenCastRecipe *OuterExt,
2815
+ VPWidenRecipe *Mul, VPWidenCastRecipe *InnerExt0,
2816
+ VPWidenCastRecipe *InnerExt1)
2817
+ : VPMulAccRecipe(
2818
+ VPDef::VPMulAccSC, Red->getRecurrenceDescriptor (),
2819
+ Red->getUnderlyingInstr(), OuterExt->getOpcode(),
2820
+ OuterExt->getUnderlyingInstr(), Mul->getUnderlyingInstr(),
2821
+ InnerExt0->getOpcode(), InnerExt0->getUnderlyingInstr(),
2822
+ InnerExt1->getUnderlyingInstr(),
2823
+ ArrayRef<VPValue *>({Red->getChainOp (), InnerExt0->getOperand (0 ),
2824
+ InnerExt1->getOperand (0 )}),
2825
+ Red->getCondOp(), Red->isOrdered(), OuterExt->getResultType(),
2826
+ InnerExt0->getResultType()) {}
2827
+
2828
+ ~VPMulAccRecipe () override = default ;
2829
+
2830
+ VPMulAccRecipe *clone () override { llvm_unreachable (" Not implement yet" ); }
2831
+
2832
+ static inline bool classof (const VPRecipeBase *R) {
2833
+ return R->getVPDefID () == VPRecipeBase::VPMulAccSC;
2834
+ }
2835
+
2836
+ static inline bool classof (const VPUser *U) {
2837
+ auto *R = dyn_cast<VPRecipeBase>(U);
2838
+ return R && classof (R);
2839
+ }
2840
+
2841
+ // / Generate the reduction in the loop
2842
+ void execute (VPTransformState &State) override ;
2843
+
2844
+ // / Return the cost of VPExtendedReductionRecipe.
2845
+ InstructionCost computeCost (ElementCount VF,
2846
+ VPCostContext &Ctx) const override ;
2847
+
2848
+ #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2849
+ // / Print the recipe.
2850
+ void print (raw_ostream &O, const Twine &Indent,
2851
+ VPSlotTracker &SlotTracker) const override ;
2852
+ #endif
2853
+
2854
+ // / Return the recurrence decriptor for the in-loop reduction.
2855
+ const RecurrenceDescriptor &getRecurrenceDescriptor () const {
2856
+ return RdxDesc;
2857
+ }
2858
+ // / Return true if the in-loop reduction is ordered.
2859
+ bool isOrdered () const { return IsOrdered; };
2860
+ // / Return true if the in-loop reduction is conditional.
2861
+ bool isConditional () const { return IsConditional; };
2862
+ // / The VPValue of the scalar Chain being accumulated.
2863
+ VPValue *getChainOp () const { return getOperand (0 ); }
2864
+ // / The VPValue of the vector value to be extended and reduced.
2865
+ VPValue *getVecOp () const { return getOperand (1 ); }
2866
+ // / The VPValue of the condition for the block.
2867
+ VPValue *getCondOp () const {
2868
+ return isConditional () ? getOperand (getNumOperands () - 1 ) : nullptr ;
2869
+ }
2870
+ Type *getResultTy () const { return ResultTy; };
2871
+ Instruction::CastOps getOuterExtOpcode () const { return OuterExtOp; };
2872
+ Instruction::CastOps getInnerExtOpcode () const { return InnerExtOp; };
2873
+ };
2874
+
2658
2875
// / VPReplicateRecipe replicates a given instruction producing multiple scalar
2659
2876
// / copies of the original scalar type, one per lane, instead of producing a
2660
2877
// / single copy of widened type for all lanes. If the instruction is known to be
0 commit comments