Skip to content

Commit a2f00e1

Browse files
authored
[RISCV] Add fixed-length patterns for disjoint or patterns for vwadd[u].v{v,x} (#136824)
This is the fixed-length equivalent of #136716. The pattern we need to match is ({s,z}ext_vl (or_vl disjoint a, b)). This only allows or_vls with an undef passthru, which allows us to ignore its mask and vl and just take it from the {s,z}ext_vl. A riscv_or_vl_is_add_oneuse PatFrag is added to mirror or_is_add in RISCVInstrInfo.td.
1 parent 03c2862 commit a2f00e1

File tree

2 files changed

+53
-12
lines changed

2 files changed

+53
-12
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,16 @@ let HasOneUse = 1 in {
497497
node:$E),
498498
(riscv_add_vl node:$A, node:$B, node:$C,
499499
node:$D, node:$E)>;
500+
def riscv_or_vl_is_add_oneuse : PatFrag<(ops node:$A, node:$B, node:$C, node:$D,
501+
node:$E),
502+
(riscv_or_vl node:$A, node:$B, node:$C,
503+
node:$D, node:$E), [{
504+
if (N->getFlags().hasDisjoint())
505+
return true;
506+
KnownBits Known0 = CurDAG->computeKnownBits(N->getOperand(0), 0);
507+
KnownBits Known1 = CurDAG->computeKnownBits(N->getOperand(1), 0);
508+
return KnownBits::haveNoCommonBitsSet(Known0, Known1);
509+
}]>;
500510
def riscv_sub_vl_oneuse : PatFrag<(ops node:$A, node:$B, node:$C, node:$D,
501511
node:$E),
502512
(riscv_sub_vl node:$A, node:$B, node:$C,
@@ -2016,6 +2026,41 @@ foreach vtiToWti = AllWidenableIntVectors in {
20162026
}
20172027
}
20182028

2029+
// DAGCombiner::hoistLogicOpWithSameOpcodeHands may hoist disjoint ors
2030+
// to (ext (or disjoint (a, b)))
2031+
multiclass VPatWidenOrDisjointVL_VV_VX<SDNode extop, string instruction_name> {
2032+
foreach vtiToWti = AllWidenableIntVectors in {
2033+
defvar vti = vtiToWti.Vti;
2034+
defvar wti = vtiToWti.Wti;
2035+
let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
2036+
GetVTypePredicates<wti>.Predicates) in {
2037+
def : Pat<(wti.Vector
2038+
(extop
2039+
(vti.Vector
2040+
(riscv_or_vl_is_add_oneuse
2041+
vti.RegClass:$rs2, vti.RegClass:$rs1,
2042+
undef, srcvalue, srcvalue)),
2043+
VMV0:$vm, VLOpFrag)),
2044+
(!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX#"_MASK")
2045+
(wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2,
2046+
vti.RegClass:$rs1, VMV0:$vm, GPR:$vl, vti.Log2SEW, TA_MA)>;
2047+
def : Pat<(wti.Vector
2048+
(extop
2049+
(vti.Vector
2050+
(riscv_or_vl_is_add_oneuse
2051+
vti.RegClass:$rs2, (SplatPat (XLenVT GPR:$rs1)),
2052+
undef, srcvalue, srcvalue)),
2053+
VMV0:$vm, VLOpFrag)),
2054+
(!cast<Instruction>(instruction_name#"_VX_"#vti.LMul.MX#"_MASK")
2055+
(wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2,
2056+
GPR:$rs1, VMV0:$vm, GPR:$vl, vti.Log2SEW, TA_MA)>;
2057+
}
2058+
}
2059+
}
2060+
2061+
defm : VPatWidenOrDisjointVL_VV_VX<riscv_sext_vl, "PseudoVWADD">;
2062+
defm : VPatWidenOrDisjointVL_VV_VX<riscv_zext_vl, "PseudoVWADDU">;
2063+
20192064
// 11.3. Vector Integer Extension
20202065
defm : VPatExtendVL_V<riscv_zext_vl, "PseudoVZEXT", "VF2",
20212066
AllFractionableVF2IntVectors>;

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -899,9 +899,8 @@ define <4 x i32> @vwaddu_vv_disjoint_or(<4 x i16> %x.i16, <4 x i16> %y.i16) {
899899
; CHECK-LABEL: vwaddu_vv_disjoint_or:
900900
; CHECK: # %bb.0:
901901
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
902-
; CHECK-NEXT: vor.vv v9, v8, v9
903-
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
904-
; CHECK-NEXT: vzext.vf2 v8, v9
902+
; CHECK-NEXT: vwaddu.vv v10, v8, v9
903+
; CHECK-NEXT: vmv1r.v v8, v10
905904
; CHECK-NEXT: ret
906905
%x.i32 = zext <4 x i16> %x.i16 to <4 x i32>
907906
%y.i32 = zext <4 x i16> %y.i16 to <4 x i32>
@@ -913,9 +912,8 @@ define <4 x i32> @vwadd_vv_disjoint_or(<4 x i16> %x.i16, <4 x i16> %y.i16) {
913912
; CHECK-LABEL: vwadd_vv_disjoint_or:
914913
; CHECK: # %bb.0:
915914
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
916-
; CHECK-NEXT: vor.vv v9, v8, v9
917-
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
918-
; CHECK-NEXT: vsext.vf2 v8, v9
915+
; CHECK-NEXT: vwadd.vv v10, v8, v9
916+
; CHECK-NEXT: vmv1r.v v8, v10
919917
; CHECK-NEXT: ret
920918
%x.i32 = sext <4 x i16> %x.i16 to <4 x i32>
921919
%y.i32 = sext <4 x i16> %y.i16 to <4 x i32>
@@ -927,9 +925,8 @@ define <4 x i32> @vwaddu_vx_disjoint_or(<4 x i16> %x.i16, i16 %y.i16) {
927925
; CHECK-LABEL: vwaddu_vx_disjoint_or:
928926
; CHECK: # %bb.0:
929927
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
930-
; CHECK-NEXT: vor.vx v9, v8, a0
931-
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
932-
; CHECK-NEXT: vzext.vf2 v8, v9
928+
; CHECK-NEXT: vwaddu.vx v9, v8, a0
929+
; CHECK-NEXT: vmv1r.v v8, v9
933930
; CHECK-NEXT: ret
934931
%x.i32 = zext <4 x i16> %x.i16 to <4 x i32>
935932
%y.head = insertelement <4 x i16> poison, i16 %y.i16, i32 0
@@ -943,9 +940,8 @@ define <4 x i32> @vwadd_vx_disjoint_or(<4 x i16> %x.i16, i16 %y.i16) {
943940
; CHECK-LABEL: vwadd_vx_disjoint_or:
944941
; CHECK: # %bb.0:
945942
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
946-
; CHECK-NEXT: vor.vx v9, v8, a0
947-
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
948-
; CHECK-NEXT: vsext.vf2 v8, v9
943+
; CHECK-NEXT: vwadd.vx v9, v8, a0
944+
; CHECK-NEXT: vmv1r.v v8, v9
949945
; CHECK-NEXT: ret
950946
%x.i32 = sext <4 x i16> %x.i16 to <4 x i32>
951947
%y.head = insertelement <4 x i16> poison, i16 %y.i16, i32 0

0 commit comments

Comments
 (0)