Skip to content

Commit 1ef62cb

Browse files
committed
[X86] SimplifyDemandedVectorEltsForTargetNode - add PSADBW handling
Peek through PSADBW operands to handle non demanded elements.
1 parent 005fc11 commit 1ef62cb

File tree

4 files changed

+37
-16
lines changed

4 files changed

+37
-16
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39358,6 +39358,31 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
3935839358
KnownZero = LHSZero | RHSZero;
3935939359
break;
3936039360
}
39361+
case X86ISD::PSADBW: {
39362+
SDValue LHS = Op.getOperand(0);
39363+
SDValue RHS = Op.getOperand(1);
39364+
assert(VT.getScalarType() == MVT::i64 &&
39365+
LHS.getValueType() == RHS.getValueType() &&
39366+
LHS.getValueType().getScalarType() == MVT::i8 &&
39367+
"Unexpected PSADBW types");
39368+
39369+
// Aggressively peek through ops to get at the demanded elts.
39370+
if (!DemandedElts.isAllOnesValue()) {
39371+
unsigned NumSrcElts = LHS.getValueType().getVectorNumElements();
39372+
APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
39373+
SDValue NewLHS = SimplifyMultipleUseDemandedVectorElts(
39374+
LHS, DemandedSrcElts, TLO.DAG, Depth + 1);
39375+
SDValue NewRHS = SimplifyMultipleUseDemandedVectorElts(
39376+
RHS, DemandedSrcElts, TLO.DAG, Depth + 1);
39377+
if (NewLHS || NewRHS) {
39378+
NewLHS = NewLHS ? NewLHS : LHS;
39379+
NewRHS = NewRHS ? NewRHS : RHS;
39380+
return TLO.CombineTo(
39381+
Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewLHS, NewRHS));
39382+
}
39383+
}
39384+
break;
39385+
}
3936139386
case X86ISD::VSHL:
3936239387
case X86ISD::VSRL:
3936339388
case X86ISD::VSRA: {

llvm/test/CodeGen/X86/psadbw.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,19 +17,15 @@ define <2 x i64> @combine_psadbw_shift(<16 x i8> %0, <16 x i8> %1) {
1717
define i64 @combine_psadbw_demandedelt(<16 x i8> %0, <16 x i8> %1) {
1818
; X86-LABEL: combine_psadbw_demandedelt:
1919
; X86: # %bb.0:
20-
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
21-
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
22-
; X86-NEXT: psadbw %xmm0, %xmm1
23-
; X86-NEXT: movd %xmm1, %eax
20+
; X86-NEXT: psadbw %xmm1, %xmm0
21+
; X86-NEXT: movd %xmm0, %eax
2422
; X86-NEXT: xorl %edx, %edx
2523
; X86-NEXT: retl
2624
;
2725
; X64-LABEL: combine_psadbw_demandedelt:
2826
; X64: # %bb.0:
29-
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
30-
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,3,2]
31-
; X64-NEXT: psadbw %xmm0, %xmm1
32-
; X64-NEXT: movq %xmm1, %rax
27+
; X64-NEXT: psadbw %xmm1, %xmm0
28+
; X64-NEXT: movq %xmm0, %rax
3329
; X64-NEXT: retq
3430
%3 = shufflevector <16 x i8> %0, <16 x i8> %0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11>
3531
%4 = shufflevector <16 x i8> %1, <16 x i8> %1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11>

llvm/test/CodeGen/X86/sad.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -733,7 +733,7 @@ define dso_local i32 @sad_nonloop_8i8(<8 x i8>* nocapture readonly %p, i64, <8 x
733733
; AVX: # %bb.0:
734734
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
735735
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
736-
; AVX-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
736+
; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
737737
; AVX-NEXT: vmovd %xmm0, %eax
738738
; AVX-NEXT: retq
739739
%v1 = load <8 x i8>, <8 x i8>* %p, align 1

llvm/test/CodeGen/X86/sad_variations.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ define i32 @sad8_32bit_icmp_sge(i8* nocapture readonly %cur, i8* nocapture reado
1818
; AVX: # %bb.0: # %entry
1919
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
2020
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
21-
; AVX-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
21+
; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
2222
; AVX-NEXT: vmovd %xmm0, %eax
2323
; AVX-NEXT: retq
2424

@@ -60,7 +60,7 @@ define i32 @sad8_32bit_icmp_sgt(i8* nocapture readonly %cur, i8* nocapture reado
6060
; AVX: # %bb.0: # %entry
6161
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
6262
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
63-
; AVX-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
63+
; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
6464
; AVX-NEXT: vmovd %xmm0, %eax
6565
; AVX-NEXT: retq
6666
entry:
@@ -101,7 +101,7 @@ define i32 @sad8_32bit_icmp_sle(i8* nocapture readonly %cur, i8* nocapture reado
101101
; AVX: # %bb.0: # %entry
102102
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
103103
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
104-
; AVX-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
104+
; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
105105
; AVX-NEXT: vmovd %xmm0, %eax
106106
; AVX-NEXT: retq
107107
entry:
@@ -142,7 +142,7 @@ define i32 @sad8_32bit_icmp_slt(i8* nocapture readonly %cur, i8* nocapture reado
142142
; AVX: # %bb.0: # %entry
143143
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
144144
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
145-
; AVX-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
145+
; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
146146
; AVX-NEXT: vmovd %xmm0, %eax
147147
; AVX-NEXT: retq
148148
entry:
@@ -183,7 +183,7 @@ define i64 @sad8_64bit_icmp_sext_slt(i8* nocapture readonly %cur, i8* nocapture
183183
; AVX: # %bb.0: # %entry
184184
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
185185
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
186-
; AVX-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
186+
; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
187187
; AVX-NEXT: vmovq %xmm0, %rax
188188
; AVX-NEXT: retq
189189
entry:
@@ -224,7 +224,7 @@ define i64 @sad8_64bit_icmp_zext_slt(i8* nocapture readonly %cur, i8* nocapture
224224
; AVX: # %bb.0: # %entry
225225
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
226226
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
227-
; AVX-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
227+
; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
228228
; AVX-NEXT: vmovq %xmm0, %rax
229229
; AVX-NEXT: retq
230230
entry:
@@ -265,7 +265,7 @@ define i64 @sad8_early_64bit_icmp_zext_slt(i8* nocapture readonly %cur, i8* noca
265265
; AVX: # %bb.0: # %entry
266266
; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
267267
; AVX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
268-
; AVX-NEXT: vpsadbw %xmm0, %xmm1, %xmm0
268+
; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0
269269
; AVX-NEXT: vmovq %xmm0, %rax
270270
; AVX-NEXT: retq
271271
entry:

0 commit comments

Comments
 (0)