Skip to content
This repository was archived by the owner on Apr 23, 2020. It is now read-only.

Commit d88990b

Browse files
committed
[SelectionDAG] Add BUILD_VECTOR support to computeKnownBits and SimplifyDemandedBits
Add the ability to computeKnownBits and SimplifyDemandedBits to extract the known zero/one bits from BUILD_VECTOR, returning the known bits that are shared by every vector element. This is an initial step towards determining the sign bits of a vector (PR29079). Differential Revision: https://reviews.llvm.org/D24253 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@280927 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 707719e commit d88990b

File tree

5 files changed

+57
-14
lines changed

5 files changed

+57
-14
lines changed

lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2016,6 +2016,26 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
20162016
KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue();
20172017
KnownZero = ~KnownOne;
20182018
break;
2019+
case ISD::BUILD_VECTOR:
2020+
// Collect the known bits that are shared by every vector element.
2021+
KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
2022+
for (SDValue SrcOp : Op->ops()) {
2023+
computeKnownBits(SrcOp, KnownZero2, KnownOne2, Depth + 1);
2024+
2025+
// BUILD_VECTOR can implicitly truncate sources, we must handle this.
2026+
if (SrcOp.getValueSizeInBits() != BitWidth) {
2027+
assert(SrcOp.getValueSizeInBits() > BitWidth &&
2028+
"Expected BUILD_VECTOR implicit truncation");
2029+
KnownOne2 = KnownOne2.trunc(BitWidth);
2030+
KnownZero2 = KnownZero2.trunc(BitWidth);
2031+
}
2032+
2033+
// Known bits are the values that are shared by every element.
2034+
// TODO: support per-element known bits.
2035+
KnownOne &= KnownOne2;
2036+
KnownZero &= KnownZero2;
2037+
}
2038+
break;
20192039
case ISD::AND:
20202040
// If either the LHS or the RHS are Zero, the result is zero.
20212041
computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);

lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -468,6 +468,33 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
468468
KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue();
469469
KnownZero = ~KnownOne;
470470
return false; // Don't fall through, will infinitely loop.
471+
case ISD::BUILD_VECTOR:
472+
// Collect the known bits that are shared by every constant vector element.
473+
KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
474+
for (SDValue SrcOp : Op->ops()) {
475+
if (!isa<ConstantSDNode>(SrcOp)) {
476+
// We can only handle all constant values - bail out with no known bits.
477+
KnownZero = KnownOne = APInt(BitWidth, 0);
478+
return false;
479+
}
480+
KnownOne2 = cast<ConstantSDNode>(SrcOp)->getAPIntValue();
481+
KnownZero2 = ~KnownOne2;
482+
483+
// BUILD_VECTOR can implicitly truncate sources, we must handle this.
484+
if (KnownOne2.getBitWidth() != BitWidth) {
485+
assert(KnownOne2.getBitWidth() > BitWidth &&
486+
KnownZero2.getBitWidth() > BitWidth &&
487+
"Expected BUILD_VECTOR implicit truncation");
488+
KnownOne2 = KnownOne2.trunc(BitWidth);
489+
KnownZero2 = KnownZero2.trunc(BitWidth);
490+
}
491+
492+
// Known bits are the values that are shared by every element.
493+
// TODO: support per-element known bits.
494+
KnownOne &= KnownOne2;
495+
KnownZero &= KnownZero2;
496+
}
497+
return false; // Don't fall through, will infinitely loop.
471498
case ISD::AND:
472499
// If the RHS is a constant, check to see if the LHS would be zero without
473500
// using the bits from the RHS. Below, we use knowledge about the RHS to

test/CodeGen/AMDGPU/load-constant-i16.ll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ define void @constant_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x
138138
; v2i16 is naturally 4 byte aligned
139139
; EG: VTX_READ_32 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1
140140
; TODO: This should use DST, but for some there are redundant MOVs
141-
; EG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{PV.[XYZW]}}, literal
141+
; EG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{PV.[XYZW]}}, literal
142142
; EG: 16
143143
define void @constant_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(2)* %in) #0 {
144144
%load = load <2 x i16>, <2 x i16> addrspace(2)* %in
@@ -212,9 +212,10 @@ entry:
212212
; v4i16 is naturally 8 byte aligned
213213
; EG: VTX_READ_64 [[DST:T[0-9]\.XY]], {{T[0-9].[XYZW]}}, 0, #1
214214
; TODO: These should use DST, but for some there are redundant MOVs
215-
; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{PV.[XYZW]}}, literal
216-
; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{T[0-9].[XYZW]}}, literal
215+
; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{PV.[XYZW]}}, literal
217216
; EG-DAG: 16
217+
; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{T[0-9].[XYZW]}}, literal
218+
; EG-DAG: AND_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{T[0-9].[XYZW]}}, literal
218219
; EG-DAG: 16
219220
define void @constant_constant_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(2)* %in) #0 {
220221
%load = load <4 x i16>, <4 x i16> addrspace(2)* %in

test/CodeGen/AMDGPU/load-global-i16.ll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ define void @global_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i
147147

148148
; EG: VTX_READ_32 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1
149149
; TODO: This should use DST, but for some there are redundant MOVs
150-
; EG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{PV.[XYZW]}}, literal
150+
; EG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{PV.[XYZW]}}, literal
151151
; EG: 16
152152
define void @global_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 {
153153
%load = load <2 x i16>, <2 x i16> addrspace(1)* %in
@@ -219,9 +219,10 @@ entry:
219219

220220
; EG: VTX_READ_64 [[DST:T[0-9]\.XY]], {{T[0-9].[XYZW]}}, 0, #1
221221
; TODO: These should use DST, but for some there are redundant MOVs
222-
; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{PV.[XYZW]}}, literal
223-
; EG-DAG: LSHR {{[* ]*}}T{{[0-9].[XYZW]}}, {{T[0-9].[XYZW]}}, literal
222+
; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{T[0-9].[XYZW]}}, literal
224223
; EG-DAG: 16
224+
; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{T[0-9].[XYZW]}}, literal
225+
; EG-DAG: AND_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{T[0-9].[XYZW]}}, literal
225226
; EG-DAG: 16
226227
define void @global_global_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 {
227228
%load = load <4 x i16>, <4 x i16> addrspace(1)* %in

test/CodeGen/X86/combine-and.ll

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -210,10 +210,7 @@ define <4 x i32> @and_or_v4i32(<4 x i32> %a0) {
210210
define <2 x i64> @and_or_zext_v2i32(<2 x i32> %a0) {
211211
; CHECK-LABEL: and_or_zext_v2i32:
212212
; CHECK: # BB#0:
213-
; CHECK-NEXT: pxor %xmm1, %xmm1
214-
; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
215-
; CHECK-NEXT: por {{.*}}(%rip), %xmm0
216-
; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
213+
; CHECK-NEXT: xorps %xmm0, %xmm0
217214
; CHECK-NEXT: retq
218215
%1 = zext <2 x i32> %a0 to <2 x i64>
219216
%2 = or <2 x i64> %1, <i64 1, i64 1>
@@ -224,10 +221,7 @@ define <2 x i64> @and_or_zext_v2i32(<2 x i32> %a0) {
224221
define <4 x i32> @and_or_zext_v4i16(<4 x i16> %a0) {
225222
; CHECK-LABEL: and_or_zext_v4i16:
226223
; CHECK: # BB#0:
227-
; CHECK-NEXT: pxor %xmm1, %xmm1
228-
; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
229-
; CHECK-NEXT: por {{.*}}(%rip), %xmm0
230-
; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
224+
; CHECK-NEXT: xorps %xmm0, %xmm0
231225
; CHECK-NEXT: retq
232226
%1 = zext <4 x i16> %a0 to <4 x i32>
233227
%2 = or <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>

0 commit comments

Comments
 (0)