-
Notifications
You must be signed in to change notification settings - Fork 13.7k
[AMDGPU] Compute GISel KnownBits for S_BFE instructions #141588
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
Pierre-vh
wants to merge
5
commits into
main
Choose a base branch
from
users/pierre-vh/gi-kb-sbfe
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+309
−0
Open
Changes from all commits
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
2e18e5d
[AMDGPU] Compute GISel KnownBits for S_BFE instructions
Pierre-vh 2fae5d5
Fixes + add tests
Pierre-vh 7a7b474
Address comment and add more tests
Pierre-vh 83cc936
Propagate depth correctly
Pierre-vh 1d1567f
add assert
Pierre-vh File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16502,12 +16502,68 @@ static void knownBitsForWorkitemID(const GCNSubtarget &ST, | |
Known.Zero.setHighBits(llvm::countl_zero(MaxValue)); | ||
} | ||
|
||
static void knownBitsForSBFE(const MachineInstr &MI, GISelValueTracking &VT, | ||
KnownBits &Known, const APInt &DemandedElts, | ||
unsigned BFEWidth, bool SExt, unsigned Depth) { | ||
const MachineRegisterInfo &MRI = VT.getMachineFunction().getRegInfo(); | ||
const MachineOperand &Src1 = MI.getOperand(2); | ||
|
||
unsigned Src1Cst = 0; | ||
if (Src1.isImm()) { | ||
Src1Cst = Src1.getImm(); | ||
} else if (Src1.isReg()) { | ||
auto Cst = getIConstantVRegValWithLookThrough(Src1.getReg(), MRI); | ||
if (!Cst) | ||
return; | ||
Src1Cst = Cst->Value.getZExtValue(); | ||
} else { | ||
return; | ||
} | ||
|
||
// Offset is at bits [4:0] for 32 bit, [5:0] for 64 bit. | ||
// Width is always [22:16]. | ||
const unsigned Offset = | ||
Src1Cst & maskTrailingOnes<unsigned>((BFEWidth == 32) ? 5 : 6); | ||
const unsigned Width = (Src1Cst >> 16) & maskTrailingOnes<unsigned>(6); | ||
|
||
if (Width >= BFEWidth) { | ||
assert(false && "Invalid S_BFE"); | ||
return; | ||
} | ||
|
||
VT.computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, | ||
Depth + 1); | ||
|
||
Known.Zero = Known.Zero.lshr(Offset); | ||
Known.One = Known.One.lshr(Offset); | ||
|
||
Known = Known.trunc(Width); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similarly this could assert if the value in src1 encodes a width > 32. |
||
|
||
if (SExt) | ||
Known = Known.sext(BFEWidth); | ||
else | ||
Known = Known.zext(BFEWidth); | ||
} | ||
|
||
void SITargetLowering::computeKnownBitsForTargetInstr( | ||
GISelValueTracking &VT, Register R, KnownBits &Known, | ||
const APInt &DemandedElts, const MachineRegisterInfo &MRI, | ||
unsigned Depth) const { | ||
Known.resetAll(); | ||
const MachineInstr *MI = MRI.getVRegDef(R); | ||
switch (MI->getOpcode()) { | ||
case AMDGPU::S_BFE_I32: | ||
return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/32, | ||
/*SExt=*/true, Depth); | ||
case AMDGPU::S_BFE_U32: | ||
return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/32, | ||
/*SExt=*/false, Depth); | ||
case AMDGPU::S_BFE_I64: | ||
return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/64, | ||
/*SExt=*/true, Depth); | ||
case AMDGPU::S_BFE_U64: | ||
return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/64, | ||
/*SExt=*/false, Depth); | ||
case AMDGPU::G_INTRINSIC: | ||
case AMDGPU::G_INTRINSIC_CONVERGENT: { | ||
Intrinsic::ID IID = cast<GIntrinsic>(MI)->getIntrinsicID(); | ||
|
253 changes: 253 additions & 0 deletions
253
llvm/test/CodeGen/AMDGPU/GlobalISel/known-bits-sbfe.mir
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,253 @@ | ||
# NOTE: Assertions have been autogenerated by utils/update_givaluetracking_test_checks.py UTC_ARGS: --version 5 | ||
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -passes='print<gisel-value-tracking>' %s -filetype=null 2>&1 | FileCheck %s | ||
|
||
--- | ||
name: test_s_bfe_u32_constants | ||
body: | | ||
bb.0: | ||
; Extract [12:16) | ||
; CHECK-LABEL: name: @test_s_bfe_u32_constants | ||
; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16 | ||
; CHECK-NEXT: %bfe:sgpr_32 KnownBits:00000000000000000000000000001111 SignBits:28 | ||
%cst:sgpr_32(s32) = G_CONSTANT i32 65535 | ||
%bfe:sgpr_32(s32) = S_BFE_U32 %cst, 262156, implicit-def $scc | ||
$sgpr0 = COPY %bfe | ||
... | ||
--- | ||
name: test_s_bfe_i32_constants | ||
body: | | ||
bb.0: | ||
; Extract [12:16) | ||
; CHECK-LABEL: name: @test_s_bfe_i32_constants | ||
; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16 | ||
; CHECK-NEXT: %bfe:sgpr_32 KnownBits:11111111111111111111111111111111 SignBits:32 | ||
%cst:sgpr_32(s32) = G_CONSTANT i32 65535 | ||
%bfe:sgpr_32(s32) = S_BFE_I32 %cst, 262156, implicit-def $scc | ||
$sgpr0 = COPY %bfe | ||
... | ||
--- | ||
name: test_s_bfe_u64_constants | ||
body: | | ||
bb.0: | ||
; Extract [12:16) | ||
; CHECK-LABEL: name: @test_s_bfe_u64_constants | ||
; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001111111111111111 SignBits:48 | ||
; CHECK-NEXT: %bfe:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000000000000001111 SignBits:60 | ||
%cst:sgpr_64(s64) = G_CONSTANT i64 65535 | ||
%bfe:sgpr_64(s64) = S_BFE_U64 %cst, 262156, implicit-def $scc | ||
$sgpr0_sgpr1 = COPY %bfe | ||
... | ||
--- | ||
name: test_s_bfe_i64_constants | ||
body: | | ||
bb.0: | ||
; Extract [12:16) | ||
; CHECK-LABEL: name: @test_s_bfe_i64_constants | ||
; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001111111111111111 SignBits:48 | ||
; CHECK-NEXT: %bfe:sgpr_64 KnownBits:1111111111111111111111111111111111111111111111111111111111111111 SignBits:64 | ||
%cst:sgpr_64(s64) = G_CONSTANT i64 65535 | ||
%bfe:sgpr_64(s64) = S_BFE_I64 %cst, 262156, implicit-def $scc | ||
$sgpr0_sgpr1 = COPY %bfe | ||
... | ||
--- | ||
name: test_s_bfe_u32_middle_bits_unknown | ||
body: | | ||
bb.0: | ||
; Extract [8:16) but the middle 4 bits are ???? | ||
liveins: $sgpr0 | ||
|
||
; CHECK-LABEL: name: @test_s_bfe_u32_middle_bits_unknown | ||
; CHECK-NEXT: %input:sgpr_32 KnownBits:???????????????????????????????? SignBits:1 | ||
; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001100001111111111 SignBits:16 | ||
; CHECK-NEXT: %mask:sgpr_32 KnownBits:00000000000000000011110000000000 SignBits:18 | ||
; CHECK-NEXT: %masked_input:sgpr_32 KnownBits:000000000000000000????0000000000 SignBits:18 | ||
; CHECK-NEXT: %merged:sgpr_32 KnownBits:000000000000000011????1111111111 SignBits:16 | ||
; CHECK-NEXT: %bfe:sgpr_32 KnownBits:00000000000000000000000011????11 SignBits:24 | ||
%input:sgpr_32(s32) = COPY $sgpr0 | ||
%cst:sgpr_32(s32) = G_CONSTANT i32 50175 | ||
%mask:sgpr_32(s32) = G_CONSTANT i32 15360 | ||
%masked_input:sgpr_32(s32) = G_AND %input, %mask | ||
%merged:sgpr_32(s32) = G_OR %masked_input, %cst | ||
%bfe:sgpr_32(s32) = S_BFE_U32 %merged, 524296, implicit-def $scc | ||
$sgpr0 = COPY %bfe | ||
... | ||
--- | ||
name: test_s_bfe_i32_middle_bits_unknown | ||
body: | | ||
bb.0: | ||
; Extract [8:16) but the middle 4 bits are ???? | ||
liveins: $sgpr0 | ||
|
||
; CHECK-LABEL: name: @test_s_bfe_i32_middle_bits_unknown | ||
; CHECK-NEXT: %input:sgpr_32 KnownBits:???????????????????????????????? SignBits:1 | ||
; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001100001111111111 SignBits:16 | ||
; CHECK-NEXT: %mask:sgpr_32 KnownBits:00000000000000000011110000000000 SignBits:18 | ||
; CHECK-NEXT: %masked_input:sgpr_32 KnownBits:000000000000000000????0000000000 SignBits:18 | ||
; CHECK-NEXT: %merged:sgpr_32 KnownBits:000000000000000011????1111111111 SignBits:16 | ||
; CHECK-NEXT: %bfe:sgpr_32 KnownBits:11111111111111111111111111????11 SignBits:26 | ||
%input:sgpr_32(s32) = COPY $sgpr0 | ||
%cst:sgpr_32(s32) = G_CONSTANT i32 50175 | ||
%mask:sgpr_32(s32) = G_CONSTANT i32 15360 | ||
%masked_input:sgpr_32(s32) = G_AND %input, %mask | ||
%merged:sgpr_32(s32) = G_OR %masked_input, %cst | ||
%bfe:sgpr_32(s32) = S_BFE_I32 %merged, 524296, implicit-def $scc | ||
$sgpr0 = COPY %bfe | ||
... | ||
--- | ||
name: test_s_bfe_u64_middle_bits_unknown | ||
body: | | ||
bb.0: | ||
; Extract [8:16) but the middle 4 bits are ???? | ||
liveins: $sgpr0_sgpr1 | ||
|
||
; CHECK-LABEL: name: @test_s_bfe_u64_middle_bits_unknown | ||
; CHECK-NEXT: %input:sgpr_64 KnownBits:???????????????????????????????????????????????????????????????? SignBits:1 | ||
; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001100001111111111 SignBits:48 | ||
; CHECK-NEXT: %mask:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000011110000000000 SignBits:50 | ||
; CHECK-NEXT: %masked_input:sgpr_64 KnownBits:00000000000000000000000000000000000000000000000000????0000000000 SignBits:50 | ||
; CHECK-NEXT: %merged:sgpr_64 KnownBits:00000000000000000000000000000000000000000000000011????1111111111 SignBits:48 | ||
; CHECK-NEXT: %bfe:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000000000011????11 SignBits:56 | ||
%input:sgpr_64(s64) = COPY $sgpr0_sgpr1 | ||
%cst:sgpr_64(s64) = G_CONSTANT i64 50175 | ||
%mask:sgpr_64(s64) = G_CONSTANT i64 15360 | ||
%masked_input:sgpr_64(s64) = G_AND %input, %mask | ||
%merged:sgpr_64(s64) = G_OR %masked_input, %cst | ||
%bfe:sgpr_64(s64) = S_BFE_U64 %merged, 524296, implicit-def $scc | ||
$sgpr0_sgpr1 = COPY %bfe | ||
... | ||
--- | ||
name: test_s_bfe_i64_middle_bits_unknown | ||
body: | | ||
bb.0: | ||
; Extract [8:16) but the middle 4 bits are ???? | ||
liveins: $sgpr0_sgpr1 | ||
|
||
; CHECK-LABEL: name: @test_s_bfe_i64_middle_bits_unknown | ||
; CHECK-NEXT: %input:sgpr_64 KnownBits:???????????????????????????????????????????????????????????????? SignBits:1 | ||
; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001100001111111111 SignBits:48 | ||
; CHECK-NEXT: %mask:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000011110000000000 SignBits:50 | ||
; CHECK-NEXT: %masked_input:sgpr_64 KnownBits:00000000000000000000000000000000000000000000000000????0000000000 SignBits:50 | ||
; CHECK-NEXT: %merged:sgpr_64 KnownBits:00000000000000000000000000000000000000000000000011????1111111111 SignBits:48 | ||
; CHECK-NEXT: %bfe:sgpr_64 KnownBits:1111111111111111111111111111111111111111111111111111111111????11 SignBits:58 | ||
%input:sgpr_64(s64) = COPY $sgpr0_sgpr1 | ||
%cst:sgpr_64(s64) = G_CONSTANT i64 50175 | ||
%mask:sgpr_64(s64) = G_CONSTANT i64 15360 | ||
%masked_input:sgpr_64(s64) = G_AND %input, %mask | ||
%merged:sgpr_64(s64) = G_OR %masked_input, %cst | ||
%bfe:sgpr_64(s64) = S_BFE_I64 %merged, 524296, implicit-def $scc | ||
$sgpr0_sgpr1 = COPY %bfe | ||
... | ||
--- | ||
name: test_s_bfe_i32_g_constants | ||
body: | | ||
bb.0: | ||
; Extract [12:16) | ||
; CHECK-LABEL: name: @test_s_bfe_i32_g_constants | ||
; CHECK-NEXT: %src0:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16 | ||
; CHECK-NEXT: %src1:sgpr_32 KnownBits:00000000000001000000000000001100 SignBits:13 | ||
; CHECK-NEXT: %bfe:sgpr_32 KnownBits:11111111111111111111111111111111 SignBits:32 | ||
%src0:sgpr_32(s32) = G_CONSTANT i32 65535 | ||
%src1:sgpr_32(s32) = G_CONSTANT i32 262156 | ||
%bfe:sgpr_32(s32) = S_BFE_I32 %src0, %src1, implicit-def $scc | ||
$sgpr0 = COPY %bfe | ||
... | ||
--- | ||
name: test_s_bfe_u64_g_constants | ||
body: | | ||
bb.0: | ||
; Extract [12:16) | ||
; CHECK-LABEL: name: @test_s_bfe_u64_g_constants | ||
; CHECK-NEXT: %src0:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001111111111111111 SignBits:48 | ||
; CHECK-NEXT: %src1:sgpr_32 KnownBits:00000000000001000000000000001100 SignBits:13 | ||
; CHECK-NEXT: %bfe:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000000000000001111 SignBits:60 | ||
%src0:sgpr_64(s64) = G_CONSTANT i64 65535 | ||
%src1:sgpr_32(s32) = G_CONSTANT i32 262156 | ||
%bfe:sgpr_64(s64) = S_BFE_U64 %src0, %src1, implicit-def $scc | ||
$sgpr0_sgpr1 = COPY %bfe | ||
... | ||
--- | ||
name: test_s_bfe_i32_g_constants_lookthrough | ||
body: | | ||
bb.0: | ||
; Extract [12:16) | ||
; CHECK-LABEL: name: @test_s_bfe_i32_g_constants_lookthrough | ||
; CHECK-NEXT: %src0:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16 | ||
; CHECK-NEXT: %src1:sgpr_32 KnownBits:000001000000000000001100 SignBits:5 | ||
; CHECK-NEXT: %src1_ext:sgpr_32 KnownBits:00000000000001000000000000001100 SignBits:13 | ||
; CHECK-NEXT: %bfe:sgpr_32 KnownBits:11111111111111111111111111111111 SignBits:32 | ||
%src0:sgpr_32(s32) = G_CONSTANT i32 65535 | ||
%src1:sgpr_32(s24) = G_CONSTANT i24 262156 | ||
%src1_ext:sgpr_32(s32) = G_ZEXT %src1 | ||
%bfe:sgpr_32(s32) = S_BFE_I32 %src0, %src1_ext, implicit-def $scc | ||
$sgpr0 = COPY %bfe | ||
... | ||
--- | ||
name: test_s_bfe_u64_g_constants_lookthrough | ||
body: | | ||
bb.0: | ||
; Extract [12:16) | ||
; CHECK-LABEL: name: @test_s_bfe_u64_g_constants_lookthrough | ||
; CHECK-NEXT: %src0:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001111111111111111 SignBits:48 | ||
; CHECK-NEXT: %src1:sgpr_32 KnownBits:000001000000000000001100 SignBits:5 | ||
; CHECK-NEXT: %src1_ext:sgpr_32 KnownBits:00000000000001000000000000001100 SignBits:13 | ||
; CHECK-NEXT: %bfe:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000000000000000001111 SignBits:60 | ||
%src0:sgpr_64(s64) = G_CONSTANT i64 65535 | ||
%src1:sgpr_32(s24) = G_CONSTANT i24 262156 | ||
%src1_ext:sgpr_32(s32) = G_ZEXT %src1 | ||
%bfe:sgpr_64(s64) = S_BFE_U64 %src0, %src1, implicit-def $scc | ||
$sgpr0_sgpr1 = COPY %bfe | ||
... | ||
--- | ||
name: test_s_bfe_u32_trash_bits | ||
body: | | ||
bb.0: | ||
; Extract [12:16) | ||
; Check that the 6th bit is ignored for u32. The lower 6 bits are | ||
; 101100 but we should mask out the first 1 for the 32 bit version. | ||
; CHECK-LABEL: name: @test_s_bfe_u32_trash_bits | ||
; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16 | ||
; CHECK-NEXT: %bfe:sgpr_32 KnownBits:00000000000000000000000000001111 SignBits:28 | ||
%cst:sgpr_32(s32) = G_CONSTANT i32 65535 | ||
%bfe:sgpr_32(s32) = S_BFE_U32 %cst, 262252, implicit-def $scc | ||
$sgpr0 = COPY %bfe | ||
... | ||
--- | ||
name: test_s_bfe_i32_trash_bits | ||
body: | | ||
bb.0: | ||
; Extract [12:16) | ||
; Check that the 6th bit is ignored for i32. The lower 6 bits are | ||
; 101100 but we should mask out the first 1 for the 32 bit version. | ||
; CHECK-LABEL: name: @test_s_bfe_i32_trash_bits | ||
; CHECK-NEXT: %cst:sgpr_32 KnownBits:00000000000000001111111111111111 SignBits:16 | ||
; CHECK-NEXT: %bfe:sgpr_32 KnownBits:11111111111111111111111111111111 SignBits:32 | ||
%cst:sgpr_32(s32) = G_CONSTANT i32 65535 | ||
%bfe:sgpr_32(s32) = S_BFE_I32 %cst, 262252, implicit-def $scc | ||
$sgpr0 = COPY %bfe | ||
... | ||
--- | ||
name: test_s_bfe_u64_constants_sixth_bit | ||
body: | | ||
bb.0: | ||
; Extract [32:48) | ||
; Check we correctly read 6 bits for the width on 64 bit BFEs. | ||
; CHECK-LABEL: name: @test_s_bfe_u64_constants_sixth_bit | ||
; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000111111111111111100000000000000000000000000000000 SignBits:16 | ||
; CHECK-NEXT: %bfe:sgpr_64 KnownBits:0000000000000000000000000000000000000000000000001111111111111111 SignBits:48 | ||
%cst:sgpr_64(s64) = G_CONSTANT i64 281470681743360 | ||
%bfe:sgpr_64(s64) = S_BFE_U64 %cst, 1048608, implicit-def $scc | ||
$sgpr0_sgpr1 = COPY %bfe | ||
... | ||
--- | ||
name: test_s_bfe_i64_constants_sixth_bit | ||
body: | | ||
bb.0: | ||
; Extract [32:48) | ||
; Check we correctly read 6 bits for the width on 64 bit BFEs. | ||
; CHECK-LABEL: name: @test_s_bfe_i64_constants_sixth_bit | ||
; CHECK-NEXT: %cst:sgpr_64 KnownBits:0000000000000000111111111111111100000000000000000000000000000000 SignBits:16 | ||
; CHECK-NEXT: %bfe:sgpr_64 KnownBits:1111111111111111111111111111111111111111111111111111111111111111 SignBits:64 | ||
%cst:sgpr_64(s64) = G_CONSTANT i64 281470681743360 | ||
%bfe:sgpr_64(s64) = S_BFE_I64 %cst, 1048608, implicit-def $scc | ||
$sgpr0_sgpr1 = COPY %bfe | ||
... |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Really I don't think we should be asserting anything. I suggest just using the same mask for Width that you do for Offset. Or test some hardware to see exactly what happens when Width >= 32. Or bail out but without an assertion.