-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[AMDGPU] Compute GISel KnownBits for S_BFE instructions #141588
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-testing-tools @llvm/pr-subscribers-backend-amdgpu Author: Pierre van Houtryve (Pierre-vh) ChangesNext patches in the stack will emit them in the RegBankCombiner. With this, S_BFE instructions will hopefully interfere less with optimizations. Full diff: https://github.com/llvm/llvm-project/pull/141588.diff 1 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index ade88a16193b8..1a1490b55d3e4 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -16502,12 +16502,57 @@ static void knownBitsForWorkitemID(const GCNSubtarget &ST,
Known.Zero.setHighBits(llvm::countl_zero(MaxValue));
}
+static void knownBitsForSBFE(const MachineInstr &MI, GISelValueTracking &VT,
+ KnownBits &Known, const APInt &DemandedElts,
+ unsigned BFEWidth, bool SExt) {
+ const MachineRegisterInfo &MRI = VT.getMachineFunction().getRegInfo();
+ const MachineOperand &Src1 = MI.getOperand(2);
+
+ unsigned Src1Cst = 0;
+ if (Src1.isImm())
+ Src1Cst = Src1.getImm();
+ else if (Src1.isReg()) {
+ auto Cst = getIConstantVRegValWithLookThrough(Src1.getReg(), MRI);
+ if (!Cst)
+ return;
+ Src1Cst = Cst->Value.getZExtValue();
+ } else
+ return;
+
+ const unsigned Mask = maskTrailingOnes<unsigned>(6);
+ const unsigned Offset = Src1Cst & Mask;
+ const unsigned Width = (Src1Cst >> 16) & Mask;
+
+ VT.computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts);
+
+ const uint64_t WidthMask = maskTrailingOnes<uint64_t>(Width);
+ Known.Zero = Known.Zero.shl(Offset) & WidthMask;
+ Known.One = Known.One.shl(Offset) & WidthMask;
+
+ if (SExt)
+ Known.sextInReg(Width);
+ else
+ Known.Zero |= maskLeadingOnes<unsigned>(BFEWidth - Width);
+}
+
void SITargetLowering::computeKnownBitsForTargetInstr(
GISelValueTracking &VT, Register R, KnownBits &Known,
const APInt &DemandedElts, const MachineRegisterInfo &MRI,
unsigned Depth) const {
const MachineInstr *MI = MRI.getVRegDef(R);
switch (MI->getOpcode()) {
+ case AMDGPU::S_BFE_I32:
+ return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/32,
+ /*SExt=*/true);
+ case AMDGPU::S_BFE_U32:
+ return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/32,
+ /*SExt=*/false);
+ case AMDGPU::S_BFE_I64:
+ return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/64,
+ /*SExt=*/true);
+ case AMDGPU::S_BFE_U64:
+ return knownBitsForSBFE(*MI, VT, Known, DemandedElts, /*Width=*/64,
+ /*SExt=*/false);
case AMDGPU::G_INTRINSIC:
case AMDGPU::G_INTRINSIC_CONVERGENT: {
Intrinsic::ID IID = cast<GIntrinsic>(MI)->getIntrinsicID();
|
I'm looking at adding tests for this right now using the new script |
a1992f6
to
7a7b474
Compare
Known.Zero = Known.Zero.lshr(Offset); | ||
Known.One = Known.One.lshr(Offset); | ||
|
||
Known = Known.trunc(Width); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Similarly this could assert if the value in src1 encodes a width > 32.
Next patches in the stack will emit them in the RegBankCombiner. With this, S_BFE instructions will hopefully interfere less with optimizations.