diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp index 412f0432e85cc..0eb55daf32089 100644 --- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -33,6 +33,7 @@ class SIShrinkInstructions { const GCNSubtarget *ST; const SIInstrInfo *TII; const SIRegisterInfo *TRI; + bool IsPostRA; bool foldImmediates(MachineInstr &MI, bool TryToCommute = true) const; bool shouldShrinkTrue16(MachineInstr &MI) const; @@ -417,7 +418,7 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const { return; // There is no advantage to doing this pre-RA. - if (!MF->getProperties().hasNoVRegs()) + if (!IsPostRA) return; if (TII->hasAnyModifiersSet(MI)) @@ -837,6 +838,7 @@ bool SIShrinkInstructions::run(MachineFunction &MF) { ST = &MF.getSubtarget(); TII = ST->getInstrInfo(); TRI = &TII->getRegisterInfo(); + IsPostRA = MF.getProperties().hasNoVRegs(); unsigned VCCReg = ST->isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC; @@ -857,9 +859,8 @@ bool SIShrinkInstructions::run(MachineFunction &MF) { // Test if we are after regalloc. We only want to do this after any // optimizations happen because this will confuse them. - // XXX - not exactly a check for post-regalloc run. MachineOperand &Src = MI.getOperand(1); - if (Src.isImm() && MI.getOperand(0).getReg().isPhysical()) { + if (Src.isImm() && IsPostRA) { int32_t ModImm; unsigned ModOpcode = canModifyToInlineImmOp32(TII, Src, ModImm, /*Scalar=*/false); @@ -948,9 +949,8 @@ bool SIShrinkInstructions::run(MachineFunction &MF) { continue; } - if (TII->isMIMG(MI.getOpcode()) && - ST->getGeneration() >= AMDGPUSubtarget::GFX10 && - MF.getProperties().hasNoVRegs()) { + if (IsPostRA && TII->isMIMG(MI.getOpcode()) && + ST->getGeneration() >= AMDGPUSubtarget::GFX10) { shrinkMIMG(MI); continue; } @@ -1061,7 +1061,7 @@ bool SIShrinkInstructions::run(MachineFunction &MF) { // fold an immediate into the shrunk instruction as a literal operand. In // GFX10 VOP3 instructions can take a literal operand anyway, so there is // no advantage to doing this. - if (ST->hasVOP3Literal() && !MF.getProperties().hasNoVRegs()) + if (ST->hasVOP3Literal() && !IsPostRA) continue; if (ST->hasTrue16BitInsts() && AMDGPU::isTrue16Inst(MI.getOpcode()) && diff --git a/llvm/test/CodeGen/AMDGPU/frexp-constant-fold.ll b/llvm/test/CodeGen/AMDGPU/frexp-constant-fold.ll index 2e75b90c00968..ce44c372ac3a7 100644 --- a/llvm/test/CodeGen/AMDGPU/frexp-constant-fold.ll +++ b/llvm/test/CodeGen/AMDGPU/frexp-constant-fold.ll @@ -108,8 +108,8 @@ define { <2 x float>, <2 x i32> } @frexp_zero_negzero_vector() { ; CHECK-LABEL: frexp_zero_negzero_vector: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_bfrev_b32_e32 v1, 1 ; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: v_bfrev_b32_e32 v1, 1 ; CHECK-NEXT: v_mov_b32_e32 v2, 0 ; CHECK-NEXT: v_mov_b32_e32 v3, 0 ; CHECK-NEXT: s_setpc_b64 s[30:31]