Skip to content

Commit e160c35

Browse files
authored
Reapply "RegAlloc: Fix verifier error after failed allocation (#119690)" (#128400)
Reapply "RegAlloc: Fix verifier error after failed allocation (#119690)" This reverts commit 0c50054. Reapply with more fixes to avoid expensive_checks failures. Make sure to call splitSeparateComponents after shrinkToUses, and update the VirtRegMap with the split registers. Also set undef on all physical register aliases to the assigned register. Move physreg handling. Not sure if necessary Remove intervals from regunits. Not sure if necessary
1 parent 92d8222 commit e160c35

10 files changed

+166
-13
lines changed

llvm/lib/CodeGen/RegAllocBase.cpp

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis,
6565
Matrix = &mat;
6666
MRI->freezeReservedRegs();
6767
RegClassInfo.runOnMachineFunction(vrm.getMachineFunction());
68+
FailedVRegs.clear();
6869
}
6970

7071
// Visit all the live registers. If they are already assigned to a physical
@@ -128,6 +129,7 @@ void RegAllocBase::allocatePhysRegs() {
128129

129130
// Keep going after reporting the error.
130131
VRM->assignVirt2Phys(VirtReg->reg(), AvailablePhysReg);
132+
FailedVRegs.insert(VirtReg->reg());
131133
} else if (AvailablePhysReg)
132134
Matrix->assign(*VirtReg, AvailablePhysReg);
133135

@@ -161,6 +163,60 @@ void RegAllocBase::postOptimization() {
161163
DeadRemats.clear();
162164
}
163165

166+
void RegAllocBase::cleanupFailedVRegs() {
167+
SmallSet<Register, 8> JunkRegs;
168+
169+
for (Register FailedReg : FailedVRegs) {
170+
JunkRegs.insert(FailedReg);
171+
172+
MCRegister PhysReg = VRM->getPhys(FailedReg);
173+
LiveInterval &FailedInterval = LIS->getInterval(FailedReg);
174+
175+
// The liveness information for the failed register and anything interfering
176+
// with the physical register we arbitrarily chose is junk and needs to be
177+
// deleted.
178+
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
179+
LiveIntervalUnion::Query &Q = Matrix->query(FailedInterval, *Units);
180+
for (const LiveInterval *InterferingReg : Q.interferingVRegs())
181+
JunkRegs.insert(InterferingReg->reg());
182+
LIS->removeRegUnit(*Units);
183+
}
184+
}
185+
186+
for (Register JunkReg : JunkRegs) {
187+
MCRegister PhysReg = VRM->getPhys(JunkReg);
188+
// We still should produce valid IR. Kill all the uses and reduce the live
189+
// ranges so that we don't think it's possible to introduce kill flags
190+
// later which will fail the verifier.
191+
for (MachineOperand &MO : MRI->reg_operands(JunkReg)) {
192+
if (MO.readsReg())
193+
MO.setIsUndef(true);
194+
}
195+
196+
// The liveness of the assigned physical register is also now unreliable.
197+
for (MCRegAliasIterator Aliases(PhysReg, TRI, true); Aliases.isValid();
198+
++Aliases) {
199+
for (MachineOperand &MO : MRI->reg_operands(*Aliases)) {
200+
if (MO.readsReg())
201+
MO.setIsUndef(true);
202+
}
203+
}
204+
205+
LiveInterval &JunkLI = LIS->getInterval(JunkReg);
206+
if (LIS->shrinkToUses(&JunkLI)) {
207+
SmallVector<LiveInterval *, 8> SplitLIs;
208+
LIS->splitSeparateComponents(JunkLI, SplitLIs);
209+
210+
VRM->grow();
211+
Register Original = VRM->getOriginal(JunkReg);
212+
for (LiveInterval *SplitLI : SplitLIs) {
213+
VRM->setIsSplitFromReg(SplitLI->reg(), Original);
214+
VRM->assignVirt2Phys(SplitLI->reg(), PhysReg);
215+
}
216+
}
217+
}
218+
}
219+
164220
void RegAllocBase::enqueue(const LiveInterval *LI) {
165221
const Register Reg = LI->reg();
166222

llvm/lib/CodeGen/RegAllocBase.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#define LLVM_LIB_CODEGEN_REGALLOCBASE_H
3838

3939
#include "llvm/ADT/SmallPtrSet.h"
40+
#include "llvm/ADT/SmallSet.h"
4041
#include "llvm/CodeGen/MachineRegisterInfo.h"
4142
#include "llvm/CodeGen/RegAllocCommon.h"
4243
#include "llvm/CodeGen/RegisterClassInfo.h"
@@ -81,6 +82,7 @@ class RegAllocBase {
8182
/// always available for the remat of all the siblings of the original reg.
8283
SmallPtrSet<MachineInstr *, 32> DeadRemats;
8384

85+
SmallSet<Register, 2> FailedVRegs;
8486
RegAllocBase(const RegAllocFilterFunc F = nullptr)
8587
: shouldAllocateRegisterImpl(F) {}
8688

@@ -104,6 +106,10 @@ class RegAllocBase {
104106
// rematerialization.
105107
virtual void postOptimization();
106108

109+
/// Perform cleanups on registers that failed to allocate. This hacks on the
110+
/// liveness in order to avoid spurious verifier errors in later passes.
111+
void cleanupFailedVRegs();
112+
107113
// Get a temporary reference to a Spiller instance.
108114
virtual Spiller &spiller() = 0;
109115

llvm/lib/CodeGen/RegAllocBasic.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,7 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) {
329329

330330
allocatePhysRegs();
331331
postOptimization();
332+
cleanupFailedVRegs();
332333

333334
// Diagnostic output before rewriting
334335
LLVM_DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n");

llvm/lib/CodeGen/RegAllocGreedy.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2927,6 +2927,7 @@ bool RAGreedy::run(MachineFunction &mf) {
29272927
if (VerifyEnabled)
29282928
MF->verify(LIS, Indexes, "Before post optimization", &errs());
29292929
postOptimization();
2930+
cleanupFailedVRegs();
29302931
reportStats();
29312932

29322933
releaseMemory();

llvm/test/CodeGen/AMDGPU/illegal-eviction-assert.mir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,12 @@
1313
ret void
1414
}
1515

16-
attributes #0 = { "amdgpu-waves-per-eu"="8,8" }
16+
attributes #0 = { "amdgpu-waves-per-eu"="8,8" }
1717

1818
...
1919

20-
# CHECK: S_NOP 0, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit-def $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit-def $vgpr28_vgpr29_vgpr30_vgpr31, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
21-
# CHECK: S_NOP 0, implicit killed undef $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit killed undef $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit killed undef $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed undef $vgpr28_vgpr29_vgpr30_vgpr31, implicit killed undef $vgpr0_vgpr1_vgpr2_vgpr3
20+
# CHECK: S_NOP 0, implicit-def $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit-def $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit-def dead $vgpr0_vgpr1_vgpr2_vgpr3, implicit-def $vgpr28_vgpr29_vgpr30_vgpr31, implicit-def dead $vgpr0_vgpr1_vgpr2_vgpr3
21+
# CHECK: S_NOP 0, implicit killed undef $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit killed undef $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3, implicit killed undef $vgpr28_vgpr29_vgpr30_vgpr31, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3
2222

2323
---
2424
name: foo

llvm/test/CodeGen/AMDGPU/inflated-reg-class-snippet-copy-use-after-free.mir

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,10 @@
2727
# CHECK-LABEL: name: inflated_reg_class_copy_use_after_free
2828
# CHECK: S_NOP 0, implicit-def [[ORIG_REG:%[0-9]+]].sub0_sub1_sub2_sub3
2929
# CHECK-NEXT: SI_SPILL_AV512_SAVE [[ORIG_REG]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5)
30-
# CHECK-NEXT: [[RESTORE0:%[0-9]+]]:vreg_512_align2 = SI_SPILL_V512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
31-
# CHECK-NEXT: early-clobber [[MFMA0:%[0-9]+]]:vreg_512_align2 = V_MFMA_F32_16X16X1F32_vgprcd_e64 undef %3:vgpr_32, undef %3:vgpr_32, [[RESTORE0]], 0, 0, 0, implicit $mode, implicit $exec, implicit $mode, implicit $exec
32-
# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[MFMA0]].sub2_sub3 {
33-
# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY [[MFMA0]].sub0
30+
# CHECK-NEXT: dead [[RESTORE0:%[0-9]+]]:vreg_512_align2 = SI_SPILL_V512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
31+
# CHECK-NEXT: dead early-clobber [[MFMA0:%[0-9]+]]:vreg_512_align2 = V_MFMA_F32_16X16X1F32_vgprcd_e64 undef %3:vgpr_32, undef %3:vgpr_32, undef [[RESTORE0]], 0, 0, 0, implicit $mode, implicit $exec, implicit $mode, implicit $exec
32+
# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY undef [[MFMA0]].sub2_sub3 {
33+
# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY undef [[MFMA0]].sub0
3434
# CHECK-NEXT: }
3535
# CHECK-NEXT: undef [[SPLIT1:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[SPLIT0]].sub2_sub3 {
3636
# CHECK-NEXT: internal [[SPLIT1]].sub0:av_512_align2 = COPY [[SPLIT0]].sub0
@@ -118,10 +118,10 @@ body: |
118118
# CHECK-LABEL: name: inflated_reg_class_copy_use_after_free_lane_subset
119119
# CHECK: S_NOP 0, implicit-def [[ORIG_REG:%[0-9]+]].sub0_sub1_sub2_sub3
120120
# CHECK-NEXT: SI_SPILL_AV512_SAVE [[ORIG_REG]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5)
121-
# CHECK-NEXT: [[RESTORE_0:%[0-9]+]]:av_512_align2 = SI_SPILL_AV512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
122-
# CHECK-NEXT: S_NOP 0, implicit-def early-clobber [[REG1:%[0-9]+]], implicit [[RESTORE_0]].sub0_sub1_sub2_sub3, implicit [[RESTORE_0]].sub4_sub5_sub6_sub7
123-
# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[REG1]].sub2_sub3 {
124-
# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY [[REG1]].sub0
121+
# CHECK-NEXT: dead [[RESTORE_0:%[0-9]+]]:av_512_align2 = SI_SPILL_AV512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5)
122+
# CHECK-NEXT: S_NOP 0, implicit-def dead early-clobber [[REG1:%[0-9]+]], implicit undef [[RESTORE_0]].sub0_sub1_sub2_sub3, implicit undef [[RESTORE_0]].sub4_sub5_sub6_sub7
123+
# CHECK-NEXT: undef [[SPLIT0:%[0-9]+]].sub2_sub3:av_512_align2 = COPY undef [[REG1]].sub2_sub3 {
124+
# CHECK-NEXT: internal [[SPLIT0]].sub0:av_512_align2 = COPY undef [[REG1]].sub0
125125
# CHECK-NEXT: }
126126
# CHECK-NEXT: undef [[SPLIT1:%[0-9]+]].sub2_sub3:av_512_align2 = COPY [[SPLIT0]].sub2_sub3 {
127127
# CHECK-NEXT: internal [[SPLIT1]].sub0:av_512_align2 = COPY [[SPLIT0]].sub0

llvm/test/CodeGen/AMDGPU/issue48473.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
# %25 to $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
4444

4545
# CHECK-LABEL: name: issue48473
46-
# CHECK: S_NOP 0, implicit killed undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed undef $sgpr12_sgpr13_sgpr14_sgpr15, implicit killed undef $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed undef $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed undef $sgpr84_sgpr85_sgpr86_sgpr87, implicit killed undef $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, implicit killed undef $sgpr4_sgpr5_sgpr6_sgpr7, implicit killed undef $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, implicit killed undef $sgpr88_sgpr89_sgpr90_sgpr91, implicit killed undef $sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83, implicit killed undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed undef $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, implicit killed undef $sgpr92_sgpr93_sgpr94_sgpr95, implicit killed undef $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit undef $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit killed undef $sgpr96_sgpr97_sgpr98_sgpr99, implicit killed undef $sgpr8_sgpr9_sgpr10_sgpr11, implicit killed undef $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
46+
# CHECK: S_NOP 0, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed undef $sgpr12_sgpr13_sgpr14_sgpr15, implicit killed undef $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23, implicit killed undef $sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit killed undef $sgpr84_sgpr85_sgpr86_sgpr87, implicit killed undef $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, implicit killed undef $sgpr4_sgpr5_sgpr6_sgpr7, implicit killed undef $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, implicit killed undef $sgpr88_sgpr89_sgpr90_sgpr91, implicit killed undef $sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83, implicit undef $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed undef $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59, implicit killed undef $sgpr92_sgpr93_sgpr94_sgpr95, implicit killed undef $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit undef $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit killed undef $sgpr96_sgpr97_sgpr98_sgpr99, implicit killed undef $sgpr8_sgpr9_sgpr10_sgpr11, implicit killed undef $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67
4747

4848
---
4949
name: issue48473
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -vgpr-regalloc=basic -sgpr-regalloc=basic -start-before=regallocbasic,0 -stop-after=virtregrewriter,2 -verify-regalloc -o - %s 2> %t.basic.err | FileCheck -check-prefix=BASIC %s
2+
# RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -start-before=greedy,0 -stop-after=virtregrewriter,2 -verify-regalloc -o - %s 2> %t.greedy.err | FileCheck -check-prefix=GREEDY %s
3+
4+
# RUN: FileCheck -check-prefix=ERR -implicit-check-not=error %s < %t.basic.err
5+
# RUN: FileCheck -check-prefix=ERR -implicit-check-not=error %s < %t.greedy.err
6+
7+
# This testcase must fail register allocation. It should also not
8+
# produce a verifier error after doing so. Previously, it would not
9+
# properly update the liveness for the dummy selected register. As a
10+
# result, VirtRegRewriter would incorrectly add kill flags which
11+
# combined with other uses of the physical register produced a
12+
# verifier error.
13+
14+
# ERR: error: <unknown>:0:0: ran out of registers during register allocation
15+
16+
# GREEDY: SI_SPILL_V256_SAVE undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
17+
# GREEDY-NEXT: SI_SPILL_V512_SAVE undef $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19
18+
# GREEDY-NEXT: SI_SPILL_V128_SAVE undef $vgpr0_vgpr1_vgpr2_vgpr3
19+
20+
# GREEDY: dead $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19 = SI_SPILL_V512_RESTORE
21+
# GREEDY: dead $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = SI_SPILL_V256_RESTORE
22+
# GREEDY: S_NOP 0, implicit undef $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3
23+
# GREEDY: S_NOP 0, implicit killed undef $vgpr20_vgpr21
24+
25+
26+
# BASIC: SI_SPILL_V128_SAVE undef $vgpr0_vgpr1_vgpr2_vgpr3
27+
# BASIC: SI_SPILL_V256_SAVE killed undef $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
28+
# BASIC: SI_SPILL_V512_SAVE undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
29+
# BASIC: SI_SPILL_V64_SAVE killed undef $vgpr0_vgpr1, %stack.{{[0-9]+}}, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.{{[0-9]+}}, align 4, addrspace 5)
30+
# BASIC: dead $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = SI_SPILL_V512_RESTORE
31+
# BASIC: $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = SI_SPILL_V256_RESTORE
32+
# BASIC: dead $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE
33+
# BASIC: S_NOP 0, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit killed undef $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3
34+
# BASIC: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE
35+
36+
--- |
37+
define void @killed_reg_after_regalloc_failure() #0 {
38+
ret void
39+
}
40+
41+
attributes #0 = { "amdgpu-waves-per-eu"="10,10" }
42+
43+
...
44+
---
45+
name: killed_reg_after_regalloc_failure
46+
tracksRegLiveness: true
47+
machineFunctionInfo:
48+
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
49+
frameOffsetReg: '$sgpr33'
50+
stackPtrOffsetReg: '$sgpr32'
51+
body: |
52+
bb.0:
53+
S_NOP 0, implicit-def %0:vreg_512, implicit-def %1:vreg_256, implicit-def %2:vreg_128
54+
S_NOP 0, implicit-def %3:vreg_64
55+
S_NOP 0, implicit %0, implicit %1, implicit %2
56+
S_NOP 0, implicit %3
57+
S_ENDPGM 0
58+
59+
...
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
; RUN: not llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR -implicit-check-not=error %s
2+
3+
; ERR: error: inline assembly requires more registers than available
4+
; ERR-NOT: ERROR
5+
; ERR-NOT: Bad machine code
6+
7+
; This test requires respecting undef on the spill source operand when
8+
; expanding the pseudos to avoid all verifier errors
9+
10+
%asm.output = type { <16 x i32>, <8 x i32>, <4 x i32>, <3 x i32>, <3 x i32> }
11+
12+
define void @foo(<32 x i32> addrspace(1)* %arg) #0 {
13+
%agpr0 = call i32 asm sideeffect "; def $0","=${a0}"()
14+
%asm = call %asm.output asm sideeffect "; def $0 $1 $2 $3 $4","=v,=v,=v,=v,=v"()
15+
%vgpr0 = extractvalue %asm.output %asm, 0
16+
%vgpr1 = extractvalue %asm.output %asm, 1
17+
%vgpr2 = extractvalue %asm.output %asm, 2
18+
%vgpr3 = extractvalue %asm.output %asm, 3
19+
%vgpr4 = extractvalue %asm.output %asm, 4
20+
call void asm sideeffect "; clobber", "~{a[0:31]},~{v[0:31]}"()
21+
call void asm sideeffect "; use $0","v"(<16 x i32> %vgpr0)
22+
call void asm sideeffect "; use $0","v"(<8 x i32> %vgpr1)
23+
call void asm sideeffect "; use $0","v"(<4 x i32> %vgpr2)
24+
call void asm sideeffect "; use $0","v"(<3 x i32> %vgpr3)
25+
call void asm sideeffect "; use $0","v"(<3 x i32> %vgpr4)
26+
call void asm sideeffect "; use $0","{a1}"(i32 %agpr0)
27+
ret void
28+
}
29+
30+
attributes #0 = { "amdgpu-waves-per-eu"="8,8" }

llvm/test/CodeGen/AMDGPU/remaining-virtual-register-operands.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs < %s 2>&1 | FileCheck %s
1+
; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s
22

33
; This testcase fails register allocation at the same time it performs
44
; virtual register splitting (by introducing VGPR to AGPR copies). We

0 commit comments

Comments
 (0)