Skip to content

Commit 167427f

Browse files
authored
[AMDGPU] change order of fp and sp in kernel prologue (#90626)
change order of fp and sp in kernel prologue also related codegen tests to make it easier to merge code into our downstream branches Signed-off-by: gangc <gangc@amd.com>
1 parent 9226688 commit 167427f

File tree

5 files changed

+19
-19
lines changed

5 files changed

+19
-19
lines changed

llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -683,19 +683,19 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
683683
}
684684
assert(ScratchWaveOffsetReg || !PreloadedScratchWaveOffsetReg);
685685

686+
if (hasFP(MF)) {
687+
Register FPReg = MFI->getFrameOffsetReg();
688+
assert(FPReg != AMDGPU::FP_REG);
689+
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
690+
}
691+
686692
if (requiresStackPointerReference(MF)) {
687693
Register SPReg = MFI->getStackPtrOffsetReg();
688694
assert(SPReg != AMDGPU::SP_REG);
689695
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg)
690696
.addImm(FrameInfo.getStackSize() * getScratchScaleFactor(ST));
691697
}
692698

693-
if (hasFP(MF)) {
694-
Register FPReg = MFI->getFrameOffsetReg();
695-
assert(FPReg != AMDGPU::FP_REG);
696-
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
697-
}
698-
699699
bool NeedsFlatScratchInit =
700700
MFI->getUserSGPRInfo().hasFlatScratchInit() &&
701701
(MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||

llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
1616
; GCN-NEXT: s_load_dword s6, s[4:5], 0x8
1717
; GCN-NEXT: s_add_u32 s0, s0, s9
1818
; GCN-NEXT: s_addc_u32 s1, s1, 0
19-
; GCN-NEXT: s_movk_i32 s32, 0x400
2019
; GCN-NEXT: s_mov_b32 s33, 0
20+
; GCN-NEXT: s_movk_i32 s32, 0x400
2121
; GCN-NEXT: s_waitcnt lgkmcnt(0)
2222
; GCN-NEXT: s_cmp_lg_u32 s6, 0
2323
; GCN-NEXT: s_cbranch_scc1 .LBB0_3
@@ -87,8 +87,8 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
8787
; GCN-NEXT: s_load_dword s6, s[4:5], 0x8
8888
; GCN-NEXT: s_add_u32 s0, s0, s9
8989
; GCN-NEXT: s_addc_u32 s1, s1, 0
90-
; GCN-NEXT: s_movk_i32 s32, 0x1000
9190
; GCN-NEXT: s_mov_b32 s33, 0
91+
; GCN-NEXT: s_movk_i32 s32, 0x1000
9292
; GCN-NEXT: s_waitcnt lgkmcnt(0)
9393
; GCN-NEXT: s_cmp_lg_u32 s6, 0
9494
; GCN-NEXT: s_cbranch_scc1 .LBB1_2

llvm/test/CodeGen/AMDGPU/cc-update.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -321,8 +321,8 @@ define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 {
321321
; GFX803-NEXT: s_mov_b64 s[10:11], s[8:9]
322322
; GFX803-NEXT: v_or_b32_e32 v31, v0, v2
323323
; GFX803-NEXT: s_mov_b64 s[8:9], s[6:7]
324-
; GFX803-NEXT: s_mov_b32 s32, 0
325324
; GFX803-NEXT: s_mov_b32 s33, 0
325+
; GFX803-NEXT: s_mov_b32 s32, 0
326326
; GFX803-NEXT: s_getpc_b64 s[16:17]
327327
; GFX803-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4
328328
; GFX803-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12
@@ -340,8 +340,8 @@ define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 {
340340
; GFX900-NEXT: s_mov_b64 s[10:11], s[8:9]
341341
; GFX900-NEXT: v_or3_b32 v31, v0, v1, v2
342342
; GFX900-NEXT: s_mov_b64 s[8:9], s[6:7]
343-
; GFX900-NEXT: s_mov_b32 s32, 0
344343
; GFX900-NEXT: s_mov_b32 s33, 0
344+
; GFX900-NEXT: s_mov_b32 s32, 0
345345
; GFX900-NEXT: s_getpc_b64 s[16:17]
346346
; GFX900-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4
347347
; GFX900-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12
@@ -351,8 +351,8 @@ define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 {
351351
; GFX1010-LABEL: test_force_fp_kern_call:
352352
; GFX1010: ; %bb.0: ; %entry
353353
; GFX1010-NEXT: s_add_u32 s10, s10, s15
354-
; GFX1010-NEXT: s_mov_b32 s32, 0
355354
; GFX1010-NEXT: s_mov_b32 s33, 0
355+
; GFX1010-NEXT: s_mov_b32 s32, 0
356356
; GFX1010-NEXT: s_addc_u32 s11, s11, 0
357357
; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10
358358
; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11
@@ -378,16 +378,16 @@ define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 {
378378
; GFX1100-NEXT: s_mov_b64 s[8:9], s[2:3]
379379
; GFX1100-NEXT: s_mov_b32 s13, s14
380380
; GFX1100-NEXT: s_mov_b32 s14, s15
381-
; GFX1100-NEXT: s_mov_b32 s32, 0
382381
; GFX1100-NEXT: s_mov_b32 s33, 0
382+
; GFX1100-NEXT: s_mov_b32 s32, 0
383383
; GFX1100-NEXT: s_getpc_b64 s[6:7]
384384
; GFX1100-NEXT: s_add_u32 s6, s6, ex@rel32@lo+4
385385
; GFX1100-NEXT: s_addc_u32 s7, s7, ex@rel32@hi+12
386386
; GFX1100-NEXT: s_swappc_b64 s[30:31], s[6:7]
387387
; GFX1100-NEXT: s_endpgm
388388
; GFX1010-NEXT s_add_u32 s12, s12, s17
389-
; GFX1010-NEXT s_mov_b32 s32, 0
390389
; GFX1010-NEXT s_mov_b32 s33, 0
390+
; GFX1010-NEXT s_mov_b32 s32, 0
391391
; GFX1010-NEXT s_addc_u32 s13, s13, 0
392392
; GFX1010-NEXT s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
393393
; GFX1010-NEXT s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
@@ -459,8 +459,8 @@ define amdgpu_kernel void @test_force_fp_kern_stack_and_call() local_unnamed_add
459459
; GFX1010-LABEL: test_force_fp_kern_stack_and_call:
460460
; GFX1010: ; %bb.0: ; %entry
461461
; GFX1010-NEXT: s_add_u32 s10, s10, s15
462-
; GFX1010-NEXT: s_movk_i32 s32, 0x200
463462
; GFX1010-NEXT: s_mov_b32 s33, 0
463+
; GFX1010-NEXT: s_movk_i32 s32, 0x200
464464
; GFX1010-NEXT: s_addc_u32 s11, s11, 0
465465
; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10
466466
; GFX1010-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11

llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
define amdgpu_kernel void @test_kernel(i32 %val) #0 {
88
; CHECK-LABEL: test_kernel:
99
; CHECK: ; %bb.0:
10-
; CHECK-NEXT: s_mov_b32 s32, 0x180000
1110
; CHECK-NEXT: s_mov_b32 s33, 0
11+
; CHECK-NEXT: s_mov_b32 s32, 0x180000
1212
; CHECK-NEXT: s_add_u32 flat_scratch_lo, s10, s15
1313
; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s11, 0
1414
; CHECK-NEXT: s_add_u32 s0, s0, s15

llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
2121
; MUBUF-NEXT: s_add_u32 s0, s0, s9
2222
; MUBUF-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x8
2323
; MUBUF-NEXT: s_addc_u32 s1, s1, 0
24-
; MUBUF-NEXT: s_movk_i32 s32, 0x400
2524
; MUBUF-NEXT: s_mov_b32 s33, 0
25+
; MUBUF-NEXT: s_movk_i32 s32, 0x400
2626
; MUBUF-NEXT: s_waitcnt lgkmcnt(0)
2727
; MUBUF-NEXT: s_cmp_lg_u32 s8, 0
2828
; MUBUF-NEXT: s_cbranch_scc1 .LBB0_3
@@ -57,8 +57,8 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
5757
; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s2, s5
5858
; FLATSCR-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x8
5959
; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
60-
; FLATSCR-NEXT: s_mov_b32 s32, 16
6160
; FLATSCR-NEXT: s_mov_b32 s33, 0
61+
; FLATSCR-NEXT: s_mov_b32 s32, 16
6262
; FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
6363
; FLATSCR-NEXT: s_cmp_lg_u32 s4, 0
6464
; FLATSCR-NEXT: s_cbranch_scc1 .LBB0_3
@@ -125,8 +125,8 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
125125
; MUBUF-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x8
126126
; MUBUF-NEXT: s_add_u32 s0, s0, s9
127127
; MUBUF-NEXT: s_addc_u32 s1, s1, 0
128-
; MUBUF-NEXT: s_movk_i32 s32, 0x1000
129128
; MUBUF-NEXT: s_mov_b32 s33, 0
129+
; MUBUF-NEXT: s_movk_i32 s32, 0x1000
130130
; MUBUF-NEXT: s_waitcnt lgkmcnt(0)
131131
; MUBUF-NEXT: s_cmp_lg_u32 s6, 0
132132
; MUBUF-NEXT: s_cbranch_scc1 .LBB1_2
@@ -159,8 +159,8 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
159159
; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s2, s5
160160
; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
161161
; FLATSCR-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x8
162-
; FLATSCR-NEXT: s_mov_b32 s32, 64
163162
; FLATSCR-NEXT: s_mov_b32 s33, 0
163+
; FLATSCR-NEXT: s_mov_b32 s32, 64
164164
; FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
165165
; FLATSCR-NEXT: s_cmp_lg_u32 s2, 0
166166
; FLATSCR-NEXT: s_cbranch_scc1 .LBB1_2

0 commit comments

Comments
 (0)