Skip to content

Commit 2429a5c

Browse files
bcahoonDavid Salinas
authored and
David Salinas
committed
Revert "Reapply "AMDGPU: Move attributor into optimization pipeline (llvm#83131)" and follow up commit "clang/AMDGPU: Defeat attribute optimization in attribute test" (llvm#98851)"
This reverts commit b1bcb7c. Change-Id: Ia262230003989ed152f82ea475364b42d2592090
1 parent d6afa3a commit 2429a5c

File tree

566 files changed

+84701
-79354
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

566 files changed

+84701
-79354
lines changed

clang/test/CodeGenHIP/default-attributes.hip

Lines changed: 30 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,44 +2,55 @@
22
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -fno-ident -fcuda-is-device \
33
// RUN: -emit-llvm -o - %s | FileCheck -check-prefix=OPTNONE %s
44

5+
// RUN: %clang_cc1 -O3 -triple amdgcn-amd-amdhsa -x hip -fno-ident -fcuda-is-device \
6+
// RUN: -emit-llvm -o - %s | FileCheck -check-prefix=OPT %s
7+
58
#define __device__ __attribute__((device))
69
#define __global__ __attribute__((global))
710

8-
//.
9-
// OPTNONE: @__hip_cuid_ = addrspace(1) global i8 0
10-
// OPTNONE: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__hip_cuid_ to ptr)], section "llvm.metadata"
11-
// OPTNONE: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
12-
//.
13-
__device__ void extern_func();
14-
1511
// OPTNONE: Function Attrs: convergent mustprogress noinline nounwind optnone
1612
// OPTNONE-LABEL: define {{[^@]+}}@_Z4funcv
1713
// OPTNONE-SAME: () #[[ATTR0:[0-9]+]] {
1814
// OPTNONE-NEXT: entry:
19-
// OPTNONE-NEXT: call void @_Z11extern_funcv() #[[ATTR3:[0-9]+]]
2015
// OPTNONE-NEXT: ret void
2116
//
17+
// OPT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
18+
// OPT-LABEL: define {{[^@]+}}@_Z4funcv
19+
// OPT-SAME: () local_unnamed_addr #[[ATTR0:[0-9]+]] {
20+
// OPT-NEXT: entry:
21+
// OPT-NEXT: ret void
22+
//
2223
__device__ void func() {
23-
extern_func();
24+
2425
}
2526

2627
// OPTNONE: Function Attrs: convergent mustprogress noinline norecurse nounwind optnone
2728
// OPTNONE-LABEL: define {{[^@]+}}@_Z6kernelv
28-
// OPTNONE-SAME: () #[[ATTR2:[0-9]+]] {
29+
// OPTNONE-SAME: () #[[ATTR1:[0-9]+]] {
2930
// OPTNONE-NEXT: entry:
30-
// OPTNONE-NEXT: call void @_Z11extern_funcv() #[[ATTR3]]
3131
// OPTNONE-NEXT: ret void
3232
//
33+
// OPT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
34+
// OPT-LABEL: define {{[^@]+}}@_Z6kernelv
35+
// OPT-SAME: () local_unnamed_addr #[[ATTR1:[0-9]+]] {
36+
// OPT-NEXT: entry:
37+
// OPT-NEXT: ret void
38+
//
3339
__global__ void kernel() {
34-
extern_func();
40+
3541
}
3642
//.
37-
// OPTNONE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
38-
// OPTNONE: attributes #[[ATTR1:[0-9]+]] = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
39-
// OPTNONE: attributes #[[ATTR2]] = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
40-
// OPTNONE: attributes #[[ATTR3]] = { convergent nounwind }
43+
// OPTNONE: attributes #0 = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
44+
// OPTNONE: attributes #1 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
45+
//.
46+
// OPT: attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
47+
// OPT: attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) "amdgpu-flat-work-group-size"="1,1024" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" }
48+
//.
49+
// OPTNONE: !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
50+
// OPTNONE: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
51+
// OPTNONE: !2 = !{i32 1, !"wchar_size", i32 4}
4152
//.
42-
// OPTNONE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
43-
// OPTNONE: [[META1:![0-9]+]] = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
44-
// OPTNONE: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
53+
// OPT: !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
54+
// OPT: !1 = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
55+
// OPT: !2 = !{i32 1, !"wchar_size", i32 4}
4556
//.

llvm/docs/ReleaseNotes.rst

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -135,10 +135,6 @@ Changes to the AMDGPU Backend
135135
:ref:`atomicrmw <i_atomicrmw>` instruction with `fadd`, `fmin` and
136136
`fmax` with addrspace(3) instead.
137137

138-
* AMDGPUAttributor is no longer run as part of the codegen pass
139-
pipeline. It is expected to run as part of the middle end
140-
optimizations.
141-
142138
Changes to the ARM Backend
143139
--------------------------
144140

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -796,14 +796,6 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
796796
PM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
797797
});
798798

799-
// FIXME: Why is AMDGPUAttributor not in CGSCC?
800-
PB.registerOptimizerLastEPCallback(
801-
[this](ModulePassManager &MPM, OptimizationLevel Level) {
802-
if (Level != OptimizationLevel::O0) {
803-
MPM.addPass(AMDGPUAttributorPass(*this));
804-
}
805-
});
806-
807799
PB.registerFullLinkTimeOptimizationLastEPCallback(
808800
[this](ModulePassManager &PM, OptimizationLevel Level) {
809801
// We want to support the -lto-partitions=N option as "best effort".
@@ -1144,6 +1136,11 @@ void AMDGPUPassConfig::addIRPasses() {
11441136
addPass(createAMDGPULowerModuleLDSLegacyPass(&TM));
11451137
}
11461138

1139+
// AMDGPUAttributor infers lack of llvm.amdgcn.lds.kernel.id calls, so run
1140+
// after their introduction
1141+
if (TM.getOptLevel() > CodeGenOptLevel::None)
1142+
addPass(createAMDGPUAttributorLegacyPass());
1143+
11471144
if (TM.getOptLevel() > CodeGenOptLevel::None)
11481145
addPass(createInferAddressSpacesPass());
11491146

llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -865,12 +865,6 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
865865
break;
866866
}
867867
}
868-
869-
// FIXME: We can spill incoming arguments and restore at the end of the
870-
// prolog.
871-
if (!ScratchWaveOffsetReg)
872-
report_fatal_error(
873-
"could not find temporary scratch offset register in prolog");
874868
} else {
875869
ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
876870
}

llvm/test/CodeGen/AMDGPU/GlobalISel/addsubu64.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ define amdgpu_kernel void @s_add_u64(ptr addrspace(1) %out, i64 %a, i64 %b) {
66
; GFX11-LABEL: s_add_u64:
77
; GFX11: ; %bb.0: ; %entry
88
; GFX11-NEXT: s_clause 0x1
9-
; GFX11-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
10-
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
9+
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
10+
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
1111
; GFX11-NEXT: v_mov_b32_e32 v2, 0
1212
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1313
; GFX11-NEXT: s_add_u32 s0, s6, s0
@@ -22,8 +22,8 @@ define amdgpu_kernel void @s_add_u64(ptr addrspace(1) %out, i64 %a, i64 %b) {
2222
; GFX12-LABEL: s_add_u64:
2323
; GFX12: ; %bb.0: ; %entry
2424
; GFX12-NEXT: s_clause 0x1
25-
; GFX12-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
26-
; GFX12-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
25+
; GFX12-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
26+
; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
2727
; GFX12-NEXT: v_mov_b32_e32 v2, 0
2828
; GFX12-NEXT: s_wait_kmcnt 0x0
2929
; GFX12-NEXT: s_add_nc_u64 s[0:1], s[6:7], s[0:1]
@@ -58,8 +58,8 @@ define amdgpu_kernel void @s_sub_u64(ptr addrspace(1) %out, i64 %a, i64 %b) {
5858
; GFX11-LABEL: s_sub_u64:
5959
; GFX11: ; %bb.0: ; %entry
6060
; GFX11-NEXT: s_clause 0x1
61-
; GFX11-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
62-
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
61+
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
62+
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
6363
; GFX11-NEXT: v_mov_b32_e32 v2, 0
6464
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
6565
; GFX11-NEXT: s_sub_u32 s0, s6, s0
@@ -74,8 +74,8 @@ define amdgpu_kernel void @s_sub_u64(ptr addrspace(1) %out, i64 %a, i64 %b) {
7474
; GFX12-LABEL: s_sub_u64:
7575
; GFX12: ; %bb.0: ; %entry
7676
; GFX12-NEXT: s_clause 0x1
77-
; GFX12-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
78-
; GFX12-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
77+
; GFX12-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
78+
; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
7979
; GFX12-NEXT: v_mov_b32_e32 v2, 0
8080
; GFX12-NEXT: s_wait_kmcnt 0x0
8181
; GFX12-NEXT: s_sub_nc_u64 s[0:1], s[6:7], s[0:1]

0 commit comments

Comments
 (0)