Skip to content

Commit 737c4a2

Browse files
committed
[clang][openmp][NFC] Remove arch-specific CGOpenMPRuntimeGPU files
The existing CGOpenMPRuntimeAMDGCN and CGOpenMPRuntimeNVPTX classes are just code bloat. By removing them, the codebase gets a bit cleaner. Reviewed By: jdoerfert, JonChesterfield, tianshilei1992 Differential Revision: https://reviews.llvm.org/D113421
1 parent 2dd00c1 commit 737c4a2

19 files changed

+1413
-1569
lines changed

clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp

Lines changed: 0 additions & 48 deletions
This file was deleted.

clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h

Lines changed: 0 additions & 40 deletions
This file was deleted.

clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
//===----------------------------------------------------------------------===//
1313

1414
#include "CGOpenMPRuntimeGPU.h"
15-
#include "CGOpenMPRuntimeNVPTX.h"
1615
#include "CodeGenFunction.h"
1716
#include "clang/AST/Attr.h"
1817
#include "clang/AST/DeclOpenMP.h"
@@ -21,7 +20,6 @@
2120
#include "clang/Basic/Cuda.h"
2221
#include "llvm/ADT/SmallPtrSet.h"
2322
#include "llvm/Frontend/OpenMP/OMPGridValues.h"
24-
#include "llvm/IR/IntrinsicsNVPTX.h"
2523
#include "llvm/Support/MathExtras.h"
2624

2725
using namespace clang;
@@ -1197,7 +1195,7 @@ unsigned CGOpenMPRuntimeGPU::getDefaultLocationReserved2Flags() const {
11971195
CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM)
11981196
: CGOpenMPRuntime(CGM, "_", "$") {
11991197
if (!CGM.getLangOpts().OpenMPIsDevice)
1200-
llvm_unreachable("OpenMP NVPTX can only handle device code.");
1198+
llvm_unreachable("OpenMP can only handle device code.");
12011199

12021200
llvm::OpenMPIRBuilder &OMPBuilder = getOMPBuilder();
12031201
if (CGM.getLangOpts().OpenMPTargetNewRuntime) {
@@ -3960,3 +3958,18 @@ llvm::Value *CGOpenMPRuntimeGPU::getGPUNumThreads(CodeGenFunction &CGF) {
39603958
}
39613959
return Bld.CreateCall(F, llvm::None, "nvptx_num_threads");
39623960
}
3961+
3962+
llvm::Value *CGOpenMPRuntimeGPU::getGPUThreadID(CodeGenFunction &CGF) {
3963+
ArrayRef<llvm::Value *> Args{};
3964+
return CGF.EmitRuntimeCall(
3965+
OMPBuilder.getOrCreateRuntimeFunction(
3966+
CGM.getModule(), OMPRTL___kmpc_get_hardware_thread_id_in_block),
3967+
Args);
3968+
}
3969+
3970+
llvm::Value *CGOpenMPRuntimeGPU::getGPUWarpSize(CodeGenFunction &CGF) {
3971+
ArrayRef<llvm::Value *> Args{};
3972+
return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3973+
CGM.getModule(), OMPRTL___kmpc_get_warp_size),
3974+
Args);
3975+
}

clang/lib/CodeGen/CGOpenMPRuntimeGPU.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,10 +176,10 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
176176
/// and NVPTX.
177177

178178
/// Get the GPU warp size.
179-
virtual llvm::Value *getGPUWarpSize(CodeGenFunction &CGF) = 0;
179+
llvm::Value *getGPUWarpSize(CodeGenFunction &CGF);
180180

181181
/// Get the id of the current thread on the GPU.
182-
virtual llvm::Value *getGPUThreadID(CodeGenFunction &CGF) = 0;
182+
llvm::Value *getGPUThreadID(CodeGenFunction &CGF);
183183

184184
/// Get the maximum number of threads in a block of the GPU.
185185
llvm::Value *getGPUNumThreads(CodeGenFunction &CGF);

clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp

Lines changed: 0 additions & 48 deletions
This file was deleted.

clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h

Lines changed: 0 additions & 40 deletions
This file was deleted.

clang/lib/CodeGen/CMakeLists.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,7 @@ add_clang_library(clangCodeGen
5959
CGObjCRuntime.cpp
6060
CGOpenCLRuntime.cpp
6161
CGOpenMPRuntime.cpp
62-
CGOpenMPRuntimeAMDGCN.cpp
6362
CGOpenMPRuntimeGPU.cpp
64-
CGOpenMPRuntimeNVPTX.cpp
6563
CGRecordLayoutBuilder.cpp
6664
CGStmt.cpp
6765
CGStmtOpenMP.cpp

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,7 @@
1919
#include "CGObjCRuntime.h"
2020
#include "CGOpenCLRuntime.h"
2121
#include "CGOpenMPRuntime.h"
22-
#include "CGOpenMPRuntimeAMDGCN.h"
23-
#include "CGOpenMPRuntimeNVPTX.h"
22+
#include "CGOpenMPRuntimeGPU.h"
2423
#include "CodeGenFunction.h"
2524
#include "CodeGenPGO.h"
2625
#include "ConstantEmitter.h"
@@ -244,14 +243,10 @@ void CodeGenModule::createOpenMPRuntime() {
244243
switch (getTriple().getArch()) {
245244
case llvm::Triple::nvptx:
246245
case llvm::Triple::nvptx64:
247-
assert(getLangOpts().OpenMPIsDevice &&
248-
"OpenMP NVPTX is only prepared to deal with device code.");
249-
OpenMPRuntime.reset(new CGOpenMPRuntimeNVPTX(*this));
250-
break;
251246
case llvm::Triple::amdgcn:
252247
assert(getLangOpts().OpenMPIsDevice &&
253-
"OpenMP AMDGCN is only prepared to deal with device code.");
254-
OpenMPRuntime.reset(new CGOpenMPRuntimeAMDGCN(*this));
248+
"OpenMP AMDGPU/NVPTX is only prepared to deal with device code.");
249+
OpenMPRuntime.reset(new CGOpenMPRuntimeGPU(*this));
255250
break;
256251
default:
257252
if (LangOpts.OpenMPSimd)

clang/test/OpenMP/nvptx_parallel_codegen.cpp

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1664,31 +1664,31 @@ int bar(int n){
16641664
// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8
16651665
// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8
16661666
// CHECK1-NEXT: [[TMP1:%.*]] = call i64 @__kmpc_warp_active_thread_mask()
1667-
// CHECK1-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
1667+
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
16681668
// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
16691669
// CHECK1-NEXT: store i32 0, i32* [[CRITICAL_COUNTER]], align 4
16701670
// CHECK1-NEXT: br label [[OMP_CRITICAL_LOOP:%.*]]
16711671
// CHECK1: omp.critical.loop:
1672-
// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CRITICAL_COUNTER]], align 4
1673-
// CHECK1-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], [[NVPTX_NUM_THREADS]]
1674-
// CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_CRITICAL_TEST:%.*]], label [[OMP_CRITICAL_EXIT:%.*]]
1672+
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CRITICAL_COUNTER]], align 4
1673+
// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], [[NVPTX_NUM_THREADS]]
1674+
// CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_CRITICAL_TEST:%.*]], label [[OMP_CRITICAL_EXIT:%.*]]
16751675
// CHECK1: omp.critical.test:
1676-
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CRITICAL_COUNTER]], align 4
1677-
// CHECK1-NEXT: [[TMP5:%.*]] = icmp eq i32 [[NVPTX_TID]], [[TMP4]]
1678-
// CHECK1-NEXT: br i1 [[TMP5]], label [[OMP_CRITICAL_BODY:%.*]], label [[OMP_CRITICAL_SYNC:%.*]]
1676+
// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CRITICAL_COUNTER]], align 4
1677+
// CHECK1-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP2]], [[TMP5]]
1678+
// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_CRITICAL_BODY:%.*]], label [[OMP_CRITICAL_SYNC:%.*]]
16791679
// CHECK1: omp.critical.body:
1680-
// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
1681-
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4
1682-
// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], [8 x i32]* @"_gomp_critical_user_$var")
1683-
// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP0]], align 4
1684-
// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1
1680+
// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
1681+
// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
1682+
// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], [8 x i32]* @"_gomp_critical_user_$var")
1683+
// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP0]], align 4
1684+
// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1
16851685
// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4
1686-
// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], [8 x i32]* @"_gomp_critical_user_$var")
1686+
// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], [8 x i32]* @"_gomp_critical_user_$var")
16871687
// CHECK1-NEXT: br label [[OMP_CRITICAL_SYNC]]
16881688
// CHECK1: omp.critical.sync:
16891689
// CHECK1-NEXT: call void @__kmpc_syncwarp(i64 [[TMP1]])
1690-
// CHECK1-NEXT: [[TMP9:%.*]] = add nsw i32 [[TMP4]], 1
1691-
// CHECK1-NEXT: store i32 [[TMP9]], i32* [[CRITICAL_COUNTER]], align 4
1690+
// CHECK1-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP5]], 1
1691+
// CHECK1-NEXT: store i32 [[TMP10]], i32* [[CRITICAL_COUNTER]], align 4
16921692
// CHECK1-NEXT: br label [[OMP_CRITICAL_LOOP]]
16931693
// CHECK1: omp.critical.exit:
16941694
// CHECK1-NEXT: ret void
@@ -1936,31 +1936,31 @@ int bar(int n){
19361936
// CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4
19371937
// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4
19381938
// CHECK2-NEXT: [[TMP1:%.*]] = call i64 @__kmpc_warp_active_thread_mask()
1939-
// CHECK2-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
1939+
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
19401940
// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
19411941
// CHECK2-NEXT: store i32 0, i32* [[CRITICAL_COUNTER]], align 4
19421942
// CHECK2-NEXT: br label [[OMP_CRITICAL_LOOP:%.*]]
19431943
// CHECK2: omp.critical.loop:
1944-
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CRITICAL_COUNTER]], align 4
1945-
// CHECK2-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], [[NVPTX_NUM_THREADS]]
1946-
// CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_CRITICAL_TEST:%.*]], label [[OMP_CRITICAL_EXIT:%.*]]
1944+
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CRITICAL_COUNTER]], align 4
1945+
// CHECK2-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], [[NVPTX_NUM_THREADS]]
1946+
// CHECK2-NEXT: br i1 [[TMP4]], label [[OMP_CRITICAL_TEST:%.*]], label [[OMP_CRITICAL_EXIT:%.*]]
19471947
// CHECK2: omp.critical.test:
1948-
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CRITICAL_COUNTER]], align 4
1949-
// CHECK2-NEXT: [[TMP5:%.*]] = icmp eq i32 [[NVPTX_TID]], [[TMP4]]
1950-
// CHECK2-NEXT: br i1 [[TMP5]], label [[OMP_CRITICAL_BODY:%.*]], label [[OMP_CRITICAL_SYNC:%.*]]
1948+
// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CRITICAL_COUNTER]], align 4
1949+
// CHECK2-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP2]], [[TMP5]]
1950+
// CHECK2-NEXT: br i1 [[TMP6]], label [[OMP_CRITICAL_BODY:%.*]], label [[OMP_CRITICAL_SYNC:%.*]]
19511951
// CHECK2: omp.critical.body:
1952-
// CHECK2-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
1953-
// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4
1954-
// CHECK2-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], [8 x i32]* @"_gomp_critical_user_$var")
1955-
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP0]], align 4
1956-
// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1
1952+
// CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4
1953+
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
1954+
// CHECK2-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], [8 x i32]* @"_gomp_critical_user_$var")
1955+
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP0]], align 4
1956+
// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1
19571957
// CHECK2-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4
1958-
// CHECK2-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], [8 x i32]* @"_gomp_critical_user_$var")
1958+
// CHECK2-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], [8 x i32]* @"_gomp_critical_user_$var")
19591959
// CHECK2-NEXT: br label [[OMP_CRITICAL_SYNC]]
19601960
// CHECK2: omp.critical.sync:
19611961
// CHECK2-NEXT: call void @__kmpc_syncwarp(i64 [[TMP1]])
1962-
// CHECK2-NEXT: [[TMP9:%.*]] = add nsw i32 [[TMP4]], 1
1963-
// CHECK2-NEXT: store i32 [[TMP9]], i32* [[CRITICAL_COUNTER]], align 4
1962+
// CHECK2-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP5]], 1
1963+
// CHECK2-NEXT: store i32 [[TMP10]], i32* [[CRITICAL_COUNTER]], align 4
19641964
// CHECK2-NEXT: br label [[OMP_CRITICAL_LOOP]]
19651965
// CHECK2: omp.critical.exit:
19661966
// CHECK2-NEXT: ret void

0 commit comments

Comments
 (0)