Skip to content

Commit 2a47a84

Browse files
[openmp][nfc] Refactor GridValues
Remove redundant fields and replace pointer with virtual function Of fourteen fields, three are dead and four can be computed from the remainder. This leaves a couple of currently dead fields in place as they are expected to be used from the deviceRTL shortly. Two of the fields that can be computed are only used from codegen and require a log2() implementation so are inlined into codegen instead. This change leaves the new methods in the same location in the struct as the previous fields for convenience at review. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D108380
1 parent bdeda95 commit 2a47a84

File tree

7 files changed

+42
-60
lines changed

7 files changed

+42
-60
lines changed

clang/include/clang/Basic/TargetInfo.h

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -210,9 +210,6 @@ class TargetInfo : public virtual TransferrableTargetInfo,
210210
unsigned char RegParmMax, SSERegParmMax;
211211
TargetCXXABI TheCXXABI;
212212
const LangASMap *AddrSpaceMap;
213-
const llvm::omp::GV *GridValues =
214-
nullptr; // target-specific GPU grid values that must be
215-
// consistent between host RTL (plugin), device RTL, and clang.
216213

217214
mutable StringRef PlatformName;
218215
mutable VersionTuple PlatformMinVersion;
@@ -1410,10 +1407,10 @@ class TargetInfo : public virtual TransferrableTargetInfo,
14101407
return LangAS::Default;
14111408
}
14121409

1413-
/// Return a target-specific GPU grid values
1414-
const llvm::omp::GV &getGridValue() const {
1415-
assert(GridValues != nullptr && "GridValues not initialized");
1416-
return *GridValues;
1410+
// access target-specific GPU grid values that must be consistent between
1411+
// host RTL (plugin), deviceRTL and clang.
1412+
virtual const llvm::omp::GV &getGridValue() const {
1413+
llvm_unreachable("getGridValue not implemented on this target");
14171414
}
14181415

14191416
/// Retrieve the name of the platform as it is used in the

clang/lib/Basic/Targets/AMDGPU.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
#include "clang/Basic/MacroBuilder.h"
1818
#include "clang/Basic/TargetBuiltins.h"
1919
#include "llvm/ADT/StringSwitch.h"
20-
#include "llvm/Frontend/OpenMP/OMPGridValues.h"
2120

2221
using namespace clang;
2322
using namespace clang::targets;
@@ -335,7 +334,6 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
335334
llvm::AMDGPU::getArchAttrR600(GPUKind)) {
336335
resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
337336
: DataLayoutStringR600);
338-
GridValues = &llvm::omp::AMDGPUGridValues;
339337

340338
setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
341339
!isAMDGCN(Triple));

clang/lib/Basic/Targets/AMDGPU.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,10 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo {
370370
return getLangASFromTargetAS(Constant);
371371
}
372372

373+
const llvm::omp::GV &getGridValue() const override {
374+
return llvm::omp::AMDGPUGridValues;
375+
}
376+
373377
/// \returns Target specific vtbl ptr address space.
374378
unsigned getVtblPtrAddressSpace() const override {
375379
return static_cast<unsigned>(Constant);

clang/lib/Basic/Targets/NVPTX.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
#include "clang/Basic/MacroBuilder.h"
1717
#include "clang/Basic/TargetBuiltins.h"
1818
#include "llvm/ADT/StringSwitch.h"
19-
#include "llvm/Frontend/OpenMP/OMPGridValues.h"
2019

2120
using namespace clang;
2221
using namespace clang::targets;
@@ -65,7 +64,6 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
6564
TLSSupported = false;
6665
VLASupported = false;
6766
AddrSpaceMap = &NVPTXAddrSpaceMap;
68-
GridValues = &llvm::omp::NVPTXGridValues;
6967
UseAddrSpaceMapMangling = true;
7068

7169
// Define available target features

clang/lib/Basic/Targets/NVPTX.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,10 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo {
147147
Opts["cl_khr_local_int32_extended_atomics"] = true;
148148
}
149149

150+
const llvm::omp::GV &getGridValue() const override {
151+
return llvm::omp::NVPTXGridValues;
152+
}
153+
150154
/// \returns If a target requires an address within a target specific address
151155
/// space \p AddressSpace to be converted in order to be used, then return the
152156
/// corresponding target specific DWARF address space.

clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "llvm/ADT/SmallPtrSet.h"
2323
#include "llvm/Frontend/OpenMP/OMPGridValues.h"
2424
#include "llvm/IR/IntrinsicsNVPTX.h"
25+
#include "llvm/Support/MathExtras.h"
2526

2627
using namespace clang;
2728
using namespace CodeGen;
@@ -106,8 +107,7 @@ class ExecutionRuntimeModesRAII {
106107
/// is the same for all known NVPTX architectures.
107108
enum MachineConfiguration : unsigned {
108109
/// See "llvm/Frontend/OpenMP/OMPGridValues.h" for various related target
109-
/// specific Grid Values like GV_Warp_Size, GV_Warp_Size_Log2,
110-
/// and GV_Warp_Size_Log2_Mask.
110+
/// specific Grid Values like GV_Warp_Size, GV_Slot_Size
111111

112112
/// Global memory alignment for performance.
113113
GlobalMemoryAlignment = 128,
@@ -535,7 +535,8 @@ class CheckVarsEscapingDeclContext final
535535
/// on the NVPTX device, to generate more efficient code.
536536
static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) {
537537
CGBuilderTy &Bld = CGF.Builder;
538-
unsigned LaneIDBits = CGF.getTarget().getGridValue().GV_Warp_Size_Log2;
538+
unsigned LaneIDBits =
539+
llvm::Log2_32(CGF.getTarget().getGridValue().GV_Warp_Size);
539540
auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
540541
return Bld.CreateAShr(RT.getGPUThreadID(CGF), LaneIDBits, "nvptx_warp_id");
541542
}
@@ -545,8 +546,9 @@ static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) {
545546
/// on the NVPTX device, to generate more efficient code.
546547
static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) {
547548
CGBuilderTy &Bld = CGF.Builder;
548-
unsigned LaneIDMask =
549-
CGF.getContext().getTargetInfo().getGridValue().GV_Warp_Size_Log2_Mask;
549+
unsigned LaneIDBits =
550+
llvm::Log2_32(CGF.getTarget().getGridValue().GV_Warp_Size);
551+
unsigned LaneIDMask = ~0 >> (32u - LaneIDBits);
550552
auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
551553
return Bld.CreateAnd(RT.getGPUThreadID(CGF), Bld.getInt32(LaneIDMask),
552554
"nvptx_lane_id");

llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h

Lines changed: 23 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -62,19 +62,13 @@ struct GV {
6262
const unsigned GV_Slot_Size;
6363
/// The default value of maximum number of threads in a worker warp.
6464
const unsigned GV_Warp_Size;
65-
/// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size
66-
/// for NVPTX.
67-
const unsigned GV_Warp_Size_32;
68-
/// The number of bits required to represent the max number of threads in warp
69-
const unsigned GV_Warp_Size_Log2;
70-
/// GV_Warp_Size * GV_Slot_Size,
71-
const unsigned GV_Warp_Slot_Size;
65+
66+
constexpr unsigned warpSlotSize() const {
67+
return GV_Warp_Size * GV_Slot_Size;
68+
}
69+
7270
/// the maximum number of teams.
7371
const unsigned GV_Max_Teams;
74-
/// Global Memory Alignment
75-
const unsigned GV_Mem_Align;
76-
/// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
77-
const unsigned GV_Warp_Size_Log2_Mask;
7872
// An alternative to the heavy data sharing infrastructure that uses global
7973
// memory is one that uses device __shared__ memory. The amount of such space
8074
// (in bytes) reserved by the OpenMP runtime is noted here.
@@ -83,47 +77,32 @@ struct GV {
8377
const unsigned GV_Max_WG_Size;
8478
// The default maximum team size for a working group
8579
const unsigned GV_Default_WG_Size;
86-
// This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN.
87-
const unsigned GV_Max_Warp_Number;
88-
/// The slot size that should be reserved for a working warp.
89-
/// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
90-
const unsigned GV_Warp_Size_Log2_MaskL;
80+
81+
constexpr unsigned maxWarpNumber() const {
82+
return GV_Max_WG_Size / GV_Warp_Size;
83+
}
9184
};
9285

9386
/// For AMDGPU GPUs
9487
static constexpr GV AMDGPUGridValues = {
95-
448, // GV_Threads
96-
256, // GV_Slot_Size
97-
64, // GV_Warp_Size
98-
32, // GV_Warp_Size_32
99-
6, // GV_Warp_Size_Log2
100-
64 * 256, // GV_Warp_Slot_Size
101-
128, // GV_Max_Teams
102-
256, // GV_Mem_Align
103-
63, // GV_Warp_Size_Log2_Mask
104-
896, // GV_SimpleBufferSize
105-
1024, // GV_Max_WG_Size,
106-
256, // GV_Defaut_WG_Size
107-
1024 / 64, // GV_Max_WG_Size / GV_WarpSize
108-
63 // GV_Warp_Size_Log2_MaskL
88+
448, // GV_Threads
89+
256, // GV_Slot_Size
90+
64, // GV_Warp_Size
91+
128, // GV_Max_Teams
92+
896, // GV_SimpleBufferSize
93+
1024, // GV_Max_WG_Size,
94+
256, // GV_Default_WG_Size
10995
};
11096

11197
/// For Nvidia GPUs
11298
static constexpr GV NVPTXGridValues = {
113-
992, // GV_Threads
114-
256, // GV_Slot_Size
115-
32, // GV_Warp_Size
116-
32, // GV_Warp_Size_32
117-
5, // GV_Warp_Size_Log2
118-
32 * 256, // GV_Warp_Slot_Size
119-
1024, // GV_Max_Teams
120-
256, // GV_Mem_Align
121-
(~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask
122-
896, // GV_SimpleBufferSize
123-
1024, // GV_Max_WG_Size
124-
128, // GV_Defaut_WG_Size
125-
1024 / 32, // GV_Max_WG_Size / GV_WarpSize
126-
31 // GV_Warp_Size_Log2_MaskL
99+
992, // GV_Threads
100+
256, // GV_Slot_Size
101+
32, // GV_Warp_Size
102+
1024, // GV_Max_Teams
103+
896, // GV_SimpleBufferSize
104+
1024, // GV_Max_WG_Size
105+
128, // GV_Default_WG_Size
127106
};
128107

129108
} // namespace omp

0 commit comments

Comments
 (0)