[openmp][nfc] Refactor GridValues

JonChesterfield · JonChesterfield · commit 2a47a84b4011 · 2021-08-20T16:41:26.000+01:00
Remove redundant fields and replace pointer with virtual function Of fourteen fields, three are dead and four can be computed from the remainder. This leaves a couple of currently dead fields in place as they are expected to be used from the deviceRTL shortly. Two of the fields that can be computed are only used from codegen and require a log2() implementation so are inlined into codegen instead. This change leaves the new methods in the same location in the struct as the previous fields for convenience at review. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D108380
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
@@ -210,9 +210,6 @@ class TargetInfo : public virtual TransferrableTargetInfo,
   unsigned char RegParmMax, SSERegParmMax;
   TargetCXXABI TheCXXABI;
   const LangASMap *AddrSpaceMap;
-  const llvm::omp::GV *GridValues =
-      nullptr; // target-specific GPU grid values that must be
-               // consistent between host RTL (plugin), device RTL, and clang.
 
   mutable StringRef PlatformName;
   mutable VersionTuple PlatformMinVersion;
@@ -1410,10 +1407,10 @@ class TargetInfo : public virtual TransferrableTargetInfo,
     return LangAS::Default;
   }
 
-  /// Return a target-specific GPU grid values
-  const llvm::omp::GV &getGridValue() const {
-    assert(GridValues != nullptr && "GridValues not initialized");
-    return *GridValues;
+  // access target-specific GPU grid values that must be consistent between
+  // host RTL (plugin), deviceRTL and clang.
+  virtual const llvm::omp::GV &getGridValue() const {
+    llvm_unreachable("getGridValue not implemented on this target");
   }
 
   /// Retrieve the name of the platform as it is used in the
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -17,7 +17,6 @@
 #include "clang/Basic/MacroBuilder.h"
 #include "clang/Basic/TargetBuiltins.h"
 #include "llvm/ADT/StringSwitch.h"
-#include "llvm/Frontend/OpenMP/OMPGridValues.h"
 
 using namespace clang;
 using namespace clang::targets;
@@ -335,7 +334,6 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
                                         : DataLayoutStringR600);
-  GridValues = &llvm::omp::AMDGPUGridValues;
 
   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
                      !isAMDGCN(Triple));
diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h
@@ -370,6 +370,10 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo {
     return getLangASFromTargetAS(Constant);
   }
 
+  const llvm::omp::GV &getGridValue() const override {
+    return llvm::omp::AMDGPUGridValues;
+  }
+
   /// \returns Target specific vtbl ptr address space.
   unsigned getVtblPtrAddressSpace() const override {
     return static_cast<unsigned>(Constant);
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -16,7 +16,6 @@
 #include "clang/Basic/MacroBuilder.h"
 #include "clang/Basic/TargetBuiltins.h"
 #include "llvm/ADT/StringSwitch.h"
-#include "llvm/Frontend/OpenMP/OMPGridValues.h"
 
 using namespace clang;
 using namespace clang::targets;
@@ -65,7 +64,6 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
   TLSSupported = false;
   VLASupported = false;
   AddrSpaceMap = &NVPTXAddrSpaceMap;
-  GridValues = &llvm::omp::NVPTXGridValues;
   UseAddrSpaceMapMangling = true;
 
   // Define available target features
diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h
@@ -147,6 +147,10 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo {
     Opts["cl_khr_local_int32_extended_atomics"] = true;
   }
 
+  const llvm::omp::GV &getGridValue() const override {
+    return llvm::omp::NVPTXGridValues;
+  }
+
   /// \returns If a target requires an address within a target specific address
   /// space \p AddressSpace to be converted in order to be used, then return the
   /// corresponding target specific DWARF address space.
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -22,6 +22,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
 #include "llvm/IR/IntrinsicsNVPTX.h"
+#include "llvm/Support/MathExtras.h"
 
 using namespace clang;
 using namespace CodeGen;
@@ -106,8 +107,7 @@ class ExecutionRuntimeModesRAII {
 /// is the same for all known NVPTX architectures.
 enum MachineConfiguration : unsigned {
   /// See "llvm/Frontend/OpenMP/OMPGridValues.h" for various related target
-  /// specific Grid Values like GV_Warp_Size, GV_Warp_Size_Log2,
-  /// and GV_Warp_Size_Log2_Mask.
+  /// specific Grid Values like GV_Warp_Size, GV_Slot_Size
 
   /// Global memory alignment for performance.
   GlobalMemoryAlignment = 128,
@@ -535,7 +535,8 @@ class CheckVarsEscapingDeclContext final
 /// on the NVPTX device, to generate more efficient code.
 static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) {
   CGBuilderTy &Bld = CGF.Builder;
-  unsigned LaneIDBits = CGF.getTarget().getGridValue().GV_Warp_Size_Log2;
+  unsigned LaneIDBits =
+      llvm::Log2_32(CGF.getTarget().getGridValue().GV_Warp_Size);
   auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
   return Bld.CreateAShr(RT.getGPUThreadID(CGF), LaneIDBits, "nvptx_warp_id");
 }
@@ -545,8 +546,9 @@ static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) {
 /// on the NVPTX device, to generate more efficient code.
 static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) {
   CGBuilderTy &Bld = CGF.Builder;
-  unsigned LaneIDMask =
-      CGF.getContext().getTargetInfo().getGridValue().GV_Warp_Size_Log2_Mask;
+  unsigned LaneIDBits =
+      llvm::Log2_32(CGF.getTarget().getGridValue().GV_Warp_Size);
+  unsigned LaneIDMask = ~0 >> (32u - LaneIDBits);
   auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
   return Bld.CreateAnd(RT.getGPUThreadID(CGF), Bld.getInt32(LaneIDMask),
                        "nvptx_lane_id");
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h b/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
@@ -62,19 +62,13 @@ struct GV {
   const unsigned GV_Slot_Size;
   /// The default value of maximum number of threads in a worker warp.
   const unsigned GV_Warp_Size;
-  /// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size
-  /// for NVPTX.
-  const unsigned GV_Warp_Size_32;
-  /// The number of bits required to represent the max number of threads in warp
-  const unsigned GV_Warp_Size_Log2;
-  /// GV_Warp_Size * GV_Slot_Size,
-  const unsigned GV_Warp_Slot_Size;
+
+  constexpr unsigned warpSlotSize() const {
+    return GV_Warp_Size * GV_Slot_Size;
+  }
+
   /// the maximum number of teams.
   const unsigned GV_Max_Teams;
-  /// Global Memory Alignment
-  const unsigned GV_Mem_Align;
-  /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
-  const unsigned GV_Warp_Size_Log2_Mask;
   // An alternative to the heavy data sharing infrastructure that uses global
   // memory is one that uses device __shared__ memory.  The amount of such space
   // (in bytes) reserved by the OpenMP runtime is noted here.
@@ -83,47 +77,32 @@ struct GV {
   const unsigned GV_Max_WG_Size;
   // The default maximum team size for a working group
   const unsigned GV_Default_WG_Size;
-  // This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN.
-  const unsigned GV_Max_Warp_Number;
-  /// The slot size that should be reserved for a working warp.
-  /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
-  const unsigned GV_Warp_Size_Log2_MaskL;
+
+  constexpr unsigned maxWarpNumber() const {
+    return GV_Max_WG_Size / GV_Warp_Size;
+  }
 };
 
 /// For AMDGPU GPUs
 static constexpr GV AMDGPUGridValues = {
-    448,       // GV_Threads
-    256,       // GV_Slot_Size
-    64,        // GV_Warp_Size
-    32,        // GV_Warp_Size_32
-    6,         // GV_Warp_Size_Log2
-    64 * 256,  // GV_Warp_Slot_Size
-    128,       // GV_Max_Teams
-    256,       // GV_Mem_Align
-    63,        // GV_Warp_Size_Log2_Mask
-    896,       // GV_SimpleBufferSize
-    1024,      // GV_Max_WG_Size,
-    256,       // GV_Defaut_WG_Size
-    1024 / 64, // GV_Max_WG_Size / GV_WarpSize
-    63         // GV_Warp_Size_Log2_MaskL
+    448,  // GV_Threads
+    256,  // GV_Slot_Size
+    64,   // GV_Warp_Size
+    128,  // GV_Max_Teams
+    896,  // GV_SimpleBufferSize
+    1024, // GV_Max_WG_Size,
+    256,  // GV_Default_WG_Size
 };
 
 /// For Nvidia GPUs
 static constexpr GV NVPTXGridValues = {
-    992,               // GV_Threads
-    256,               // GV_Slot_Size
-    32,                // GV_Warp_Size
-    32,                // GV_Warp_Size_32
-    5,                 // GV_Warp_Size_Log2
-    32 * 256,          // GV_Warp_Slot_Size
-    1024,              // GV_Max_Teams
-    256,               // GV_Mem_Align
-    (~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask
-    896,               // GV_SimpleBufferSize
-    1024,              // GV_Max_WG_Size
-    128,               // GV_Defaut_WG_Size
-    1024 / 32,         // GV_Max_WG_Size / GV_WarpSize
-    31                 // GV_Warp_Size_Log2_MaskL
+    992,  // GV_Threads
+    256,  // GV_Slot_Size
+    32,   // GV_Warp_Size
+    1024, // GV_Max_Teams
+    896,  // GV_SimpleBufferSize
+    1024, // GV_Max_WG_Size
+    128,  // GV_Default_WG_Size
 };
 
 } // namespace omp