Skip to content

Commit 0210750

Browse files
chencha3adam-smnk
andauthored
[MLIR][XeGPU] Add unroll patterns and blocking pass for XeGPU [2/N] (#140163)
This PR introduces the initial implementation of a blocking pass for XeGPU programs. The pass leverages unroll patterns from both the XeGPU and Vector dialects. --------- Co-authored-by: Adam Siemieniuk <adam.siemieniuk@intel.com>
1 parent 18e5131 commit 0210750

File tree

10 files changed

+965
-48
lines changed

10 files changed

+965
-48
lines changed

mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,11 +295,17 @@ def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout"> {
295295
}
296296

297297
LayoutAttr dropSgLayoutAndData() {
298+
// avoid every field of the attribute is nullptr, which may lead to segment fault
299+
if (!getInstData() && !getLaneLayout())
300+
return nullptr;
298301
return LayoutAttr::get(getContext(), nullptr, nullptr, getInstData(),
299302
getLaneLayout(), getLaneData(), getOrder());
300303
}
301304

302305
LayoutAttr dropInstData() {
306+
// avoid every field of the attribute is nullptr, which may lead to segment fault
307+
if (!getSgLayout() && !getLaneLayout())
308+
return nullptr;
303309
return LayoutAttr::get(getContext(), getSgLayout(), getSgData(), nullptr,
304310
getLaneLayout(), getLaneData(), getOrder());
305311
}

mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,4 +45,17 @@ def XeGPUWgToSgDistribute : Pass<"xegpu-wg-to-sg-distribute"> {
4545
"gpu::GPUDialect", "index::IndexDialect"];
4646
}
4747

48+
def XeGPUBlocking: Pass<"xegpu-blocking"> {
49+
let summary = "Block XeGPU ops into smaller size.";
50+
let description = [{
51+
This pass partitions operations that process large shapes into multiple
52+
operations on smaller shapes, as specified by the inst_data in the layout
53+
attribute. This enables each resulting operation to be efficiently mapped
54+
to a hardware instruction.
55+
}];
56+
let dependentDialects = [
57+
"memref::MemRefDialect", "xegpu::XeGPUDialect", "vector::VectorDialect"
58+
];
59+
}
60+
4861
#endif // MLIR_DIALECT_XEGPU_TRANSFORMS_PASSES_TD

mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,12 @@
1313
namespace mlir {
1414

1515
class VectorType;
16+
class OpOperand;
17+
class OpResult;
18+
class OpBuilder;
19+
class ValueRange;
20+
class TypeConverter;
21+
1622
namespace xegpu {
1723
class LayoutAttr;
1824
class TensorDescType;
@@ -50,6 +56,59 @@ FailureOr<VectorType> getDistributedVectorType(xegpu::TensorDescType tdescTy);
5056
FailureOr<VectorType> getDistributedVectorType(VectorType originalType,
5157
LayoutAttr layout);
5258

59+
/// Return the attribute name for the OpOperand to attach LayoutAttr
60+
std::string getLayoutName(const OpOperand &operand);
61+
62+
/// Return the attribute name for the OpResult to attach LayoutAttr
63+
std::string getLayoutName(const OpResult result);
64+
65+
/// Retrieves the LayoutAttr associated with a given Value. For TensorDescType
66+
/// values, the LayoutAttr is extracted from the TensorDescType itself. For
67+
/// other values, it is obtained from the attributes of the defining operation.
68+
/// Returns nullptr if no LayoutAttr is found.
69+
LayoutAttr getLayoutAttr(const Value value);
70+
71+
/// Retrieves the LayoutAttr associated with a given OpOperand. It will
72+
/// first check the operand_layout_{id} of the owner operation. If not found,
73+
/// it will check the operand itself and its defining op.
74+
LayoutAttr getLayoutAttr(const OpOperand &opr);
75+
76+
/// Sets the LayoutAttr for a given OpOperand or OpResult by attaching
77+
/// it to the owner's dictionary attributes
78+
template <typename T,
79+
typename = std::enable_if_t<std::is_same_v<T, OpOperand> ||
80+
std::is_same_v<T, OpResult>>>
81+
void setLayoutAttr(const T &operandOrResult, const LayoutAttr layout);
82+
83+
/// Set the LayoutAttr for each OpOperand and OpResult of the given operation.
84+
/// If the operation contains regions, it is also applied recursively to the
85+
/// contained operations
86+
void setLayoutAttrs(Operation *op,
87+
function_ref<LayoutAttr(Value)> getLayoutImpl);
88+
89+
/// Extract a set of small vectors from a value with a given shape using
90+
/// vector.extract_stride_slice
91+
SmallVector<Value> extractVectorsWithShapeFromValue(OpBuilder &builder,
92+
Location loc, Value value,
93+
ArrayRef<int64_t> shape);
94+
95+
/// Create a vector of shape from a set of values using
96+
/// vector.insert_stride_slice.
97+
Value createVectorWithShapeFromValues(OpBuilder &builder, Location loc,
98+
ValueRange values,
99+
ArrayRef<int64_t> shape);
100+
101+
/// Do type conversion for SCF structural ops, e.g., scf.for using SCF structure
102+
/// type convertion patterns. Since VectorType cannot carry the layout
103+
/// attribute, which is needed to guide the type conversion for XeGPU, they are
104+
/// first converted into RankedTensorType, where the layout attribute can be
105+
/// attached. And then upstream SCF structural type conversion patterns are
106+
/// applied with the provided converter.
107+
/// TODO: This is a temporary solution. We should refactor it when context-aware
108+
/// type conversion is available.
109+
void doSCFStructuralTypeConversionWithTensorType(Operation *op,
110+
TypeConverter converter);
111+
53112
} // namespace xegpu
54113

55114
} // namespace mlir

mlir/lib/Dialect/XeGPU/Transforms/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
add_mlir_dialect_library(MLIRXeGPUTransforms
2+
XeGPUBlocking.cpp
23
XeGPUFoldAliasOps.cpp
34
XeGPUSubgroupDistribute.cpp
45
XeGPUUnroll.cpp

0 commit comments

Comments
 (0)