Skip to content

Commit 4dcfcd3

Browse files
authored
[CIR] Upstream ShuffleDynamicOp for VectorType (#141411)
This change adds support for the Dynamic Shuffle op for VectorType Issue #136487
1 parent 8eb4ada commit 4dcfcd3

File tree

9 files changed

+340
-1
lines changed

9 files changed

+340
-1
lines changed

clang/include/clang/CIR/Dialect/IR/CIROps.td

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2141,4 +2141,38 @@ def VecCmpOp : CIR_Op<"vec.cmp", [Pure, SameTypeOperands]> {
21412141
}];
21422142
}
21432143

2144+
//===----------------------------------------------------------------------===//
2145+
// VecShuffleDynamicOp
2146+
//===----------------------------------------------------------------------===//
2147+
2148+
def VecShuffleDynamicOp : CIR_Op<"vec.shuffle.dynamic",
2149+
[Pure, AllTypesMatch<["vec", "result"]>]> {
2150+
let summary = "Shuffle a vector using indices in another vector";
2151+
let description = [{
2152+
The `cir.vec.shuffle.dynamic` operation implements the undocumented form of
2153+
Clang's __builtin_shufflevector, where the indices of the shuffled result
2154+
can be runtime values.
2155+
2156+
There are two input vectors, which must have the same number of elements.
2157+
The second input vector must have an integral element type. The elements of
2158+
the second vector are interpreted as indices into the first vector. The
2159+
result vector is constructed by taking the elements from the first input
2160+
vector from the indices indicated by the elements of the second vector.
2161+
2162+
```mlir
2163+
%new_vec = cir.vec.shuffle.dynamic %vec : !cir.vector<4 x !s32i>, %indices
2164+
: !cir.vector<4 x !s32i>
2165+
```
2166+
}];
2167+
2168+
let arguments = (ins CIR_VectorType:$vec, IntegerVector:$indices);
2169+
let results = (outs CIR_VectorType:$result);
2170+
let assemblyFormat = [{
2171+
$vec `:` qualified(type($vec)) `,` $indices `:` qualified(type($indices))
2172+
attr-dict
2173+
}];
2174+
2175+
let hasVerifier = 1;
2176+
}
2177+
21442178
#endif // CLANG_CIR_DIALECT_IR_CIROPS_TD

clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,20 @@ class ScalarExprEmitter : public StmtVisitor<ScalarExprEmitter, mlir::Value> {
171171
return emitLoadOfLValue(e);
172172
}
173173

174+
mlir::Value VisitShuffleVectorExpr(ShuffleVectorExpr *e) {
175+
if (e->getNumSubExprs() == 2) {
176+
// The undocumented form of __builtin_shufflevector.
177+
mlir::Value inputVec = Visit(e->getExpr(0));
178+
mlir::Value indexVec = Visit(e->getExpr(1));
179+
return cgf.builder.create<cir::VecShuffleDynamicOp>(
180+
cgf.getLoc(e->getSourceRange()), inputVec, indexVec);
181+
}
182+
183+
cgf.getCIRGenModule().errorNYI(e->getSourceRange(),
184+
"ShuffleVectorExpr with indices");
185+
return {};
186+
}
187+
174188
mlir::Value VisitMemberExpr(MemberExpr *e);
175189

176190
mlir::Value VisitInitListExpr(InitListExpr *e);

clang/lib/CIR/Dialect/IR/CIRDialect.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1564,6 +1564,20 @@ OpFoldResult cir::VecExtractOp::fold(FoldAdaptor adaptor) {
15641564
return elements[index];
15651565
}
15661566

1567+
//===----------------------------------------------------------------------===//
1568+
// VecShuffleDynamicOp
1569+
//===----------------------------------------------------------------------===//
1570+
1571+
LogicalResult cir::VecShuffleDynamicOp::verify() {
1572+
// The number of elements in the two input vectors must match.
1573+
if (getVec().getType().getSize() !=
1574+
mlir::cast<cir::VectorType>(getIndices().getType()).getSize()) {
1575+
return emitOpError() << ": the number of elements in " << getVec().getType()
1576+
<< " and " << getIndices().getType() << " don't match";
1577+
}
1578+
return success();
1579+
}
1580+
15671581
//===----------------------------------------------------------------------===//
15681582
// TableGen'd op method definitions
15691583
//===----------------------------------------------------------------------===//

clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1709,7 +1709,8 @@ void ConvertCIRToLLVMPass::runOnOperation() {
17091709
CIRToLLVMVecCreateOpLowering,
17101710
CIRToLLVMVecExtractOpLowering,
17111711
CIRToLLVMVecInsertOpLowering,
1712-
CIRToLLVMVecCmpOpLowering
1712+
CIRToLLVMVecCmpOpLowering,
1713+
CIRToLLVMVecShuffleDynamicOpLowering
17131714
// clang-format on
17141715
>(converter, patterns.getContext());
17151716

@@ -1863,6 +1864,60 @@ mlir::LogicalResult CIRToLLVMVecCmpOpLowering::matchAndRewrite(
18631864
return mlir::success();
18641865
}
18651866

1867+
mlir::LogicalResult CIRToLLVMVecShuffleDynamicOpLowering::matchAndRewrite(
1868+
cir::VecShuffleDynamicOp op, OpAdaptor adaptor,
1869+
mlir::ConversionPatternRewriter &rewriter) const {
1870+
// LLVM IR does not have an operation that corresponds to this form of
1871+
// the built-in.
1872+
// __builtin_shufflevector(V, I)
1873+
// is implemented as this pseudocode, where the for loop is unrolled
1874+
// and N is the number of elements:
1875+
//
1876+
// result = undef
1877+
// maskbits = NextPowerOf2(N - 1)
1878+
// masked = I & maskbits
1879+
// for (i in 0 <= i < N)
1880+
// result[i] = V[masked[i]]
1881+
mlir::Location loc = op.getLoc();
1882+
mlir::Value input = adaptor.getVec();
1883+
mlir::Type llvmIndexVecType =
1884+
getTypeConverter()->convertType(op.getIndices().getType());
1885+
mlir::Type llvmIndexType = getTypeConverter()->convertType(
1886+
elementTypeIfVector(op.getIndices().getType()));
1887+
uint64_t numElements =
1888+
mlir::cast<cir::VectorType>(op.getVec().getType()).getSize();
1889+
1890+
uint64_t maskBits = llvm::NextPowerOf2(numElements - 1) - 1;
1891+
mlir::Value maskValue = rewriter.create<mlir::LLVM::ConstantOp>(
1892+
loc, llvmIndexType, rewriter.getIntegerAttr(llvmIndexType, maskBits));
1893+
mlir::Value maskVector =
1894+
rewriter.create<mlir::LLVM::UndefOp>(loc, llvmIndexVecType);
1895+
1896+
for (uint64_t i = 0; i < numElements; ++i) {
1897+
mlir::Value idxValue =
1898+
rewriter.create<mlir::LLVM::ConstantOp>(loc, rewriter.getI64Type(), i);
1899+
maskVector = rewriter.create<mlir::LLVM::InsertElementOp>(
1900+
loc, maskVector, maskValue, idxValue);
1901+
}
1902+
1903+
mlir::Value maskedIndices = rewriter.create<mlir::LLVM::AndOp>(
1904+
loc, llvmIndexVecType, adaptor.getIndices(), maskVector);
1905+
mlir::Value result = rewriter.create<mlir::LLVM::UndefOp>(
1906+
loc, getTypeConverter()->convertType(op.getVec().getType()));
1907+
for (uint64_t i = 0; i < numElements; ++i) {
1908+
mlir::Value iValue =
1909+
rewriter.create<mlir::LLVM::ConstantOp>(loc, rewriter.getI64Type(), i);
1910+
mlir::Value indexValue = rewriter.create<mlir::LLVM::ExtractElementOp>(
1911+
loc, maskedIndices, iValue);
1912+
mlir::Value valueAtIndex =
1913+
rewriter.create<mlir::LLVM::ExtractElementOp>(loc, input, indexValue);
1914+
result = rewriter.create<mlir::LLVM::InsertElementOp>(loc, result,
1915+
valueAtIndex, iValue);
1916+
}
1917+
rewriter.replaceOp(op, result);
1918+
return mlir::success();
1919+
}
1920+
18661921
std::unique_ptr<mlir::Pass> createConvertCIRToLLVMPass() {
18671922
return std::make_unique<ConvertCIRToLLVMPass>();
18681923
}

clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,17 @@ class CIRToLLVMVecCmpOpLowering
352352
mlir::ConversionPatternRewriter &) const override;
353353
};
354354

355+
class CIRToLLVMVecShuffleDynamicOpLowering
356+
: public mlir::OpConversionPattern<cir::VecShuffleDynamicOp> {
357+
public:
358+
using mlir::OpConversionPattern<
359+
cir::VecShuffleDynamicOp>::OpConversionPattern;
360+
361+
mlir::LogicalResult
362+
matchAndRewrite(cir::VecShuffleDynamicOp op, OpAdaptor,
363+
mlir::ConversionPatternRewriter &) const override;
364+
};
365+
355366
} // namespace direct
356367
} // namespace cir
357368

clang/test/CIR/CodeGen/vector-ext.cpp

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG
77

88
typedef int vi4 __attribute__((ext_vector_type(4)));
9+
typedef int vi6 __attribute__((ext_vector_type(6)));
910
typedef unsigned int uvi4 __attribute__((ext_vector_type(4)));
1011
typedef int vi3 __attribute__((ext_vector_type(3)));
1112
typedef int vi2 __attribute__((ext_vector_type(2)));
@@ -988,3 +989,87 @@ void foo14() {
988989
// OGCG: %[[TMP_B:.*]] = load <4 x float>, ptr %[[VEC_B]], align 16
989990
// OGCG: %[[GE:.*]] = fcmp oge <4 x float> %[[TMP_A]], %[[TMP_B]]
990991
// OGCG: %[[RES:.*]] = sext <4 x i1> %[[GE]] to <4 x i32>
992+
993+
void foo15() {
994+
vi4 a;
995+
vi4 b;
996+
vi4 r = __builtin_shufflevector(a, b);
997+
}
998+
999+
// CIR: %[[TMP_A:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
1000+
// CIR: %[[TMP_B:.*]] = cir.load{{>*}} {{.*}} : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
1001+
// CIR: %[[NEW_VEC:.*]] = cir.vec.shuffle.dynamic %[[TMP_A]] : !cir.vector<4 x !s32i>, %[[TMP_B]] : !cir.vector<4 x !s32i>
1002+
1003+
// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr {{.*}}, align 16
1004+
// LLVM: %[[TMP_B:.*]] = load <4 x i32>, ptr {{.*}}, align 16
1005+
// LLVM: %[[MASK:.*]] = and <4 x i32> %[[TMP_B]], splat (i32 3)
1006+
// LLVM: %[[SHUF_IDX_0:.*]] = extractelement <4 x i32> %[[MASK]], i64 0
1007+
// LLVM: %[[SHUF_ELE_0:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]]
1008+
// LLVM: %[[SHUF_INS_0:.*]] = insertelement <4 x i32> undef, i32 %[[SHUF_ELE_0]], i64 0
1009+
// LLVM: %[[SHUF_IDX_1:.*]] = extractelement <4 x i32> %[[MASK]], i64 1
1010+
// LLVM: %[[SHUF_ELE_1:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]]
1011+
// LLVM: %[[SHUF_INS_1:.*]] = insertelement <4 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1
1012+
// LLVM: %[[SHUF_IDX_2:.*]] = extractelement <4 x i32> %[[MASK]], i64 2
1013+
// LLVM: %[[SHUF_ELE_2:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]]
1014+
// LLVM: %[[SHUF_INS_2:.*]] = insertelement <4 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2
1015+
// LLVM: %[[SHUF_IDX_3:.*]] = extractelement <4 x i32> %[[MASK]], i64 3
1016+
// LLVM: %[[SHUF_ELE_3:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]]
1017+
// LLVM: %[[SHUF_INS_3:.*]] = insertelement <4 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3
1018+
1019+
// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr {{.*}}, align 16
1020+
// OGCG: %[[TMP_B:.*]] = load <4 x i32>, ptr {{.*}}, align 16
1021+
// OGCG: %[[MASK:.*]] = and <4 x i32> %[[TMP_B]], splat (i32 3)
1022+
// OGCG: %[[SHUF_IDX_0:.*]] = extractelement <4 x i32> %[[MASK]], i64 0
1023+
// OGCG: %[[SHUF_ELE_0:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]]
1024+
// OGCG: %[[SHUF_INS_0:.*]] = insertelement <4 x i32> poison, i32 %[[SHUF_ELE_0]], i64 0
1025+
// OGCG: %[[SHUF_IDX_1:.*]] = extractelement <4 x i32> %[[MASK]], i64 1
1026+
// OGCG: %[[SHUF_ELE_1:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]]
1027+
// OGCG: %[[SHUF_INS_1:.*]] = insertelement <4 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1
1028+
// OGCG: %[[SHUF_IDX_2:.*]] = extractelement <4 x i32> %[[MASK]], i64 2
1029+
// OGCG: %[[SHUF_ELE_2:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]]
1030+
// OGCG: %[[SHUF_INS_2:.*]] = insertelement <4 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2
1031+
// OGCG: %[[SHUF_IDX_3:.*]] = extractelement <4 x i32> %[[MASK]], i64 3
1032+
// OGCG: %[[SHUF_ELE_3:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]]
1033+
// OGCG: %[[SHUF_INS_3:.*]] = insertelement <4 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3
1034+
1035+
void foo16() {
1036+
vi6 a;
1037+
vi6 b;
1038+
vi6 r = __builtin_shufflevector(a, b);
1039+
}
1040+
1041+
// CIR: %[[TMP_A:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.vector<6 x !s32i>>, !cir.vector<6 x !s32i>
1042+
// CIR: %[[TMP_B:.*]] = cir.load{{>*}} {{.*}} : !cir.ptr<!cir.vector<6 x !s32i>>, !cir.vector<6 x !s32i>
1043+
// CIR: %[[NEW_VEC:.*]] = cir.vec.shuffle.dynamic %[[TMP_A]] : !cir.vector<6 x !s32i>, %[[TMP_B]] : !cir.vector<6 x !s32i>
1044+
1045+
// LLVM: %[[TMP_A:.*]] = load <6 x i32>, ptr {{.*}}, align 32
1046+
// LLVM: %[[TMP_B:.*]] = load <6 x i32>, ptr {{.*}}, align 32
1047+
// LLVM: %[[MASK:.*]] = and <6 x i32> %[[TMP_B]], splat (i32 7)
1048+
// LLVM: %[[SHUF_IDX_0:.*]] = extractelement <6 x i32> %[[MASK]], i64 0
1049+
// LLVM: %[[SHUF_ELE_0:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]]
1050+
// LLVM: %[[SHUF_INS_0:.*]] = insertelement <6 x i32> undef, i32 %[[SHUF_ELE_0]], i64 0
1051+
// LLVM: %[[SHUF_IDX_1:.*]] = extractelement <6 x i32> %[[MASK]], i64 1
1052+
// LLVM: %[[SHUF_ELE_1:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]]
1053+
// LLVM: %[[SHUF_INS_1:.*]] = insertelement <6 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1
1054+
// LLVM: %[[SHUF_IDX_2:.*]] = extractelement <6 x i32> %[[MASK]], i64 2
1055+
// LLVM: %[[SHUF_ELE_2:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]]
1056+
// LLVM: %[[SHUF_INS_2:.*]] = insertelement <6 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2
1057+
// LLVM: %[[SHUF_IDX_3:.*]] = extractelement <6 x i32> %[[MASK]], i64 3
1058+
// LLVM: %[[SHUF_ELE_3:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]]
1059+
// LLVM: %[[SHUF_INS_3:.*]] = insertelement <6 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3
1060+
1061+
// OGCG: %[[TMP_A:.*]] = load <6 x i32>, ptr {{.*}}, align 32
1062+
// OGCG: %[[TMP_B:.*]] = load <6 x i32>, ptr {{.*}}, align 32
1063+
// OGCG: %[[MASK:.*]] = and <6 x i32> %[[TMP_B]], splat (i32 7)
1064+
// OGCG: %[[SHUF_IDX_0:.*]] = extractelement <6 x i32> %[[MASK]], i64 0
1065+
// OGCG: %[[SHUF_ELE_0:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]]
1066+
// OGCG: %[[SHUF_INS_0:.*]] = insertelement <6 x i32> poison, i32 %[[SHUF_ELE_0]], i64 0
1067+
// OGCG: %[[SHUF_IDX_1:.*]] = extractelement <6 x i32> %[[MASK]], i64 1
1068+
// OGCG: %[[SHUF_ELE_1:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]]
1069+
// OGCG: %[[SHUF_INS_1:.*]] = insertelement <6 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1
1070+
// OGCG: %[[SHUF_IDX_2:.*]] = extractelement <6 x i32> %[[MASK]], i64 2
1071+
// OGCG: %[[SHUF_ELE_2:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]]
1072+
// OGCG: %[[SHUF_INS_2:.*]] = insertelement <6 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2
1073+
// OGCG: %[[SHUF_IDX_3:.*]] = extractelement <6 x i32> %[[MASK]], i64 3
1074+
// OGCG: %[[SHUF_ELE_3:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]]
1075+
// OGCG: %[[SHUF_INS_3:.*]] = insertelement <6 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3

clang/test/CIR/CodeGen/vector.cpp

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
// RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG
77

88
typedef int vi4 __attribute__((vector_size(16)));
9+
typedef int vi6 __attribute__((vector_size(24)));
910
typedef unsigned int uvi4 __attribute__((vector_size(16)));
1011
typedef float vf4 __attribute__((vector_size(16)));
1112
typedef double vd2 __attribute__((vector_size(16)));
@@ -967,3 +968,87 @@ void foo14() {
967968
// OGCG: %[[GE:.*]] = fcmp oge <4 x float> %[[TMP_A]], %[[TMP_B]]
968969
// OGCG: %[[RES:.*]] = sext <4 x i1> %[[GE]] to <4 x i32>
969970
// OGCG: store <4 x i32> %[[RES]], ptr {{.*}}, align 16
971+
972+
void foo15() {
973+
vi4 a;
974+
vi4 b;
975+
vi4 r = __builtin_shufflevector(a, b);
976+
}
977+
978+
// CIR: %[[TMP_A:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
979+
// CIR: %[[TMP_B:.*]] = cir.load{{>*}} {{.*}} : !cir.ptr<!cir.vector<4 x !s32i>>, !cir.vector<4 x !s32i>
980+
// CIR: %[[NEW_VEC:.*]] = cir.vec.shuffle.dynamic %[[TMP_A]] : !cir.vector<4 x !s32i>, %[[TMP_B]] : !cir.vector<4 x !s32i>
981+
982+
// LLVM: %[[TMP_A:.*]] = load <4 x i32>, ptr {{.*}}, align 16
983+
// LLVM: %[[TMP_B:.*]] = load <4 x i32>, ptr {{.*}}, align 16
984+
// LLVM: %[[MASK:.*]] = and <4 x i32> %[[TMP_B]], splat (i32 3)
985+
// LLVM: %[[SHUF_IDX_0:.*]] = extractelement <4 x i32> %[[MASK]], i64 0
986+
// LLVM: %[[SHUF_ELE_0:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]]
987+
// LLVM: %[[SHUF_INS_0:.*]] = insertelement <4 x i32> undef, i32 %[[SHUF_ELE_0]], i64 0
988+
// LLVM: %[[SHUF_IDX_1:.*]] = extractelement <4 x i32> %[[MASK]], i64 1
989+
// LLVM: %[[SHUF_ELE_1:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]]
990+
// LLVM: %[[SHUF_INS_1:.*]] = insertelement <4 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1
991+
// LLVM: %[[SHUF_IDX_2:.*]] = extractelement <4 x i32> %[[MASK]], i64 2
992+
// LLVM: %[[SHUF_ELE_2:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]]
993+
// LLVM: %[[SHUF_INS_2:.*]] = insertelement <4 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2
994+
// LLVM: %[[SHUF_IDX_3:.*]] = extractelement <4 x i32> %[[MASK]], i64 3
995+
// LLVM: %[[SHUF_ELE_3:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]]
996+
// LLVM: %[[SHUF_INS_3:.*]] = insertelement <4 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3
997+
998+
// OGCG: %[[TMP_A:.*]] = load <4 x i32>, ptr {{.*}}, align 16
999+
// OGCG: %[[TMP_B:.*]] = load <4 x i32>, ptr {{.*}}, align 16
1000+
// OGCG: %[[MASK:.*]] = and <4 x i32> %[[TMP_B]], splat (i32 3)
1001+
// OGCG: %[[SHUF_IDX_0:.*]] = extractelement <4 x i32> %[[MASK]], i64 0
1002+
// OGCG: %[[SHUF_ELE_0:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]]
1003+
// OGCG: %[[SHUF_INS_0:.*]] = insertelement <4 x i32> poison, i32 %[[SHUF_ELE_0]], i64 0
1004+
// OGCG: %[[SHUF_IDX_1:.*]] = extractelement <4 x i32> %[[MASK]], i64 1
1005+
// OGCG: %[[SHUF_ELE_1:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]]
1006+
// OGCG: %[[SHUF_INS_1:.*]] = insertelement <4 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1
1007+
// OGCG: %[[SHUF_IDX_2:.*]] = extractelement <4 x i32> %[[MASK]], i64 2
1008+
// OGCG: %[[SHUF_ELE_2:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]]
1009+
// OGCG: %[[SHUF_INS_2:.*]] = insertelement <4 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2
1010+
// OGCG: %[[SHUF_IDX_3:.*]] = extractelement <4 x i32> %[[MASK]], i64 3
1011+
// OGCG: %[[SHUF_ELE_3:.*]] = extractelement <4 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]]
1012+
// OGCG: %[[SHUF_INS_3:.*]] = insertelement <4 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3
1013+
1014+
void foo16() {
1015+
vi6 a;
1016+
vi6 b;
1017+
vi6 r = __builtin_shufflevector(a, b);
1018+
}
1019+
1020+
// CIR: %[[TMP_A:.*]] = cir.load{{.*}} {{.*}} : !cir.ptr<!cir.vector<6 x !s32i>>, !cir.vector<6 x !s32i>
1021+
// CIR: %[[TMP_B:.*]] = cir.load{{>*}} {{.*}} : !cir.ptr<!cir.vector<6 x !s32i>>, !cir.vector<6 x !s32i>
1022+
// CIR: %[[NEW_VEC:.*]] = cir.vec.shuffle.dynamic %[[TMP_A]] : !cir.vector<6 x !s32i>, %[[TMP_B]] : !cir.vector<6 x !s32i>
1023+
1024+
// LLVM: %[[TMP_A:.*]] = load <6 x i32>, ptr {{.*}}, align 32
1025+
// LLVM: %[[TMP_B:.*]] = load <6 x i32>, ptr {{.*}}, align 32
1026+
// LLVM: %[[MASK:.*]] = and <6 x i32> %[[TMP_B]], splat (i32 7)
1027+
// LLVM: %[[SHUF_IDX_0:.*]] = extractelement <6 x i32> %[[MASK]], i64 0
1028+
// LLVM: %[[SHUF_ELE_0:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]]
1029+
// LLVM: %[[SHUF_INS_0:.*]] = insertelement <6 x i32> undef, i32 %[[SHUF_ELE_0]], i64 0
1030+
// LLVM: %[[SHUF_IDX_1:.*]] = extractelement <6 x i32> %[[MASK]], i64 1
1031+
// LLVM: %[[SHUF_ELE_1:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]]
1032+
// LLVM: %[[SHUF_INS_1:.*]] = insertelement <6 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1
1033+
// LLVM: %[[SHUF_IDX_2:.*]] = extractelement <6 x i32> %[[MASK]], i64 2
1034+
// LLVM: %[[SHUF_ELE_2:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]]
1035+
// LLVM: %[[SHUF_INS_2:.*]] = insertelement <6 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2
1036+
// LLVM: %[[SHUF_IDX_3:.*]] = extractelement <6 x i32> %[[MASK]], i64 3
1037+
// LLVM: %[[SHUF_ELE_3:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]]
1038+
// LLVM: %[[SHUF_INS_3:.*]] = insertelement <6 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3
1039+
1040+
// OGCG: %[[TMP_A:.*]] = load <6 x i32>, ptr {{.*}}, align 32
1041+
// OGCG: %[[TMP_B:.*]] = load <6 x i32>, ptr {{.*}}, align 32
1042+
// OGCG: %[[MASK:.*]] = and <6 x i32> %[[TMP_B]], splat (i32 7)
1043+
// OGCG: %[[SHUF_IDX_0:.*]] = extractelement <6 x i32> %[[MASK]], i64 0
1044+
// OGCG: %[[SHUF_ELE_0:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_0]]
1045+
// OGCG: %[[SHUF_INS_0:.*]] = insertelement <6 x i32> poison, i32 %[[SHUF_ELE_0]], i64 0
1046+
// OGCG: %[[SHUF_IDX_1:.*]] = extractelement <6 x i32> %[[MASK]], i64 1
1047+
// OGCG: %[[SHUF_ELE_1:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_1]]
1048+
// OGCG: %[[SHUF_INS_1:.*]] = insertelement <6 x i32> %[[SHUF_INS_0]], i32 %[[SHUF_ELE_1]], i64 1
1049+
// OGCG: %[[SHUF_IDX_2:.*]] = extractelement <6 x i32> %[[MASK]], i64 2
1050+
// OGCG: %[[SHUF_ELE_2:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_2]]
1051+
// OGCG: %[[SHUF_INS_2:.*]] = insertelement <6 x i32> %[[SHUF_INS_1]], i32 %[[SHUF_ELE_2]], i64 2
1052+
// OGCG: %[[SHUF_IDX_3:.*]] = extractelement <6 x i32> %[[MASK]], i64 3
1053+
// OGCG: %[[SHUF_ELE_3:.*]] = extractelement <6 x i32> %[[TMP_A]], i32 %[[SHUF_IDX_3]]
1054+
// OGCG: %[[SHUF_INS_3:.*]] = insertelement <6 x i32> %[[SHUF_INS_2]], i32 %[[SHUF_ELE_3]], i64 3
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
// RUN: cir-opt %s -verify-diagnostics -split-input-file
2+
3+
!s32i = !cir.int<s, 32>
4+
!s64i = !cir.int<s, 64>
5+
6+
module {
7+
cir.func @foo() {
8+
%1 = cir.const #cir.int<1> : !s32i
9+
%2 = cir.const #cir.int<2> : !s32i
10+
%3 = cir.const #cir.int<3> : !s32i
11+
%4 = cir.const #cir.int<4> : !s32i
12+
%vec = cir.vec.create(%1, %2, %3, %4 : !s32i, !s32i, !s32i, !s32i) : !cir.vector<4 x !s32i>
13+
%indices = cir.vec.create(%1, %2 : !s32i, !s32i) : !cir.vector<2 x !s32i>
14+
15+
// expected-error @below {{the number of elements in '!cir.vector<4 x !cir.int<s, 32>>' and '!cir.vector<2 x !cir.int<s, 32>>' don't match}}
16+
%new_vec = cir.vec.shuffle.dynamic %vec : !cir.vector<4 x !s32i>, %indices : !cir.vector<2 x !s32i>
17+
cir.return
18+
}
19+
}

0 commit comments

Comments
 (0)