diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp index 756a72e6d97bc..6c364f057481a 100644 --- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp +++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp @@ -269,6 +269,15 @@ computeShapeInfoForInst(Instruction *I, return OpShape->second; } + if (isa(I)) { + auto OpShape = ShapeMap.find(I->getOperand(1)); + if (OpShape != ShapeMap.end()) + return OpShape->second; + OpShape = ShapeMap.find(I->getOperand(2)); + if (OpShape != ShapeMap.end()) + return OpShape->second; + } + if (isUniformShape(I)) { // Find the first operand that has a known shape and use that. for (auto &Op : I->operands()) { @@ -623,7 +632,8 @@ class LowerMatrixIntrinsics { default: return false; } - return isUniformShape(V) || isa(V) || isa(V); + return isUniformShape(V) || isa(V) || isa(V) || + isa(V); } /// Propagate the shape information of instructions to their users. @@ -710,6 +720,12 @@ class LowerMatrixIntrinsics { } else if (isa(V)) { // Nothing to do. We forward-propagated to this so we would just // backward propagate to an instruction with an already known shape. + } else if (auto *Select = dyn_cast(V)) { + ShapeInfo Shape = ShapeMap[V]; + if (setShapeInfo(Select->getOperand(1), Shape)) + pushInstruction(Select, WorkList); + if (setShapeInfo(Select->getOperand(2), Shape)) + pushInstruction(Select, WorkList); } else if (isUniformShape(V)) { // Propagate to all operands. ShapeInfo Shape = ShapeMap[V]; @@ -1068,6 +1084,8 @@ class LowerMatrixIntrinsics { Changed |= VisitBinaryOperator(BinOp); if (auto *UnOp = dyn_cast(Inst)) Changed |= VisitUnaryOperator(UnOp); + if (auto *Select = dyn_cast(Inst)) + Changed |= VisitSelectInst(Select); if (match(Inst, m_Load(m_Value(Op1)))) Changed |= VisitLoad(cast(Inst), Op1, Builder); else if (match(Inst, m_Store(m_Value(Op1), m_Value(Op2)))) @@ -2198,6 +2216,35 @@ class LowerMatrixIntrinsics { return true; } + /// Lower selects, if shape information is available. + bool VisitSelectInst(SelectInst *Inst) { + auto I = ShapeMap.find(Inst); + if (I == ShapeMap.end()) + return false; + + Value *Cond = Inst->getOperand(0); + Value *OpA = Inst->getOperand(1); + Value *OpB = Inst->getOperand(2); + + IRBuilder<> Builder(Inst); + ShapeInfo &Shape = I->second; + + MatrixTy Result; + MatrixTy A = getMatrix(OpA, Shape, Builder); + MatrixTy B = getMatrix(OpB, Shape, Builder); + + for (unsigned I = 0; I < Shape.getNumVectors(); ++I) { + auto *Sel = Builder.CreateSelect(Cond, A.getVector(I), B.getVector(I)); + Result.addVector(Sel); + } + + finalizeLowering(Inst, + Result.addNumComputeOps(getNumOps(Result.getVectorTy()) * + Result.getNumVectors()), + Builder); + return true; + } + /// Helper to linearize a matrix expression tree into a string. Currently /// matrix expressions are linarized by starting at an expression leaf and /// linearizing bottom up. diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/select.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/select.ll new file mode 100644 index 0000000000000..507b02a04f47f --- /dev/null +++ b/llvm/test/Transforms/LowerMatrixIntrinsics/select.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes='lower-matrix-intrinsics' -S < %s | FileCheck %s + +define void @select_2x2_bot(i1 %cond, ptr %lhs, ptr %rhs, ptr %out) { +; CHECK-LABEL: @select_2x2_bot( +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x float>, ptr [[LHS:%.*]], align 16 +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr float, ptr [[LHS]], i64 2 +; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x float>, ptr [[VEC_GEP]], align 8 +; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x float>, ptr [[RHS:%.*]], align 16 +; CHECK-NEXT: [[VEC_GEP3:%.*]] = getelementptr float, ptr [[RHS]], i64 2 +; CHECK-NEXT: [[COL_LOAD4:%.*]] = load <2 x float>, ptr [[VEC_GEP3]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[COND:%.*]], <2 x float> [[COL_LOAD]], <2 x float> [[COL_LOAD2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[COND]], <2 x float> [[COL_LOAD1]], <2 x float> [[COL_LOAD4]] +; CHECK-NEXT: store <2 x float> [[TMP1]], ptr [[OUT:%.*]], align 4 +; CHECK-NEXT: [[VEC_GEP5:%.*]] = getelementptr float, ptr [[OUT]], i64 2 +; CHECK-NEXT: store <2 x float> [[TMP2]], ptr [[VEC_GEP5]], align 4 +; CHECK-NEXT: ret void +; + %lhsv = load <4 x float>, ptr %lhs + %rhsv = load <4 x float>, ptr %rhs + %op = select i1 %cond, <4 x float> %lhsv, <4 x float> %rhsv + call void @llvm.matrix.column.major.store(<4 x float> %op, ptr %out, i64 2, i1 false, i32 2, i32 2) + ret void +} + +define void @select_2x2_lhs(i1 %cond, ptr %lhs, ptr %rhs, ptr %out) { +; CHECK-LABEL: @select_2x2_lhs( +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x float>, ptr [[LHS:%.*]], align 4 +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr float, ptr [[LHS]], i64 2 +; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x float>, ptr [[VEC_GEP]], align 4 +; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x float>, ptr [[RHS:%.*]], align 16 +; CHECK-NEXT: [[VEC_GEP3:%.*]] = getelementptr float, ptr [[RHS]], i64 2 +; CHECK-NEXT: [[COL_LOAD4:%.*]] = load <2 x float>, ptr [[VEC_GEP3]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[COND:%.*]], <2 x float> [[COL_LOAD]], <2 x float> [[COL_LOAD2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[COND]], <2 x float> [[COL_LOAD1]], <2 x float> [[COL_LOAD4]] +; CHECK-NEXT: store <2 x float> [[TMP1]], ptr [[OUT:%.*]], align 16 +; CHECK-NEXT: [[VEC_GEP5:%.*]] = getelementptr float, ptr [[OUT]], i64 2 +; CHECK-NEXT: store <2 x float> [[TMP2]], ptr [[VEC_GEP5]], align 8 +; CHECK-NEXT: ret void +; + %lhsv = call <4 x float> @llvm.matrix.column.major.load(ptr %lhs, i64 2, i1 false, i32 2, i32 2) + %rhsv = load <4 x float>, ptr %rhs + %op = select i1 %cond, <4 x float> %lhsv, <4 x float> %rhsv + store <4 x float> %op, ptr %out + ret void +} + +define void @select_2x2_rhs(i1 %cond, ptr %lhs, ptr %rhs, ptr %out) { +; CHECK-LABEL: @select_2x2_rhs( +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x float>, ptr [[RHS:%.*]], align 16 +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr float, ptr [[RHS]], i64 2 +; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x float>, ptr [[VEC_GEP]], align 8 +; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x float>, ptr [[RHS1:%.*]], align 4 +; CHECK-NEXT: [[VEC_GEP3:%.*]] = getelementptr float, ptr [[RHS1]], i64 2 +; CHECK-NEXT: [[COL_LOAD4:%.*]] = load <2 x float>, ptr [[VEC_GEP3]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[COND:%.*]], <2 x float> [[COL_LOAD]], <2 x float> [[COL_LOAD2]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[COND]], <2 x float> [[COL_LOAD1]], <2 x float> [[COL_LOAD4]] +; CHECK-NEXT: store <2 x float> [[TMP1]], ptr [[OUT:%.*]], align 16 +; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr float, ptr [[OUT]], i64 2 +; CHECK-NEXT: store <2 x float> [[TMP2]], ptr [[VEC_GEP2]], align 8 +; CHECK-NEXT: ret void +; + %lhsv = load <4 x float>, ptr %lhs + %rhsv = call <4 x float> @llvm.matrix.column.major.load(ptr %rhs, i64 2, i1 false, i32 2, i32 2) + %op = select i1 %cond, <4 x float> %lhsv, <4 x float> %rhsv + store <4 x float> %op, ptr %out + ret void +}