[Matrix] Optimize static extracts with ShapeInfo #141815

jroelofs · 2025-05-28T18:16:13Z

For ExtractElementInsts with static indices that extract from a Matrix, use the known layout of the Rows/Columns to look through the shuffles that embedInVector creates, which in some cases allows us to delete them.

llvm/test/Transforms/LowerMatrixIntrinsics/transpose-opts.ll

github-actions · 2025-05-28T18:19:32Z

✅ With the latest revision this PR passed the C/C++ code formatter.

For ExtractElementInsts with static indices that extract from a Matrix, use the known layout of the Rows/Columns, avoiding some of the shuffles that embedInVector creates.

llvmbot · 2025-05-28T19:05:22Z

@llvm/pr-subscribers-llvm-transforms

Author: Jon Roelofs (jroelofs)

Changes

For ExtractElementInsts with static indices that extract from a Matrix, use the known layout of the Rows/Columns to look through the shuffles that embedInVector creates, which in some cases allows us to delete them.

Full diff: https://github.com/llvm/llvm-project/pull/141815.diff

2 Files Affected:

(modified) llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp (+37-6)
(added) llvm/test/Transforms/LowerMatrixIntrinsics/extract.ll (+47)

diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index 756a72e6d97bc..8b322afd9b6e4 100644
--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -34,6 +34,7 @@
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/MatrixBuilder.h"
@@ -623,7 +624,8 @@ class LowerMatrixIntrinsics {
       default:
         return false;
       }
-    return isUniformShape(V) || isa<StoreInst>(V) || isa<LoadInst>(V);
+    return isUniformShape(V) || isa<StoreInst>(V) || isa<LoadInst>(V) ||
+           isa<ExtractElementInst>(V);
   }
 
   /// Propagate the shape information of instructions to their users.
@@ -1337,6 +1339,28 @@ class LowerMatrixIntrinsics {
     return Builder.CreateAdd(Sum, Mul);
   }
 
+  bool VisitExtractElt(ExtractElementInst *Inst, uint64_t Index) {
+    Value *Op0 = Inst->getOperand(0);
+    auto *VTy = cast<VectorType>(Op0->getType());
+
+    if (VTy->getElementCount().getKnownMinValue() < Index) {
+      Inst->replaceAllUsesWith(PoisonValue::get(VTy->getElementType()));
+      Inst->eraseFromParent();
+      return true;
+    }
+
+    auto *I = Inst2ColumnMatrix.find(Op0);
+    if (I == Inst2ColumnMatrix.end())
+      return false;
+
+    const MatrixTy &M = I->second;
+
+    IRBuilder<> Builder(Inst);
+    Inst->setOperand(0, M.getVector(Index / M.getStride()));
+    Inst->setOperand(1, Builder.getInt32(Index % M.getStride()));
+    return true;
+  }
+
   /// Cache \p Matrix as result of \p Inst and update the uses of \p Inst. For
   /// users with shape information, there's nothing to do: they will use the
   /// cached value when they are lowered. For other users, \p Matrix is
@@ -1351,11 +1375,18 @@ class LowerMatrixIntrinsics {
     ToRemove.push_back(Inst);
     Value *Flattened = nullptr;
     for (Use &U : llvm::make_early_inc_range(Inst->uses())) {
-      if (!ShapeMap.contains(U.getUser())) {
-        if (!Flattened)
-          Flattened = Matrix.embedInVector(Builder);
-        U.set(Flattened);
-      }
+      if (ShapeMap.contains(U.getUser()))
+        continue;
+
+      Value *Op1;
+      uint64_t Index;
+      if (match(U.getUser(), m_ExtractElt(m_Value(Op1), m_ConstantInt(Index))))
+        if (VisitExtractElt(cast<ExtractElementInst>(U.getUser()), Index))
+          continue;
+
+      if (!Flattened)
+        Flattened = Matrix.embedInVector(Builder);
+      U.set(Flattened);
     }
   }
 
diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/extract.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/extract.ll
new file mode 100644
index 0000000000000..0bac9492d654a
--- /dev/null
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/extract.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes='lower-matrix-intrinsics' -S < %s | FileCheck %s
+
+define float @extract_static(ptr %in, ptr %out) {
+; CHECK-LABEL: @extract_static(
+; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <2 x float>, ptr [[IN:%.*]], align 16
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr float, ptr [[IN]], i64 2
+; CHECK-NEXT:    [[COL_LOAD1:%.*]] = load <2 x float>, ptr [[VEC_GEP]], align 8
+; CHECK-NEXT:    [[EXTRACT:%.*]] = extractelement <2 x float> [[COL_LOAD1]], i32 1
+; CHECK-NEXT:    ret float [[EXTRACT]]
+;
+  %inv = load <4 x float>, ptr %in
+  %invt  = call <4 x float> @llvm.matrix.transpose(<4 x float> %inv, i32 2, i32 2)
+  %invtt = call <4 x float> @llvm.matrix.transpose(<4 x float> %invt, i32 2, i32 2)
+  %extract = extractelement <4 x float> %invtt, i32 3
+  ret float %extract
+}
+
+define float @extract_static_outofbounds(ptr %in, ptr %out) {
+; CHECK-LABEL: @extract_static_outofbounds(
+; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <2 x float>, ptr [[IN:%.*]], align 16
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr float, ptr [[IN]], i64 2
+; CHECK-NEXT:    [[COL_LOAD1:%.*]] = load <2 x float>, ptr [[VEC_GEP]], align 8
+; CHECK-NEXT:    ret float poison
+;
+  %inv = load <4 x float>, ptr %in
+  %invt  = call <4 x float> @llvm.matrix.transpose(<4 x float> %inv, i32 2, i32 2)
+  %invtt = call <4 x float> @llvm.matrix.transpose(<4 x float> %invt, i32 2, i32 2)
+  %extract = extractelement <4 x float> %invtt, i32 5
+  ret float %extract
+}
+
+define float @extract_dynamic(ptr %in, i32 %idx, ptr %out) {
+; CHECK-LABEL: @extract_dynamic(
+; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <2 x float>, ptr [[IN:%.*]], align 16
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr float, ptr [[IN]], i64 2
+; CHECK-NEXT:    [[COL_LOAD1:%.*]] = load <2 x float>, ptr [[VEC_GEP]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x float> [[COL_LOAD]], <2 x float> [[COL_LOAD1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    [[EXTRACT:%.*]] = extractelement <4 x float> [[TMP1]], i32 [[IDX:%.*]]
+; CHECK-NEXT:    ret float [[EXTRACT]]
+;
+  %inv = load <4 x float>, ptr %in
+  %invt  = call <4 x float> @llvm.matrix.transpose(<4 x float> %inv, i32 2, i32 2)
+  %invtt = call <4 x float> @llvm.matrix.transpose(<4 x float> %invt, i32 2, i32 2)
+  %extract = extractelement <4 x float> %invtt, i32 %idx
+  ret float %extract
+}

jroelofs requested review from fhahn and anemet May 28, 2025 18:16

jroelofs commented May 28, 2025

View reviewed changes

llvm/test/Transforms/LowerMatrixIntrinsics/transpose-opts.ll Outdated Show resolved Hide resolved

jroelofs force-pushed the jroelofs/lower-matrix-extract branch from 10a854e to ae5751f Compare May 28, 2025 19:02

[Matrix] Optimize static extracts with ShapeInfo

323c9a8

For ExtractElementInsts with static indices that extract from a Matrix, use the known layout of the Rows/Columns, avoiding some of the shuffles that embedInVector creates.

jroelofs force-pushed the jroelofs/lower-matrix-extract branch from ae5751f to 323c9a8 Compare May 28, 2025 19:03

jroelofs marked this pull request as ready for review May 28, 2025 19:04

llvmbot added the llvm:transforms label May 28, 2025

jroelofs added 3 commits May 29, 2025 11:07

use colum major load intrinsic for shape info

c962ce9

extractelemtn shouldn't supportShapeInfo

169ed56

refactor as tryLower

04f9707

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[Matrix] Optimize static extracts with ShapeInfo #141815

[Matrix] Optimize static extracts with ShapeInfo #141815

jroelofs commented May 28, 2025

Uh oh!

Uh oh!

github-actions bot commented May 28, 2025 •

edited

Loading

Uh oh!

llvmbot commented May 28, 2025

Uh oh!

Uh oh!

[Matrix] Optimize static extracts with ShapeInfo #141815

Are you sure you want to change the base?

[Matrix] Optimize static extracts with ShapeInfo #141815

Conversation

jroelofs commented May 28, 2025

Uh oh!

Uh oh!

github-actions bot commented May 28, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented May 28, 2025

Uh oh!

Uh oh!

github-actions bot commented May 28, 2025 •

edited

Loading