|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| 2 | +; RUN: opt -passes='lower-matrix-intrinsics' -S < %s | FileCheck %s |
| 3 | + |
| 4 | +define float @extract_static(ptr %in, ptr %out) { |
| 5 | +; CHECK-LABEL: @extract_static( |
| 6 | +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x float>, ptr [[IN:%.*]], align 16 |
| 7 | +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr float, ptr [[IN]], i64 2 |
| 8 | +; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x float>, ptr [[VEC_GEP]], align 8 |
| 9 | +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[COL_LOAD]], <2 x float> [[COL_LOAD1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| 10 | +; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x float> [[TMP1]], i32 0 |
| 11 | +; CHECK-NEXT: ret float [[EXTRACT]] |
| 12 | +; |
| 13 | + %inv = load <4 x float>, ptr %in |
| 14 | + %invt = call <4 x float> @llvm.matrix.transpose(<4 x float> %inv, i32 2, i32 2) |
| 15 | + %invtt = call <4 x float> @llvm.matrix.transpose(<4 x float> %invt, i32 2, i32 2) |
| 16 | + %extract = extractelement <4 x float> %invtt, i32 0 |
| 17 | + ret float %extract |
| 18 | +} |
| 19 | + |
| 20 | +define float @extract_dynamic(ptr %in, i32 %idx, ptr %out) { |
| 21 | +; CHECK-LABEL: @extract_dynamic( |
| 22 | +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x float>, ptr [[IN:%.*]], align 16 |
| 23 | +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr float, ptr [[IN]], i64 2 |
| 24 | +; CHECK-NEXT: [[COL_LOAD1:%.*]] = load <2 x float>, ptr [[VEC_GEP]], align 8 |
| 25 | +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[COL_LOAD]], <2 x float> [[COL_LOAD1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| 26 | +; CHECK-NEXT: [[EXTRACT:%.*]] = extractelement <4 x float> [[TMP1]], i32 [[IDX:%.*]] |
| 27 | +; CHECK-NEXT: ret float [[EXTRACT]] |
| 28 | +; |
| 29 | + %inv = load <4 x float>, ptr %in |
| 30 | + %invt = call <4 x float> @llvm.matrix.transpose(<4 x float> %inv, i32 2, i32 2) |
| 31 | + %invtt = call <4 x float> @llvm.matrix.transpose(<4 x float> %invt, i32 2, i32 2) |
| 32 | + %extract = extractelement <4 x float> %invtt, i32 %idx |
| 33 | + ret float %extract |
| 34 | +} |
0 commit comments