Skip to content

Commit aa70d84

Browse files
author
Thorsten Schütt
authored
[GlobalISel][AArch64] Legalize G_SPLAT_VECTOR (#114006)
{nxv8s16, s16} fails to select. {nxv16s8, s8} no patterns available.
1 parent 083a5cd commit aa70d84

File tree

3 files changed

+78
-0
lines changed

3 files changed

+78
-0
lines changed

llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ def : GINodeEquiv<G_INSERT_VECTOR_ELT, vector_insert>;
148148
def : GINodeEquiv<G_CONCAT_VECTORS, concat_vectors>;
149149
def : GINodeEquiv<G_BUILD_VECTOR, build_vector>;
150150
def : GINodeEquiv<G_EXTRACT_SUBVECTOR, extract_subvector>;
151+
def : GINodeEquiv<G_SPLAT_VECTOR, splat_vector>;
151152
def : GINodeEquiv<G_FCEIL, fceil>;
152153
def : GINodeEquiv<G_FCOS, fcos>;
153154
def : GINodeEquiv<G_FSIN, fsin>;

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1316,6 +1316,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
13161316
.widenScalarOrEltToNextPow2(0)
13171317
.immIdx(0); // Inform verifier imm idx 0 is handled.
13181318

1319+
// TODO: {nxv16s8, s8}, {nxv8s16, s16}
1320+
getActionDefinitionsBuilder(G_SPLAT_VECTOR)
1321+
.legalFor(HasSVE, {{nxv4s32, s32}, {nxv2s64, s64}});
1322+
13191323
getLegacyLegalizerInfo().computeTables();
13201324
verify(*ST.getInstrInfo());
13211325
}
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2+
; RUN: llc < %s -mtriple aarch64 -mattr=+sve -aarch64-enable-gisel-sve=1 | FileCheck %s --check-prefixes=CHECK,CHECK-SDAG
3+
; RUN: llc < %s -mtriple aarch64 -mattr=+sve -global-isel -aarch64-enable-gisel-sve=1 | FileCheck %s --check-prefixes=CHECK,CHECK-GS
4+
5+
; REQUIRES: asserts, aarch64-registered-target
6+
7+
;; add
8+
define <vscale x 2 x i64> @addnxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
9+
; CHECK-SDAG-LABEL: addnxv2i64:
10+
; CHECK-SDAG: // %bb.0: // %entry
11+
; CHECK-SDAG-NEXT: add z0.d, z0.d, #9 // =0x9
12+
; CHECK-SDAG-NEXT: ret
13+
;
14+
; CHECK-GS-LABEL: addnxv2i64:
15+
; CHECK-GS: // %bb.0: // %entry
16+
; CHECK-GS-NEXT: mov w8, #9 // =0x9
17+
; CHECK-GS-NEXT: mov z1.d, x8
18+
; CHECK-GS-NEXT: add z0.d, z0.d, z1.d
19+
; CHECK-GS-NEXT: ret
20+
entry:
21+
%c = add <vscale x 2 x i64> %a, splat (i64 9)
22+
ret <vscale x 2 x i64> %c
23+
}
24+
25+
define <vscale x 2 x i64> @splarnxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
26+
; CHECK-SDAG-LABEL: splarnxv2i64:
27+
; CHECK-SDAG: // %bb.0: // %entry
28+
; CHECK-SDAG-NEXT: mov z0.d, #9 // =0x9
29+
; CHECK-SDAG-NEXT: ret
30+
;
31+
; CHECK-GS-LABEL: splarnxv2i64:
32+
; CHECK-GS: // %bb.0: // %entry
33+
; CHECK-GS-NEXT: mov w8, #9 // =0x9
34+
; CHECK-GS-NEXT: mov z0.d, x8
35+
; CHECK-GS-NEXT: ret
36+
entry:
37+
ret <vscale x 2 x i64> splat (i64 9)
38+
}
39+
40+
define <vscale x 4 x i32> @addnxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
41+
; CHECK-SDAG-LABEL: addnxv4i32:
42+
; CHECK-SDAG: // %bb.0: // %entry
43+
; CHECK-SDAG-NEXT: add z0.s, z0.s, #9 // =0x9
44+
; CHECK-SDAG-NEXT: ret
45+
;
46+
; CHECK-GS-LABEL: addnxv4i32:
47+
; CHECK-GS: // %bb.0: // %entry
48+
; CHECK-GS-NEXT: mov w8, #9 // =0x9
49+
; CHECK-GS-NEXT: mov z1.s, w8
50+
; CHECK-GS-NEXT: add z0.s, z0.s, z1.s
51+
; CHECK-GS-NEXT: ret
52+
entry:
53+
%c = add <vscale x 4 x i32> %a, splat (i32 9)
54+
ret <vscale x 4 x i32> %c
55+
}
56+
57+
define <vscale x 4 x i32> @splatnxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
58+
; CHECK-SDAG-LABEL: splatnxv4i32:
59+
; CHECK-SDAG: // %bb.0: // %entry
60+
; CHECK-SDAG-NEXT: mov z0.s, #9 // =0x9
61+
; CHECK-SDAG-NEXT: ret
62+
;
63+
; CHECK-GS-LABEL: splatnxv4i32:
64+
; CHECK-GS: // %bb.0: // %entry
65+
; CHECK-GS-NEXT: mov w8, #9 // =0x9
66+
; CHECK-GS-NEXT: mov z0.s, w8
67+
; CHECK-GS-NEXT: ret
68+
entry:
69+
ret <vscale x 4 x i32> splat (i32 9)
70+
}
71+
72+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
73+
; CHECK: {{.*}}

0 commit comments

Comments
 (0)