1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2
2
; RUN: llc -mattr=+sve < %s | FileCheck %s -check-prefix CHECK-SVE
3
- ; RUN: llc -mattr=+sve2p1 < %s | FileCheck %s -check-prefix CHECK-SVE2p1
3
+ ; RUN: llc -mattr=+sve2p1 < %s | FileCheck %s -check-prefix CHECK-SVE2p1-SME2 -check-prefix CHECK-SVE2p1
4
+ ; RUN: llc -mattr=+sve -mattr=+sme2 -force-streaming < %s | FileCheck %s -check-prefix CHECK-SVE2p1-SME2 -check-prefix CHECK-SME2
4
5
target triple = "aarch64-linux"
5
6
6
7
; Test combining of getActiveLaneMask with a pair of extract_vector operations.
@@ -13,12 +14,12 @@ define void @test_2x8bit_mask_with_32bit_index_and_trip_count(i32 %i, i32 %n) #0
13
14
; CHECK-SVE-NEXT: punpkhi p1.h, p1.b
14
15
; CHECK-SVE-NEXT: b use
15
16
;
16
- ; CHECK-SVE2p1-LABEL: test_2x8bit_mask_with_32bit_index_and_trip_count:
17
- ; CHECK-SVE2p1: // %bb.0:
18
- ; CHECK-SVE2p1-NEXT: mov w8, w1
19
- ; CHECK-SVE2p1-NEXT: mov w9, w0
20
- ; CHECK-SVE2p1-NEXT: whilelo { p0.h, p1.h }, x9, x8
21
- ; CHECK-SVE2p1-NEXT: b use
17
+ ; CHECK-SVE2p1-SME2- LABEL: test_2x8bit_mask_with_32bit_index_and_trip_count:
18
+ ; CHECK-SVE2p1-SME2 : // %bb.0:
19
+ ; CHECK-SVE2p1-SME2- NEXT: mov w8, w1
20
+ ; CHECK-SVE2p1-SME2- NEXT: mov w9, w0
21
+ ; CHECK-SVE2p1-SME2- NEXT: whilelo { p0.h, p1.h }, x9, x8
22
+ ; CHECK-SVE2p1-SME2- NEXT: b use
22
23
%r = call <vscale x 16 x i1 > @llvm.get.active.lane.mask.nxv16i1.i32 (i32 %i , i32 %n )
23
24
%v0 = call <vscale x 8 x i1 > @llvm.vector.extract.nxv8i1.nxv16i1.i64 (<vscale x 16 x i1 > %r , i64 0 )
24
25
%v1 = call <vscale x 8 x i1 > @llvm.vector.extract.nxv8i1.nxv16i1.i64 (<vscale x 16 x i1 > %r , i64 8 )
@@ -34,10 +35,10 @@ define void @test_2x8bit_mask_with_64bit_index_and_trip_count(i64 %i, i64 %n) #0
34
35
; CHECK-SVE-NEXT: punpkhi p1.h, p1.b
35
36
; CHECK-SVE-NEXT: b use
36
37
;
37
- ; CHECK-SVE2p1-LABEL: test_2x8bit_mask_with_64bit_index_and_trip_count:
38
- ; CHECK-SVE2p1: // %bb.0:
39
- ; CHECK-SVE2p1-NEXT: whilelo { p0.h, p1.h }, x0, x1
40
- ; CHECK-SVE2p1-NEXT: b use
38
+ ; CHECK-SVE2p1-SME2- LABEL: test_2x8bit_mask_with_64bit_index_and_trip_count:
39
+ ; CHECK-SVE2p1-SME2 : // %bb.0:
40
+ ; CHECK-SVE2p1-SME2- NEXT: whilelo { p0.h, p1.h }, x0, x1
41
+ ; CHECK-SVE2p1-SME2- NEXT: b use
41
42
%r = call <vscale x 16 x i1 > @llvm.get.active.lane.mask.nxv16i1.i64 (i64 %i , i64 %n )
42
43
%v0 = call <vscale x 8 x i1 > @llvm.vector.extract.nxv8i1.nxv16i1.i64 (<vscale x 16 x i1 > %r , i64 0 )
43
44
%v1 = call <vscale x 8 x i1 > @llvm.vector.extract.nxv8i1.nxv16i1.i64 (<vscale x 16 x i1 > %r , i64 8 )
@@ -53,12 +54,12 @@ define void @test_edge_case_2x1bit_mask(i64 %i, i64 %n) #0 {
53
54
; CHECK-SVE-NEXT: punpkhi p1.h, p1.b
54
55
; CHECK-SVE-NEXT: b use
55
56
;
56
- ; CHECK-SVE2p1-LABEL: test_edge_case_2x1bit_mask:
57
- ; CHECK-SVE2p1: // %bb.0:
58
- ; CHECK-SVE2p1-NEXT: whilelo p1.d, x0, x1
59
- ; CHECK-SVE2p1-NEXT: punpklo p0.h, p1.b
60
- ; CHECK-SVE2p1-NEXT: punpkhi p1.h, p1.b
61
- ; CHECK-SVE2p1-NEXT: b use
57
+ ; CHECK-SVE2p1-SME2- LABEL: test_edge_case_2x1bit_mask:
58
+ ; CHECK-SVE2p1-SME2 : // %bb.0:
59
+ ; CHECK-SVE2p1-SME2- NEXT: whilelo p1.d, x0, x1
60
+ ; CHECK-SVE2p1-SME2- NEXT: punpklo p0.h, p1.b
61
+ ; CHECK-SVE2p1-SME2- NEXT: punpkhi p1.h, p1.b
62
+ ; CHECK-SVE2p1-SME2- NEXT: b use
62
63
%r = call <vscale x 2 x i1 > @llvm.get.active.lane.mask.nxv2i1.i64 (i64 %i , i64 %n )
63
64
%v0 = call <vscale x 1 x i1 > @llvm.vector.extract.nxv1i1.nxv2i1.i64 (<vscale x 2 x i1 > %r , i64 0 )
64
65
%v1 = call <vscale x 1 x i1 > @llvm.vector.extract.nxv1i1.nxv2i1.i64 (<vscale x 2 x i1 > %r , i64 1 )
@@ -74,10 +75,10 @@ define void @test_boring_case_2x2bit_mask(i64 %i, i64 %n) #0 {
74
75
; CHECK-SVE-NEXT: punpkhi p1.h, p1.b
75
76
; CHECK-SVE-NEXT: b use
76
77
;
77
- ; CHECK-SVE2p1-LABEL: test_boring_case_2x2bit_mask:
78
- ; CHECK-SVE2p1: // %bb.0:
79
- ; CHECK-SVE2p1-NEXT: whilelo { p0.d, p1.d }, x0, x1
80
- ; CHECK-SVE2p1-NEXT: b use
78
+ ; CHECK-SVE2p1-SME2- LABEL: test_boring_case_2x2bit_mask:
79
+ ; CHECK-SVE2p1-SME2 : // %bb.0:
80
+ ; CHECK-SVE2p1-SME2- NEXT: whilelo { p0.d, p1.d }, x0, x1
81
+ ; CHECK-SVE2p1-SME2- NEXT: b use
81
82
%r = call <vscale x 4 x i1 > @llvm.get.active.lane.mask.nxv4i1.i64 (i64 %i , i64 %n )
82
83
%v0 = call <vscale x 2 x i1 > @llvm.vector.extract.nxv2i1.nxv4i1.i64 (<vscale x 4 x i1 > %r , i64 0 )
83
84
%v1 = call <vscale x 2 x i1 > @llvm.vector.extract.nxv2i1.nxv4i1.i64 (<vscale x 4 x i1 > %r , i64 2 )
@@ -96,22 +97,22 @@ define void @test_partial_extract(i64 %i, i64 %n) #0 {
96
97
; CHECK-SVE-NEXT: punpklo p1.h, p2.b
97
98
; CHECK-SVE-NEXT: b use
98
99
;
99
- ; CHECK-SVE2p1-LABEL: test_partial_extract:
100
- ; CHECK-SVE2p1: // %bb.0:
101
- ; CHECK-SVE2p1-NEXT: whilelo p0.h, x0, x1
102
- ; CHECK-SVE2p1-NEXT: punpklo p1.h, p0.b
103
- ; CHECK-SVE2p1-NEXT: punpkhi p2.h, p0.b
104
- ; CHECK-SVE2p1-NEXT: punpklo p0.h, p1.b
105
- ; CHECK-SVE2p1-NEXT: punpklo p1.h, p2.b
106
- ; CHECK-SVE2p1-NEXT: b use
100
+ ; CHECK-SVE2p1-SME2- LABEL: test_partial_extract:
101
+ ; CHECK-SVE2p1-SME2 : // %bb.0:
102
+ ; CHECK-SVE2p1-SME2- NEXT: whilelo p0.h, x0, x1
103
+ ; CHECK-SVE2p1-SME2- NEXT: punpklo p1.h, p0.b
104
+ ; CHECK-SVE2p1-SME2- NEXT: punpkhi p2.h, p0.b
105
+ ; CHECK-SVE2p1-SME2- NEXT: punpklo p0.h, p1.b
106
+ ; CHECK-SVE2p1-SME2- NEXT: punpklo p1.h, p2.b
107
+ ; CHECK-SVE2p1-SME2- NEXT: b use
107
108
%r = call <vscale x 8 x i1 > @llvm.get.active.lane.mask.nxv8i1.i64 (i64 %i , i64 %n )
108
109
%v0 = call <vscale x 2 x i1 > @llvm.vector.extract.nxv2i1.nxv8i1.i64 (<vscale x 8 x i1 > %r , i64 0 )
109
110
%v1 = call <vscale x 2 x i1 > @llvm.vector.extract.nxv2i1.nxv8i1.i64 (<vscale x 8 x i1 > %r , i64 4 )
110
111
tail call void @use (<vscale x 2 x i1 > %v0 , <vscale x 2 x i1 > %v1 )
111
112
ret void
112
113
}
113
114
114
- ;; Negative test for when extracting a fixed-length vector.
115
+ ; Negative test for when extracting a fixed-length vector.
115
116
define void @test_fixed_extract (i64 %i , i64 %n ) #0 {
116
117
; CHECK-SVE-LABEL: test_fixed_extract:
117
118
; CHECK-SVE: // %bb.0:
@@ -144,13 +145,89 @@ define void @test_fixed_extract(i64 %i, i64 %n) #0 {
144
145
; CHECK-SVE2p1-NEXT: mov v1.s[1], w11
145
146
; CHECK-SVE2p1-NEXT: // kill: def $d1 killed $d1 killed $q1
146
147
; CHECK-SVE2p1-NEXT: b use
148
+ ;
149
+ ; CHECK-SME2-LABEL: test_fixed_extract:
150
+ ; CHECK-SME2: // %bb.0:
151
+ ; CHECK-SME2-NEXT: whilelo p0.h, x0, x1
152
+ ; CHECK-SME2-NEXT: cset w8, mi
153
+ ; CHECK-SME2-NEXT: mov z0.h, p0/z, #1 // =0x1
154
+ ; CHECK-SME2-NEXT: mov z1.h, z0.h[1]
155
+ ; CHECK-SME2-NEXT: mov z2.h, z0.h[5]
156
+ ; CHECK-SME2-NEXT: mov z3.h, z0.h[4]
157
+ ; CHECK-SME2-NEXT: fmov s0, w8
158
+ ; CHECK-SME2-NEXT: zip1 z0.s, z0.s, z1.s
159
+ ; CHECK-SME2-NEXT: zip1 z1.s, z3.s, z2.s
160
+ ; CHECK-SME2-NEXT: // kill: def $d0 killed $d0 killed $z0
161
+ ; CHECK-SME2-NEXT: // kill: def $d1 killed $d1 killed $z1
162
+ ; CHECK-SME2-NEXT: b use
147
163
%r = call <vscale x 8 x i1 > @llvm.get.active.lane.mask.nxv8i1.i64 (i64 %i , i64 %n )
148
164
%v0 = call <2 x i1 > @llvm.vector.extract.v2i1.nxv8i1.i64 (<vscale x 8 x i1 > %r , i64 0 )
149
165
%v1 = call <2 x i1 > @llvm.vector.extract.v2i1.nxv8i1.i64 (<vscale x 8 x i1 > %r , i64 4 )
150
166
tail call void @use (<2 x i1 > %v0 , <2 x i1 > %v1 )
151
167
ret void
152
168
}
153
169
170
+ ; Illegal Types
171
+
172
+ define void @test_2x16bit_mask_with_32bit_index_and_trip_count (i32 %i , i32 %n ) #0 {
173
+ ; CHECK-SVE-LABEL: test_2x16bit_mask_with_32bit_index_and_trip_count:
174
+ ; CHECK-SVE: // %bb.0:
175
+ ; CHECK-SVE-NEXT: rdvl x8, #1
176
+ ; CHECK-SVE-NEXT: adds w8, w0, w8
177
+ ; CHECK-SVE-NEXT: csinv w8, w8, wzr, lo
178
+ ; CHECK-SVE-NEXT: whilelo p0.b, w0, w1
179
+ ; CHECK-SVE-NEXT: whilelo p1.b, w8, w1
180
+ ; CHECK-SVE-NEXT: b use
181
+ ;
182
+ ; CHECK-SVE2p1-SME2-LABEL: test_2x16bit_mask_with_32bit_index_and_trip_count:
183
+ ; CHECK-SVE2p1-SME2: // %bb.0:
184
+ ; CHECK-SVE2p1-SME2-NEXT: mov w8, w1
185
+ ; CHECK-SVE2p1-SME2-NEXT: mov w9, w0
186
+ ; CHECK-SVE2p1-SME2-NEXT: whilelo { p0.b, p1.b }, x9, x8
187
+ ; CHECK-SVE2p1-SME2-NEXT: b use
188
+ %r = call <vscale x 32 x i1 > @llvm.get.active.lane.mask.nxv32i1.i32 (i32 %i , i32 %n )
189
+ %v0 = call <vscale x 16 x i1 > @llvm.vector.extract.nxv16i1.nxv32i1.i64 (<vscale x 32 x i1 > %r , i64 0 )
190
+ %v1 = call <vscale x 16 x i1 > @llvm.vector.extract.nxv16i1.nxv32i1.i64 (<vscale x 32 x i1 > %r , i64 16 )
191
+ tail call void @use (<vscale x 16 x i1 > %v0 , <vscale x 16 x i1 > %v1 )
192
+ ret void
193
+ }
194
+
195
+ define void @test_2x32bit_mask_with_32bit_index_and_trip_count (i32 %i , i32 %n ) #0 {
196
+ ; CHECK-SVE-LABEL: test_2x32bit_mask_with_32bit_index_and_trip_count:
197
+ ; CHECK-SVE: // %bb.0:
198
+ ; CHECK-SVE-NEXT: rdvl x8, #2
199
+ ; CHECK-SVE-NEXT: rdvl x9, #1
200
+ ; CHECK-SVE-NEXT: adds w8, w0, w8
201
+ ; CHECK-SVE-NEXT: csinv w8, w8, wzr, lo
202
+ ; CHECK-SVE-NEXT: adds w10, w8, w9
203
+ ; CHECK-SVE-NEXT: csinv w10, w10, wzr, lo
204
+ ; CHECK-SVE-NEXT: whilelo p3.b, w10, w1
205
+ ; CHECK-SVE-NEXT: adds w9, w0, w9
206
+ ; CHECK-SVE-NEXT: csinv w9, w9, wzr, lo
207
+ ; CHECK-SVE-NEXT: whilelo p0.b, w0, w1
208
+ ; CHECK-SVE-NEXT: whilelo p1.b, w9, w1
209
+ ; CHECK-SVE-NEXT: whilelo p2.b, w8, w1
210
+ ; CHECK-SVE-NEXT: b use
211
+ ;
212
+ ; CHECK-SVE2p1-SME2-LABEL: test_2x32bit_mask_with_32bit_index_and_trip_count:
213
+ ; CHECK-SVE2p1-SME2: // %bb.0:
214
+ ; CHECK-SVE2p1-SME2-NEXT: rdvl x8, #2
215
+ ; CHECK-SVE2p1-SME2-NEXT: mov w9, w1
216
+ ; CHECK-SVE2p1-SME2-NEXT: mov w10, w0
217
+ ; CHECK-SVE2p1-SME2-NEXT: adds w8, w0, w8
218
+ ; CHECK-SVE2p1-SME2-NEXT: csinv w8, w8, wzr, lo
219
+ ; CHECK-SVE2p1-SME2-NEXT: whilelo { p0.b, p1.b }, x10, x9
220
+ ; CHECK-SVE2p1-SME2-NEXT: whilelo { p2.b, p3.b }, x8, x9
221
+ ; CHECK-SVE2p1-SME2-NEXT: b use
222
+ %r = call <vscale x 64 x i1 > @llvm.get.active.lane.mask.nxv64i1.i32 (i32 %i , i32 %n )
223
+ %v0 = call <vscale x 16 x i1 > @llvm.vector.extract.nxv16i1.nxv64i1.i64 (<vscale x 64 x i1 > %r , i64 0 )
224
+ %v1 = call <vscale x 16 x i1 > @llvm.vector.extract.nxv16i1.nxv64i1.i64 (<vscale x 64 x i1 > %r , i64 16 )
225
+ %v2 = call <vscale x 16 x i1 > @llvm.vector.extract.nxv16i1.nxv64i1.i64 (<vscale x 64 x i1 > %r , i64 32 )
226
+ %v3 = call <vscale x 16 x i1 > @llvm.vector.extract.nxv16i1.nxv64i1.i64 (<vscale x 64 x i1 > %r , i64 48 )
227
+ tail call void @use (<vscale x 16 x i1 > %v0 , <vscale x 16 x i1 > %v1 , <vscale x 16 x i1 > %v2 , <vscale x 16 x i1 > %v3 )
228
+ ret void
229
+ }
230
+
154
231
declare void @use (...)
155
232
156
233
attributes #0 = { nounwind }
0 commit comments