@@ -201,9 +201,92 @@ define void @scatter_i8_index_stride_too_big(i8* %base, i64 %offset, <vscale x 4
201
201
ret void
202
202
}
203
203
204
+ ; Ensure the resulting load is "vscale x 4" wide, despite the offset giving the
205
+ ; impression the gather must be split due to it's <vscale x 4 x i64> offset.
206
+ ; gather_f32(base, index(offset, 8 * sizeof(float))
207
+ define <vscale x 4 x i8 > @gather_8i8_index_offset_8 ([8 x i8 ]* %base , i64 %offset , <vscale x 4 x i1 > %pg ) #0 {
208
+ ; CHECK-LABEL: gather_8i8_index_offset_8:
209
+ ; CHECK: // %bb.0:
210
+ ; CHECK-NEXT: add x8, x0, x1, lsl #3
211
+ ; CHECK-NEXT: index z0.s, #0, #8
212
+ ; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x8, z0.s, sxtw]
213
+ ; CHECK-NEXT: ret
214
+ %t0 = insertelement <vscale x 4 x i64 > undef , i64 %offset , i32 0
215
+ %t1 = shufflevector <vscale x 4 x i64 > %t0 , <vscale x 4 x i64 > undef , <vscale x 4 x i32 > zeroinitializer
216
+ %step = call <vscale x 4 x i64 > @llvm.experimental.stepvector.nxv4i64 ()
217
+ %t2 = add <vscale x 4 x i64 > %t1 , %step
218
+ %t3 = getelementptr [8 x i8 ], [8 x i8 ]* %base , <vscale x 4 x i64 > %t2
219
+ %t4 = bitcast <vscale x 4 x [8 x i8 ]*> %t3 to <vscale x 4 x i8* >
220
+ %load = call <vscale x 4 x i8 > @llvm.masked.gather.nxv4i8 (<vscale x 4 x i8* > %t4 , i32 4 , <vscale x 4 x i1 > %pg , <vscale x 4 x i8 > undef )
221
+ ret <vscale x 4 x i8 > %load
222
+ }
223
+
224
+ ; Ensure the resulting load is "vscale x 4" wide, despite the offset giving the
225
+ ; impression the gather must be split due to it's <vscale x 4 x i64> offset.
226
+ ; gather_f32(base, index(offset, 8 * sizeof(float))
227
+ define <vscale x 4 x float > @gather_f32_index_offset_8 ([8 x float ]* %base , i64 %offset , <vscale x 4 x i1 > %pg ) #0 {
228
+ ; CHECK-LABEL: gather_f32_index_offset_8:
229
+ ; CHECK: // %bb.0:
230
+ ; CHECK-NEXT: mov w8, #32
231
+ ; CHECK-NEXT: add x9, x0, x1, lsl #5
232
+ ; CHECK-NEXT: index z0.s, #0, w8
233
+ ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9, z0.s, sxtw]
234
+ ; CHECK-NEXT: ret
235
+ %t0 = insertelement <vscale x 4 x i64 > undef , i64 %offset , i32 0
236
+ %t1 = shufflevector <vscale x 4 x i64 > %t0 , <vscale x 4 x i64 > undef , <vscale x 4 x i32 > zeroinitializer
237
+ %step = call <vscale x 4 x i64 > @llvm.experimental.stepvector.nxv4i64 ()
238
+ %t2 = add <vscale x 4 x i64 > %t1 , %step
239
+ %t3 = getelementptr [8 x float ], [8 x float ]* %base , <vscale x 4 x i64 > %t2
240
+ %t4 = bitcast <vscale x 4 x [8 x float ]*> %t3 to <vscale x 4 x float *>
241
+ %load = call <vscale x 4 x float > @llvm.masked.gather.nxv4f32 (<vscale x 4 x float *> %t4 , i32 4 , <vscale x 4 x i1 > %pg , <vscale x 4 x float > undef )
242
+ ret <vscale x 4 x float > %load
243
+ }
244
+
245
+ ; Ensure the resulting store is "vscale x 4" wide, despite the offset giving the
246
+ ; impression the scatter must be split due to it's <vscale x 4 x i64> offset.
247
+ ; scatter_f16(base, index(offset, 8 * sizeof(i8))
248
+ define void @scatter_i8_index_offset_8 ([8 x i8 ]* %base , i64 %offset , <vscale x 4 x i1 > %pg , <vscale x 4 x i8 > %data ) #0 {
249
+ ; CHECK-LABEL: scatter_i8_index_offset_8:
250
+ ; CHECK: // %bb.0:
251
+ ; CHECK-NEXT: add x8, x0, x1, lsl #3
252
+ ; CHECK-NEXT: index z1.s, #0, #8
253
+ ; CHECK-NEXT: st1b { z0.s }, p0, [x8, z1.s, sxtw]
254
+ ; CHECK-NEXT: ret
255
+ %t0 = insertelement <vscale x 4 x i64 > undef , i64 %offset , i32 0
256
+ %t1 = shufflevector <vscale x 4 x i64 > %t0 , <vscale x 4 x i64 > undef , <vscale x 4 x i32 > zeroinitializer
257
+ %step = call <vscale x 4 x i64 > @llvm.experimental.stepvector.nxv4i64 ()
258
+ %t2 = add <vscale x 4 x i64 > %t1 , %step
259
+ %t3 = getelementptr [8 x i8 ], [8 x i8 ]* %base , <vscale x 4 x i64 > %t2
260
+ %t4 = bitcast <vscale x 4 x [8 x i8 ]*> %t3 to <vscale x 4 x i8* >
261
+ call void @llvm.masked.scatter.nxv4i8 (<vscale x 4 x i8 > %data , <vscale x 4 x i8* > %t4 , i32 2 , <vscale x 4 x i1 > %pg )
262
+ ret void
263
+ }
264
+
265
+ ; Ensure the resulting store is "vscale x 4" wide, despite the offset giving the
266
+ ; impression the scatter must be split due to it's <vscale x 4 x i64> offset.
267
+ ; scatter_f16(base, index(offset, 8 * sizeof(half))
268
+ define void @scatter_f16_index_offset_8 ([8 x half ]* %base , i64 %offset , <vscale x 4 x i1 > %pg , <vscale x 4 x half > %data ) #0 {
269
+ ; CHECK-LABEL: scatter_f16_index_offset_8:
270
+ ; CHECK: // %bb.0:
271
+ ; CHECK-NEXT: mov w8, #16
272
+ ; CHECK-NEXT: add x9, x0, x1, lsl #4
273
+ ; CHECK-NEXT: index z1.s, #0, w8
274
+ ; CHECK-NEXT: st1h { z0.s }, p0, [x9, z1.s, sxtw]
275
+ ; CHECK-NEXT: ret
276
+ %t0 = insertelement <vscale x 4 x i64 > undef , i64 %offset , i32 0
277
+ %t1 = shufflevector <vscale x 4 x i64 > %t0 , <vscale x 4 x i64 > undef , <vscale x 4 x i32 > zeroinitializer
278
+ %step = call <vscale x 4 x i64 > @llvm.experimental.stepvector.nxv4i64 ()
279
+ %t2 = add <vscale x 4 x i64 > %t1 , %step
280
+ %t3 = getelementptr [8 x half ], [8 x half ]* %base , <vscale x 4 x i64 > %t2
281
+ %t4 = bitcast <vscale x 4 x [8 x half ]*> %t3 to <vscale x 4 x half *>
282
+ call void @llvm.masked.scatter.nxv4f16 (<vscale x 4 x half > %data , <vscale x 4 x half *> %t4 , i32 2 , <vscale x 4 x i1 > %pg )
283
+ ret void
284
+ }
285
+
204
286
205
287
attributes #0 = { "target-features" ="+sve" vscale_range(1 , 16 ) }
206
288
289
+ declare <vscale x 4 x float > @llvm.masked.gather.nxv4f32 (<vscale x 4 x float *>, i32 , <vscale x 4 x i1 >, <vscale x 4 x float >)
207
290
208
291
declare <vscale x 4 x i8 > @llvm.masked.gather.nxv4i8 (<vscale x 4 x i8* >, i32 , <vscale x 4 x i1 >, <vscale x 4 x i8 >)
209
292
declare void @llvm.masked.scatter.nxv4i8 (<vscale x 4 x i8 >, <vscale x 4 x i8* >, i32 , <vscale x 4 x i1 >)
0 commit comments