@@ -307,11 +307,85 @@ define <4 x i64> @extract_v4i64_nxv8i64_0(<vscale x 8 x i64> %arg) {
307
307
ret <4 x i64 > %ext
308
308
}
309
309
310
+ define <4 x half > @extract_v4f16_nxv2f16_0 (<vscale x 2 x half > %arg ) {
311
+ ; CHECK-LABEL: extract_v4f16_nxv2f16_0:
312
+ ; CHECK: // %bb.0:
313
+ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
314
+ ; CHECK-NEXT: addvl sp, sp, #-1
315
+ ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
316
+ ; CHECK-NEXT: .cfi_offset w29, -16
317
+ ; CHECK-NEXT: cntd x8
318
+ ; CHECK-NEXT: ptrue p0.d
319
+ ; CHECK-NEXT: addpl x9, sp, #6
320
+ ; CHECK-NEXT: subs x8, x8, #4
321
+ ; CHECK-NEXT: csel x8, xzr, x8, lo
322
+ ; CHECK-NEXT: st1h { z0.d }, p0, [sp, #3, mul vl]
323
+ ; CHECK-NEXT: cmp x8, #0
324
+ ; CHECK-NEXT: csel x8, x8, xzr, lo
325
+ ; CHECK-NEXT: lsl x8, x8, #1
326
+ ; CHECK-NEXT: ldr d0, [x9, x8]
327
+ ; CHECK-NEXT: addvl sp, sp, #1
328
+ ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
329
+ ; CHECK-NEXT: ret
330
+ %ext = call <4 x half > @llvm.vector.extract.v4f16.nxv2f16 (<vscale x 2 x half > %arg , i64 0 )
331
+ ret <4 x half > %ext
332
+ }
333
+
334
+ define <4 x half > @extract_v4f16_nxv2f16_4 (<vscale x 2 x half > %arg ) {
335
+ ; CHECK-LABEL: extract_v4f16_nxv2f16_4:
336
+ ; CHECK: // %bb.0:
337
+ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
338
+ ; CHECK-NEXT: addvl sp, sp, #-1
339
+ ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
340
+ ; CHECK-NEXT: .cfi_offset w29, -16
341
+ ; CHECK-NEXT: cntd x8
342
+ ; CHECK-NEXT: mov w9, #4 // =0x4
343
+ ; CHECK-NEXT: ptrue p0.d
344
+ ; CHECK-NEXT: subs x8, x8, #4
345
+ ; CHECK-NEXT: csel x8, xzr, x8, lo
346
+ ; CHECK-NEXT: st1h { z0.d }, p0, [sp, #3, mul vl]
347
+ ; CHECK-NEXT: cmp x8, #4
348
+ ; CHECK-NEXT: csel x8, x8, x9, lo
349
+ ; CHECK-NEXT: addpl x9, sp, #6
350
+ ; CHECK-NEXT: lsl x8, x8, #1
351
+ ; CHECK-NEXT: ldr d0, [x9, x8]
352
+ ; CHECK-NEXT: addvl sp, sp, #1
353
+ ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
354
+ ; CHECK-NEXT: ret
355
+ %ext = call <4 x half > @llvm.vector.extract.v4f16.nxv2f16 (<vscale x 2 x half > %arg , i64 4 )
356
+ ret <4 x half > %ext
357
+ }
358
+
359
+ define <2 x half > @extract_v2f16_nxv4f16_2 (<vscale x 4 x half > %arg ) {
360
+ ; CHECK-LABEL: extract_v2f16_nxv4f16_2:
361
+ ; CHECK: // %bb.0:
362
+ ; CHECK-NEXT: mov z1.s, z0.s[3]
363
+ ; CHECK-NEXT: mov z0.s, z0.s[2]
364
+ ; CHECK-NEXT: mov v0.h[1], v1.h[0]
365
+ ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
366
+ ; CHECK-NEXT: ret
367
+ %ext = call <2 x half > @llvm.vector.extract.v2f16.nxv4f16 (<vscale x 4 x half > %arg , i64 2 )
368
+ ret <2 x half > %ext
369
+ }
370
+
371
+ define <2 x half > @extract_v2f16_nxv4f16_6 (<vscale x 4 x half > %arg ) {
372
+ ; CHECK-LABEL: extract_v2f16_nxv4f16_6:
373
+ ; CHECK: // %bb.0:
374
+ ; CHECK-NEXT: mov z1.s, z0.s[7]
375
+ ; CHECK-NEXT: mov z0.s, z0.s[6]
376
+ ; CHECK-NEXT: mov v0.h[1], v1.h[0]
377
+ ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
378
+ ; CHECK-NEXT: ret
379
+ %ext = call <2 x half > @llvm.vector.extract.v2f16.nxv4f16 (<vscale x 4 x half > %arg , i64 6 )
380
+ ret <2 x half > %ext
381
+ }
310
382
311
- declare <2 x i64 > @llvm.vector.extract.v2i64.nxv8i64 (<vscale x 8 x i64 >, i64 )
312
- declare <4 x i64 > @llvm.vector.extract.v4i64.nxv8i64 (<vscale x 8 x i64 >, i64 )
313
383
declare <4 x float > @llvm.vector.extract.v4f32.nxv16f32 (<vscale x 16 x float >, i64 )
314
384
declare <2 x float > @llvm.vector.extract.v2f32.nxv16f32 (<vscale x 16 x float >, i64 )
385
+ declare <4 x half > @llvm.vector.extract.v4f16.nxv2f16 (<vscale x 2 x half >, i64 );
386
+ declare <2 x half > @llvm.vector.extract.v2f16.nxv4f16 (<vscale x 4 x half >, i64 );
387
+ declare <2 x i64 > @llvm.vector.extract.v2i64.nxv8i64 (<vscale x 8 x i64 >, i64 )
388
+ declare <4 x i64 > @llvm.vector.extract.v4i64.nxv8i64 (<vscale x 8 x i64 >, i64 )
315
389
declare <4 x i32 > @llvm.vector.extract.v4i32.nxv16i32 (<vscale x 16 x i32 >, i64 )
316
390
declare <2 x i32 > @llvm.vector.extract.v2i32.nxv16i32 (<vscale x 16 x i32 >, i64 )
317
391
declare <8 x i16 > @llvm.vector.extract.v8i16.nxv32i16 (<vscale x 32 x i16 >, i64 )
0 commit comments