Skip to content

Commit c84c74e

Browse files
[LLVM][CodeGen][SVE] Add tests for vector extracts from unpacked types.
1 parent db4cf7c commit c84c74e

File tree

1 file changed

+76
-2
lines changed

1 file changed

+76
-2
lines changed

llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll

Lines changed: 76 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -307,11 +307,85 @@ define <4 x i64> @extract_v4i64_nxv8i64_0(<vscale x 8 x i64> %arg) {
307307
ret <4 x i64> %ext
308308
}
309309

310+
define <4 x half> @extract_v4f16_nxv2f16_0(<vscale x 2 x half> %arg) {
311+
; CHECK-LABEL: extract_v4f16_nxv2f16_0:
312+
; CHECK: // %bb.0:
313+
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
314+
; CHECK-NEXT: addvl sp, sp, #-1
315+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
316+
; CHECK-NEXT: .cfi_offset w29, -16
317+
; CHECK-NEXT: cntd x8
318+
; CHECK-NEXT: ptrue p0.d
319+
; CHECK-NEXT: addpl x9, sp, #6
320+
; CHECK-NEXT: subs x8, x8, #4
321+
; CHECK-NEXT: csel x8, xzr, x8, lo
322+
; CHECK-NEXT: st1h { z0.d }, p0, [sp, #3, mul vl]
323+
; CHECK-NEXT: cmp x8, #0
324+
; CHECK-NEXT: csel x8, x8, xzr, lo
325+
; CHECK-NEXT: lsl x8, x8, #1
326+
; CHECK-NEXT: ldr d0, [x9, x8]
327+
; CHECK-NEXT: addvl sp, sp, #1
328+
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
329+
; CHECK-NEXT: ret
330+
%ext = call <4 x half> @llvm.vector.extract.v4f16.nxv2f16(<vscale x 2 x half> %arg, i64 0)
331+
ret <4 x half> %ext
332+
}
333+
334+
define <4 x half> @extract_v4f16_nxv2f16_4(<vscale x 2 x half> %arg) {
335+
; CHECK-LABEL: extract_v4f16_nxv2f16_4:
336+
; CHECK: // %bb.0:
337+
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
338+
; CHECK-NEXT: addvl sp, sp, #-1
339+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
340+
; CHECK-NEXT: .cfi_offset w29, -16
341+
; CHECK-NEXT: cntd x8
342+
; CHECK-NEXT: mov w9, #4 // =0x4
343+
; CHECK-NEXT: ptrue p0.d
344+
; CHECK-NEXT: subs x8, x8, #4
345+
; CHECK-NEXT: csel x8, xzr, x8, lo
346+
; CHECK-NEXT: st1h { z0.d }, p0, [sp, #3, mul vl]
347+
; CHECK-NEXT: cmp x8, #4
348+
; CHECK-NEXT: csel x8, x8, x9, lo
349+
; CHECK-NEXT: addpl x9, sp, #6
350+
; CHECK-NEXT: lsl x8, x8, #1
351+
; CHECK-NEXT: ldr d0, [x9, x8]
352+
; CHECK-NEXT: addvl sp, sp, #1
353+
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
354+
; CHECK-NEXT: ret
355+
%ext = call <4 x half> @llvm.vector.extract.v4f16.nxv2f16(<vscale x 2 x half> %arg, i64 4)
356+
ret <4 x half> %ext
357+
}
358+
359+
define <2 x half> @extract_v2f16_nxv4f16_2(<vscale x 4 x half> %arg) {
360+
; CHECK-LABEL: extract_v2f16_nxv4f16_2:
361+
; CHECK: // %bb.0:
362+
; CHECK-NEXT: mov z1.s, z0.s[3]
363+
; CHECK-NEXT: mov z0.s, z0.s[2]
364+
; CHECK-NEXT: mov v0.h[1], v1.h[0]
365+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
366+
; CHECK-NEXT: ret
367+
%ext = call <2 x half> @llvm.vector.extract.v2f16.nxv4f16(<vscale x 4 x half> %arg, i64 2)
368+
ret <2 x half> %ext
369+
}
370+
371+
define <2 x half> @extract_v2f16_nxv4f16_6(<vscale x 4 x half> %arg) {
372+
; CHECK-LABEL: extract_v2f16_nxv4f16_6:
373+
; CHECK: // %bb.0:
374+
; CHECK-NEXT: mov z1.s, z0.s[7]
375+
; CHECK-NEXT: mov z0.s, z0.s[6]
376+
; CHECK-NEXT: mov v0.h[1], v1.h[0]
377+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
378+
; CHECK-NEXT: ret
379+
%ext = call <2 x half> @llvm.vector.extract.v2f16.nxv4f16(<vscale x 4 x half> %arg, i64 6)
380+
ret <2 x half> %ext
381+
}
310382

311-
declare <2 x i64> @llvm.vector.extract.v2i64.nxv8i64(<vscale x 8 x i64>, i64)
312-
declare <4 x i64> @llvm.vector.extract.v4i64.nxv8i64(<vscale x 8 x i64>, i64)
313383
declare <4 x float> @llvm.vector.extract.v4f32.nxv16f32(<vscale x 16 x float>, i64)
314384
declare <2 x float> @llvm.vector.extract.v2f32.nxv16f32(<vscale x 16 x float>, i64)
385+
declare <4 x half> @llvm.vector.extract.v4f16.nxv2f16(<vscale x 2 x half>, i64);
386+
declare <2 x half> @llvm.vector.extract.v2f16.nxv4f16(<vscale x 4 x half>, i64);
387+
declare <2 x i64> @llvm.vector.extract.v2i64.nxv8i64(<vscale x 8 x i64>, i64)
388+
declare <4 x i64> @llvm.vector.extract.v4i64.nxv8i64(<vscale x 8 x i64>, i64)
315389
declare <4 x i32> @llvm.vector.extract.v4i32.nxv16i32(<vscale x 16 x i32>, i64)
316390
declare <2 x i32> @llvm.vector.extract.v2i32.nxv16i32(<vscale x 16 x i32>, i64)
317391
declare <8 x i16> @llvm.vector.extract.v8i16.nxv32i16(<vscale x 32 x i16>, i64)

0 commit comments

Comments
 (0)