Skip to content

[AArch64][GlobalISel] Prefer DUPLANE to REV #142725

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions llvm/lib/Target/AArch64/AArch64Combine.td
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,8 @@ def form_duplane : GICombineRule <
(apply [{ applyDupLane(*${root}, MRI, B, ${matchinfo}); }])
>;

def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn, fullrev,
form_duplane, shuf_to_ins]>;
def shuffle_vector_lowering : GICombineGroup<[dup, form_duplane, rev, ext, zip,
uzp, trn, fullrev, shuf_to_ins]>;

// Turn G_UNMERGE_VALUES -> G_EXTRACT_VECTOR_ELT's
def vector_unmerge_lowering : GICombineRule <
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,8 @@ body: |
; CHECK: liveins: $q0, $q1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %v1:_(<8 x s16>) = COPY $q0
; CHECK-NEXT: %shuf:_(<8 x s16>) = G_REV64 %v1
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: %shuf:_(<8 x s16>) = G_DUPLANE16 %v1, [[C]](s64)
; CHECK-NEXT: $q0 = COPY %shuf(<8 x s16>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%v1:_(<8 x s16>) = COPY $q0
Expand Down Expand Up @@ -298,8 +299,8 @@ body: |
; CHECK: liveins: $q0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %v1:_(<2 x s64>) = COPY $q0
; CHECK-NEXT: %v2:_(<2 x s64>) = G_IMPLICIT_DEF
; CHECK-NEXT: %shuf:_(<2 x s64>) = G_TRN2 %v1, %v2
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: %shuf:_(<2 x s64>) = G_DUPLANE64 %v1, [[C]](s64)
; CHECK-NEXT: $q0 = COPY %shuf(<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%v1:_(<2 x s64>) = COPY $q0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,11 @@ body: |
; CHECK: liveins: $d0, $d1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
; CHECK-NEXT: [[REV64_:%[0-9]+]]:_(<2 x s32>) = G_REV64 [[COPY]]
; CHECK-NEXT: $d0 = COPY [[REV64_]](<2 x s32>)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[COPY]](<2 x s32>), [[DEF]](<2 x s32>)
; CHECK-NEXT: [[DUPLANE32_:%[0-9]+]]:_(<2 x s32>) = G_DUPLANE32 [[CONCAT_VECTORS]], [[C]](s64)
; CHECK-NEXT: $d0 = COPY [[DUPLANE32_]](<2 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $d0
%0:_(<2 x s32>) = COPY $d0
%1:_(<2 x s32>) = COPY $d1
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ define i32 @addp_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT: mov d1, v0.d[1]
; CHECK-GI-NEXT: addp v0.2s, v0.2s, v1.2s
; CHECK-GI-NEXT: rev64 v1.2s, v0.2s
; CHECK-GI-NEXT: dup v1.2s, v0.s[1]
; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s
; CHECK-GI-NEXT: fmov w0, s0
; CHECK-GI-NEXT: ret
Expand Down
54 changes: 18 additions & 36 deletions llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll
Original file line number Diff line number Diff line change
Expand Up @@ -657,23 +657,14 @@ entry:
}

define i16 @test_vqrdmlahh_lane_s16(i16 %a, i16 %b, <4 x i16> %c) {
; CHECK-SD-LABEL: test_vqrdmlahh_lane_s16:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fmov s1, w0
; CHECK-SD-NEXT: fmov s2, w1
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: sqrdmlah v1.4h, v2.4h, v0.h[3]
; CHECK-SD-NEXT: umov w0, v1.h[0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_vqrdmlahh_lane_s16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: rev64 v0.4h, v0.4h
; CHECK-GI-NEXT: fmov s1, w0
; CHECK-GI-NEXT: fmov s2, w1
; CHECK-GI-NEXT: sqrdmlah v1.4h, v2.4h, v0.4h
; CHECK-GI-NEXT: umov w0, v1.h[0]
; CHECK-GI-NEXT: ret
; CHECK-LABEL: test_vqrdmlahh_lane_s16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmov s1, w0
; CHECK-NEXT: fmov s2, w1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: sqrdmlah v1.4h, v2.4h, v0.h[3]
; CHECK-NEXT: umov w0, v1.h[0]
; CHECK-NEXT: ret
entry:
%0 = insertelement <4 x i16> undef, i16 %a, i64 0
%1 = insertelement <4 x i16> undef, i16 %b, i64 0
Expand Down Expand Up @@ -719,7 +710,7 @@ define i16 @test_vqrdmlahh_laneq_s16(i16 %a, i16 %b, <8 x i16> %c) {
;
; CHECK-GI-LABEL: test_vqrdmlahh_laneq_s16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #14
; CHECK-GI-NEXT: dup v0.8h, v0.h[7]
; CHECK-GI-NEXT: fmov s1, w0
; CHECK-GI-NEXT: fmov s2, w1
; CHECK-GI-NEXT: sqrdmlah v1.4h, v2.4h, v0.4h
Expand Down Expand Up @@ -837,23 +828,14 @@ entry:
}

define i16 @test_vqrdmlshh_lane_s16(i16 %a, i16 %b, <4 x i16> %c) {
; CHECK-SD-LABEL: test_vqrdmlshh_lane_s16:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fmov s1, w0
; CHECK-SD-NEXT: fmov s2, w1
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: sqrdmlsh v1.4h, v2.4h, v0.h[3]
; CHECK-SD-NEXT: umov w0, v1.h[0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_vqrdmlshh_lane_s16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: rev64 v0.4h, v0.4h
; CHECK-GI-NEXT: fmov s1, w0
; CHECK-GI-NEXT: fmov s2, w1
; CHECK-GI-NEXT: sqrdmlsh v1.4h, v2.4h, v0.4h
; CHECK-GI-NEXT: umov w0, v1.h[0]
; CHECK-GI-NEXT: ret
; CHECK-LABEL: test_vqrdmlshh_lane_s16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fmov s1, w0
; CHECK-NEXT: fmov s2, w1
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: sqrdmlsh v1.4h, v2.4h, v0.h[3]
; CHECK-NEXT: umov w0, v1.h[0]
; CHECK-NEXT: ret
entry:
%0 = insertelement <4 x i16> undef, i16 %a, i64 0
%1 = insertelement <4 x i16> undef, i16 %b, i64 0
Expand Down Expand Up @@ -899,7 +881,7 @@ define i16 @test_vqrdmlshh_laneq_s16(i16 %a, i16 %b, <8 x i16> %c) {
;
; CHECK-GI-LABEL: test_vqrdmlshh_laneq_s16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #14
; CHECK-GI-NEXT: dup v0.8h, v0.h[7]
; CHECK-GI-NEXT: fmov s1, w0
; CHECK-GI-NEXT: fmov s2, w1
; CHECK-GI-NEXT: sqrdmlsh v1.4h, v2.4h, v0.4h
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/bitcast.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ define <4 x i16> @foo1(<2 x i32> %a) {
; CHECK-GI-NEXT: mov w8, #58712 // =0xe558
; CHECK-GI-NEXT: fmov s1, w8
; CHECK-GI-NEXT: zip1 v0.2s, v1.2s, v0.2s
; CHECK-GI-NEXT: rev32 v0.4h, v0.4h
; CHECK-GI-NEXT: dup v0.4h, v0.h[1]
; CHECK-GI-NEXT: ret
%1 = shufflevector <2 x i32> <i32 58712, i32 undef>, <2 x i32> %a, <2 x i32> <i32 0, i32 2>
; Can't optimize the following bitcast to scalar_to_vector.
Expand All @@ -35,7 +35,7 @@ define <4 x i16> @foo2(<2 x i32> %a) {
; CHECK-GI-NEXT: mov w8, #712 // =0x2c8
; CHECK-GI-NEXT: fmov s1, w8
; CHECK-GI-NEXT: zip1 v0.2s, v1.2s, v0.2s
; CHECK-GI-NEXT: rev32 v0.4h, v0.4h
; CHECK-GI-NEXT: dup v0.4h, v0.h[1]
; CHECK-GI-NEXT: ret
%1 = shufflevector <2 x i32> <i32 712, i32 undef>, <2 x i32> %a, <2 x i32> <i32 0, i32 2>
; Can't optimize the following bitcast to scalar_to_vector.
Expand Down
18 changes: 8 additions & 10 deletions llvm/test/CodeGen/AArch64/dup.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1025,13 +1025,12 @@ define <3 x i64> @duplane0_v3i64(<3 x i64> %b) {
;
; CHECK-GI-LABEL: duplane0_v3i64:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: fmov d2, d0
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT: mov v2.d[1], v1.d[0]
; CHECK-GI-NEXT: dup v0.2d, v2.d[0]
; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
; CHECK-GI-NEXT: dup v0.2d, v0.d[0]
; CHECK-GI-NEXT: mov d1, v0.d[1]
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: fmov d2, d0
; CHECK-GI-NEXT: ret
entry:
%c = shufflevector <3 x i64> %b, <3 x i64> poison, <3 x i32> zeroinitializer
Expand Down Expand Up @@ -2354,13 +2353,12 @@ define <3 x double> @duplane0_v3double(<3 x double> %b) {
;
; CHECK-GI-LABEL: duplane0_v3double:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: fmov d2, d0
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT: mov v2.d[1], v1.d[0]
; CHECK-GI-NEXT: dup v0.2d, v2.d[0]
; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
; CHECK-GI-NEXT: dup v0.2d, v0.d[0]
; CHECK-GI-NEXT: mov d1, v0.d[1]
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: fmov d2, d0
; CHECK-GI-NEXT: ret
entry:
%c = shufflevector <3 x double> %b, <3 x double> poison, <3 x i32> zeroinitializer
Expand Down
3 changes: 2 additions & 1 deletion llvm/test/CodeGen/AArch64/shufflevector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,8 @@ define <3 x ptr> @shufflevector_v3p0(<3 x ptr> %a, <3 x ptr> %b) {
; CHECK-GI-NEXT: fmov x9, d4
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d3 killed $d3 def $q3
; CHECK-GI-NEXT: fmov d2, d5
; CHECK-GI-NEXT: // kill: def $d5 killed $d5 def $q5
; CHECK-GI-NEXT: dup v2.2d, v5.d[0]
; CHECK-GI-NEXT: mov v0.d[1], x8
; CHECK-GI-NEXT: mov v3.d[1], x9
; CHECK-GI-NEXT: ext v0.16b, v0.16b, v3.16b, #8
Expand Down
Loading