Skip to content

Commit 84f0b56

Browse files
committed
[AArch64][GlobalISel] Prefer DUPLANE to REV
Some shuffles containing undefs can match multiple instructions, such as <3,u,u,u> being either a duplane or a rev. This changes the order that different shuffles are considered, so that duplane is preferred which is simpler and more likely to lead to further combines.
1 parent 03f616e commit 84f0b56

File tree

8 files changed

+42
-57
lines changed

8 files changed

+42
-57
lines changed

llvm/lib/Target/AArch64/AArch64Combine.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,8 +172,8 @@ def form_duplane : GICombineRule <
172172
(apply [{ applyDupLane(*${root}, MRI, B, ${matchinfo}); }])
173173
>;
174174

175-
def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn, fullrev,
176-
form_duplane, shuf_to_ins]>;
175+
def shuffle_vector_lowering : GICombineGroup<[dup, form_duplane, rev, ext, zip,
176+
uzp, trn, fullrev, shuf_to_ins]>;
177177

178178
// Turn G_UNMERGE_VALUES -> G_EXTRACT_VECTOR_ELT's
179179
def vector_unmerge_lowering : GICombineRule <

llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-ext.mir

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,8 @@ body: |
255255
; CHECK: liveins: $q0, $q1
256256
; CHECK-NEXT: {{ $}}
257257
; CHECK-NEXT: %v1:_(<8 x s16>) = COPY $q0
258-
; CHECK-NEXT: %shuf:_(<8 x s16>) = G_REV64 %v1
258+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
259+
; CHECK-NEXT: %shuf:_(<8 x s16>) = G_DUPLANE16 %v1, [[C]](s64)
259260
; CHECK-NEXT: $q0 = COPY %shuf(<8 x s16>)
260261
; CHECK-NEXT: RET_ReallyLR implicit $q0
261262
%v1:_(<8 x s16>) = COPY $q0
@@ -298,8 +299,8 @@ body: |
298299
; CHECK: liveins: $q0
299300
; CHECK-NEXT: {{ $}}
300301
; CHECK-NEXT: %v1:_(<2 x s64>) = COPY $q0
301-
; CHECK-NEXT: %v2:_(<2 x s64>) = G_IMPLICIT_DEF
302-
; CHECK-NEXT: %shuf:_(<2 x s64>) = G_TRN2 %v1, %v2
302+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
303+
; CHECK-NEXT: %shuf:_(<2 x s64>) = G_DUPLANE64 %v1, [[C]](s64)
303304
; CHECK-NEXT: $q0 = COPY %shuf(<2 x s64>)
304305
; CHECK-NEXT: RET_ReallyLR implicit $q0
305306
%v1:_(<2 x s64>) = COPY $q0

llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-rev.mir

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,11 @@ body: |
3838
; CHECK: liveins: $d0, $d1
3939
; CHECK-NEXT: {{ $}}
4040
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
41-
; CHECK-NEXT: [[REV64_:%[0-9]+]]:_(<2 x s32>) = G_REV64 [[COPY]]
42-
; CHECK-NEXT: $d0 = COPY [[REV64_]](<2 x s32>)
41+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
42+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
43+
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[COPY]](<2 x s32>), [[DEF]](<2 x s32>)
44+
; CHECK-NEXT: [[DUPLANE32_:%[0-9]+]]:_(<2 x s32>) = G_DUPLANE32 [[CONCAT_VECTORS]], [[C]](s64)
45+
; CHECK-NEXT: $d0 = COPY [[DUPLANE32_]](<2 x s32>)
4346
; CHECK-NEXT: RET_ReallyLR implicit $d0
4447
%0:_(<2 x s32>) = COPY $d0
4548
%1:_(<2 x s32>) = COPY $d1

llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ define i32 @addp_v4i32(<4 x i32> %a, <4 x i32> %b) {
139139
; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
140140
; CHECK-GI-NEXT: mov d1, v0.d[1]
141141
; CHECK-GI-NEXT: addp v0.2s, v0.2s, v1.2s
142-
; CHECK-GI-NEXT: rev64 v1.2s, v0.2s
142+
; CHECK-GI-NEXT: dup v1.2s, v0.s[1]
143143
; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s
144144
; CHECK-GI-NEXT: fmov w0, s0
145145
; CHECK-GI-NEXT: ret

llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll

Lines changed: 18 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -657,23 +657,14 @@ entry:
657657
}
658658

659659
define i16 @test_vqrdmlahh_lane_s16(i16 %a, i16 %b, <4 x i16> %c) {
660-
; CHECK-SD-LABEL: test_vqrdmlahh_lane_s16:
661-
; CHECK-SD: // %bb.0: // %entry
662-
; CHECK-SD-NEXT: fmov s1, w0
663-
; CHECK-SD-NEXT: fmov s2, w1
664-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
665-
; CHECK-SD-NEXT: sqrdmlah v1.4h, v2.4h, v0.h[3]
666-
; CHECK-SD-NEXT: umov w0, v1.h[0]
667-
; CHECK-SD-NEXT: ret
668-
;
669-
; CHECK-GI-LABEL: test_vqrdmlahh_lane_s16:
670-
; CHECK-GI: // %bb.0: // %entry
671-
; CHECK-GI-NEXT: rev64 v0.4h, v0.4h
672-
; CHECK-GI-NEXT: fmov s1, w0
673-
; CHECK-GI-NEXT: fmov s2, w1
674-
; CHECK-GI-NEXT: sqrdmlah v1.4h, v2.4h, v0.4h
675-
; CHECK-GI-NEXT: umov w0, v1.h[0]
676-
; CHECK-GI-NEXT: ret
660+
; CHECK-LABEL: test_vqrdmlahh_lane_s16:
661+
; CHECK: // %bb.0: // %entry
662+
; CHECK-NEXT: fmov s1, w0
663+
; CHECK-NEXT: fmov s2, w1
664+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
665+
; CHECK-NEXT: sqrdmlah v1.4h, v2.4h, v0.h[3]
666+
; CHECK-NEXT: umov w0, v1.h[0]
667+
; CHECK-NEXT: ret
677668
entry:
678669
%0 = insertelement <4 x i16> undef, i16 %a, i64 0
679670
%1 = insertelement <4 x i16> undef, i16 %b, i64 0
@@ -719,7 +710,7 @@ define i16 @test_vqrdmlahh_laneq_s16(i16 %a, i16 %b, <8 x i16> %c) {
719710
;
720711
; CHECK-GI-LABEL: test_vqrdmlahh_laneq_s16:
721712
; CHECK-GI: // %bb.0: // %entry
722-
; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #14
713+
; CHECK-GI-NEXT: dup v0.8h, v0.h[7]
723714
; CHECK-GI-NEXT: fmov s1, w0
724715
; CHECK-GI-NEXT: fmov s2, w1
725716
; CHECK-GI-NEXT: sqrdmlah v1.4h, v2.4h, v0.4h
@@ -837,23 +828,14 @@ entry:
837828
}
838829

839830
define i16 @test_vqrdmlshh_lane_s16(i16 %a, i16 %b, <4 x i16> %c) {
840-
; CHECK-SD-LABEL: test_vqrdmlshh_lane_s16:
841-
; CHECK-SD: // %bb.0: // %entry
842-
; CHECK-SD-NEXT: fmov s1, w0
843-
; CHECK-SD-NEXT: fmov s2, w1
844-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
845-
; CHECK-SD-NEXT: sqrdmlsh v1.4h, v2.4h, v0.h[3]
846-
; CHECK-SD-NEXT: umov w0, v1.h[0]
847-
; CHECK-SD-NEXT: ret
848-
;
849-
; CHECK-GI-LABEL: test_vqrdmlshh_lane_s16:
850-
; CHECK-GI: // %bb.0: // %entry
851-
; CHECK-GI-NEXT: rev64 v0.4h, v0.4h
852-
; CHECK-GI-NEXT: fmov s1, w0
853-
; CHECK-GI-NEXT: fmov s2, w1
854-
; CHECK-GI-NEXT: sqrdmlsh v1.4h, v2.4h, v0.4h
855-
; CHECK-GI-NEXT: umov w0, v1.h[0]
856-
; CHECK-GI-NEXT: ret
831+
; CHECK-LABEL: test_vqrdmlshh_lane_s16:
832+
; CHECK: // %bb.0: // %entry
833+
; CHECK-NEXT: fmov s1, w0
834+
; CHECK-NEXT: fmov s2, w1
835+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
836+
; CHECK-NEXT: sqrdmlsh v1.4h, v2.4h, v0.h[3]
837+
; CHECK-NEXT: umov w0, v1.h[0]
838+
; CHECK-NEXT: ret
857839
entry:
858840
%0 = insertelement <4 x i16> undef, i16 %a, i64 0
859841
%1 = insertelement <4 x i16> undef, i16 %b, i64 0
@@ -899,7 +881,7 @@ define i16 @test_vqrdmlshh_laneq_s16(i16 %a, i16 %b, <8 x i16> %c) {
899881
;
900882
; CHECK-GI-LABEL: test_vqrdmlshh_laneq_s16:
901883
; CHECK-GI: // %bb.0: // %entry
902-
; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #14
884+
; CHECK-GI-NEXT: dup v0.8h, v0.h[7]
903885
; CHECK-GI-NEXT: fmov s1, w0
904886
; CHECK-GI-NEXT: fmov s2, w1
905887
; CHECK-GI-NEXT: sqrdmlsh v1.4h, v2.4h, v0.4h

llvm/test/CodeGen/AArch64/bitcast.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ define <4 x i16> @foo1(<2 x i32> %a) {
1515
; CHECK-GI-NEXT: mov w8, #58712 // =0xe558
1616
; CHECK-GI-NEXT: fmov s1, w8
1717
; CHECK-GI-NEXT: zip1 v0.2s, v1.2s, v0.2s
18-
; CHECK-GI-NEXT: rev32 v0.4h, v0.4h
18+
; CHECK-GI-NEXT: dup v0.4h, v0.h[1]
1919
; CHECK-GI-NEXT: ret
2020
%1 = shufflevector <2 x i32> <i32 58712, i32 undef>, <2 x i32> %a, <2 x i32> <i32 0, i32 2>
2121
; Can't optimize the following bitcast to scalar_to_vector.
@@ -35,7 +35,7 @@ define <4 x i16> @foo2(<2 x i32> %a) {
3535
; CHECK-GI-NEXT: mov w8, #712 // =0x2c8
3636
; CHECK-GI-NEXT: fmov s1, w8
3737
; CHECK-GI-NEXT: zip1 v0.2s, v1.2s, v0.2s
38-
; CHECK-GI-NEXT: rev32 v0.4h, v0.4h
38+
; CHECK-GI-NEXT: dup v0.4h, v0.h[1]
3939
; CHECK-GI-NEXT: ret
4040
%1 = shufflevector <2 x i32> <i32 712, i32 undef>, <2 x i32> %a, <2 x i32> <i32 0, i32 2>
4141
; Can't optimize the following bitcast to scalar_to_vector.

llvm/test/CodeGen/AArch64/dup.ll

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1025,13 +1025,12 @@ define <3 x i64> @duplane0_v3i64(<3 x i64> %b) {
10251025
;
10261026
; CHECK-GI-LABEL: duplane0_v3i64:
10271027
; CHECK-GI: // %bb.0: // %entry
1028-
; CHECK-GI-NEXT: fmov d2, d0
1028+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
10291029
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
1030-
; CHECK-GI-NEXT: mov v2.d[1], v1.d[0]
1031-
; CHECK-GI-NEXT: dup v0.2d, v2.d[0]
1032-
; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2
1030+
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
1031+
; CHECK-GI-NEXT: dup v0.2d, v0.d[0]
10331032
; CHECK-GI-NEXT: mov d1, v0.d[1]
1034-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
1033+
; CHECK-GI-NEXT: fmov d2, d0
10351034
; CHECK-GI-NEXT: ret
10361035
entry:
10371036
%c = shufflevector <3 x i64> %b, <3 x i64> poison, <3 x i32> zeroinitializer
@@ -2354,13 +2353,12 @@ define <3 x double> @duplane0_v3double(<3 x double> %b) {
23542353
;
23552354
; CHECK-GI-LABEL: duplane0_v3double:
23562355
; CHECK-GI: // %bb.0: // %entry
2357-
; CHECK-GI-NEXT: fmov d2, d0
2356+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
23582357
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
2359-
; CHECK-GI-NEXT: mov v2.d[1], v1.d[0]
2360-
; CHECK-GI-NEXT: dup v0.2d, v2.d[0]
2361-
; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2
2358+
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
2359+
; CHECK-GI-NEXT: dup v0.2d, v0.d[0]
23622360
; CHECK-GI-NEXT: mov d1, v0.d[1]
2363-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
2361+
; CHECK-GI-NEXT: fmov d2, d0
23642362
; CHECK-GI-NEXT: ret
23652363
entry:
23662364
%c = shufflevector <3 x double> %b, <3 x double> poison, <3 x i32> zeroinitializer

llvm/test/CodeGen/AArch64/shufflevector.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,8 @@ define <3 x ptr> @shufflevector_v3p0(<3 x ptr> %a, <3 x ptr> %b) {
399399
; CHECK-GI-NEXT: fmov x9, d4
400400
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
401401
; CHECK-GI-NEXT: // kill: def $d3 killed $d3 def $q3
402-
; CHECK-GI-NEXT: fmov d2, d5
402+
; CHECK-GI-NEXT: // kill: def $d5 killed $d5 def $q5
403+
; CHECK-GI-NEXT: dup v2.2d, v5.d[0]
403404
; CHECK-GI-NEXT: mov v0.d[1], x8
404405
; CHECK-GI-NEXT: mov v3.d[1], x9
405406
; CHECK-GI-NEXT: ext v0.16b, v0.16b, v3.16b, #8

0 commit comments

Comments
 (0)