diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td index f84e83816bf33..940d18a17b244 100644 --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -172,8 +172,8 @@ def form_duplane : GICombineRule < (apply [{ applyDupLane(*${root}, MRI, B, ${matchinfo}); }]) >; -def shuffle_vector_lowering : GICombineGroup<[dup, rev, ext, zip, uzp, trn, fullrev, - form_duplane, shuf_to_ins]>; +def shuffle_vector_lowering : GICombineGroup<[dup, form_duplane, rev, ext, zip, + uzp, trn, fullrev, shuf_to_ins]>; // Turn G_UNMERGE_VALUES -> G_EXTRACT_VECTOR_ELT's def vector_unmerge_lowering : GICombineRule < diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-ext.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-ext.mir index 14d44d85e06f3..8dedb26dac2e1 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-ext.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-ext.mir @@ -255,7 +255,8 @@ body: | ; CHECK: liveins: $q0, $q1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %v1:_(<8 x s16>) = COPY $q0 - ; CHECK-NEXT: %shuf:_(<8 x s16>) = G_REV64 %v1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: %shuf:_(<8 x s16>) = G_DUPLANE16 %v1, [[C]](s64) ; CHECK-NEXT: $q0 = COPY %shuf(<8 x s16>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %v1:_(<8 x s16>) = COPY $q0 @@ -298,8 +299,8 @@ body: | ; CHECK: liveins: $q0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %v1:_(<2 x s64>) = COPY $q0 - ; CHECK-NEXT: %v2:_(<2 x s64>) = G_IMPLICIT_DEF - ; CHECK-NEXT: %shuf:_(<2 x s64>) = G_TRN2 %v1, %v2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: %shuf:_(<2 x s64>) = G_DUPLANE64 %v1, [[C]](s64) ; CHECK-NEXT: $q0 = COPY %shuf(<2 x s64>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %v1:_(<2 x s64>) = COPY $q0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-rev.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-rev.mir index c5a6030155494..1d24f8acfbc53 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-rev.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-rev.mir @@ -38,8 +38,11 @@ body: | ; CHECK: liveins: $d0, $d1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 - ; CHECK-NEXT: [[REV64_:%[0-9]+]]:_(<2 x s32>) = G_REV64 [[COPY]] - ; CHECK-NEXT: $d0 = COPY [[REV64_]](<2 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[COPY]](<2 x s32>), [[DEF]](<2 x s32>) + ; CHECK-NEXT: [[DUPLANE32_:%[0-9]+]]:_(<2 x s32>) = G_DUPLANE32 [[CONCAT_VECTORS]], [[C]](s64) + ; CHECK-NEXT: $d0 = COPY [[DUPLANE32_]](<2 x s32>) ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:_(<2 x s32>) = COPY $d0 %1:_(<2 x s32>) = COPY $d1 diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll b/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll index 17fb312c63754..0ede4bc7a4d6c 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll @@ -139,7 +139,7 @@ define i32 @addp_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: mov d1, v0.d[1] ; CHECK-GI-NEXT: addp v0.2s, v0.2s, v1.2s -; CHECK-GI-NEXT: rev64 v1.2s, v0.2s +; CHECK-GI-NEXT: dup v1.2s, v0.s[1] ; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll b/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll index 7b439dd36c425..bb97ba6d92651 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll @@ -657,23 +657,14 @@ entry: } define i16 @test_vqrdmlahh_lane_s16(i16 %a, i16 %b, <4 x i16> %c) { -; CHECK-SD-LABEL: test_vqrdmlahh_lane_s16: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fmov s1, w0 -; CHECK-SD-NEXT: fmov s2, w1 -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: sqrdmlah v1.4h, v2.4h, v0.h[3] -; CHECK-SD-NEXT: umov w0, v1.h[0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_vqrdmlahh_lane_s16: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: rev64 v0.4h, v0.4h -; CHECK-GI-NEXT: fmov s1, w0 -; CHECK-GI-NEXT: fmov s2, w1 -; CHECK-GI-NEXT: sqrdmlah v1.4h, v2.4h, v0.4h -; CHECK-GI-NEXT: umov w0, v1.h[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_vqrdmlahh_lane_s16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov s1, w0 +; CHECK-NEXT: fmov s2, w1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: sqrdmlah v1.4h, v2.4h, v0.h[3] +; CHECK-NEXT: umov w0, v1.h[0] +; CHECK-NEXT: ret entry: %0 = insertelement <4 x i16> undef, i16 %a, i64 0 %1 = insertelement <4 x i16> undef, i16 %b, i64 0 @@ -719,7 +710,7 @@ define i16 @test_vqrdmlahh_laneq_s16(i16 %a, i16 %b, <8 x i16> %c) { ; ; CHECK-GI-LABEL: test_vqrdmlahh_laneq_s16: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #14 +; CHECK-GI-NEXT: dup v0.8h, v0.h[7] ; CHECK-GI-NEXT: fmov s1, w0 ; CHECK-GI-NEXT: fmov s2, w1 ; CHECK-GI-NEXT: sqrdmlah v1.4h, v2.4h, v0.4h @@ -837,23 +828,14 @@ entry: } define i16 @test_vqrdmlshh_lane_s16(i16 %a, i16 %b, <4 x i16> %c) { -; CHECK-SD-LABEL: test_vqrdmlshh_lane_s16: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: fmov s1, w0 -; CHECK-SD-NEXT: fmov s2, w1 -; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: sqrdmlsh v1.4h, v2.4h, v0.h[3] -; CHECK-SD-NEXT: umov w0, v1.h[0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_vqrdmlshh_lane_s16: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: rev64 v0.4h, v0.4h -; CHECK-GI-NEXT: fmov s1, w0 -; CHECK-GI-NEXT: fmov s2, w1 -; CHECK-GI-NEXT: sqrdmlsh v1.4h, v2.4h, v0.4h -; CHECK-GI-NEXT: umov w0, v1.h[0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_vqrdmlshh_lane_s16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov s1, w0 +; CHECK-NEXT: fmov s2, w1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: sqrdmlsh v1.4h, v2.4h, v0.h[3] +; CHECK-NEXT: umov w0, v1.h[0] +; CHECK-NEXT: ret entry: %0 = insertelement <4 x i16> undef, i16 %a, i64 0 %1 = insertelement <4 x i16> undef, i16 %b, i64 0 @@ -899,7 +881,7 @@ define i16 @test_vqrdmlshh_laneq_s16(i16 %a, i16 %b, <8 x i16> %c) { ; ; CHECK-GI-LABEL: test_vqrdmlshh_laneq_s16: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #14 +; CHECK-GI-NEXT: dup v0.8h, v0.h[7] ; CHECK-GI-NEXT: fmov s1, w0 ; CHECK-GI-NEXT: fmov s2, w1 ; CHECK-GI-NEXT: sqrdmlsh v1.4h, v2.4h, v0.4h diff --git a/llvm/test/CodeGen/AArch64/bitcast.ll b/llvm/test/CodeGen/AArch64/bitcast.ll index 38cec0d71a945..d2f72ecacc86c 100644 --- a/llvm/test/CodeGen/AArch64/bitcast.ll +++ b/llvm/test/CodeGen/AArch64/bitcast.ll @@ -15,7 +15,7 @@ define <4 x i16> @foo1(<2 x i32> %a) { ; CHECK-GI-NEXT: mov w8, #58712 // =0xe558 ; CHECK-GI-NEXT: fmov s1, w8 ; CHECK-GI-NEXT: zip1 v0.2s, v1.2s, v0.2s -; CHECK-GI-NEXT: rev32 v0.4h, v0.4h +; CHECK-GI-NEXT: dup v0.4h, v0.h[1] ; CHECK-GI-NEXT: ret %1 = shufflevector <2 x i32> , <2 x i32> %a, <2 x i32> ; Can't optimize the following bitcast to scalar_to_vector. @@ -35,7 +35,7 @@ define <4 x i16> @foo2(<2 x i32> %a) { ; CHECK-GI-NEXT: mov w8, #712 // =0x2c8 ; CHECK-GI-NEXT: fmov s1, w8 ; CHECK-GI-NEXT: zip1 v0.2s, v1.2s, v0.2s -; CHECK-GI-NEXT: rev32 v0.4h, v0.4h +; CHECK-GI-NEXT: dup v0.4h, v0.h[1] ; CHECK-GI-NEXT: ret %1 = shufflevector <2 x i32> , <2 x i32> %a, <2 x i32> ; Can't optimize the following bitcast to scalar_to_vector. diff --git a/llvm/test/CodeGen/AArch64/dup.ll b/llvm/test/CodeGen/AArch64/dup.ll index 26e070f2a0acd..079ff1076b110 100644 --- a/llvm/test/CodeGen/AArch64/dup.ll +++ b/llvm/test/CodeGen/AArch64/dup.ll @@ -1025,13 +1025,12 @@ define <3 x i64> @duplane0_v3i64(<3 x i64> %b) { ; ; CHECK-GI-LABEL: duplane0_v3i64: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: fmov d2, d0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-GI-NEXT: mov v2.d[1], v1.d[0] -; CHECK-GI-NEXT: dup v0.2d, v2.d[0] -; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: dup v0.2d, v0.d[0] ; CHECK-GI-NEXT: mov d1, v0.d[1] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: fmov d2, d0 ; CHECK-GI-NEXT: ret entry: %c = shufflevector <3 x i64> %b, <3 x i64> poison, <3 x i32> zeroinitializer @@ -2354,13 +2353,12 @@ define <3 x double> @duplane0_v3double(<3 x double> %b) { ; ; CHECK-GI-LABEL: duplane0_v3double: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: fmov d2, d0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-GI-NEXT: mov v2.d[1], v1.d[0] -; CHECK-GI-NEXT: dup v0.2d, v2.d[0] -; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] +; CHECK-GI-NEXT: dup v0.2d, v0.d[0] ; CHECK-GI-NEXT: mov d1, v0.d[1] -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: fmov d2, d0 ; CHECK-GI-NEXT: ret entry: %c = shufflevector <3 x double> %b, <3 x double> poison, <3 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/AArch64/shufflevector.ll b/llvm/test/CodeGen/AArch64/shufflevector.ll index 59cc400e8ac73..9fd5e65086782 100644 --- a/llvm/test/CodeGen/AArch64/shufflevector.ll +++ b/llvm/test/CodeGen/AArch64/shufflevector.ll @@ -399,7 +399,8 @@ define <3 x ptr> @shufflevector_v3p0(<3 x ptr> %a, <3 x ptr> %b) { ; CHECK-GI-NEXT: fmov x9, d4 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: // kill: def $d3 killed $d3 def $q3 -; CHECK-GI-NEXT: fmov d2, d5 +; CHECK-GI-NEXT: // kill: def $d5 killed $d5 def $q5 +; CHECK-GI-NEXT: dup v2.2d, v5.d[0] ; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: mov v3.d[1], x9 ; CHECK-GI-NEXT: ext v0.16b, v0.16b, v3.16b, #8