Skip to content

Commit d45db81

Browse files
committed
[AArch64][GlobalISel] Expand 64bit extracts to 128bit to allow more patterns.
SDAG will promote a 64bit G_EXTRACT_VECTOR_ELT to 128 to reduce the number of duplicate lane-extract patterns needed. This patch does the same for gisel inside regbankselect, so that selection will operate on the larger vectors. Most of the tests just add kill markers, but arm64-neon-v8.1a.ll shows the lanewise patterns now being selected.
1 parent 3894bdc commit d45db81

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+331
-592
lines changed

llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,26 @@ void AArch64RegisterBankInfo::applyMappingImpl(
399399
MI.getOperand(1).setReg(ConstReg);
400400
return applyDefaultMapping(OpdMapper);
401401
}
402+
case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
403+
// SDAG will promote a 64bit G_EXTRACT_VECTOR_ELT to 128 to reduce the
404+
// number of duplicate lane-extract patterns needed. Do the same here so
405+
// that selection will operate on the larger vectors.
406+
Register Src = MI.getOperand(1).getReg();
407+
LLT SrcTy = MRI.getType(Src);
408+
assert(SrcTy.getSizeInBits() == 64 && "Expected 64-bit source vector");
409+
LLT DstTy = SrcTy.multiplyElements(2);
410+
Builder.setInsertPt(*MI.getParent(), MI.getIterator());
411+
auto Undef = Builder.buildUndef(SrcTy);
412+
auto Concat = Builder.buildConcatVectors(DstTy, {Src, Undef.getReg(0)});
413+
MRI.setRegBank(Undef.getReg(0), getRegBank(AArch64::FPRRegBankID));
414+
MRI.setRegBank(Concat.getReg(0), getRegBank(AArch64::FPRRegBankID));
415+
for (MachineInstr &Ext :
416+
make_early_inc_range(MRI.use_nodbg_instructions(Src))) {
417+
if (Ext.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT)
418+
Ext.getOperand(1).setReg(Concat.getReg(0));
419+
}
420+
return applyDefaultMapping(OpdMapper);
421+
}
402422
default:
403423
llvm_unreachable("Don't know how to handle that operation");
404424
}
@@ -1014,14 +1034,20 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
10141034
}
10151035
break;
10161036
}
1017-
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1037+
case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
10181038
// Destination and source need to be FPRs.
10191039
OpRegBankIdx[0] = PMI_FirstFPR;
10201040
OpRegBankIdx[1] = PMI_FirstFPR;
1021-
1022-
// Index needs to be a GPR.
1041+
// Index needs to be a GPR constant.
10231042
OpRegBankIdx[2] = PMI_FirstGPR;
1043+
// SDAG will promote a 64bit G_EXTRACT_VECTOR_ELT to 128 to reduce the
1044+
// number of duplicate lane-extract patterns needed. Do the same here so
1045+
// that selection will operate on the larger vectors.
1046+
LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1047+
if (!Ty.isScalable() && Ty.getSizeInBits() == 64)
1048+
MappingID = CustomMappingID;
10241049
break;
1050+
}
10251051
case TargetOpcode::G_INSERT_VECTOR_ELT:
10261052
OpRegBankIdx[0] = PMI_FirstFPR;
10271053
OpRegBankIdx[1] = PMI_FirstFPR;

llvm/test/CodeGen/AArch64/GlobalISel/regbank-extract-vector-elt.mir

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,9 @@ body: |
9494
; CHECK-NEXT: {{ $}}
9595
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(<4 x s16>) = COPY $d0
9696
; CHECK-NEXT: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 1
97-
; CHECK-NEXT: [[EVEC:%[0-9]+]]:fpr(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s16>), [[C]](s64)
97+
; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr(<4 x s16>) = G_IMPLICIT_DEF
98+
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:fpr(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<4 x s16>), [[DEF]](<4 x s16>)
99+
; CHECK-NEXT: [[EVEC:%[0-9]+]]:fpr(s16) = G_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS]](<8 x s16>), [[C]](s64)
98100
; CHECK-NEXT: $h0 = COPY [[EVEC]](s16)
99101
; CHECK-NEXT: RET_ReallyLR implicit $h0
100102
%0:_(<4 x s16>) = COPY $d0

llvm/test/CodeGen/AArch64/aarch64-bif-gen.ll

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ define <1 x i32> @test_bitf_v1i32(<1 x i32> %A, <1 x i32> %B, <1 x i32> %C) {
7070
;
7171
; CHECK-GI-LABEL: test_bitf_v1i32:
7272
; CHECK-GI: // %bb.0:
73+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
74+
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
75+
; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
7376
; CHECK-GI-NEXT: fmov w8, s2
7477
; CHECK-GI-NEXT: fmov w9, s1
7578
; CHECK-GI-NEXT: fmov w10, s0

llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ define <1 x i32> @test_bit_v1i32(<1 x i32> %A, <1 x i32> %B, <1 x i32> %C) {
7070
;
7171
; CHECK-GI-LABEL: test_bit_v1i32:
7272
; CHECK-GI: // %bb.0:
73+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
74+
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
75+
; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
7376
; CHECK-GI-NEXT: fmov w8, s2
7477
; CHECK-GI-NEXT: fmov w9, s1
7578
; CHECK-GI-NEXT: fmov w10, s0

llvm/test/CodeGen/AArch64/abs.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ define <1 x i32> @abs_v1i32(<1 x i32> %a){
243243
;
244244
; CHECK-GI-LABEL: abs_v1i32:
245245
; CHECK-GI: // %bb.0: // %entry
246+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
246247
; CHECK-GI-NEXT: fmov w8, s0
247248
; CHECK-GI-NEXT: fmov w9, s0
248249
; CHECK-GI-NEXT: cmp w8, #0

llvm/test/CodeGen/AArch64/arm64-neon-copy.ll

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1249,6 +1249,7 @@ define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
12491249
;
12501250
; CHECK-GI-LABEL: testDUP.v1i8:
12511251
; CHECK-GI: // %bb.0:
1252+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
12521253
; CHECK-GI-NEXT: fmov w8, s0
12531254
; CHECK-GI-NEXT: dup v0.8b, w8
12541255
; CHECK-GI-NEXT: ret
@@ -1744,7 +1745,7 @@ define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
17441745
; CHECK-GI-NEXT: mov v2.16b, v1.16b
17451746
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
17461747
; CHECK-GI-NEXT: adrp x8, .LCPI127_0
1747-
; CHECK-GI-NEXT: mov v1.b[0], v0.b[0]
1748+
; CHECK-GI-NEXT: mov b1, v0.b[0]
17481749
; CHECK-GI-NEXT: mov v1.b[1], v0.b[1]
17491750
; CHECK-GI-NEXT: mov v1.b[2], v0.b[2]
17501751
; CHECK-GI-NEXT: mov v1.b[3], v0.b[3]
@@ -1851,7 +1852,7 @@ define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 {
18511852
; CHECK-GI-LABEL: test_concat_v16i8_v8i8_v8i8:
18521853
; CHECK-GI: // %bb.0: // %entry
18531854
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
1854-
; CHECK-GI-NEXT: mov v2.b[0], v0.b[0]
1855+
; CHECK-GI-NEXT: mov b2, v0.b[0]
18551856
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
18561857
; CHECK-GI-NEXT: mov v2.b[1], v0.b[1]
18571858
; CHECK-GI-NEXT: mov v2.b[2], v0.b[2]
@@ -1937,7 +1938,7 @@ define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 {
19371938
; CHECK-GI-NEXT: mov v2.16b, v1.16b
19381939
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
19391940
; CHECK-GI-NEXT: adrp x8, .LCPI131_0
1940-
; CHECK-GI-NEXT: mov v1.h[0], v0.h[0]
1941+
; CHECK-GI-NEXT: mov h1, v0.h[0]
19411942
; CHECK-GI-NEXT: mov v1.h[1], v0.h[1]
19421943
; CHECK-GI-NEXT: mov v1.h[2], v0.h[2]
19431944
; CHECK-GI-NEXT: mov v1.h[3], v0.h[3]
@@ -2008,7 +2009,7 @@ define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 {
20082009
; CHECK-GI-LABEL: test_concat_v8i16_v4i16_v4i16:
20092010
; CHECK-GI: // %bb.0: // %entry
20102011
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
2011-
; CHECK-GI-NEXT: mov v2.h[0], v0.h[0]
2012+
; CHECK-GI-NEXT: mov h2, v0.h[0]
20122013
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
20132014
; CHECK-GI-NEXT: mov v2.h[1], v0.h[1]
20142015
; CHECK-GI-NEXT: mov v2.h[2], v0.h[2]
@@ -2070,7 +2071,7 @@ define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 {
20702071
; CHECK-GI-NEXT: mov v2.16b, v1.16b
20712072
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
20722073
; CHECK-GI-NEXT: adrp x8, .LCPI135_0
2073-
; CHECK-GI-NEXT: mov v1.s[0], v0.s[0]
2074+
; CHECK-GI-NEXT: mov s1, v0.s[0]
20742075
; CHECK-GI-NEXT: mov v1.s[1], v0.s[1]
20752076
; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI135_0]
20762077
; CHECK-GI-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b
@@ -2276,6 +2277,7 @@ define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) {
22762277
;
22772278
; CHECK-GI-LABEL: concat_vector_v8i8:
22782279
; CHECK-GI: // %bb.0:
2280+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
22792281
; CHECK-GI-NEXT: fmov w8, s0
22802282
; CHECK-GI-NEXT: dup v0.8b, w8
22812283
; CHECK-GI-NEXT: ret
@@ -2302,6 +2304,7 @@ define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) {
23022304
;
23032305
; CHECK-GI-LABEL: concat_vector_v16i8:
23042306
; CHECK-GI: // %bb.0:
2307+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
23052308
; CHECK-GI-NEXT: fmov w8, s0
23062309
; CHECK-GI-NEXT: dup v0.16b, w8
23072310
; CHECK-GI-NEXT: ret

llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll

Lines changed: 15 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -614,16 +614,11 @@ entry:
614614
}
615615

616616
define void @test_vst1_lane0_s16(ptr %a, <4 x i16> %b) {
617-
; CHECK-GI-LABEL: test_vst1_lane0_s16:
618-
; CHECK-GI: // %bb.0: // %entry
619-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
620-
; CHECK-GI-NEXT: str h0, [x0]
621-
; CHECK-GI-NEXT: ret
622-
;
623-
; CHECK-SD-LABEL: test_vst1_lane0_s16:
624-
; CHECK-SD: // %bb.0: // %entry
625-
; CHECK-SD-NEXT: str h0, [x0]
626-
; CHECK-SD-NEXT: ret
617+
; CHECK-LABEL: test_vst1_lane0_s16:
618+
; CHECK: // %bb.0: // %entry
619+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
620+
; CHECK-NEXT: str h0, [x0]
621+
; CHECK-NEXT: ret
627622
entry:
628623
%0 = extractelement <4 x i16> %b, i32 0
629624
store i16 %0, ptr %a, align 2
@@ -643,16 +638,11 @@ entry:
643638
}
644639

645640
define void @test_vst1_lane0_s32(ptr %a, <2 x i32> %b) {
646-
; CHECK-GI-LABEL: test_vst1_lane0_s32:
647-
; CHECK-GI: // %bb.0: // %entry
648-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
649-
; CHECK-GI-NEXT: str s0, [x0]
650-
; CHECK-GI-NEXT: ret
651-
;
652-
; CHECK-SD-LABEL: test_vst1_lane0_s32:
653-
; CHECK-SD: // %bb.0: // %entry
654-
; CHECK-SD-NEXT: str s0, [x0]
655-
; CHECK-SD-NEXT: ret
641+
; CHECK-LABEL: test_vst1_lane0_s32:
642+
; CHECK: // %bb.0: // %entry
643+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
644+
; CHECK-NEXT: str s0, [x0]
645+
; CHECK-NEXT: ret
656646
entry:
657647
%0 = extractelement <2 x i32> %b, i32 0
658648
store i32 %0, ptr %a, align 4
@@ -683,16 +673,11 @@ entry:
683673
}
684674

685675
define void @test_vst1_lane0_f32(ptr %a, <2 x float> %b) {
686-
; CHECK-GI-LABEL: test_vst1_lane0_f32:
687-
; CHECK-GI: // %bb.0: // %entry
688-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
689-
; CHECK-GI-NEXT: str s0, [x0]
690-
; CHECK-GI-NEXT: ret
691-
;
692-
; CHECK-SD-LABEL: test_vst1_lane0_f32:
693-
; CHECK-SD: // %bb.0: // %entry
694-
; CHECK-SD-NEXT: str s0, [x0]
695-
; CHECK-SD-NEXT: ret
676+
; CHECK-LABEL: test_vst1_lane0_f32:
677+
; CHECK: // %bb.0: // %entry
678+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
679+
; CHECK-NEXT: str s0, [x0]
680+
; CHECK-NEXT: ret
696681
entry:
697682
%0 = extractelement <2 x float> %b, i32 0
698683
store float %0, ptr %a, align 4

llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll

Lines changed: 16 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -704,24 +704,14 @@ entry:
704704
}
705705

706706
define i32 @test_vqrdmlahs_lane_s32(i32 %a, i32 %b, <2 x i32> %c) {
707-
; CHECK-SD-LABEL: test_vqrdmlahs_lane_s32:
708-
; CHECK-SD: // %bb.0: // %entry
709-
; CHECK-SD-NEXT: fmov s1, w0
710-
; CHECK-SD-NEXT: fmov s2, w1
711-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
712-
; CHECK-SD-NEXT: sqrdmlah s1, s2, v0.s[1]
713-
; CHECK-SD-NEXT: fmov w0, s1
714-
; CHECK-SD-NEXT: ret
715-
;
716-
; CHECK-GI-LABEL: test_vqrdmlahs_lane_s32:
717-
; CHECK-GI: // %bb.0: // %entry
718-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
719-
; CHECK-GI-NEXT: fmov s1, w0
720-
; CHECK-GI-NEXT: fmov s2, w1
721-
; CHECK-GI-NEXT: mov s0, v0.s[1]
722-
; CHECK-GI-NEXT: sqrdmlah s1, s2, s0
723-
; CHECK-GI-NEXT: fmov w0, s1
724-
; CHECK-GI-NEXT: ret
707+
; CHECK-LABEL: test_vqrdmlahs_lane_s32:
708+
; CHECK: // %bb.0: // %entry
709+
; CHECK-NEXT: fmov s1, w0
710+
; CHECK-NEXT: fmov s2, w1
711+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
712+
; CHECK-NEXT: sqrdmlah s1, s2, v0.s[1]
713+
; CHECK-NEXT: fmov w0, s1
714+
; CHECK-NEXT: ret
725715
entry:
726716
%vget_lane = extractelement <2 x i32> %c, i64 1
727717
%vqrdmlahs_s32.i = tail call i32 @llvm.aarch64.neon.sqrdmlah.i32(i32 %a, i32 %b, i32 %vget_lane) #4
@@ -884,24 +874,14 @@ entry:
884874
}
885875

886876
define i32 @test_vqrdmlshs_lane_s32(i32 %a, i32 %b, <2 x i32> %c) {
887-
; CHECK-SD-LABEL: test_vqrdmlshs_lane_s32:
888-
; CHECK-SD: // %bb.0: // %entry
889-
; CHECK-SD-NEXT: fmov s1, w0
890-
; CHECK-SD-NEXT: fmov s2, w1
891-
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
892-
; CHECK-SD-NEXT: sqrdmlsh s1, s2, v0.s[1]
893-
; CHECK-SD-NEXT: fmov w0, s1
894-
; CHECK-SD-NEXT: ret
895-
;
896-
; CHECK-GI-LABEL: test_vqrdmlshs_lane_s32:
897-
; CHECK-GI: // %bb.0: // %entry
898-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
899-
; CHECK-GI-NEXT: fmov s1, w0
900-
; CHECK-GI-NEXT: fmov s2, w1
901-
; CHECK-GI-NEXT: mov s0, v0.s[1]
902-
; CHECK-GI-NEXT: sqrdmlsh s1, s2, s0
903-
; CHECK-GI-NEXT: fmov w0, s1
904-
; CHECK-GI-NEXT: ret
877+
; CHECK-LABEL: test_vqrdmlshs_lane_s32:
878+
; CHECK: // %bb.0: // %entry
879+
; CHECK-NEXT: fmov s1, w0
880+
; CHECK-NEXT: fmov s2, w1
881+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
882+
; CHECK-NEXT: sqrdmlsh s1, s2, v0.s[1]
883+
; CHECK-NEXT: fmov w0, s1
884+
; CHECK-NEXT: ret
905885
entry:
906886
%vget_lane = extractelement <2 x i32> %c, i64 1
907887
%vqrdmlshs_s32.i = tail call i32 @llvm.aarch64.neon.sqrdmlsh.i32(i32 %a, i32 %b, i32 %vget_lane) #4

llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,7 @@ define half @test_vcvt_f16_f32(<1 x float> %x) {
271271
;
272272
; GISEL-LABEL: test_vcvt_f16_f32:
273273
; GISEL: // %bb.0:
274+
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
274275
; GISEL-NEXT: fcvt h0, s0
275276
; GISEL-NEXT: ret
276277
%tmp = fptrunc <1 x float> %x to <1 x half>

llvm/test/CodeGen/AArch64/bswap.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,7 @@ define <1 x i32> @bswap_v1i32(<1 x i32> %a){
207207
;
208208
; CHECK-GI-LABEL: bswap_v1i32:
209209
; CHECK-GI: // %bb.0: // %entry
210+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
210211
; CHECK-GI-NEXT: fmov w8, s0
211212
; CHECK-GI-NEXT: rev w8, w8
212213
; CHECK-GI-NEXT: mov v0.s[0], w8

llvm/test/CodeGen/AArch64/concat-vector.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,10 @@ define <4 x i8> @concat1(<2 x i8> %A, <2 x i8> %B) {
1313
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
1414
; CHECK-GI-NEXT: mov w8, v0.s[1]
1515
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
16-
; CHECK-GI-NEXT: mov w9, v1.s[1]
1716
; CHECK-GI-NEXT: mov v0.h[1], w8
18-
; CHECK-GI-NEXT: fmov w8, s1
19-
; CHECK-GI-NEXT: mov v0.h[2], w8
20-
; CHECK-GI-NEXT: mov v0.h[3], w9
17+
; CHECK-GI-NEXT: mov w8, v1.s[1]
18+
; CHECK-GI-NEXT: mov v0.h[2], v1.h[0]
19+
; CHECK-GI-NEXT: mov v0.h[3], w8
2120
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
2221
; CHECK-GI-NEXT: ret
2322
%v4i8 = shufflevector <2 x i8> %A, <2 x i8> %B, <4 x i32> <i32 0, i32 1, i32 2, i32 3>

llvm/test/CodeGen/AArch64/double_reduct.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -65,10 +65,8 @@ define float @fmul_f32(<8 x float> %a, <4 x float> %b) {
6565
; CHECK-GI-NEXT: mov d1, v0.d[1]
6666
; CHECK-GI-NEXT: fmul v0.2s, v0.2s, v1.2s
6767
; CHECK-GI-NEXT: fmul v1.2s, v2.2s, v3.2s
68-
; CHECK-GI-NEXT: mov s2, v0.s[1]
69-
; CHECK-GI-NEXT: mov s3, v1.s[1]
70-
; CHECK-GI-NEXT: fmul s0, s0, s2
71-
; CHECK-GI-NEXT: fmul s1, s1, s3
68+
; CHECK-GI-NEXT: fmul s0, s0, v0.s[1]
69+
; CHECK-GI-NEXT: fmul s1, s1, v1.s[1]
7270
; CHECK-GI-NEXT: fmul s0, s0, s1
7371
; CHECK-GI-NEXT: ret
7472
%r1 = call fast float @llvm.vector.reduce.fmul.f32.v8f32(float 1.0, <8 x float> %a)
@@ -92,10 +90,8 @@ define float @fmul_f32_same(<4 x float> %a, <4 x float> %b) {
9290
; CHECK-GI-NEXT: mov d3, v1.d[1]
9391
; CHECK-GI-NEXT: fmul v0.2s, v0.2s, v2.2s
9492
; CHECK-GI-NEXT: fmul v1.2s, v1.2s, v3.2s
95-
; CHECK-GI-NEXT: mov s2, v0.s[1]
96-
; CHECK-GI-NEXT: mov s3, v1.s[1]
97-
; CHECK-GI-NEXT: fmul s0, s0, s2
98-
; CHECK-GI-NEXT: fmul s1, s1, s3
93+
; CHECK-GI-NEXT: fmul s0, s0, v0.s[1]
94+
; CHECK-GI-NEXT: fmul s1, s1, v1.s[1]
9995
; CHECK-GI-NEXT: fmul s0, s0, s1
10096
; CHECK-GI-NEXT: ret
10197
%r1 = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %a)
@@ -922,10 +918,8 @@ define float @nested_mul_f32(<4 x float> %a, <4 x float> %b, float %c, float %d)
922918
; CHECK-GI-NEXT: mov d5, v1.d[1]
923919
; CHECK-GI-NEXT: fmul v0.2s, v0.2s, v4.2s
924920
; CHECK-GI-NEXT: fmul v1.2s, v1.2s, v5.2s
925-
; CHECK-GI-NEXT: mov s4, v0.s[1]
926-
; CHECK-GI-NEXT: mov s5, v1.s[1]
927-
; CHECK-GI-NEXT: fmul s0, s0, s4
928-
; CHECK-GI-NEXT: fmul s1, s1, s5
921+
; CHECK-GI-NEXT: fmul s0, s0, v0.s[1]
922+
; CHECK-GI-NEXT: fmul s1, s1, v1.s[1]
929923
; CHECK-GI-NEXT: fmul s0, s0, s2
930924
; CHECK-GI-NEXT: fmul s1, s1, s3
931925
; CHECK-GI-NEXT: fmul s0, s0, s1

0 commit comments

Comments
 (0)