Skip to content

Commit 1311527

Browse files
committed
Revert "[AArch64][GlobalISel] Expand 64bit extracts to 128bit to allow more patterns (#142904)"
This reverts commit 61cdba6 due to verifier issues.
1 parent e9bd1ae commit 1311527

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+592
-334
lines changed

llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp

Lines changed: 3 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -399,26 +399,6 @@ void AArch64RegisterBankInfo::applyMappingImpl(
399399
MI.getOperand(1).setReg(ConstReg);
400400
return applyDefaultMapping(OpdMapper);
401401
}
402-
case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
403-
// SDAG will promote a 64bit G_EXTRACT_VECTOR_ELT to 128 to reduce the
404-
// number of duplicate lane-extract patterns needed. Do the same here so
405-
// that selection will operate on the larger vectors.
406-
Register Src = MI.getOperand(1).getReg();
407-
LLT SrcTy = MRI.getType(Src);
408-
assert(SrcTy.getSizeInBits() == 64 && "Expected 64-bit source vector");
409-
LLT DstTy = SrcTy.multiplyElements(2);
410-
Builder.setInsertPt(*MI.getParent(), MI.getIterator());
411-
auto Undef = Builder.buildUndef(SrcTy);
412-
auto Concat = Builder.buildConcatVectors(DstTy, {Src, Undef.getReg(0)});
413-
MRI.setRegBank(Undef.getReg(0), getRegBank(AArch64::FPRRegBankID));
414-
MRI.setRegBank(Concat.getReg(0), getRegBank(AArch64::FPRRegBankID));
415-
for (MachineInstr &Ext :
416-
make_early_inc_range(MRI.use_nodbg_instructions(Src))) {
417-
if (Ext.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT)
418-
Ext.getOperand(1).setReg(Concat.getReg(0));
419-
}
420-
return applyDefaultMapping(OpdMapper);
421-
}
422402
default:
423403
llvm_unreachable("Don't know how to handle that operation");
424404
}
@@ -1034,20 +1014,14 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
10341014
}
10351015
break;
10361016
}
1037-
case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
1017+
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
10381018
// Destination and source need to be FPRs.
10391019
OpRegBankIdx[0] = PMI_FirstFPR;
10401020
OpRegBankIdx[1] = PMI_FirstFPR;
1041-
// Index needs to be a GPR constant.
1021+
1022+
// Index needs to be a GPR.
10421023
OpRegBankIdx[2] = PMI_FirstGPR;
1043-
// SDAG will promote a 64bit G_EXTRACT_VECTOR_ELT to 128 to reduce the
1044-
// number of duplicate lane-extract patterns needed. Do the same here so
1045-
// that selection will operate on the larger vectors.
1046-
LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1047-
if (!Ty.isScalable() && Ty.getSizeInBits() == 64)
1048-
MappingID = CustomMappingID;
10491024
break;
1050-
}
10511025
case TargetOpcode::G_INSERT_VECTOR_ELT:
10521026
OpRegBankIdx[0] = PMI_FirstFPR;
10531027
OpRegBankIdx[1] = PMI_FirstFPR;

llvm/test/CodeGen/AArch64/GlobalISel/regbank-extract-vector-elt.mir

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,9 +94,7 @@ body: |
9494
; CHECK-NEXT: {{ $}}
9595
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(<4 x s16>) = COPY $d0
9696
; CHECK-NEXT: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 1
97-
; CHECK-NEXT: [[DEF:%[0-9]+]]:fpr(<4 x s16>) = G_IMPLICIT_DEF
98-
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:fpr(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<4 x s16>), [[DEF]](<4 x s16>)
99-
; CHECK-NEXT: [[EVEC:%[0-9]+]]:fpr(s16) = G_EXTRACT_VECTOR_ELT [[CONCAT_VECTORS]](<8 x s16>), [[C]](s64)
97+
; CHECK-NEXT: [[EVEC:%[0-9]+]]:fpr(s16) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s16>), [[C]](s64)
10098
; CHECK-NEXT: $h0 = COPY [[EVEC]](s16)
10199
; CHECK-NEXT: RET_ReallyLR implicit $h0
102100
%0:_(<4 x s16>) = COPY $d0

llvm/test/CodeGen/AArch64/aarch64-bif-gen.ll

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,6 @@ define <1 x i32> @test_bitf_v1i32(<1 x i32> %A, <1 x i32> %B, <1 x i32> %C) {
7070
;
7171
; CHECK-GI-LABEL: test_bitf_v1i32:
7272
; CHECK-GI: // %bb.0:
73-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
74-
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
75-
; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
7673
; CHECK-GI-NEXT: fmov w8, s2
7774
; CHECK-GI-NEXT: fmov w9, s1
7875
; CHECK-GI-NEXT: fmov w10, s0

llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,6 @@ define <1 x i32> @test_bit_v1i32(<1 x i32> %A, <1 x i32> %B, <1 x i32> %C) {
7070
;
7171
; CHECK-GI-LABEL: test_bit_v1i32:
7272
; CHECK-GI: // %bb.0:
73-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
74-
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
75-
; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
7673
; CHECK-GI-NEXT: fmov w8, s2
7774
; CHECK-GI-NEXT: fmov w9, s1
7875
; CHECK-GI-NEXT: fmov w10, s0

llvm/test/CodeGen/AArch64/abs.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,6 @@ define <1 x i32> @abs_v1i32(<1 x i32> %a){
243243
;
244244
; CHECK-GI-LABEL: abs_v1i32:
245245
; CHECK-GI: // %bb.0: // %entry
246-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
247246
; CHECK-GI-NEXT: fmov w8, s0
248247
; CHECK-GI-NEXT: fmov w9, s0
249248
; CHECK-GI-NEXT: cmp w8, #0

llvm/test/CodeGen/AArch64/arm64-neon-copy.ll

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1215,7 +1215,6 @@ define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
12151215
;
12161216
; CHECK-GI-LABEL: testDUP.v1i8:
12171217
; CHECK-GI: // %bb.0:
1218-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
12191218
; CHECK-GI-NEXT: fmov w8, s0
12201219
; CHECK-GI-NEXT: dup v0.8b, w8
12211220
; CHECK-GI-NEXT: ret
@@ -1711,7 +1710,7 @@ define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
17111710
; CHECK-GI-NEXT: mov v2.16b, v1.16b
17121711
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
17131712
; CHECK-GI-NEXT: adrp x8, .LCPI127_0
1714-
; CHECK-GI-NEXT: mov b1, v0.b[0]
1713+
; CHECK-GI-NEXT: mov v1.b[0], v0.b[0]
17151714
; CHECK-GI-NEXT: mov v1.b[1], v0.b[1]
17161715
; CHECK-GI-NEXT: mov v1.b[2], v0.b[2]
17171716
; CHECK-GI-NEXT: mov v1.b[3], v0.b[3]
@@ -1818,7 +1817,7 @@ define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 {
18181817
; CHECK-GI-LABEL: test_concat_v16i8_v8i8_v8i8:
18191818
; CHECK-GI: // %bb.0: // %entry
18201819
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
1821-
; CHECK-GI-NEXT: mov b2, v0.b[0]
1820+
; CHECK-GI-NEXT: mov v2.b[0], v0.b[0]
18221821
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
18231822
; CHECK-GI-NEXT: mov v2.b[1], v0.b[1]
18241823
; CHECK-GI-NEXT: mov v2.b[2], v0.b[2]
@@ -1904,7 +1903,7 @@ define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 {
19041903
; CHECK-GI-NEXT: mov v2.16b, v1.16b
19051904
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
19061905
; CHECK-GI-NEXT: adrp x8, .LCPI131_0
1907-
; CHECK-GI-NEXT: mov h1, v0.h[0]
1906+
; CHECK-GI-NEXT: mov v1.h[0], v0.h[0]
19081907
; CHECK-GI-NEXT: mov v1.h[1], v0.h[1]
19091908
; CHECK-GI-NEXT: mov v1.h[2], v0.h[2]
19101909
; CHECK-GI-NEXT: mov v1.h[3], v0.h[3]
@@ -1975,7 +1974,7 @@ define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 {
19751974
; CHECK-GI-LABEL: test_concat_v8i16_v4i16_v4i16:
19761975
; CHECK-GI: // %bb.0: // %entry
19771976
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
1978-
; CHECK-GI-NEXT: mov h2, v0.h[0]
1977+
; CHECK-GI-NEXT: mov v2.h[0], v0.h[0]
19791978
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
19801979
; CHECK-GI-NEXT: mov v2.h[1], v0.h[1]
19811980
; CHECK-GI-NEXT: mov v2.h[2], v0.h[2]
@@ -2037,7 +2036,7 @@ define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 {
20372036
; CHECK-GI-NEXT: mov v2.16b, v1.16b
20382037
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
20392038
; CHECK-GI-NEXT: adrp x8, .LCPI135_0
2040-
; CHECK-GI-NEXT: mov s1, v0.s[0]
2039+
; CHECK-GI-NEXT: mov v1.s[0], v0.s[0]
20412040
; CHECK-GI-NEXT: mov v1.s[1], v0.s[1]
20422041
; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI135_0]
20432042
; CHECK-GI-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b
@@ -2243,7 +2242,6 @@ define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) {
22432242
;
22442243
; CHECK-GI-LABEL: concat_vector_v8i8:
22452244
; CHECK-GI: // %bb.0:
2246-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
22472245
; CHECK-GI-NEXT: fmov w8, s0
22482246
; CHECK-GI-NEXT: dup v0.8b, w8
22492247
; CHECK-GI-NEXT: ret
@@ -2270,7 +2268,6 @@ define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) {
22702268
;
22712269
; CHECK-GI-LABEL: concat_vector_v16i8:
22722270
; CHECK-GI: // %bb.0:
2273-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
22742271
; CHECK-GI-NEXT: fmov w8, s0
22752272
; CHECK-GI-NEXT: dup v0.16b, w8
22762273
; CHECK-GI-NEXT: ret

llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -614,11 +614,16 @@ entry:
614614
}
615615

616616
define void @test_vst1_lane0_s16(ptr %a, <4 x i16> %b) {
617-
; CHECK-LABEL: test_vst1_lane0_s16:
618-
; CHECK: // %bb.0: // %entry
619-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
620-
; CHECK-NEXT: str h0, [x0]
621-
; CHECK-NEXT: ret
617+
; CHECK-GI-LABEL: test_vst1_lane0_s16:
618+
; CHECK-GI: // %bb.0: // %entry
619+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
620+
; CHECK-GI-NEXT: str h0, [x0]
621+
; CHECK-GI-NEXT: ret
622+
;
623+
; CHECK-SD-LABEL: test_vst1_lane0_s16:
624+
; CHECK-SD: // %bb.0: // %entry
625+
; CHECK-SD-NEXT: str h0, [x0]
626+
; CHECK-SD-NEXT: ret
622627
entry:
623628
%0 = extractelement <4 x i16> %b, i32 0
624629
store i16 %0, ptr %a, align 2
@@ -638,11 +643,16 @@ entry:
638643
}
639644

640645
define void @test_vst1_lane0_s32(ptr %a, <2 x i32> %b) {
641-
; CHECK-LABEL: test_vst1_lane0_s32:
642-
; CHECK: // %bb.0: // %entry
643-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
644-
; CHECK-NEXT: str s0, [x0]
645-
; CHECK-NEXT: ret
646+
; CHECK-GI-LABEL: test_vst1_lane0_s32:
647+
; CHECK-GI: // %bb.0: // %entry
648+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
649+
; CHECK-GI-NEXT: str s0, [x0]
650+
; CHECK-GI-NEXT: ret
651+
;
652+
; CHECK-SD-LABEL: test_vst1_lane0_s32:
653+
; CHECK-SD: // %bb.0: // %entry
654+
; CHECK-SD-NEXT: str s0, [x0]
655+
; CHECK-SD-NEXT: ret
646656
entry:
647657
%0 = extractelement <2 x i32> %b, i32 0
648658
store i32 %0, ptr %a, align 4
@@ -673,11 +683,16 @@ entry:
673683
}
674684

675685
define void @test_vst1_lane0_f32(ptr %a, <2 x float> %b) {
676-
; CHECK-LABEL: test_vst1_lane0_f32:
677-
; CHECK: // %bb.0: // %entry
678-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
679-
; CHECK-NEXT: str s0, [x0]
680-
; CHECK-NEXT: ret
686+
; CHECK-GI-LABEL: test_vst1_lane0_f32:
687+
; CHECK-GI: // %bb.0: // %entry
688+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
689+
; CHECK-GI-NEXT: str s0, [x0]
690+
; CHECK-GI-NEXT: ret
691+
;
692+
; CHECK-SD-LABEL: test_vst1_lane0_f32:
693+
; CHECK-SD: // %bb.0: // %entry
694+
; CHECK-SD-NEXT: str s0, [x0]
695+
; CHECK-SD-NEXT: ret
681696
entry:
682697
%0 = extractelement <2 x float> %b, i32 0
683698
store float %0, ptr %a, align 4

llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll

Lines changed: 36 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -663,14 +663,24 @@ entry:
663663
}
664664

665665
define i32 @test_vqrdmlahs_lane_s32(i32 %a, i32 %b, <2 x i32> %c) {
666-
; CHECK-LABEL: test_vqrdmlahs_lane_s32:
667-
; CHECK: // %bb.0: // %entry
668-
; CHECK-NEXT: fmov s1, w0
669-
; CHECK-NEXT: fmov s2, w1
670-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
671-
; CHECK-NEXT: sqrdmlah s1, s2, v0.s[1]
672-
; CHECK-NEXT: fmov w0, s1
673-
; CHECK-NEXT: ret
666+
; CHECK-SD-LABEL: test_vqrdmlahs_lane_s32:
667+
; CHECK-SD: // %bb.0: // %entry
668+
; CHECK-SD-NEXT: fmov s1, w0
669+
; CHECK-SD-NEXT: fmov s2, w1
670+
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
671+
; CHECK-SD-NEXT: sqrdmlah s1, s2, v0.s[1]
672+
; CHECK-SD-NEXT: fmov w0, s1
673+
; CHECK-SD-NEXT: ret
674+
;
675+
; CHECK-GI-LABEL: test_vqrdmlahs_lane_s32:
676+
; CHECK-GI: // %bb.0: // %entry
677+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
678+
; CHECK-GI-NEXT: fmov s1, w0
679+
; CHECK-GI-NEXT: fmov s2, w1
680+
; CHECK-GI-NEXT: mov s0, v0.s[1]
681+
; CHECK-GI-NEXT: sqrdmlah s1, s2, s0
682+
; CHECK-GI-NEXT: fmov w0, s1
683+
; CHECK-GI-NEXT: ret
674684
entry:
675685
%vget_lane = extractelement <2 x i32> %c, i64 1
676686
%vqrdmlahs_s32.i = tail call i32 @llvm.aarch64.neon.sqrdmlah.i32(i32 %a, i32 %b, i32 %vget_lane) #4
@@ -803,14 +813,24 @@ entry:
803813
}
804814

805815
define i32 @test_vqrdmlshs_lane_s32(i32 %a, i32 %b, <2 x i32> %c) {
806-
; CHECK-LABEL: test_vqrdmlshs_lane_s32:
807-
; CHECK: // %bb.0: // %entry
808-
; CHECK-NEXT: fmov s1, w0
809-
; CHECK-NEXT: fmov s2, w1
810-
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
811-
; CHECK-NEXT: sqrdmlsh s1, s2, v0.s[1]
812-
; CHECK-NEXT: fmov w0, s1
813-
; CHECK-NEXT: ret
816+
; CHECK-SD-LABEL: test_vqrdmlshs_lane_s32:
817+
; CHECK-SD: // %bb.0: // %entry
818+
; CHECK-SD-NEXT: fmov s1, w0
819+
; CHECK-SD-NEXT: fmov s2, w1
820+
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
821+
; CHECK-SD-NEXT: sqrdmlsh s1, s2, v0.s[1]
822+
; CHECK-SD-NEXT: fmov w0, s1
823+
; CHECK-SD-NEXT: ret
824+
;
825+
; CHECK-GI-LABEL: test_vqrdmlshs_lane_s32:
826+
; CHECK-GI: // %bb.0: // %entry
827+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
828+
; CHECK-GI-NEXT: fmov s1, w0
829+
; CHECK-GI-NEXT: fmov s2, w1
830+
; CHECK-GI-NEXT: mov s0, v0.s[1]
831+
; CHECK-GI-NEXT: sqrdmlsh s1, s2, s0
832+
; CHECK-GI-NEXT: fmov w0, s1
833+
; CHECK-GI-NEXT: ret
814834
entry:
815835
%vget_lane = extractelement <2 x i32> %c, i64 1
816836
%vqrdmlshs_s32.i = tail call i32 @llvm.aarch64.neon.sqrdmlsh.i32(i32 %a, i32 %b, i32 %vget_lane) #4
@@ -847,6 +867,3 @@ entry:
847867
%vqrdmlshs_s32.i = tail call i32 @llvm.aarch64.neon.sqrdmlsh.i32(i32 %a, i32 %b, i32 %vgetq_lane) #4
848868
ret i32 %vqrdmlshs_s32.i
849869
}
850-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
851-
; CHECK-GI: {{.*}}
852-
; CHECK-SD: {{.*}}

llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,6 @@ define half @test_vcvt_f16_f32(<1 x float> %x) {
271271
;
272272
; GISEL-LABEL: test_vcvt_f16_f32:
273273
; GISEL: // %bb.0:
274-
; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
275274
; GISEL-NEXT: fcvt h0, s0
276275
; GISEL-NEXT: ret
277276
%tmp = fptrunc <1 x float> %x to <1 x half>

llvm/test/CodeGen/AArch64/bswap.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,6 @@ define <1 x i32> @bswap_v1i32(<1 x i32> %a){
207207
;
208208
; CHECK-GI-LABEL: bswap_v1i32:
209209
; CHECK-GI: // %bb.0: // %entry
210-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
211210
; CHECK-GI-NEXT: fmov w8, s0
212211
; CHECK-GI-NEXT: rev w8, w8
213212
; CHECK-GI-NEXT: fmov s0, w8

llvm/test/CodeGen/AArch64/concat-vector.ll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,11 @@ define <4 x i8> @concat1(<2 x i8> %A, <2 x i8> %B) {
1313
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
1414
; CHECK-GI-NEXT: mov w8, v0.s[1]
1515
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
16+
; CHECK-GI-NEXT: mov w9, v1.s[1]
1617
; CHECK-GI-NEXT: mov v0.h[1], w8
17-
; CHECK-GI-NEXT: mov w8, v1.s[1]
18-
; CHECK-GI-NEXT: mov v0.h[2], v1.h[0]
19-
; CHECK-GI-NEXT: mov v0.h[3], w8
18+
; CHECK-GI-NEXT: fmov w8, s1
19+
; CHECK-GI-NEXT: mov v0.h[2], w8
20+
; CHECK-GI-NEXT: mov v0.h[3], w9
2021
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
2122
; CHECK-GI-NEXT: ret
2223
%v4i8 = shufflevector <2 x i8> %A, <2 x i8> %B, <4 x i32> <i32 0, i32 1, i32 2, i32 3>

llvm/test/CodeGen/AArch64/double_reduct.ll

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,10 @@ define float @fmul_f32(<8 x float> %a, <4 x float> %b) {
6565
; CHECK-GI-NEXT: mov d1, v0.d[1]
6666
; CHECK-GI-NEXT: fmul v0.2s, v0.2s, v1.2s
6767
; CHECK-GI-NEXT: fmul v1.2s, v2.2s, v3.2s
68-
; CHECK-GI-NEXT: fmul s0, s0, v0.s[1]
69-
; CHECK-GI-NEXT: fmul s1, s1, v1.s[1]
68+
; CHECK-GI-NEXT: mov s2, v0.s[1]
69+
; CHECK-GI-NEXT: mov s3, v1.s[1]
70+
; CHECK-GI-NEXT: fmul s0, s0, s2
71+
; CHECK-GI-NEXT: fmul s1, s1, s3
7072
; CHECK-GI-NEXT: fmul s0, s0, s1
7173
; CHECK-GI-NEXT: ret
7274
%r1 = call fast float @llvm.vector.reduce.fmul.f32.v8f32(float 1.0, <8 x float> %a)
@@ -90,8 +92,10 @@ define float @fmul_f32_same(<4 x float> %a, <4 x float> %b) {
9092
; CHECK-GI-NEXT: mov d3, v1.d[1]
9193
; CHECK-GI-NEXT: fmul v0.2s, v0.2s, v2.2s
9294
; CHECK-GI-NEXT: fmul v1.2s, v1.2s, v3.2s
93-
; CHECK-GI-NEXT: fmul s0, s0, v0.s[1]
94-
; CHECK-GI-NEXT: fmul s1, s1, v1.s[1]
95+
; CHECK-GI-NEXT: mov s2, v0.s[1]
96+
; CHECK-GI-NEXT: mov s3, v1.s[1]
97+
; CHECK-GI-NEXT: fmul s0, s0, s2
98+
; CHECK-GI-NEXT: fmul s1, s1, s3
9599
; CHECK-GI-NEXT: fmul s0, s0, s1
96100
; CHECK-GI-NEXT: ret
97101
%r1 = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %a)
@@ -918,8 +922,10 @@ define float @nested_mul_f32(<4 x float> %a, <4 x float> %b, float %c, float %d)
918922
; CHECK-GI-NEXT: mov d5, v1.d[1]
919923
; CHECK-GI-NEXT: fmul v0.2s, v0.2s, v4.2s
920924
; CHECK-GI-NEXT: fmul v1.2s, v1.2s, v5.2s
921-
; CHECK-GI-NEXT: fmul s0, s0, v0.s[1]
922-
; CHECK-GI-NEXT: fmul s1, s1, v1.s[1]
925+
; CHECK-GI-NEXT: mov s4, v0.s[1]
926+
; CHECK-GI-NEXT: mov s5, v1.s[1]
927+
; CHECK-GI-NEXT: fmul s0, s0, s4
928+
; CHECK-GI-NEXT: fmul s1, s1, s5
923929
; CHECK-GI-NEXT: fmul s0, s0, s2
924930
; CHECK-GI-NEXT: fmul s1, s1, s3
925931
; CHECK-GI-NEXT: fmul s0, s0, s1

0 commit comments

Comments
 (0)