Skip to content

Commit 9b016e3

Browse files
authored
[ARM] Add early-clobber to MVE VCMLA.f32 (#114995)
This instruction (but not the f16 variant) cannot us the same register for the output as either of the inputs, so it needs to be marked as early-clobber.
1 parent 2f743ac commit 9b016e3

File tree

2 files changed

+29
-5
lines changed

2 files changed

+29
-5
lines changed

llvm/lib/Target/ARM/ARMInstrMVE.td

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3583,10 +3583,10 @@ def ARMimmOneH: PatLeaf<(bitconvert (v8i16 (ARMvmovImm (i32 2620))))>; // 1.0 ha
35833583
defm MVE_VMULf32 : MVE_VMUL_fp_m<MVE_v4f32, ARMimmOneF>;
35843584
defm MVE_VMULf16 : MVE_VMUL_fp_m<MVE_v8f16, ARMimmOneH>;
35853585

3586-
class MVE_VCMLA<string suffix, bits<2> size>
3586+
class MVE_VCMLA<string suffix, bits<2> size, string cstr>
35873587
: MVEFloatArithNeon<"vcmla", suffix, size{1}, (outs MQPR:$Qd),
35883588
(ins MQPR:$Qd_src, MQPR:$Qn, MQPR:$Qm, complexrotateop:$rot),
3589-
"$Qd, $Qn, $Qm, $rot", vpred_n, "$Qd = $Qd_src", size, []> {
3589+
"$Qd, $Qn, $Qm, $rot", vpred_n, "$Qd = $Qd_src"#cstr, size, []> {
35903590
bits<4> Qd;
35913591
bits<4> Qn;
35923592
bits<2> rot;
@@ -3603,8 +3603,8 @@ class MVE_VCMLA<string suffix, bits<2> size>
36033603
let Inst{4} = 0b0;
36043604
}
36053605

3606-
multiclass MVE_VCMLA_m<MVEVectorVTInfo VTI> {
3607-
def "" : MVE_VCMLA<VTI.Suffix, VTI.Size>;
3606+
multiclass MVE_VCMLA_m<MVEVectorVTInfo VTI, string cstr=""> {
3607+
def "" : MVE_VCMLA<VTI.Suffix, VTI.Size, cstr>;
36083608
defvar Inst = !cast<Instruction>(NAME);
36093609

36103610
let Predicates = [HasMVEFloat] in {
@@ -3633,7 +3633,7 @@ multiclass MVE_VCMLA_m<MVEVectorVTInfo VTI> {
36333633
}
36343634

36353635
defm MVE_VCMLAf16 : MVE_VCMLA_m<MVE_v8f16>;
3636-
defm MVE_VCMLAf32 : MVE_VCMLA_m<MVE_v4f32>;
3636+
defm MVE_VCMLAf32 : MVE_VCMLA_m<MVE_v4f32, ",@earlyclobber $Qd">;
36373637

36383638
class MVE_VADDSUBFMA_fp<string iname, string suffix, bits<2> size, bit bit_4,
36393639
bit bit_8, bit bit_21, dag iops=(ins),

llvm/test/CodeGen/Thumb2/mve-vcmla.ll

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,3 +121,27 @@ entry:
121121
%res = fadd <4 x float> %d, %a
122122
ret <4 x float> %res
123123
}
124+
125+
define arm_aapcs_vfpcc <8 x half> @same_register_f16(<8 x half> %a) {
126+
; CHECK-LABEL: same_register_f16:
127+
; CHECK: @ %bb.0: @ %entry
128+
; CHECK-NEXT: vcmla.f16 q0, q0, q0, #0
129+
; CHECK-NEXT: bx lr
130+
entry:
131+
%d = tail call <8 x half> @llvm.arm.mve.vcmlaq.v8f16(i32 0, <8 x half> zeroinitializer, <8 x half> %a, <8 x half> %a)
132+
%res = fadd fast <8 x half> %d, %a
133+
ret <8 x half> %res
134+
}
135+
136+
define arm_aapcs_vfpcc <4 x float> @same_register_f32(<4 x float> %a) {
137+
; CHECK-LABEL: same_register_f32:
138+
; CHECK: @ %bb.0: @ %entry
139+
; CHECK-NEXT: vmov q1, q0
140+
; CHECK-NEXT: vcmla.f32 q1, q0, q0, #0
141+
; CHECK-NEXT: vmov q0, q1
142+
; CHECK-NEXT: bx lr
143+
entry:
144+
%d = tail call <4 x float> @llvm.arm.mve.vcmlaq.v4f32(i32 0, <4 x float> zeroinitializer, <4 x float> %a, <4 x float> %a)
145+
%res = fadd fast <4 x float> %d, %a
146+
ret <4 x float> %res
147+
}

0 commit comments

Comments
 (0)