Skip to content
This repository was archived by the owner on Apr 23, 2020. It is now read-only.

Commit 3d22b2c

Browse files
committed
Merging r324581:
------------------------------------------------------------------------ r324581 | sjoerdmeijer | 2018-02-08 00:39:05 -0800 (Thu, 08 Feb 2018) | 12 lines [AArch64] Don't materialize 0 with "fmov h0, .." when FullFP16 is not supported We were generating "fmov h0, wzr" instructions when FullFP16 is not enabled. I've not added any tests, because the problem was visible in: test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll, which I had to change: I don't think Cyclone has FullFP16 enabled by default, so it shouldn't be using this v8.2a instruction. I've also removed these rdar tags, please shout if there are any objections. Differential Revision: https://reviews.llvm.org/D43020 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_60@332655 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent a431507 commit 3d22b2c

File tree

3 files changed

+11
-7
lines changed

3 files changed

+11
-7
lines changed

lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4930,7 +4930,8 @@ bool AArch64TargetLowering::isOffsetFoldingLegal(
49304930
bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
49314931
// We can materialize #0.0 as fmov $Rd, XZR for 64-bit and 32-bit cases.
49324932
// FIXME: We should be able to handle f128 as well with a clever lowering.
4933-
if (Imm.isPosZero() && (VT == MVT::f16 || VT == MVT::f64 || VT == MVT::f32)) {
4933+
if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32 ||
4934+
(VT == MVT::f16 && Subtarget->hasFullFP16()))) {
49344935
DEBUG(dbgs() << "Legal fp imm: materialize 0 using the zero register\n");
49354936
return true;
49364937
}

lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2713,7 +2713,7 @@ defm FMOV : UnscaledConversion<"fmov">;
27132713
// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable
27142714
let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1 in {
27152715
def FMOVH0 : Pseudo<(outs FPR16:$Rd), (ins), [(set f16:$Rd, (fpimm0))]>,
2716-
Sched<[WriteF]>;
2716+
Sched<[WriteF]>, Requires<[HasFullFP16]>;
27172717
def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>,
27182718
Sched<[WriteF]>;
27192719
def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>,

test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,10 @@
11
; RUN: llc -mtriple=arm64-apple-ios -mcpu=cyclone < %s | FileCheck %s -check-prefixes=ALL,CYCLONE
2+
; RUN: llc -mtriple=arm64-apple-ios -mcpu=cyclone -mattr=+fullfp16 < %s | FileCheck %s -check-prefixes=CYCLONE-FULLFP16
23
; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=exynos-m1 < %s | FileCheck %s -check-prefixes=ALL,OTHERS
34
; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=exynos-m3 < %s | FileCheck %s -check-prefixes=ALL,OTHERS
45
; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=kryo < %s | FileCheck %s -check-prefixes=ALL,OTHERS
56
; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=falkor < %s | FileCheck %s -check-prefixes=ALL,OTHERS
67

7-
; rdar://11481771
8-
; rdar://13713797
9-
108
declare void @bar(half, float, double, <2 x double>)
119
declare void @bari(i32, i32)
1210
declare void @barl(i64, i64)
@@ -16,11 +14,14 @@ define void @t1() nounwind ssp {
1614
entry:
1715
; ALL-LABEL: t1:
1816
; ALL-NOT: fmov
19-
; CYCLONE: fmov h0, wzr
17+
; ALL: ldr h0,{{.*}}
2018
; CYCLONE: fmov s1, wzr
2119
; CYCLONE: fmov d2, xzr
2220
; CYCLONE: movi.16b v3, #0
23-
; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000
21+
; CYCLONE-FULLFP16: fmov h0, wzr
22+
; CYCLONE-FULLFP16: fmov s1, wzr
23+
; CYCLONE-FULLFP16: fmov d2, xzr
24+
; CYCLONE-FULLFP16: movi.16b v3, #0
2425
; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000
2526
; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000
2627
; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000
@@ -53,6 +54,8 @@ define void @t4() nounwind ssp {
5354
; ALL-NOT: fmov
5455
; CYCLONE: fmov s{{[0-3]+}}, wzr
5556
; CYCLONE: fmov s{{[0-3]+}}, wzr
57+
; CYCLONE-FULLFP16: fmov s{{[0-3]+}}, wzr
58+
; CYCLONE-FULLFP16: fmov s{{[0-3]+}}, wzr
5659
; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000
5760
; OTHERS: movi v{{[0-3]+}}.2d, #0000000000000000
5861
tail call void @barf(float 0.000000e+00, float 0.000000e+00) nounwind

0 commit comments

Comments
 (0)