Skip to content

Commit 4b71e89

Browse files
committed
DAG: Assert fcmp uno runtime calls are boolean values
This saves 2 instructions in the ARM soft float case for fcmp ueq. This code is written in an confusingly overly general way. The point of getCmpLibcallCC is to express that the compiler-rt implementations of the FP compares are different aliases around functions which may return -1 in some cases. This does not apply to the call for unordered, which returns a normal boolean. Also stop overriding the default value for the unordered compare for ARM. This was setting it to the same value as the default, which is now assumed.
1 parent eb71fdd commit 4b71e89

File tree

8 files changed

+57
-39
lines changed

8 files changed

+57
-39
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -429,8 +429,20 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
429429
// Update Chain.
430430
Chain = Call.second;
431431
} else {
432+
assert(CCCode == (ShouldInvertCC ? ISD::SETEQ : ISD::SETNE) &&
433+
"unordered call should be simple boolean");
434+
432435
EVT SetCCVT =
433436
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
437+
if (RetVT == SetCCVT &&
438+
getBooleanContents(RetVT) == ZeroOrOneBooleanContent) {
439+
// FIXME: Checking the type matches is a hack in case the calling
440+
// convention lowering inserted some instructions after the
441+
// CopyFromReg. Combines fail to look through the AssertZext.
442+
NewLHS = DAG.getNode(ISD::AssertZext, dl, RetVT, Call.first,
443+
DAG.getValueType(MVT::i1));
444+
}
445+
434446
SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
435447
auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
436448
CCCode = getCmpLibcallCC(LC2);

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -612,7 +612,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
612612
{ RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
613613
{ RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
614614
{ RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
615-
{ RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
615+
{ RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
616616

617617
// Single-precision floating-point arithmetic helper functions
618618
// RTABI chapter 4.1.2, Table 4
@@ -629,7 +629,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
629629
{ RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
630630
{ RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
631631
{ RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
632-
{ RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
632+
{ RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
633633

634634
// Floating-point to integer conversions.
635635
// RTABI chapter 4.1.2, Table 6

llvm/test/CodeGen/ARM/fpcmp_ueq.ll

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,13 @@ entry:
99
}
1010

1111
; CHECK-ARMv4-LABEL: f7:
12-
; CHECK-ARMv4-DAG: bl ___eqsf2
13-
; CHECK-ARMv4-DAG: bl ___unordsf2
14-
; CHECK-ARMv4: cmp r0, #0
15-
; CHECK-ARMv4: movne r0, #1
16-
; CHECK-ARMv4: orrs r0, r0,
17-
; CHECK-ARMv4: moveq r0, #42
12+
; CHECK-ARMv4: bl ___eqsf2
13+
; CHECK-ARMv4-NEXT: rsbs r1, r0, #0
14+
; CHECK-ARMv4-NEXT: adc r6, r0, r1
15+
16+
; CHECK-ARMv4: bl ___unordsf2
17+
; CHECK-ARMv4-NEXT: orrs r0, r0, r6
18+
; CHECK-ARMv4-NEXT: mov r0, #154
1819

1920
; CHECK-ARMv7-LABEL: f7:
2021
; CHECK-ARMv7: vcmp.f32

llvm/test/CodeGen/RISCV/double-fcmp-strict.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -471,15 +471,15 @@ define i32 @fcmp_ueq(double %a, double %b) nounwind strictfp {
471471
; RV32I-NEXT: mv s1, a2
472472
; RV32I-NEXT: mv s2, a1
473473
; RV32I-NEXT: mv s3, a0
474-
; RV32I-NEXT: call __eqdf2
475-
; RV32I-NEXT: seqz s4, a0
474+
; RV32I-NEXT: call __unorddf2
475+
; RV32I-NEXT: mv s4, a0
476476
; RV32I-NEXT: mv a0, s3
477477
; RV32I-NEXT: mv a1, s2
478478
; RV32I-NEXT: mv a2, s1
479479
; RV32I-NEXT: mv a3, s0
480-
; RV32I-NEXT: call __unorddf2
481-
; RV32I-NEXT: snez a0, a0
482-
; RV32I-NEXT: or a0, a0, s4
480+
; RV32I-NEXT: call __eqdf2
481+
; RV32I-NEXT: seqz a0, a0
482+
; RV32I-NEXT: or a0, s4, a0
483483
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
484484
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
485485
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload
@@ -1199,15 +1199,15 @@ define i32 @fcmps_ueq(double %a, double %b) nounwind strictfp {
11991199
; RV32I-NEXT: mv s1, a2
12001200
; RV32I-NEXT: mv s2, a1
12011201
; RV32I-NEXT: mv s3, a0
1202-
; RV32I-NEXT: call __eqdf2
1203-
; RV32I-NEXT: seqz s4, a0
1202+
; RV32I-NEXT: call __unorddf2
1203+
; RV32I-NEXT: mv s4, a0
12041204
; RV32I-NEXT: mv a0, s3
12051205
; RV32I-NEXT: mv a1, s2
12061206
; RV32I-NEXT: mv a2, s1
12071207
; RV32I-NEXT: mv a3, s0
1208-
; RV32I-NEXT: call __unorddf2
1209-
; RV32I-NEXT: snez a0, a0
1210-
; RV32I-NEXT: or a0, a0, s4
1208+
; RV32I-NEXT: call __eqdf2
1209+
; RV32I-NEXT: seqz a0, a0
1210+
; RV32I-NEXT: or a0, s4, a0
12111211
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
12121212
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
12131213
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload

llvm/test/CodeGen/RISCV/double-fcmp.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -403,15 +403,15 @@ define i32 @fcmp_ueq(double %a, double %b) nounwind {
403403
; RV32I-NEXT: mv s1, a2
404404
; RV32I-NEXT: mv s2, a1
405405
; RV32I-NEXT: mv s3, a0
406-
; RV32I-NEXT: call __eqdf2
407-
; RV32I-NEXT: seqz s4, a0
406+
; RV32I-NEXT: call __unorddf2
407+
; RV32I-NEXT: mv s4, a0
408408
; RV32I-NEXT: mv a0, s3
409409
; RV32I-NEXT: mv a1, s2
410410
; RV32I-NEXT: mv a2, s1
411411
; RV32I-NEXT: mv a3, s0
412-
; RV32I-NEXT: call __unorddf2
413-
; RV32I-NEXT: snez a0, a0
414-
; RV32I-NEXT: or a0, a0, s4
412+
; RV32I-NEXT: call __eqdf2
413+
; RV32I-NEXT: seqz a0, a0
414+
; RV32I-NEXT: or a0, s4, a0
415415
; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
416416
; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
417417
; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload

llvm/test/CodeGen/RISCV/float-fcmp-strict.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -382,13 +382,13 @@ define i32 @fcmp_ueq(float %a, float %b) nounwind strictfp {
382382
; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
383383
; RV32I-NEXT: mv s0, a1
384384
; RV32I-NEXT: mv s1, a0
385-
; RV32I-NEXT: call __eqsf2
386-
; RV32I-NEXT: seqz s2, a0
385+
; RV32I-NEXT: call __unordsf2
386+
; RV32I-NEXT: mv s2, a0
387387
; RV32I-NEXT: mv a0, s1
388388
; RV32I-NEXT: mv a1, s0
389-
; RV32I-NEXT: call __unordsf2
390-
; RV32I-NEXT: snez a0, a0
391-
; RV32I-NEXT: or a0, a0, s2
389+
; RV32I-NEXT: call __eqsf2
390+
; RV32I-NEXT: seqz a0, a0
391+
; RV32I-NEXT: or a0, s2, a0
392392
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
393393
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
394394
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
@@ -991,13 +991,13 @@ define i32 @fcmps_ueq(float %a, float %b) nounwind strictfp {
991991
; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
992992
; RV32I-NEXT: mv s0, a1
993993
; RV32I-NEXT: mv s1, a0
994-
; RV32I-NEXT: call __eqsf2
995-
; RV32I-NEXT: seqz s2, a0
994+
; RV32I-NEXT: call __unordsf2
995+
; RV32I-NEXT: mv s2, a0
996996
; RV32I-NEXT: mv a0, s1
997997
; RV32I-NEXT: mv a1, s0
998-
; RV32I-NEXT: call __unordsf2
999-
; RV32I-NEXT: snez a0, a0
1000-
; RV32I-NEXT: or a0, a0, s2
998+
; RV32I-NEXT: call __eqsf2
999+
; RV32I-NEXT: seqz a0, a0
1000+
; RV32I-NEXT: or a0, s2, a0
10011001
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
10021002
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
10031003
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload

llvm/test/CodeGen/RISCV/float-fcmp.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -344,13 +344,13 @@ define i32 @fcmp_ueq(float %a, float %b) nounwind {
344344
; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill
345345
; RV32I-NEXT: mv s0, a1
346346
; RV32I-NEXT: mv s1, a0
347-
; RV32I-NEXT: call __eqsf2
348-
; RV32I-NEXT: seqz s2, a0
347+
; RV32I-NEXT: call __unordsf2
348+
; RV32I-NEXT: mv s2, a0
349349
; RV32I-NEXT: mv a0, s1
350350
; RV32I-NEXT: mv a1, s0
351-
; RV32I-NEXT: call __unordsf2
352-
; RV32I-NEXT: snez a0, a0
353-
; RV32I-NEXT: or a0, a0, s2
351+
; RV32I-NEXT: call __eqsf2
352+
; RV32I-NEXT: seqz a0, a0
353+
; RV32I-NEXT: or a0, s2, a0
354354
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
355355
; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
356356
; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload

llvm/test/CodeGen/Thumb2/float-cmp.ll

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,8 +200,13 @@ define i1 @cmp_d_one(double %a, double %b) {
200200
; CHECK-LABEL: cmp_d_one:
201201
; NONE: bl __aeabi_dcmpeq
202202
; NONE: bl __aeabi_dcmpun
203-
; SP: bl __aeabi_dcmpeq
204203
; SP: bl __aeabi_dcmpun
204+
; SP: eor r8, r0, #1
205+
; SP: bl __aeabi_dcmpeq
206+
; SP-NEXT: clz r0, r0
207+
; SP-NEXT: lsrs r0, r0, #5
208+
; SP-NEXT: ands.w r0, r0, r8
209+
205210
; DP: vcmp.f64
206211
; DP: movmi r0, #1
207212
; DP: movgt r0, #1

0 commit comments

Comments
 (0)