diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 13315fed7ed2a..ebe65bf1a64ab 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -11545,22 +11545,49 @@ SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node, // If the integer bounds are exactly representable as floats and min/max are // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence // of comparisons and selects. - bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) && - isOperationLegal(ISD::FMAXNUM, SrcVT); - if (AreExactFloatBounds && MinMaxLegal) { + bool MinMax2019NumLegal = isOperationLegal(ISD::FMINIMUMNUM, SrcVT) && + isOperationLegal(ISD::FMAXIMUMNUM, SrcVT); + bool MinMax2019Legal = isOperationLegal(ISD::FMINIMUM, SrcVT) && + isOperationLegal(ISD::FMAXIMUM, SrcVT); + bool MinMax2008Legal = isOperationLegal(ISD::FMINNUM, SrcVT) && + isOperationLegal(ISD::FMAXNUM, SrcVT); + + if (AreExactFloatBounds && + (MinMax2019NumLegal || MinMax2019Legal || MinMax2008Legal)) { SDValue Clamped = Src; - - // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat. - Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode); - // Clamp by MaxFloat from above. NaN cannot occur. - Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode); + bool Use2019 = false; + + if (MinMax2019NumLegal) { + // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat. + Clamped = DAG.getNode(ISD::FMAXIMUMNUM, dl, SrcVT, Clamped, MinFloatNode); + // Clamp by MaxFloat from above. NaN cannot occur. + Clamped = DAG.getNode(ISD::FMINIMUMNUM, dl, SrcVT, Clamped, MaxFloatNode); + } else if (MinMax2008Legal) { + // Try 2008 first as it has better performance for converting SNaN to + // unsigned. + if (!IsSigned && !DAG.isKnownNeverSNaN(Clamped)) { + Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, Clamped); + } + // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat. + Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode); + // Clamp by MaxFloat from above. NaN cannot occur. + Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode); + } else if (MinMax2019Legal) { + // Clamp Src by MinFloat from below. If Src is NaN the result is qNaN. + Clamped = DAG.getNode(ISD::FMAXIMUM, dl, SrcVT, Clamped, MinFloatNode); + // Clamp by MaxFloat from above. NaN may occur. + Clamped = DAG.getNode(ISD::FMINIMUM, dl, SrcVT, Clamped, MaxFloatNode); + Use2019 = true; + } else { + llvm_unreachable("No Min/Max supported?"); + } // Convert clamped value to integer. SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Clamped); // In the unsigned case we're done, because we mapped NaN to MinFloat, // which will cast to zero. - if (!IsSigned) + if ((!IsSigned && !Use2019) || DAG.isKnownNeverNaN(Src)) return FpToInt; // Otherwise, select 0 if Src is NaN. diff --git a/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll b/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll index 5179f97624489..bccb0ad150509 100644 --- a/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll +++ b/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll @@ -67,23 +67,22 @@ define i1 @test_signed_i1_f32(float %f) nounwind { ; ; VFP2-LABEL: test_signed_i1_f32: ; VFP2: @ %bb.0: -; VFP2-NEXT: vmov s2, r0 ; VFP2-NEXT: vmov.f32 s0, #-1.000000e+00 -; VFP2-NEXT: vcvt.s32.f32 s4, s2 -; VFP2-NEXT: vcmp.f32 s2, s0 -; VFP2-NEXT: vmov r0, s4 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt.w r0, #-1 -; VFP2-NEXT: vcmp.f32 s2, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt -; VFP2-NEXT: movgt r0, #0 +; VFP2-NEXT: vldr s4, .LCPI0_0 +; VFP2-NEXT: vmov s2, r0 ; VFP2-NEXT: vcmp.f32 s2, s2 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr +; VFP2-NEXT: vmax.f32 d16, d1, d0 +; VFP2-NEXT: vmin.f32 d0, d16, d2 +; VFP2-NEXT: vcvt.s32.f32 s0, s0 +; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: it vs ; VFP2-NEXT: movvs r0, #0 ; VFP2-NEXT: bx lr +; VFP2-NEXT: .p2align 2 +; VFP2-NEXT: @ %bb.1: +; VFP2-NEXT: .LCPI0_0: +; VFP2-NEXT: .long 0x00000000 @ float 0 ; ; FP16-LABEL: test_signed_i1_f32: ; FP16: @ %bb.0: @@ -157,21 +156,15 @@ define i8 @test_signed_i8_f32(float %f) nounwind { ; ; VFP2-LABEL: test_signed_i8_f32: ; VFP2: @ %bb.0: -; VFP2-NEXT: vmov s0, r0 -; VFP2-NEXT: vldr s2, .LCPI1_0 -; VFP2-NEXT: vldr s6, .LCPI1_1 -; VFP2-NEXT: vcvt.s32.f32 s4, s0 -; VFP2-NEXT: vcmp.f32 s0, s2 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s0, s6 -; VFP2-NEXT: vmov r0, s4 -; VFP2-NEXT: it lt -; VFP2-NEXT: mvnlt r0, #127 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt -; VFP2-NEXT: movgt r0, #127 -; VFP2-NEXT: vcmp.f32 s0, s0 +; VFP2-NEXT: vmov s2, r0 +; VFP2-NEXT: vldr s0, .LCPI1_0 +; VFP2-NEXT: vmax.f32 d16, d1, d0 +; VFP2-NEXT: vldr s4, .LCPI1_1 +; VFP2-NEXT: vcmp.f32 s2, s2 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr +; VFP2-NEXT: vmin.f32 d0, d16, d2 +; VFP2-NEXT: vcvt.s32.f32 s0, s0 +; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: it vs ; VFP2-NEXT: movvs r0, #0 ; VFP2-NEXT: bx lr @@ -256,22 +249,15 @@ define i13 @test_signed_i13_f32(float %f) nounwind { ; ; VFP2-LABEL: test_signed_i13_f32: ; VFP2: @ %bb.0: -; VFP2-NEXT: vmov s0, r0 -; VFP2-NEXT: vldr s2, .LCPI2_0 -; VFP2-NEXT: vldr s6, .LCPI2_1 -; VFP2-NEXT: vcvt.s32.f32 s4, s0 -; VFP2-NEXT: vcmp.f32 s0, s2 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s0, s6 -; VFP2-NEXT: vmov r0, s4 -; VFP2-NEXT: itt lt -; VFP2-NEXT: movwlt r0, #61440 -; VFP2-NEXT: movtlt r0, #65535 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt -; VFP2-NEXT: movwgt r0, #4095 -; VFP2-NEXT: vcmp.f32 s0, s0 +; VFP2-NEXT: vmov s2, r0 +; VFP2-NEXT: vldr s0, .LCPI2_0 +; VFP2-NEXT: vmax.f32 d16, d1, d0 +; VFP2-NEXT: vldr s4, .LCPI2_1 +; VFP2-NEXT: vcmp.f32 s2, s2 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr +; VFP2-NEXT: vmin.f32 d0, d16, d2 +; VFP2-NEXT: vcvt.s32.f32 s0, s0 +; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: it vs ; VFP2-NEXT: movvs r0, #0 ; VFP2-NEXT: bx lr @@ -356,22 +342,15 @@ define i16 @test_signed_i16_f32(float %f) nounwind { ; ; VFP2-LABEL: test_signed_i16_f32: ; VFP2: @ %bb.0: -; VFP2-NEXT: vmov s0, r0 -; VFP2-NEXT: vldr s2, .LCPI3_0 -; VFP2-NEXT: vldr s6, .LCPI3_1 -; VFP2-NEXT: vcvt.s32.f32 s4, s0 -; VFP2-NEXT: vcmp.f32 s0, s2 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s0, s6 -; VFP2-NEXT: vmov r0, s4 -; VFP2-NEXT: itt lt -; VFP2-NEXT: movwlt r0, #32768 -; VFP2-NEXT: movtlt r0, #65535 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt -; VFP2-NEXT: movwgt r0, #32767 -; VFP2-NEXT: vcmp.f32 s0, s0 +; VFP2-NEXT: vmov s2, r0 +; VFP2-NEXT: vldr s0, .LCPI3_0 +; VFP2-NEXT: vmax.f32 d16, d1, d0 +; VFP2-NEXT: vldr s4, .LCPI3_1 +; VFP2-NEXT: vcmp.f32 s2, s2 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr +; VFP2-NEXT: vmin.f32 d0, d16, d2 +; VFP2-NEXT: vcvt.s32.f32 s0, s0 +; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: it vs ; VFP2-NEXT: movvs r0, #0 ; VFP2-NEXT: bx lr @@ -456,23 +435,15 @@ define i19 @test_signed_i19_f32(float %f) nounwind { ; ; VFP2-LABEL: test_signed_i19_f32: ; VFP2: @ %bb.0: -; VFP2-NEXT: vmov s0, r0 -; VFP2-NEXT: vldr s2, .LCPI4_0 -; VFP2-NEXT: vldr s6, .LCPI4_1 -; VFP2-NEXT: vcvt.s32.f32 s4, s0 -; VFP2-NEXT: vcmp.f32 s0, s2 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s0, s6 -; VFP2-NEXT: vmov r0, s4 -; VFP2-NEXT: itt lt -; VFP2-NEXT: movlt r0, #0 -; VFP2-NEXT: movtlt r0, #65532 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s0, s0 -; VFP2-NEXT: itt gt -; VFP2-NEXT: movwgt r0, #65535 -; VFP2-NEXT: movtgt r0, #3 +; VFP2-NEXT: vmov s2, r0 +; VFP2-NEXT: vldr s0, .LCPI4_0 +; VFP2-NEXT: vmax.f32 d16, d1, d0 +; VFP2-NEXT: vldr s4, .LCPI4_1 +; VFP2-NEXT: vcmp.f32 s2, s2 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr +; VFP2-NEXT: vmin.f32 d0, d16, d2 +; VFP2-NEXT: vcvt.s32.f32 s0, s0 +; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: it vs ; VFP2-NEXT: movvs r0, #0 ; VFP2-NEXT: bx lr @@ -2492,23 +2463,22 @@ define i1 @test_signed_i1_f16(half %f) nounwind { ; VFP2-NEXT: .save {r7, lr} ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: bl __aeabi_h2f -; VFP2-NEXT: vmov s2, r0 ; VFP2-NEXT: vmov.f32 s0, #-1.000000e+00 -; VFP2-NEXT: vcvt.s32.f32 s4, s2 -; VFP2-NEXT: vcmp.f32 s2, s0 -; VFP2-NEXT: vmov r0, s4 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt.w r0, #-1 -; VFP2-NEXT: vcmp.f32 s2, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt -; VFP2-NEXT: movgt r0, #0 +; VFP2-NEXT: vldr s4, .LCPI20_0 +; VFP2-NEXT: vmov s2, r0 ; VFP2-NEXT: vcmp.f32 s2, s2 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr +; VFP2-NEXT: vmax.f32 d16, d1, d0 +; VFP2-NEXT: vmin.f32 d0, d16, d2 +; VFP2-NEXT: vcvt.s32.f32 s0, s0 +; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: it vs ; VFP2-NEXT: movvs r0, #0 ; VFP2-NEXT: pop {r7, pc} +; VFP2-NEXT: .p2align 2 +; VFP2-NEXT: @ %bb.1: +; VFP2-NEXT: .LCPI20_0: +; VFP2-NEXT: .long 0x00000000 @ float 0 ; ; FP16-LABEL: test_signed_i1_f16: ; FP16: @ %bb.0: @@ -2588,21 +2558,15 @@ define i8 @test_signed_i8_f16(half %f) nounwind { ; VFP2-NEXT: .save {r7, lr} ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: bl __aeabi_h2f -; VFP2-NEXT: vmov s0, r0 -; VFP2-NEXT: vldr s2, .LCPI21_0 -; VFP2-NEXT: vldr s6, .LCPI21_1 -; VFP2-NEXT: vcvt.s32.f32 s4, s0 -; VFP2-NEXT: vcmp.f32 s0, s2 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s0, s6 -; VFP2-NEXT: vmov r0, s4 -; VFP2-NEXT: it lt -; VFP2-NEXT: mvnlt r0, #127 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt -; VFP2-NEXT: movgt r0, #127 -; VFP2-NEXT: vcmp.f32 s0, s0 +; VFP2-NEXT: vmov s2, r0 +; VFP2-NEXT: vldr s0, .LCPI21_0 +; VFP2-NEXT: vmax.f32 d16, d1, d0 +; VFP2-NEXT: vldr s4, .LCPI21_1 +; VFP2-NEXT: vcmp.f32 s2, s2 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr +; VFP2-NEXT: vmin.f32 d0, d16, d2 +; VFP2-NEXT: vcvt.s32.f32 s0, s0 +; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: it vs ; VFP2-NEXT: movvs r0, #0 ; VFP2-NEXT: pop {r7, pc} @@ -2693,22 +2657,15 @@ define i13 @test_signed_i13_f16(half %f) nounwind { ; VFP2-NEXT: .save {r7, lr} ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: bl __aeabi_h2f -; VFP2-NEXT: vmov s0, r0 -; VFP2-NEXT: vldr s2, .LCPI22_0 -; VFP2-NEXT: vldr s6, .LCPI22_1 -; VFP2-NEXT: vcvt.s32.f32 s4, s0 -; VFP2-NEXT: vcmp.f32 s0, s2 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s0, s6 -; VFP2-NEXT: vmov r0, s4 -; VFP2-NEXT: itt lt -; VFP2-NEXT: movwlt r0, #61440 -; VFP2-NEXT: movtlt r0, #65535 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt -; VFP2-NEXT: movwgt r0, #4095 -; VFP2-NEXT: vcmp.f32 s0, s0 +; VFP2-NEXT: vmov s2, r0 +; VFP2-NEXT: vldr s0, .LCPI22_0 +; VFP2-NEXT: vmax.f32 d16, d1, d0 +; VFP2-NEXT: vldr s4, .LCPI22_1 +; VFP2-NEXT: vcmp.f32 s2, s2 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr +; VFP2-NEXT: vmin.f32 d0, d16, d2 +; VFP2-NEXT: vcvt.s32.f32 s0, s0 +; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: it vs ; VFP2-NEXT: movvs r0, #0 ; VFP2-NEXT: pop {r7, pc} @@ -2799,22 +2756,15 @@ define i16 @test_signed_i16_f16(half %f) nounwind { ; VFP2-NEXT: .save {r7, lr} ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: bl __aeabi_h2f -; VFP2-NEXT: vmov s0, r0 -; VFP2-NEXT: vldr s2, .LCPI23_0 -; VFP2-NEXT: vldr s6, .LCPI23_1 -; VFP2-NEXT: vcvt.s32.f32 s4, s0 -; VFP2-NEXT: vcmp.f32 s0, s2 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s0, s6 -; VFP2-NEXT: vmov r0, s4 -; VFP2-NEXT: itt lt -; VFP2-NEXT: movwlt r0, #32768 -; VFP2-NEXT: movtlt r0, #65535 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt -; VFP2-NEXT: movwgt r0, #32767 -; VFP2-NEXT: vcmp.f32 s0, s0 +; VFP2-NEXT: vmov s2, r0 +; VFP2-NEXT: vldr s0, .LCPI23_0 +; VFP2-NEXT: vmax.f32 d16, d1, d0 +; VFP2-NEXT: vldr s4, .LCPI23_1 +; VFP2-NEXT: vcmp.f32 s2, s2 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr +; VFP2-NEXT: vmin.f32 d0, d16, d2 +; VFP2-NEXT: vcvt.s32.f32 s0, s0 +; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: it vs ; VFP2-NEXT: movvs r0, #0 ; VFP2-NEXT: pop {r7, pc} @@ -2905,23 +2855,15 @@ define i19 @test_signed_i19_f16(half %f) nounwind { ; VFP2-NEXT: .save {r7, lr} ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: bl __aeabi_h2f -; VFP2-NEXT: vmov s0, r0 -; VFP2-NEXT: vldr s2, .LCPI24_0 -; VFP2-NEXT: vldr s6, .LCPI24_1 -; VFP2-NEXT: vcvt.s32.f32 s4, s0 -; VFP2-NEXT: vcmp.f32 s0, s2 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s0, s6 -; VFP2-NEXT: vmov r0, s4 -; VFP2-NEXT: itt lt -; VFP2-NEXT: movlt r0, #0 -; VFP2-NEXT: movtlt r0, #65532 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s0, s0 -; VFP2-NEXT: itt gt -; VFP2-NEXT: movwgt r0, #65535 -; VFP2-NEXT: movtgt r0, #3 +; VFP2-NEXT: vmov s2, r0 +; VFP2-NEXT: vldr s0, .LCPI24_0 +; VFP2-NEXT: vmax.f32 d16, d1, d0 +; VFP2-NEXT: vldr s4, .LCPI24_1 +; VFP2-NEXT: vcmp.f32 s2, s2 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr +; VFP2-NEXT: vmin.f32 d0, d16, d2 +; VFP2-NEXT: vcvt.s32.f32 s0, s0 +; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: it vs ; VFP2-NEXT: movvs r0, #0 ; VFP2-NEXT: pop {r7, pc} diff --git a/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll b/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll index 4cc5f943dadff..fdf2348ee416d 100644 --- a/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll +++ b/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll @@ -49,27 +49,31 @@ define i1 @test_signed_i1_f32(float %f) nounwind { ; ; VFP2-LABEL: test_signed_i1_f32: ; VFP2: @ %bb.0: -; VFP2-NEXT: vmov s0, r0 ; VFP2-NEXT: vmov.f32 s4, #1.000000e+00 -; VFP2-NEXT: vcvt.u32.f32 s2, s0 -; VFP2-NEXT: vcmp.f32 s0, #0 +; VFP2-NEXT: vldr s2, .LCPI0_0 +; VFP2-NEXT: vmov s0, r0 +; VFP2-NEXT: vmax.f32 d16, d0, d1 +; VFP2-NEXT: vcmp.f32 s0, s0 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s0, s4 +; VFP2-NEXT: vmin.f32 d1, d16, d2 +; VFP2-NEXT: vcvt.u32.f32 s2, s2 ; VFP2-NEXT: vmov r0, s2 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r0, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt -; VFP2-NEXT: movgt r0, #1 +; VFP2-NEXT: it vs +; VFP2-NEXT: movvs r0, #0 ; VFP2-NEXT: bx lr +; VFP2-NEXT: .p2align 2 +; VFP2-NEXT: @ %bb.1: +; VFP2-NEXT: .LCPI0_0: +; VFP2-NEXT: .long 0x00000000 @ float 0 ; ; FP16-LABEL: test_signed_i1_f32: ; FP16: @ %bb.0: -; FP16-NEXT: vldr s0, .LCPI0_0 -; FP16-NEXT: vmov s2, r0 -; FP16-NEXT: vmov.f32 s4, #1.000000e+00 -; FP16-NEXT: vmaxnm.f32 s0, s2, s0 -; FP16-NEXT: vminnm.f32 s0, s0, s4 +; FP16-NEXT: vmov s4, r0 +; FP16-NEXT: vldr s2, .LCPI0_0 +; FP16-NEXT: vmaxnm.f32 s4, s4, s4 +; FP16-NEXT: vmov.f32 s0, #1.000000e+00 +; FP16-NEXT: vmaxnm.f32 s2, s4, s2 +; FP16-NEXT: vminnm.f32 s0, s2, s0 ; FP16-NEXT: vcvt.u32.f32 s0, s0 ; FP16-NEXT: vmov r0, s0 ; FP16-NEXT: bx lr @@ -115,40 +119,42 @@ define i8 @test_signed_i8_f32(float %f) nounwind { ; ; VFP2-LABEL: test_signed_i8_f32: ; VFP2: @ %bb.0: -; VFP2-NEXT: vmov s0, r0 -; VFP2-NEXT: vldr s4, .LCPI1_0 -; VFP2-NEXT: vcvt.u32.f32 s2, s0 -; VFP2-NEXT: vcmp.f32 s0, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s0, s4 -; VFP2-NEXT: vmov r0, s2 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r0, #0 +; VFP2-NEXT: vmov s2, r0 +; VFP2-NEXT: vldr s0, .LCPI1_0 +; VFP2-NEXT: vmax.f32 d16, d1, d0 +; VFP2-NEXT: vldr s4, .LCPI1_1 +; VFP2-NEXT: vcmp.f32 s2, s2 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt -; VFP2-NEXT: movgt r0, #255 +; VFP2-NEXT: vmin.f32 d0, d16, d2 +; VFP2-NEXT: vcvt.u32.f32 s0, s0 +; VFP2-NEXT: vmov r0, s0 +; VFP2-NEXT: it vs +; VFP2-NEXT: movvs r0, #0 ; VFP2-NEXT: bx lr ; VFP2-NEXT: .p2align 2 ; VFP2-NEXT: @ %bb.1: ; VFP2-NEXT: .LCPI1_0: +; VFP2-NEXT: .long 0x00000000 @ float 0 +; VFP2-NEXT: .LCPI1_1: ; VFP2-NEXT: .long 0x437f0000 @ float 255 ; ; FP16-LABEL: test_signed_i8_f32: ; FP16: @ %bb.0: +; FP16-NEXT: vmov s4, r0 +; FP16-NEXT: vldr s2, .LCPI1_1 +; FP16-NEXT: vmaxnm.f32 s4, s4, s4 ; FP16-NEXT: vldr s0, .LCPI1_0 -; FP16-NEXT: vmov s2, r0 -; FP16-NEXT: vldr s4, .LCPI1_1 -; FP16-NEXT: vmaxnm.f32 s0, s2, s0 -; FP16-NEXT: vminnm.f32 s0, s0, s4 +; FP16-NEXT: vmaxnm.f32 s2, s4, s2 +; FP16-NEXT: vminnm.f32 s0, s2, s0 ; FP16-NEXT: vcvt.u32.f32 s0, s0 ; FP16-NEXT: vmov r0, s0 ; FP16-NEXT: bx lr ; FP16-NEXT: .p2align 2 ; FP16-NEXT: @ %bb.1: ; FP16-NEXT: .LCPI1_0: -; FP16-NEXT: .long 0x00000000 @ float 0 -; FP16-NEXT: .LCPI1_1: ; FP16-NEXT: .long 0x437f0000 @ float 255 +; FP16-NEXT: .LCPI1_1: +; FP16-NEXT: .long 0x00000000 @ float 0 %x = call i8 @llvm.fptoui.sat.i8.f32(float %f) ret i8 %x } @@ -189,40 +195,42 @@ define i13 @test_signed_i13_f32(float %f) nounwind { ; ; VFP2-LABEL: test_signed_i13_f32: ; VFP2: @ %bb.0: -; VFP2-NEXT: vmov s0, r0 -; VFP2-NEXT: vldr s4, .LCPI2_0 -; VFP2-NEXT: vcvt.u32.f32 s2, s0 -; VFP2-NEXT: vcmp.f32 s0, #0 +; VFP2-NEXT: vmov s2, r0 +; VFP2-NEXT: vldr s0, .LCPI2_0 +; VFP2-NEXT: vmax.f32 d16, d1, d0 +; VFP2-NEXT: vldr s4, .LCPI2_1 +; VFP2-NEXT: vcmp.f32 s2, s2 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s0, s4 -; VFP2-NEXT: vmov r0, s2 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r0, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt -; VFP2-NEXT: movwgt r0, #8191 +; VFP2-NEXT: vmin.f32 d0, d16, d2 +; VFP2-NEXT: vcvt.u32.f32 s0, s0 +; VFP2-NEXT: vmov r0, s0 +; VFP2-NEXT: it vs +; VFP2-NEXT: movvs r0, #0 ; VFP2-NEXT: bx lr ; VFP2-NEXT: .p2align 2 ; VFP2-NEXT: @ %bb.1: ; VFP2-NEXT: .LCPI2_0: +; VFP2-NEXT: .long 0x00000000 @ float 0 +; VFP2-NEXT: .LCPI2_1: ; VFP2-NEXT: .long 0x45fff800 @ float 8191 ; ; FP16-LABEL: test_signed_i13_f32: ; FP16: @ %bb.0: +; FP16-NEXT: vmov s4, r0 +; FP16-NEXT: vldr s2, .LCPI2_1 +; FP16-NEXT: vmaxnm.f32 s4, s4, s4 ; FP16-NEXT: vldr s0, .LCPI2_0 -; FP16-NEXT: vmov s2, r0 -; FP16-NEXT: vldr s4, .LCPI2_1 -; FP16-NEXT: vmaxnm.f32 s0, s2, s0 -; FP16-NEXT: vminnm.f32 s0, s0, s4 +; FP16-NEXT: vmaxnm.f32 s2, s4, s2 +; FP16-NEXT: vminnm.f32 s0, s2, s0 ; FP16-NEXT: vcvt.u32.f32 s0, s0 ; FP16-NEXT: vmov r0, s0 ; FP16-NEXT: bx lr ; FP16-NEXT: .p2align 2 ; FP16-NEXT: @ %bb.1: ; FP16-NEXT: .LCPI2_0: -; FP16-NEXT: .long 0x00000000 @ float 0 -; FP16-NEXT: .LCPI2_1: ; FP16-NEXT: .long 0x45fff800 @ float 8191 +; FP16-NEXT: .LCPI2_1: +; FP16-NEXT: .long 0x00000000 @ float 0 %x = call i13 @llvm.fptoui.sat.i13.f32(float %f) ret i13 %x } @@ -263,40 +271,42 @@ define i16 @test_signed_i16_f32(float %f) nounwind { ; ; VFP2-LABEL: test_signed_i16_f32: ; VFP2: @ %bb.0: -; VFP2-NEXT: vmov s0, r0 -; VFP2-NEXT: vldr s4, .LCPI3_0 -; VFP2-NEXT: vcvt.u32.f32 s2, s0 -; VFP2-NEXT: vcmp.f32 s0, #0 +; VFP2-NEXT: vmov s2, r0 +; VFP2-NEXT: vldr s0, .LCPI3_0 +; VFP2-NEXT: vmax.f32 d16, d1, d0 +; VFP2-NEXT: vldr s4, .LCPI3_1 +; VFP2-NEXT: vcmp.f32 s2, s2 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s0, s4 -; VFP2-NEXT: vmov r0, s2 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r0, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt -; VFP2-NEXT: movwgt r0, #65535 +; VFP2-NEXT: vmin.f32 d0, d16, d2 +; VFP2-NEXT: vcvt.u32.f32 s0, s0 +; VFP2-NEXT: vmov r0, s0 +; VFP2-NEXT: it vs +; VFP2-NEXT: movvs r0, #0 ; VFP2-NEXT: bx lr ; VFP2-NEXT: .p2align 2 ; VFP2-NEXT: @ %bb.1: ; VFP2-NEXT: .LCPI3_0: +; VFP2-NEXT: .long 0x00000000 @ float 0 +; VFP2-NEXT: .LCPI3_1: ; VFP2-NEXT: .long 0x477fff00 @ float 65535 ; ; FP16-LABEL: test_signed_i16_f32: ; FP16: @ %bb.0: +; FP16-NEXT: vmov s4, r0 +; FP16-NEXT: vldr s2, .LCPI3_1 +; FP16-NEXT: vmaxnm.f32 s4, s4, s4 ; FP16-NEXT: vldr s0, .LCPI3_0 -; FP16-NEXT: vmov s2, r0 -; FP16-NEXT: vldr s4, .LCPI3_1 -; FP16-NEXT: vmaxnm.f32 s0, s2, s0 -; FP16-NEXT: vminnm.f32 s0, s0, s4 +; FP16-NEXT: vmaxnm.f32 s2, s4, s2 +; FP16-NEXT: vminnm.f32 s0, s2, s0 ; FP16-NEXT: vcvt.u32.f32 s0, s0 ; FP16-NEXT: vmov r0, s0 ; FP16-NEXT: bx lr ; FP16-NEXT: .p2align 2 ; FP16-NEXT: @ %bb.1: ; FP16-NEXT: .LCPI3_0: -; FP16-NEXT: .long 0x00000000 @ float 0 -; FP16-NEXT: .LCPI3_1: ; FP16-NEXT: .long 0x477fff00 @ float 65535 +; FP16-NEXT: .LCPI3_1: +; FP16-NEXT: .long 0x00000000 @ float 0 %x = call i16 @llvm.fptoui.sat.i16.f32(float %f) ret i16 %x } @@ -337,41 +347,42 @@ define i19 @test_signed_i19_f32(float %f) nounwind { ; ; VFP2-LABEL: test_signed_i19_f32: ; VFP2: @ %bb.0: -; VFP2-NEXT: vmov s0, r0 -; VFP2-NEXT: vldr s4, .LCPI4_0 -; VFP2-NEXT: vcvt.u32.f32 s2, s0 -; VFP2-NEXT: vcmp.f32 s0, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s0, s4 -; VFP2-NEXT: vmov r0, s2 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r0, #0 +; VFP2-NEXT: vmov s2, r0 +; VFP2-NEXT: vldr s0, .LCPI4_0 +; VFP2-NEXT: vmax.f32 d16, d1, d0 +; VFP2-NEXT: vldr s4, .LCPI4_1 +; VFP2-NEXT: vcmp.f32 s2, s2 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: itt gt -; VFP2-NEXT: movwgt r0, #65535 -; VFP2-NEXT: movtgt r0, #7 +; VFP2-NEXT: vmin.f32 d0, d16, d2 +; VFP2-NEXT: vcvt.u32.f32 s0, s0 +; VFP2-NEXT: vmov r0, s0 +; VFP2-NEXT: it vs +; VFP2-NEXT: movvs r0, #0 ; VFP2-NEXT: bx lr ; VFP2-NEXT: .p2align 2 ; VFP2-NEXT: @ %bb.1: ; VFP2-NEXT: .LCPI4_0: +; VFP2-NEXT: .long 0x00000000 @ float 0 +; VFP2-NEXT: .LCPI4_1: ; VFP2-NEXT: .long 0x48ffffe0 @ float 524287 ; ; FP16-LABEL: test_signed_i19_f32: ; FP16: @ %bb.0: +; FP16-NEXT: vmov s4, r0 +; FP16-NEXT: vldr s2, .LCPI4_1 +; FP16-NEXT: vmaxnm.f32 s4, s4, s4 ; FP16-NEXT: vldr s0, .LCPI4_0 -; FP16-NEXT: vmov s2, r0 -; FP16-NEXT: vldr s4, .LCPI4_1 -; FP16-NEXT: vmaxnm.f32 s0, s2, s0 -; FP16-NEXT: vminnm.f32 s0, s0, s4 +; FP16-NEXT: vmaxnm.f32 s2, s4, s2 +; FP16-NEXT: vminnm.f32 s0, s2, s0 ; FP16-NEXT: vcvt.u32.f32 s0, s0 ; FP16-NEXT: vmov r0, s0 ; FP16-NEXT: bx lr ; FP16-NEXT: .p2align 2 ; FP16-NEXT: @ %bb.1: ; FP16-NEXT: .LCPI4_0: -; FP16-NEXT: .long 0x00000000 @ float 0 -; FP16-NEXT: .LCPI4_1: ; FP16-NEXT: .long 0x48ffffe0 @ float 524287 +; FP16-NEXT: .LCPI4_1: +; FP16-NEXT: .long 0x00000000 @ float 0 %x = call i19 @llvm.fptoui.sat.i19.f32(float %f) ret i19 %x } @@ -873,8 +884,9 @@ define i1 @test_signed_i1_f64(double %f) nounwind { ; FP16-LABEL: test_signed_i1_f64: ; FP16: @ %bb.0: ; FP16-NEXT: vmov.f64 d0, #1.000000e+00 -; FP16-NEXT: vldr d1, .LCPI10_0 ; FP16-NEXT: vmov d2, r0, r1 +; FP16-NEXT: vldr d1, .LCPI10_0 +; FP16-NEXT: vmaxnm.f64 d2, d2, d2 ; FP16-NEXT: vmaxnm.f64 d1, d2, d1 ; FP16-NEXT: vminnm.f64 d0, d1, d0 ; FP16-NEXT: vcvt.u32.f64 s0, d0 @@ -955,22 +967,23 @@ define i8 @test_signed_i8_f64(double %f) nounwind { ; ; FP16-LABEL: test_signed_i8_f64: ; FP16: @ %bb.0: +; FP16-NEXT: vmov d2, r0, r1 +; FP16-NEXT: vldr d1, .LCPI11_1 +; FP16-NEXT: vmaxnm.f64 d2, d2, d2 ; FP16-NEXT: vldr d0, .LCPI11_0 -; FP16-NEXT: vmov d1, r0, r1 -; FP16-NEXT: vldr d2, .LCPI11_1 -; FP16-NEXT: vmaxnm.f64 d0, d1, d0 -; FP16-NEXT: vminnm.f64 d0, d0, d2 +; FP16-NEXT: vmaxnm.f64 d1, d2, d1 +; FP16-NEXT: vminnm.f64 d0, d1, d0 ; FP16-NEXT: vcvt.u32.f64 s0, d0 ; FP16-NEXT: vmov r0, s0 ; FP16-NEXT: bx lr ; FP16-NEXT: .p2align 3 ; FP16-NEXT: @ %bb.1: ; FP16-NEXT: .LCPI11_0: -; FP16-NEXT: .long 0 @ double 0 -; FP16-NEXT: .long 0 -; FP16-NEXT: .LCPI11_1: ; FP16-NEXT: .long 0 @ double 255 ; FP16-NEXT: .long 1081073664 +; FP16-NEXT: .LCPI11_1: +; FP16-NEXT: .long 0 @ double 0 +; FP16-NEXT: .long 0 %x = call i8 @llvm.fptoui.sat.i8.f64(double %f) ret i8 %x } @@ -1043,22 +1056,23 @@ define i13 @test_signed_i13_f64(double %f) nounwind { ; ; FP16-LABEL: test_signed_i13_f64: ; FP16: @ %bb.0: +; FP16-NEXT: vmov d2, r0, r1 +; FP16-NEXT: vldr d1, .LCPI12_1 +; FP16-NEXT: vmaxnm.f64 d2, d2, d2 ; FP16-NEXT: vldr d0, .LCPI12_0 -; FP16-NEXT: vmov d1, r0, r1 -; FP16-NEXT: vldr d2, .LCPI12_1 -; FP16-NEXT: vmaxnm.f64 d0, d1, d0 -; FP16-NEXT: vminnm.f64 d0, d0, d2 +; FP16-NEXT: vmaxnm.f64 d1, d2, d1 +; FP16-NEXT: vminnm.f64 d0, d1, d0 ; FP16-NEXT: vcvt.u32.f64 s0, d0 ; FP16-NEXT: vmov r0, s0 ; FP16-NEXT: bx lr ; FP16-NEXT: .p2align 3 ; FP16-NEXT: @ %bb.1: ; FP16-NEXT: .LCPI12_0: -; FP16-NEXT: .long 0 @ double 0 -; FP16-NEXT: .long 0 -; FP16-NEXT: .LCPI12_1: ; FP16-NEXT: .long 0 @ double 8191 ; FP16-NEXT: .long 1086324480 +; FP16-NEXT: .LCPI12_1: +; FP16-NEXT: .long 0 @ double 0 +; FP16-NEXT: .long 0 %x = call i13 @llvm.fptoui.sat.i13.f64(double %f) ret i13 %x } @@ -1131,22 +1145,23 @@ define i16 @test_signed_i16_f64(double %f) nounwind { ; ; FP16-LABEL: test_signed_i16_f64: ; FP16: @ %bb.0: +; FP16-NEXT: vmov d2, r0, r1 +; FP16-NEXT: vldr d1, .LCPI13_1 +; FP16-NEXT: vmaxnm.f64 d2, d2, d2 ; FP16-NEXT: vldr d0, .LCPI13_0 -; FP16-NEXT: vmov d1, r0, r1 -; FP16-NEXT: vldr d2, .LCPI13_1 -; FP16-NEXT: vmaxnm.f64 d0, d1, d0 -; FP16-NEXT: vminnm.f64 d0, d0, d2 +; FP16-NEXT: vmaxnm.f64 d1, d2, d1 +; FP16-NEXT: vminnm.f64 d0, d1, d0 ; FP16-NEXT: vcvt.u32.f64 s0, d0 ; FP16-NEXT: vmov r0, s0 ; FP16-NEXT: bx lr ; FP16-NEXT: .p2align 3 ; FP16-NEXT: @ %bb.1: ; FP16-NEXT: .LCPI13_0: -; FP16-NEXT: .long 0 @ double 0 -; FP16-NEXT: .long 0 -; FP16-NEXT: .LCPI13_1: ; FP16-NEXT: .long 0 @ double 65535 ; FP16-NEXT: .long 1089470432 +; FP16-NEXT: .LCPI13_1: +; FP16-NEXT: .long 0 @ double 0 +; FP16-NEXT: .long 0 %x = call i16 @llvm.fptoui.sat.i16.f64(double %f) ret i16 %x } @@ -1220,22 +1235,23 @@ define i19 @test_signed_i19_f64(double %f) nounwind { ; ; FP16-LABEL: test_signed_i19_f64: ; FP16: @ %bb.0: +; FP16-NEXT: vmov d2, r0, r1 +; FP16-NEXT: vldr d1, .LCPI14_1 +; FP16-NEXT: vmaxnm.f64 d2, d2, d2 ; FP16-NEXT: vldr d0, .LCPI14_0 -; FP16-NEXT: vmov d1, r0, r1 -; FP16-NEXT: vldr d2, .LCPI14_1 -; FP16-NEXT: vmaxnm.f64 d0, d1, d0 -; FP16-NEXT: vminnm.f64 d0, d0, d2 +; FP16-NEXT: vmaxnm.f64 d1, d2, d1 +; FP16-NEXT: vminnm.f64 d0, d1, d0 ; FP16-NEXT: vcvt.u32.f64 s0, d0 ; FP16-NEXT: vmov r0, s0 ; FP16-NEXT: bx lr ; FP16-NEXT: .p2align 3 ; FP16-NEXT: @ %bb.1: ; FP16-NEXT: .LCPI14_0: -; FP16-NEXT: .long 0 @ double 0 -; FP16-NEXT: .long 0 -; FP16-NEXT: .LCPI14_1: ; FP16-NEXT: .long 0 @ double 524287 ; FP16-NEXT: .long 1092616188 +; FP16-NEXT: .LCPI14_1: +; FP16-NEXT: .long 0 @ double 0 +; FP16-NEXT: .long 0 %x = call i19 @llvm.fptoui.sat.i19.f64(double %f) ret i19 %x } @@ -1393,22 +1409,23 @@ define i50 @test_signed_i50_f64(double %f) nounwind { ; FP16: @ %bb.0: ; FP16-NEXT: .save {r7, lr} ; FP16-NEXT: push {r7, lr} +; FP16-NEXT: vmov d2, r0, r1 +; FP16-NEXT: vldr d1, .LCPI16_1 +; FP16-NEXT: vmaxnm.f64 d2, d2, d2 ; FP16-NEXT: vldr d0, .LCPI16_0 -; FP16-NEXT: vmov d1, r0, r1 -; FP16-NEXT: vldr d2, .LCPI16_1 -; FP16-NEXT: vmaxnm.f64 d0, d1, d0 -; FP16-NEXT: vminnm.f64 d0, d0, d2 +; FP16-NEXT: vmaxnm.f64 d1, d2, d1 +; FP16-NEXT: vminnm.f64 d0, d1, d0 ; FP16-NEXT: vmov r0, r1, d0 ; FP16-NEXT: bl __aeabi_d2ulz ; FP16-NEXT: pop {r7, pc} ; FP16-NEXT: .p2align 3 ; FP16-NEXT: @ %bb.1: ; FP16-NEXT: .LCPI16_0: -; FP16-NEXT: .long 0 @ double 0 -; FP16-NEXT: .long 0 -; FP16-NEXT: .LCPI16_1: ; FP16-NEXT: .long 4294967288 @ double 1125899906842623 ; FP16-NEXT: .long 1125122047 +; FP16-NEXT: .LCPI16_1: +; FP16-NEXT: .long 0 @ double 0 +; FP16-NEXT: .long 0 %x = call i50 @llvm.fptoui.sat.i50.f64(double %f) ret i50 %x } @@ -1864,19 +1881,22 @@ define i1 @test_signed_i1_f16(half %f) nounwind { ; VFP2-NEXT: .save {r7, lr} ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: bl __aeabi_h2f -; VFP2-NEXT: vmov s0, r0 ; VFP2-NEXT: vmov.f32 s4, #1.000000e+00 -; VFP2-NEXT: vcvt.u32.f32 s2, s0 -; VFP2-NEXT: vcmp.f32 s0, #0 +; VFP2-NEXT: vldr s2, .LCPI20_0 +; VFP2-NEXT: vmov s0, r0 +; VFP2-NEXT: vmax.f32 d16, d0, d1 +; VFP2-NEXT: vcmp.f32 s0, s0 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s0, s4 +; VFP2-NEXT: vmin.f32 d1, d16, d2 +; VFP2-NEXT: vcvt.u32.f32 s2, s2 ; VFP2-NEXT: vmov r0, s2 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r0, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt -; VFP2-NEXT: movgt r0, #1 +; VFP2-NEXT: it vs +; VFP2-NEXT: movvs r0, #0 ; VFP2-NEXT: pop {r7, pc} +; VFP2-NEXT: .p2align 2 +; VFP2-NEXT: @ %bb.1: +; VFP2-NEXT: .LCPI20_0: +; VFP2-NEXT: .long 0x00000000 @ float 0 ; ; FP16-LABEL: test_signed_i1_f16: ; FP16: @ %bb.0: @@ -1936,22 +1956,23 @@ define i8 @test_signed_i8_f16(half %f) nounwind { ; VFP2-NEXT: .save {r7, lr} ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: bl __aeabi_h2f -; VFP2-NEXT: vmov s0, r0 -; VFP2-NEXT: vldr s4, .LCPI21_0 -; VFP2-NEXT: vcvt.u32.f32 s2, s0 -; VFP2-NEXT: vcmp.f32 s0, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s0, s4 -; VFP2-NEXT: vmov r0, s2 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r0, #0 +; VFP2-NEXT: vmov s2, r0 +; VFP2-NEXT: vldr s0, .LCPI21_0 +; VFP2-NEXT: vmax.f32 d16, d1, d0 +; VFP2-NEXT: vldr s4, .LCPI21_1 +; VFP2-NEXT: vcmp.f32 s2, s2 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt -; VFP2-NEXT: movgt r0, #255 +; VFP2-NEXT: vmin.f32 d0, d16, d2 +; VFP2-NEXT: vcvt.u32.f32 s0, s0 +; VFP2-NEXT: vmov r0, s0 +; VFP2-NEXT: it vs +; VFP2-NEXT: movvs r0, #0 ; VFP2-NEXT: pop {r7, pc} ; VFP2-NEXT: .p2align 2 ; VFP2-NEXT: @ %bb.1: ; VFP2-NEXT: .LCPI21_0: +; VFP2-NEXT: .long 0x00000000 @ float 0 +; VFP2-NEXT: .LCPI21_1: ; VFP2-NEXT: .long 0x437f0000 @ float 255 ; ; FP16-LABEL: test_signed_i8_f16: @@ -2016,22 +2037,23 @@ define i13 @test_signed_i13_f16(half %f) nounwind { ; VFP2-NEXT: .save {r7, lr} ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: bl __aeabi_h2f -; VFP2-NEXT: vmov s0, r0 -; VFP2-NEXT: vldr s4, .LCPI22_0 -; VFP2-NEXT: vcvt.u32.f32 s2, s0 -; VFP2-NEXT: vcmp.f32 s0, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s0, s4 -; VFP2-NEXT: vmov r0, s2 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r0, #0 +; VFP2-NEXT: vmov s2, r0 +; VFP2-NEXT: vldr s0, .LCPI22_0 +; VFP2-NEXT: vmax.f32 d16, d1, d0 +; VFP2-NEXT: vldr s4, .LCPI22_1 +; VFP2-NEXT: vcmp.f32 s2, s2 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt -; VFP2-NEXT: movwgt r0, #8191 +; VFP2-NEXT: vmin.f32 d0, d16, d2 +; VFP2-NEXT: vcvt.u32.f32 s0, s0 +; VFP2-NEXT: vmov r0, s0 +; VFP2-NEXT: it vs +; VFP2-NEXT: movvs r0, #0 ; VFP2-NEXT: pop {r7, pc} ; VFP2-NEXT: .p2align 2 ; VFP2-NEXT: @ %bb.1: ; VFP2-NEXT: .LCPI22_0: +; VFP2-NEXT: .long 0x00000000 @ float 0 +; VFP2-NEXT: .LCPI22_1: ; VFP2-NEXT: .long 0x45fff800 @ float 8191 ; ; FP16-LABEL: test_signed_i13_f16: @@ -2096,22 +2118,23 @@ define i16 @test_signed_i16_f16(half %f) nounwind { ; VFP2-NEXT: .save {r7, lr} ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: bl __aeabi_h2f -; VFP2-NEXT: vmov s0, r0 -; VFP2-NEXT: vldr s4, .LCPI23_0 -; VFP2-NEXT: vcvt.u32.f32 s2, s0 -; VFP2-NEXT: vcmp.f32 s0, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s0, s4 -; VFP2-NEXT: vmov r0, s2 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r0, #0 +; VFP2-NEXT: vmov s2, r0 +; VFP2-NEXT: vldr s0, .LCPI23_0 +; VFP2-NEXT: vmax.f32 d16, d1, d0 +; VFP2-NEXT: vldr s4, .LCPI23_1 +; VFP2-NEXT: vcmp.f32 s2, s2 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: it gt -; VFP2-NEXT: movwgt r0, #65535 +; VFP2-NEXT: vmin.f32 d0, d16, d2 +; VFP2-NEXT: vcvt.u32.f32 s0, s0 +; VFP2-NEXT: vmov r0, s0 +; VFP2-NEXT: it vs +; VFP2-NEXT: movvs r0, #0 ; VFP2-NEXT: pop {r7, pc} ; VFP2-NEXT: .p2align 2 ; VFP2-NEXT: @ %bb.1: ; VFP2-NEXT: .LCPI23_0: +; VFP2-NEXT: .long 0x00000000 @ float 0 +; VFP2-NEXT: .LCPI23_1: ; VFP2-NEXT: .long 0x477fff00 @ float 65535 ; ; FP16-LABEL: test_signed_i16_f16: @@ -2176,23 +2199,23 @@ define i19 @test_signed_i19_f16(half %f) nounwind { ; VFP2-NEXT: .save {r7, lr} ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: bl __aeabi_h2f -; VFP2-NEXT: vmov s0, r0 -; VFP2-NEXT: vldr s4, .LCPI24_0 -; VFP2-NEXT: vcvt.u32.f32 s2, s0 -; VFP2-NEXT: vcmp.f32 s0, #0 -; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s0, s4 -; VFP2-NEXT: vmov r0, s2 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r0, #0 +; VFP2-NEXT: vmov s2, r0 +; VFP2-NEXT: vldr s0, .LCPI24_0 +; VFP2-NEXT: vmax.f32 d16, d1, d0 +; VFP2-NEXT: vldr s4, .LCPI24_1 +; VFP2-NEXT: vcmp.f32 s2, s2 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: itt gt -; VFP2-NEXT: movwgt r0, #65535 -; VFP2-NEXT: movtgt r0, #7 +; VFP2-NEXT: vmin.f32 d0, d16, d2 +; VFP2-NEXT: vcvt.u32.f32 s0, s0 +; VFP2-NEXT: vmov r0, s0 +; VFP2-NEXT: it vs +; VFP2-NEXT: movvs r0, #0 ; VFP2-NEXT: pop {r7, pc} ; VFP2-NEXT: .p2align 2 ; VFP2-NEXT: @ %bb.1: ; VFP2-NEXT: .LCPI24_0: +; VFP2-NEXT: .long 0x00000000 @ float 0 +; VFP2-NEXT: .LCPI24_1: ; VFP2-NEXT: .long 0x48ffffe0 @ float 524287 ; ; FP16-LABEL: test_signed_i19_f16: diff --git a/llvm/test/CodeGen/Mips/Half2Int16.ll b/llvm/test/CodeGen/Mips/Half2Int16.ll new file mode 100644 index 0000000000000..9ef54b516754f --- /dev/null +++ b/llvm/test/CodeGen/Mips/Half2Int16.ll @@ -0,0 +1,145 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=mipsisa32r6 -mattr=-soft-float | FileCheck %s + +define i16 @fcvt_h_s_sat(float %a) { +; CHECK-LABEL: fcvt_h_s_sat: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: lui $1, %hi($CPI0_0) +; CHECK-NEXT: lwc1 $f0, %lo($CPI0_0)($1) +; CHECK-NEXT: max.s $f0, $f12, $f0 +; CHECK-NEXT: lui $1, %hi($CPI0_1) +; CHECK-NEXT: lwc1 $f1, %lo($CPI0_1)($1) +; CHECK-NEXT: min.s $f0, $f0, $f1 +; CHECK-NEXT: trunc.w.s $f0, $f0 +; CHECK-NEXT: mfc1 $1, $f0 +; CHECK-NEXT: cmp.un.s $f0, $f12, $f12 +; CHECK-NEXT: mfc1 $2, $f0 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: seleqz $2, $1, $2 +start: + %0 = tail call i16 @llvm.fptosi.sat.i16.f32(float %a) + ret i16 %0 +} + +define i16 @fcvt_hu_s_sat(float %a) { +; CHECK-LABEL: fcvt_hu_s_sat: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: max.s $f0, $f12, $f12 +; CHECK-NEXT: mtc1 $zero, $f1 +; CHECK-NEXT: max.s $f0, $f0, $f1 +; CHECK-NEXT: lui $1, %hi($CPI1_0) +; CHECK-NEXT: lwc1 $f1, %lo($CPI1_0)($1) +; CHECK-NEXT: min.s $f0, $f0, $f1 +; CHECK-NEXT: lui $1, %hi($CPI1_1) +; CHECK-NEXT: lwc1 $f1, %lo($CPI1_1)($1) +; CHECK-NEXT: cmp.lt.s $f2, $f0, $f1 +; CHECK-NEXT: trunc.w.s $f3, $f0 +; CHECK-NEXT: mfc1 $1, $f3 +; CHECK-NEXT: mfc1 $2, $f2 +; CHECK-NEXT: selnez $1, $1, $2 +; CHECK-NEXT: sub.s $f0, $f0, $f1 +; CHECK-NEXT: trunc.w.s $f0, $f0 +; CHECK-NEXT: mfc1 $3, $f0 +; CHECK-NEXT: lui $4, 32768 +; CHECK-NEXT: xor $3, $3, $4 +; CHECK-NEXT: seleqz $2, $3, $2 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: or $2, $1, $2 +start: + %0 = tail call i16 @llvm.fptoui.sat.i16.f32(float %a) + ret i16 %0 +} + +define i16 @fcvt_h_s_sat_nnan(float nofpclass(nan) %a) { +; CHECK-LABEL: fcvt_h_s_sat_nnan: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: lui $1, %hi($CPI2_0) +; CHECK-NEXT: lwc1 $f0, %lo($CPI2_0)($1) +; CHECK-NEXT: max.s $f0, $f12, $f0 +; CHECK-NEXT: lui $1, %hi($CPI2_1) +; CHECK-NEXT: lwc1 $f1, %lo($CPI2_1)($1) +; CHECK-NEXT: min.s $f0, $f0, $f1 +; CHECK-NEXT: trunc.w.s $f0, $f0 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: mfc1 $2, $f0 +start: + %0 = tail call i16 @llvm.fptosi.sat.i16.f32(float %a) + ret i16 %0 +} + +define i16 @fcvt_hu_s_sat_nnan(float nofpclass(nan) %a) { +; CHECK-LABEL: fcvt_hu_s_sat_nnan: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: mtc1 $zero, $f0 +; CHECK-NEXT: max.s $f0, $f12, $f0 +; CHECK-NEXT: lui $1, %hi($CPI3_0) +; CHECK-NEXT: lwc1 $f1, %lo($CPI3_0)($1) +; CHECK-NEXT: min.s $f0, $f0, $f1 +; CHECK-NEXT: lui $1, %hi($CPI3_1) +; CHECK-NEXT: lwc1 $f1, %lo($CPI3_1)($1) +; CHECK-NEXT: cmp.lt.s $f2, $f0, $f1 +; CHECK-NEXT: trunc.w.s $f3, $f0 +; CHECK-NEXT: mfc1 $1, $f3 +; CHECK-NEXT: mfc1 $2, $f2 +; CHECK-NEXT: selnez $1, $1, $2 +; CHECK-NEXT: sub.s $f0, $f0, $f1 +; CHECK-NEXT: trunc.w.s $f0, $f0 +; CHECK-NEXT: mfc1 $3, $f0 +; CHECK-NEXT: lui $4, 32768 +; CHECK-NEXT: xor $3, $3, $4 +; CHECK-NEXT: seleqz $2, $3, $2 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: or $2, $1, $2 +start: + %0 = tail call i16 @llvm.fptoui.sat.i16.f32(float %a) + ret i16 %0 +} + +define i16 @fcvt_h_s_sat_nsnan(float nofpclass(snan) %a) { +; CHECK-LABEL: fcvt_h_s_sat_nsnan: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: lui $1, %hi($CPI4_0) +; CHECK-NEXT: lwc1 $f0, %lo($CPI4_0)($1) +; CHECK-NEXT: max.s $f0, $f12, $f0 +; CHECK-NEXT: lui $1, %hi($CPI4_1) +; CHECK-NEXT: lwc1 $f1, %lo($CPI4_1)($1) +; CHECK-NEXT: min.s $f0, $f0, $f1 +; CHECK-NEXT: trunc.w.s $f0, $f0 +; CHECK-NEXT: mfc1 $1, $f0 +; CHECK-NEXT: cmp.un.s $f0, $f12, $f12 +; CHECK-NEXT: mfc1 $2, $f0 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: seleqz $2, $1, $2 +start: + %0 = tail call i16 @llvm.fptosi.sat.i16.f32(float %a) + ret i16 %0 +} + +define i16 @fcvt_hu_s_sat_nsnan(float nofpclass(snan) %a) { +; CHECK-LABEL: fcvt_hu_s_sat_nsnan: +; CHECK: # %bb.0: # %start +; CHECK-NEXT: mtc1 $zero, $f0 +; CHECK-NEXT: max.s $f0, $f12, $f0 +; CHECK-NEXT: lui $1, %hi($CPI5_0) +; CHECK-NEXT: lwc1 $f1, %lo($CPI5_0)($1) +; CHECK-NEXT: min.s $f0, $f0, $f1 +; CHECK-NEXT: lui $1, %hi($CPI5_1) +; CHECK-NEXT: lwc1 $f1, %lo($CPI5_1)($1) +; CHECK-NEXT: cmp.lt.s $f2, $f0, $f1 +; CHECK-NEXT: trunc.w.s $f3, $f0 +; CHECK-NEXT: mfc1 $1, $f3 +; CHECK-NEXT: mfc1 $2, $f2 +; CHECK-NEXT: selnez $1, $1, $2 +; CHECK-NEXT: sub.s $f0, $f0, $f1 +; CHECK-NEXT: trunc.w.s $f0, $f0 +; CHECK-NEXT: mfc1 $3, $f0 +; CHECK-NEXT: lui $4, 32768 +; CHECK-NEXT: xor $3, $3, $4 +; CHECK-NEXT: seleqz $2, $3, $2 +; CHECK-NEXT: jr $ra +; CHECK-NEXT: or $2, $1, $2 +start: + %0 = tail call i16 @llvm.fptoui.sat.i16.f32(float %a) + ret i16 %0 +} + diff --git a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll index ee040feca4240..6ab88c3b4a88c 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll @@ -1190,19 +1190,23 @@ define arm_aapcs_vfpcc <4 x i1> @test_unsigned_v4f32_v4i1(<4 x float> %f) { ; CHECK-LABEL: test_unsigned_v4f32_v4i1: ; CHECK: @ %bb.0: ; CHECK-NEXT: vldr s4, .LCPI22_0 +; CHECK-NEXT: vmaxnm.f32 s0, s0, s0 ; CHECK-NEXT: vmov.f32 s6, #1.000000e+00 -; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: vmaxnm.f32 s8, s3, s3 ; CHECK-NEXT: vmaxnm.f32 s0, s0, s4 -; CHECK-NEXT: vmaxnm.f32 s8, s3, s4 +; CHECK-NEXT: vmaxnm.f32 s2, s2, s2 ; CHECK-NEXT: vminnm.f32 s0, s0, s6 -; CHECK-NEXT: vmaxnm.f32 s2, s2, s4 +; CHECK-NEXT: vmaxnm.f32 s10, s1, s1 ; CHECK-NEXT: vcvt.u32.f32 s0, s0 -; CHECK-NEXT: vmaxnm.f32 s4, s1, s4 +; CHECK-NEXT: vmaxnm.f32 s8, s8, s4 +; CHECK-NEXT: vmaxnm.f32 s2, s2, s4 +; CHECK-NEXT: vmaxnm.f32 s4, s10, s4 ; CHECK-NEXT: vminnm.f32 s4, s4, s6 -; CHECK-NEXT: vminnm.f32 s2, s2, s6 +; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: vcvt.u32.f32 s4, s4 -; CHECK-NEXT: vminnm.f32 s8, s8, s6 +; CHECK-NEXT: vminnm.f32 s2, s2, s6 ; CHECK-NEXT: vcvt.u32.f32 s2, s2 +; CHECK-NEXT: vminnm.f32 s8, s8, s6 ; CHECK-NEXT: vcvt.u32.f32 s8, s8 ; CHECK-NEXT: vmov r2, s0 ; CHECK-NEXT: and r2, r2, #1 @@ -1233,16 +1237,20 @@ define arm_aapcs_vfpcc <4 x i1> @test_unsigned_v4f32_v4i1(<4 x float> %f) { define arm_aapcs_vfpcc <4 x i8> @test_unsigned_v4f32_v4i8(<4 x float> %f) { ; CHECK-MVE-LABEL: test_unsigned_v4f32_v4i8: ; CHECK-MVE: @ %bb.0: -; CHECK-MVE-NEXT: vldr s4, .LCPI23_0 ; CHECK-MVE-NEXT: vldr s6, .LCPI23_1 -; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s4 -; CHECK-MVE-NEXT: vmaxnm.f32 s0, s0, s4 -; CHECK-MVE-NEXT: vmaxnm.f32 s8, s3, s4 -; CHECK-MVE-NEXT: vminnm.f32 s2, s2, s6 -; CHECK-MVE-NEXT: vminnm.f32 s0, s0, s6 -; CHECK-MVE-NEXT: vmaxnm.f32 s4, s1, s4 -; CHECK-MVE-NEXT: vminnm.f32 s8, s8, s6 -; CHECK-MVE-NEXT: vminnm.f32 s4, s4, s6 +; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s2 +; CHECK-MVE-NEXT: vmaxnm.f32 s0, s0, s0 +; CHECK-MVE-NEXT: vldr s4, .LCPI23_0 +; CHECK-MVE-NEXT: vmaxnm.f32 s8, s3, s3 +; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s6 +; CHECK-MVE-NEXT: vmaxnm.f32 s0, s0, s6 +; CHECK-MVE-NEXT: vmaxnm.f32 s10, s1, s1 +; CHECK-MVE-NEXT: vmaxnm.f32 s8, s8, s6 +; CHECK-MVE-NEXT: vminnm.f32 s2, s2, s4 +; CHECK-MVE-NEXT: vminnm.f32 s0, s0, s4 +; CHECK-MVE-NEXT: vmaxnm.f32 s6, s10, s6 +; CHECK-MVE-NEXT: vminnm.f32 s8, s8, s4 +; CHECK-MVE-NEXT: vminnm.f32 s4, s6, s4 ; CHECK-MVE-NEXT: vcvt.u32.f32 s2, s2 ; CHECK-MVE-NEXT: vcvt.u32.f32 s0, s0 ; CHECK-MVE-NEXT: vcvt.u32.f32 s8, s8 @@ -1257,9 +1265,9 @@ define arm_aapcs_vfpcc <4 x i8> @test_unsigned_v4f32_v4i8(<4 x float> %f) { ; CHECK-MVE-NEXT: .p2align 2 ; CHECK-MVE-NEXT: @ %bb.1: ; CHECK-MVE-NEXT: .LCPI23_0: -; CHECK-MVE-NEXT: .long 0x00000000 @ float 0 -; CHECK-MVE-NEXT: .LCPI23_1: ; CHECK-MVE-NEXT: .long 0x437f0000 @ float 255 +; CHECK-MVE-NEXT: .LCPI23_1: +; CHECK-MVE-NEXT: .long 0x00000000 @ float 0 ; ; CHECK-MVEFP-LABEL: test_unsigned_v4f32_v4i8: ; CHECK-MVEFP: @ %bb.0: @@ -1274,16 +1282,20 @@ define arm_aapcs_vfpcc <4 x i8> @test_unsigned_v4f32_v4i8(<4 x float> %f) { define arm_aapcs_vfpcc <4 x i13> @test_unsigned_v4f32_v4i13(<4 x float> %f) { ; CHECK-MVE-LABEL: test_unsigned_v4f32_v4i13: ; CHECK-MVE: @ %bb.0: -; CHECK-MVE-NEXT: vldr s4, .LCPI24_0 ; CHECK-MVE-NEXT: vldr s6, .LCPI24_1 -; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s4 -; CHECK-MVE-NEXT: vmaxnm.f32 s0, s0, s4 -; CHECK-MVE-NEXT: vmaxnm.f32 s8, s3, s4 -; CHECK-MVE-NEXT: vminnm.f32 s2, s2, s6 -; CHECK-MVE-NEXT: vminnm.f32 s0, s0, s6 -; CHECK-MVE-NEXT: vmaxnm.f32 s4, s1, s4 -; CHECK-MVE-NEXT: vminnm.f32 s8, s8, s6 -; CHECK-MVE-NEXT: vminnm.f32 s4, s4, s6 +; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s2 +; CHECK-MVE-NEXT: vmaxnm.f32 s0, s0, s0 +; CHECK-MVE-NEXT: vldr s4, .LCPI24_0 +; CHECK-MVE-NEXT: vmaxnm.f32 s8, s3, s3 +; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s6 +; CHECK-MVE-NEXT: vmaxnm.f32 s0, s0, s6 +; CHECK-MVE-NEXT: vmaxnm.f32 s10, s1, s1 +; CHECK-MVE-NEXT: vmaxnm.f32 s8, s8, s6 +; CHECK-MVE-NEXT: vminnm.f32 s2, s2, s4 +; CHECK-MVE-NEXT: vminnm.f32 s0, s0, s4 +; CHECK-MVE-NEXT: vmaxnm.f32 s6, s10, s6 +; CHECK-MVE-NEXT: vminnm.f32 s8, s8, s4 +; CHECK-MVE-NEXT: vminnm.f32 s4, s6, s4 ; CHECK-MVE-NEXT: vcvt.u32.f32 s2, s2 ; CHECK-MVE-NEXT: vcvt.u32.f32 s0, s0 ; CHECK-MVE-NEXT: vcvt.u32.f32 s8, s8 @@ -1298,9 +1310,9 @@ define arm_aapcs_vfpcc <4 x i13> @test_unsigned_v4f32_v4i13(<4 x float> %f) { ; CHECK-MVE-NEXT: .p2align 2 ; CHECK-MVE-NEXT: @ %bb.1: ; CHECK-MVE-NEXT: .LCPI24_0: -; CHECK-MVE-NEXT: .long 0x00000000 @ float 0 -; CHECK-MVE-NEXT: .LCPI24_1: ; CHECK-MVE-NEXT: .long 0x45fff800 @ float 8191 +; CHECK-MVE-NEXT: .LCPI24_1: +; CHECK-MVE-NEXT: .long 0x00000000 @ float 0 ; ; CHECK-MVEFP-LABEL: test_unsigned_v4f32_v4i13: ; CHECK-MVEFP: @ %bb.0: @@ -1315,16 +1327,20 @@ define arm_aapcs_vfpcc <4 x i13> @test_unsigned_v4f32_v4i13(<4 x float> %f) { define arm_aapcs_vfpcc <4 x i16> @test_unsigned_v4f32_v4i16(<4 x float> %f) { ; CHECK-MVE-LABEL: test_unsigned_v4f32_v4i16: ; CHECK-MVE: @ %bb.0: -; CHECK-MVE-NEXT: vldr s4, .LCPI25_0 ; CHECK-MVE-NEXT: vldr s6, .LCPI25_1 -; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s4 -; CHECK-MVE-NEXT: vmaxnm.f32 s0, s0, s4 -; CHECK-MVE-NEXT: vmaxnm.f32 s8, s3, s4 -; CHECK-MVE-NEXT: vminnm.f32 s2, s2, s6 -; CHECK-MVE-NEXT: vminnm.f32 s0, s0, s6 -; CHECK-MVE-NEXT: vmaxnm.f32 s4, s1, s4 -; CHECK-MVE-NEXT: vminnm.f32 s8, s8, s6 -; CHECK-MVE-NEXT: vminnm.f32 s4, s4, s6 +; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s2 +; CHECK-MVE-NEXT: vmaxnm.f32 s0, s0, s0 +; CHECK-MVE-NEXT: vldr s4, .LCPI25_0 +; CHECK-MVE-NEXT: vmaxnm.f32 s8, s3, s3 +; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s6 +; CHECK-MVE-NEXT: vmaxnm.f32 s0, s0, s6 +; CHECK-MVE-NEXT: vmaxnm.f32 s10, s1, s1 +; CHECK-MVE-NEXT: vmaxnm.f32 s8, s8, s6 +; CHECK-MVE-NEXT: vminnm.f32 s2, s2, s4 +; CHECK-MVE-NEXT: vminnm.f32 s0, s0, s4 +; CHECK-MVE-NEXT: vmaxnm.f32 s6, s10, s6 +; CHECK-MVE-NEXT: vminnm.f32 s8, s8, s4 +; CHECK-MVE-NEXT: vminnm.f32 s4, s6, s4 ; CHECK-MVE-NEXT: vcvt.u32.f32 s2, s2 ; CHECK-MVE-NEXT: vcvt.u32.f32 s0, s0 ; CHECK-MVE-NEXT: vcvt.u32.f32 s8, s8 @@ -1339,9 +1355,9 @@ define arm_aapcs_vfpcc <4 x i16> @test_unsigned_v4f32_v4i16(<4 x float> %f) { ; CHECK-MVE-NEXT: .p2align 2 ; CHECK-MVE-NEXT: @ %bb.1: ; CHECK-MVE-NEXT: .LCPI25_0: -; CHECK-MVE-NEXT: .long 0x00000000 @ float 0 -; CHECK-MVE-NEXT: .LCPI25_1: ; CHECK-MVE-NEXT: .long 0x477fff00 @ float 65535 +; CHECK-MVE-NEXT: .LCPI25_1: +; CHECK-MVE-NEXT: .long 0x00000000 @ float 0 ; ; CHECK-MVEFP-LABEL: test_unsigned_v4f32_v4i16: ; CHECK-MVEFP: @ %bb.0: @@ -1356,16 +1372,20 @@ define arm_aapcs_vfpcc <4 x i16> @test_unsigned_v4f32_v4i16(<4 x float> %f) { define arm_aapcs_vfpcc <4 x i19> @test_unsigned_v4f32_v4i19(<4 x float> %f) { ; CHECK-MVE-LABEL: test_unsigned_v4f32_v4i19: ; CHECK-MVE: @ %bb.0: -; CHECK-MVE-NEXT: vldr s4, .LCPI26_0 ; CHECK-MVE-NEXT: vldr s6, .LCPI26_1 -; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s4 -; CHECK-MVE-NEXT: vmaxnm.f32 s0, s0, s4 -; CHECK-MVE-NEXT: vmaxnm.f32 s8, s3, s4 -; CHECK-MVE-NEXT: vminnm.f32 s2, s2, s6 -; CHECK-MVE-NEXT: vminnm.f32 s0, s0, s6 -; CHECK-MVE-NEXT: vmaxnm.f32 s4, s1, s4 -; CHECK-MVE-NEXT: vminnm.f32 s8, s8, s6 -; CHECK-MVE-NEXT: vminnm.f32 s4, s4, s6 +; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s2 +; CHECK-MVE-NEXT: vmaxnm.f32 s0, s0, s0 +; CHECK-MVE-NEXT: vldr s4, .LCPI26_0 +; CHECK-MVE-NEXT: vmaxnm.f32 s8, s3, s3 +; CHECK-MVE-NEXT: vmaxnm.f32 s2, s2, s6 +; CHECK-MVE-NEXT: vmaxnm.f32 s0, s0, s6 +; CHECK-MVE-NEXT: vmaxnm.f32 s10, s1, s1 +; CHECK-MVE-NEXT: vmaxnm.f32 s8, s8, s6 +; CHECK-MVE-NEXT: vminnm.f32 s2, s2, s4 +; CHECK-MVE-NEXT: vminnm.f32 s0, s0, s4 +; CHECK-MVE-NEXT: vmaxnm.f32 s6, s10, s6 +; CHECK-MVE-NEXT: vminnm.f32 s8, s8, s4 +; CHECK-MVE-NEXT: vminnm.f32 s4, s6, s4 ; CHECK-MVE-NEXT: vcvt.u32.f32 s2, s2 ; CHECK-MVE-NEXT: vcvt.u32.f32 s0, s0 ; CHECK-MVE-NEXT: vcvt.u32.f32 s8, s8 @@ -1380,9 +1400,9 @@ define arm_aapcs_vfpcc <4 x i19> @test_unsigned_v4f32_v4i19(<4 x float> %f) { ; CHECK-MVE-NEXT: .p2align 2 ; CHECK-MVE-NEXT: @ %bb.1: ; CHECK-MVE-NEXT: .LCPI26_0: -; CHECK-MVE-NEXT: .long 0x00000000 @ float 0 -; CHECK-MVE-NEXT: .LCPI26_1: ; CHECK-MVE-NEXT: .long 0x48ffffe0 @ float 524287 +; CHECK-MVE-NEXT: .LCPI26_1: +; CHECK-MVE-NEXT: .long 0x00000000 @ float 0 ; ; CHECK-MVEFP-LABEL: test_unsigned_v4f32_v4i19: ; CHECK-MVEFP: @ %bb.0: diff --git a/llvm/test/CodeGen/WebAssembly/Half2Int16.ll b/llvm/test/CodeGen/WebAssembly/Half2Int16.ll new file mode 100644 index 0000000000000..7c08609f970ef --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/Half2Int16.ll @@ -0,0 +1,119 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=wasm32 | FileCheck %s + +define i16 @fcvt_h_s_sat(float %a) { +; CHECK-LABEL: fcvt_h_s_sat: +; CHECK: .functype fcvt_h_s_sat (f32) -> (i32) +; CHECK-NEXT: # %bb.0: # %start +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: f32.const -0x1p15 +; CHECK-NEXT: f32.max +; CHECK-NEXT: f32.const 0x1.fffcp14 +; CHECK-NEXT: f32.min +; CHECK-NEXT: i32.trunc_sat_f32_s +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: f32.ne +; CHECK-NEXT: i32.select +; CHECK-NEXT: # fallthrough-return +start: + %0 = tail call i16 @llvm.fptosi.sat.i16.f32(float %a) + ret i16 %0 +} + +define i16 @fcvt_hu_s_sat(float %a) { +; CHECK-LABEL: fcvt_hu_s_sat: +; CHECK: .functype fcvt_hu_s_sat (f32) -> (i32) +; CHECK-NEXT: # %bb.0: # %start +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: f32.const 0x0p0 +; CHECK-NEXT: f32.max +; CHECK-NEXT: f32.const 0x1.fffep15 +; CHECK-NEXT: f32.min +; CHECK-NEXT: i32.trunc_sat_f32_u +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: f32.ne +; CHECK-NEXT: i32.select +; CHECK-NEXT: # fallthrough-return +start: + %0 = tail call i16 @llvm.fptoui.sat.i16.f32(float %a) + ret i16 %0 +} + +define i16 @fcvt_h_s_sat_nnan(float nofpclass(nan) %a) { +; CHECK-LABEL: fcvt_h_s_sat_nnan: +; CHECK: .functype fcvt_h_s_sat_nnan (f32) -> (i32) +; CHECK-NEXT: # %bb.0: # %start +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: f32.const -0x1p15 +; CHECK-NEXT: f32.max +; CHECK-NEXT: f32.const 0x1.fffcp14 +; CHECK-NEXT: f32.min +; CHECK-NEXT: i32.trunc_sat_f32_s +; CHECK-NEXT: # fallthrough-return +start: + %0 = tail call i16 @llvm.fptosi.sat.i16.f32(float %a) + ret i16 %0 +} + +define i16 @fcvt_hu_s_sat_nnan(float nofpclass(nan) %a) { +; CHECK-LABEL: fcvt_hu_s_sat_nnan: +; CHECK: .functype fcvt_hu_s_sat_nnan (f32) -> (i32) +; CHECK-NEXT: # %bb.0: # %start +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: f32.const 0x0p0 +; CHECK-NEXT: f32.max +; CHECK-NEXT: f32.const 0x1.fffep15 +; CHECK-NEXT: f32.min +; CHECK-NEXT: i32.trunc_sat_f32_u +; CHECK-NEXT: # fallthrough-return +start: + %0 = tail call i16 @llvm.fptoui.sat.i16.f32(float %a) + ret i16 %0 +} + +define i16 @fcvt_h_s_sat_nsnan(float nofpclass(snan) %a) { +; CHECK-LABEL: fcvt_h_s_sat_nsnan: +; CHECK: .functype fcvt_h_s_sat_nsnan (f32) -> (i32) +; CHECK-NEXT: # %bb.0: # %start +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: f32.const -0x1p15 +; CHECK-NEXT: f32.max +; CHECK-NEXT: f32.const 0x1.fffcp14 +; CHECK-NEXT: f32.min +; CHECK-NEXT: i32.trunc_sat_f32_s +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: f32.ne +; CHECK-NEXT: i32.select +; CHECK-NEXT: # fallthrough-return +start: + %0 = tail call i16 @llvm.fptosi.sat.i16.f32(float %a) + ret i16 %0 +} + +define i16 @fcvt_hu_s_sat_nsnan(float nofpclass(snan) %a) { +; CHECK-LABEL: fcvt_hu_s_sat_nsnan: +; CHECK: .functype fcvt_hu_s_sat_nsnan (f32) -> (i32) +; CHECK-NEXT: # %bb.0: # %start +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: f32.const 0x0p0 +; CHECK-NEXT: f32.max +; CHECK-NEXT: f32.const 0x1.fffep15 +; CHECK-NEXT: f32.min +; CHECK-NEXT: i32.trunc_sat_f32_u +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: f32.ne +; CHECK-NEXT: i32.select +; CHECK-NEXT: # fallthrough-return +start: + %0 = tail call i16 @llvm.fptoui.sat.i16.f32(float %a) + ret i16 %0 +} +