diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 282dc4470238d..97421c7eb3868 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11516,9 +11516,18 @@ static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS, case ISD::SETLE: case ISD::SETULT: case ISD::SETULE: { - // Since it's known never nan to get here already, either fminnum or - // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is - // expanded in terms of it. + // Since it's known never nan to get here already, either fminimumnum, + // fminimum, fminnum, or fminnum_ieee are OK. Try the ieee version first, + // since it's fminnum is expanded in terms of it. + unsigned IEEE2019NumOpcode = + (LHS == True) ? ISD::FMINIMUMNUM : ISD::FMAXIMUMNUM; + if (TLI.isOperationLegal(IEEE2019NumOpcode, VT)) + return DAG.getNode(IEEE2019NumOpcode, DL, VT, LHS, RHS); + + unsigned IEEE2019Opcode = (LHS == True) ? ISD::FMINIMUM : ISD::FMAXIMUM; + if (TLI.isOperationLegal(IEEE2019Opcode, VT)) + return DAG.getNode(IEEE2019Opcode, DL, VT, LHS, RHS); + unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE; if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT)) return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS); @@ -11534,6 +11543,15 @@ static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS, case ISD::SETGE: case ISD::SETUGT: case ISD::SETUGE: { + unsigned IEEE2019NumOpcode = + (LHS == True) ? ISD::FMAXIMUMNUM : ISD::FMINIMUMNUM; + if (TLI.isOperationLegal(IEEE2019NumOpcode, VT)) + return DAG.getNode(IEEE2019NumOpcode, DL, VT, LHS, RHS); + + unsigned IEEE2019Opcode = (LHS == True) ? ISD::FMAXIMUM : ISD::FMINIMUM; + if (TLI.isOperationLegal(IEEE2019Opcode, VT)) + return DAG.getNode(IEEE2019Opcode, DL, VT, LHS, RHS); + unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE; if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT)) return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS); diff --git a/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll b/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll index f7bd5f8d5bfb4..1fb3eb5c215e7 100644 --- a/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll @@ -119,7 +119,7 @@ define float @v_test_fmin_legacy_ule_f32_nnan_nsz_flag(float %a, float %b) { ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX12-NEXT: v_minimum_f32 v0, v0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] %cmp = fcmp ule float %a, %b %val = select nnan nsz i1 %cmp, float %a, float %b @@ -236,7 +236,7 @@ define float @v_test_fmax_legacy_uge_f32_nnan_nsz_flag(float %a, float %b) { ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX12-NEXT: v_maximum_f32 v0, v0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] %cmp = fcmp uge float %a, %b %val = select nnan nsz i1 %cmp, float %a, float %b @@ -693,7 +693,7 @@ define half @v_test_fmin_legacy_ule_f16_nnan_nsz_flag(half %a, half %b) { ; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-TRUE16-NEXT: v_min_num_f16_e32 v0.l, v0.l, v1.l +; GFX12-TRUE16-NEXT: v_minimum_f16 v0.l, v0.l, v1.l ; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-FAKE16-LABEL: v_test_fmin_legacy_ule_f16_nnan_nsz_flag: @@ -703,7 +703,7 @@ define half @v_test_fmin_legacy_ule_f16_nnan_nsz_flag(half %a, half %b) { ; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-FAKE16-NEXT: v_min_num_f16_e32 v0, v0, v1 +; GFX12-FAKE16-NEXT: v_minimum_f16 v0, v0, v1 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = fcmp ule half %a, %b %val = select nnan nsz i1 %cmp, half %a, half %b @@ -872,7 +872,7 @@ define half @v_test_fmax_legacy_uge_f16_nnan_nsz_flag(half %a, half %b) { ; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v0.l, v0.l, v1.l +; GFX12-TRUE16-NEXT: v_maximum_f16 v0.l, v0.l, v1.l ; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-FAKE16-LABEL: v_test_fmax_legacy_uge_f16_nnan_nsz_flag: @@ -882,7 +882,7 @@ define half @v_test_fmax_legacy_uge_f16_nnan_nsz_flag(half %a, half %b) { ; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v0, v0, v1 +; GFX12-FAKE16-NEXT: v_maximum_f16 v0, v0, v1 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] %cmp = fcmp uge half %a, %b %val = select nnan nsz i1 %cmp, half %a, half %b @@ -1122,7 +1122,7 @@ define <2 x half> @v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag(<2 x half> %a, <2 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v1 +; GFX12-NEXT: v_pk_minimum_f16 v0, v0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] %cmp = fcmp ule <2 x half> %a, %b %val = select nnan nsz <2 x i1> %cmp, <2 x half> %a, <2 x half> %b @@ -1362,7 +1362,7 @@ define <2 x half> @v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag(<2 x half> %a, <2 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v1 +; GFX12-NEXT: v_pk_maximum_f16 v0, v0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] %cmp = fcmp uge <2 x half> %a, %b %val = select nnan nsz <2 x i1> %cmp, <2 x half> %a, <2 x half> %b @@ -1692,8 +1692,12 @@ define <4 x half> @v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag(<4 x half> %a, <4 ; GFX9-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX9-NEXT: v_pk_max_f16 v0, v0, v0 ; GFX9-NEXT: v_pk_min_f16 v0, v0, v2 -; GFX9-NEXT: v_pk_min_f16 v1, v1, v3 +; GFX9-NEXT: v_pk_max_f16 v2, v3, v3 +; GFX9-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX9-NEXT: v_pk_min_f16 v1, v1, v2 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag: @@ -1703,6 +1707,11 @@ define <4 x half> @v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag(<4 x half> %a, <4 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX12-NEXT: v_pk_min_num_f16 v0, v0, v2 ; GFX12-NEXT: v_pk_min_num_f16 v1, v1, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] @@ -2034,8 +2043,12 @@ define <4 x half> @v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag(<4 x half> %a, <4 ; GFX9-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_pk_max_f16 v2, v2, v2 +; GFX9-NEXT: v_pk_max_f16 v0, v0, v0 ; GFX9-NEXT: v_pk_max_f16 v0, v0, v2 -; GFX9-NEXT: v_pk_max_f16 v1, v1, v3 +; GFX9-NEXT: v_pk_max_f16 v2, v3, v3 +; GFX9-NEXT: v_pk_max_f16 v1, v1, v1 +; GFX9-NEXT: v_pk_max_f16 v1, v1, v2 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag: @@ -2045,6 +2058,11 @@ define <4 x half> @v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag(<4 x half> %a, <4 ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_pk_max_num_f16 v2, v2, v2 +; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v0 +; GFX12-NEXT: v_pk_max_num_f16 v3, v3, v3 +; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v1 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX12-NEXT: v_pk_max_num_f16 v0, v0, v2 ; GFX12-NEXT: v_pk_max_num_f16 v1, v1, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] @@ -2079,7 +2097,7 @@ define float @v_test_fmin_legacy_uge_f32_nsz_flag__nnan_srcs(float %arg0, float ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: v_dual_add_f32 v0, v0, v0 :: v_dual_add_f32 v1, v1, v1 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_min_num_f32_e32 v0, v0, v1 +; GFX12-NEXT: v_minimum_f32 v0, v0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] %a = fadd nnan float %arg0, %arg0 %b = fadd nnan float %arg1, %arg1 @@ -2114,7 +2132,7 @@ define float @v_test_fmax_legacy_uge_f32_nsz_flag__nnan_srcs(float %arg0, float ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: v_dual_add_f32 v0, v0, v0 :: v_dual_add_f32 v1, v1, v1 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_max_num_f32_e32 v0, v0, v1 +; GFX12-NEXT: v_maximum_f32 v0, v0, v1 ; GFX12-NEXT: s_setpc_b64 s[30:31] %a = fadd nnan float %arg0, %arg0 %b = fadd nnan float %arg1, %arg1 diff --git a/llvm/test/CodeGen/WebAssembly/f32.ll b/llvm/test/CodeGen/WebAssembly/f32.ll index 7410fa43e4081..a7c35317f1da8 100644 --- a/llvm/test/CodeGen/WebAssembly/f32.ll +++ b/llvm/test/CodeGen/WebAssembly/f32.ll @@ -229,13 +229,10 @@ define float @fminnum32_intrinsic(float %x, float %y) { ; CHECK-LABEL: fminnum32_intrinsic: ; CHECK: .functype fminnum32_intrinsic (f32, f32) -> (f32) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: local.get $push5=, 0 -; CHECK-NEXT: local.get $push4=, 1 -; CHECK-NEXT: local.get $push3=, 0 -; CHECK-NEXT: local.get $push2=, 1 -; CHECK-NEXT: f32.lt $push0=, $pop3, $pop2 -; CHECK-NEXT: f32.select $push1=, $pop5, $pop4, $pop0 -; CHECK-NEXT: return $pop1 +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f32.min $push0=, $pop2, $pop1 +; CHECK-NEXT: return $pop0 %a = call nnan float @llvm.minnum.f32(float %x, float %y) ret float %a } @@ -282,13 +279,10 @@ define float @fmaxnum32_intrinsic(float %x, float %y) { ; CHECK-LABEL: fmaxnum32_intrinsic: ; CHECK: .functype fmaxnum32_intrinsic (f32, f32) -> (f32) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: local.get $push5=, 0 -; CHECK-NEXT: local.get $push4=, 1 -; CHECK-NEXT: local.get $push3=, 0 -; CHECK-NEXT: local.get $push2=, 1 -; CHECK-NEXT: f32.gt $push0=, $pop3, $pop2 -; CHECK-NEXT: f32.select $push1=, $pop5, $pop4, $pop0 -; CHECK-NEXT: return $pop1 +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f32.max $push0=, $pop2, $pop1 +; CHECK-NEXT: return $pop0 %a = call nnan float @llvm.maxnum.f32(float %x, float %y) ret float %a } @@ -309,13 +303,10 @@ define float @fmaxnum32_zero_intrinsic(float %x) { ; CHECK-LABEL: fmaxnum32_zero_intrinsic: ; CHECK: .functype fmaxnum32_zero_intrinsic (f32) -> (f32) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: local.get $push5=, 0 +; CHECK-NEXT: local.get $push2=, 0 ; CHECK-NEXT: f32.const $push0=, 0x0p0 -; CHECK-NEXT: local.get $push4=, 0 -; CHECK-NEXT: f32.const $push3=, 0x0p0 -; CHECK-NEXT: f32.gt $push1=, $pop4, $pop3 -; CHECK-NEXT: f32.select $push2=, $pop5, $pop0, $pop1 -; CHECK-NEXT: return $pop2 +; CHECK-NEXT: f32.max $push1=, $pop2, $pop0 +; CHECK-NEXT: return $pop1 %a = call nnan float @llvm.maxnum.f32(float %x, float 0.0) ret float %a } diff --git a/llvm/test/CodeGen/WebAssembly/f64.ll b/llvm/test/CodeGen/WebAssembly/f64.ll index d79f34185eb87..c5af777888d36 100644 --- a/llvm/test/CodeGen/WebAssembly/f64.ll +++ b/llvm/test/CodeGen/WebAssembly/f64.ll @@ -229,13 +229,10 @@ define double @fminnum64_intrinsic(double %x, double %y) { ; CHECK-LABEL: fminnum64_intrinsic: ; CHECK: .functype fminnum64_intrinsic (f64, f64) -> (f64) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: local.get $push5=, 0 -; CHECK-NEXT: local.get $push4=, 1 -; CHECK-NEXT: local.get $push3=, 0 -; CHECK-NEXT: local.get $push2=, 1 -; CHECK-NEXT: f64.lt $push0=, $pop3, $pop2 -; CHECK-NEXT: f64.select $push1=, $pop5, $pop4, $pop0 -; CHECK-NEXT: return $pop1 +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f64.min $push0=, $pop2, $pop1 +; CHECK-NEXT: return $pop0 %a = call nnan double @llvm.minnum.f64(double %x, double %y) ret double %a } @@ -256,13 +253,10 @@ define double @fminnum64_zero_intrinsic(double %x) { ; CHECK-LABEL: fminnum64_zero_intrinsic: ; CHECK: .functype fminnum64_zero_intrinsic (f64) -> (f64) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: local.get $push5=, 0 +; CHECK-NEXT: local.get $push2=, 0 ; CHECK-NEXT: f64.const $push0=, -0x0p0 -; CHECK-NEXT: local.get $push4=, 0 -; CHECK-NEXT: f64.const $push3=, -0x0p0 -; CHECK-NEXT: f64.lt $push1=, $pop4, $pop3 -; CHECK-NEXT: f64.select $push2=, $pop5, $pop0, $pop1 -; CHECK-NEXT: return $pop2 +; CHECK-NEXT: f64.min $push1=, $pop2, $pop0 +; CHECK-NEXT: return $pop1 %a = call nnan double @llvm.minnum.f64(double %x, double -0.0) ret double %a } @@ -297,13 +291,10 @@ define double@fmaxnum64_intrinsic(double %x, double %y) { ; CHECK-LABEL: fmaxnum64_intrinsic: ; CHECK: .functype fmaxnum64_intrinsic (f64, f64) -> (f64) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: local.get $push5=, 0 -; CHECK-NEXT: local.get $push4=, 1 -; CHECK-NEXT: local.get $push3=, 0 -; CHECK-NEXT: local.get $push2=, 1 -; CHECK-NEXT: f64.gt $push0=, $pop3, $pop2 -; CHECK-NEXT: f64.select $push1=, $pop5, $pop4, $pop0 -; CHECK-NEXT: return $pop1 +; CHECK-NEXT: local.get $push2=, 0 +; CHECK-NEXT: local.get $push1=, 1 +; CHECK-NEXT: f64.max $push0=, $pop2, $pop1 +; CHECK-NEXT: return $pop0 %a = call nnan double @llvm.maxnum.f64(double %x, double %y) ret double %a } @@ -324,13 +315,10 @@ define double @fmaxnum64_zero_intrinsic(double %x) { ; CHECK-LABEL: fmaxnum64_zero_intrinsic: ; CHECK: .functype fmaxnum64_zero_intrinsic (f64) -> (f64) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: local.get $push5=, 0 +; CHECK-NEXT: local.get $push2=, 0 ; CHECK-NEXT: f64.const $push0=, 0x0p0 -; CHECK-NEXT: local.get $push4=, 0 -; CHECK-NEXT: f64.const $push3=, 0x0p0 -; CHECK-NEXT: f64.gt $push1=, $pop4, $pop3 -; CHECK-NEXT: f64.select $push2=, $pop5, $pop0, $pop1 -; CHECK-NEXT: return $pop2 +; CHECK-NEXT: f64.max $push1=, $pop2, $pop0 +; CHECK-NEXT: return $pop1 %a = call nnan double @llvm.maxnum.f64(double %x, double 0.0) ret double %a } diff --git a/llvm/test/CodeGen/WebAssembly/simd-arith.ll b/llvm/test/CodeGen/WebAssembly/simd-arith.ll index 185c46aa5681e..875fe6924fbd2 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-arith.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-arith.ll @@ -11722,101 +11722,69 @@ define <4 x float> @minnum_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; SIMD128-LABEL: minnum_intrinsic_v4f32: ; SIMD128: .functype minnum_intrinsic_v4f32 (v128, v128) -> (v128) ; SIMD128-NEXT: # %bb.0: -; SIMD128-NEXT: f32x4.extract_lane $push27=, $0, 0 -; SIMD128-NEXT: local.tee $push26=, $3=, $pop27 -; SIMD128-NEXT: f32x4.extract_lane $push25=, $1, 0 -; SIMD128-NEXT: local.tee $push24=, $2=, $pop25 -; SIMD128-NEXT: f32.lt $push2=, $3, $2 -; SIMD128-NEXT: f32.select $push3=, $pop26, $pop24, $pop2 -; SIMD128-NEXT: f32x4.splat $push4=, $pop3 -; SIMD128-NEXT: f32x4.extract_lane $push23=, $0, 1 -; SIMD128-NEXT: local.tee $push22=, $3=, $pop23 -; SIMD128-NEXT: f32x4.extract_lane $push21=, $1, 1 -; SIMD128-NEXT: local.tee $push20=, $2=, $pop21 -; SIMD128-NEXT: f32.lt $push0=, $3, $2 -; SIMD128-NEXT: f32.select $push1=, $pop22, $pop20, $pop0 -; SIMD128-NEXT: f32x4.replace_lane $push5=, $pop4, 1, $pop1 -; SIMD128-NEXT: f32x4.extract_lane $push19=, $0, 2 -; SIMD128-NEXT: local.tee $push18=, $3=, $pop19 -; SIMD128-NEXT: f32x4.extract_lane $push17=, $1, 2 -; SIMD128-NEXT: local.tee $push16=, $2=, $pop17 -; SIMD128-NEXT: f32.lt $push6=, $3, $2 -; SIMD128-NEXT: f32.select $push7=, $pop18, $pop16, $pop6 -; SIMD128-NEXT: f32x4.replace_lane $push8=, $pop5, 2, $pop7 -; SIMD128-NEXT: f32x4.extract_lane $push15=, $0, 3 -; SIMD128-NEXT: local.tee $push14=, $3=, $pop15 -; SIMD128-NEXT: f32x4.extract_lane $push13=, $1, 3 -; SIMD128-NEXT: local.tee $push12=, $2=, $pop13 -; SIMD128-NEXT: f32.lt $push9=, $3, $2 -; SIMD128-NEXT: f32.select $push10=, $pop14, $pop12, $pop9 -; SIMD128-NEXT: f32x4.replace_lane $push11=, $pop8, 3, $pop10 -; SIMD128-NEXT: return $pop11 +; SIMD128-NEXT: f32x4.extract_lane $push4=, $0, 0 +; SIMD128-NEXT: f32x4.extract_lane $push3=, $1, 0 +; SIMD128-NEXT: f32.min $push5=, $pop4, $pop3 +; SIMD128-NEXT: f32x4.splat $push6=, $pop5 +; SIMD128-NEXT: f32x4.extract_lane $push1=, $0, 1 +; SIMD128-NEXT: f32x4.extract_lane $push0=, $1, 1 +; SIMD128-NEXT: f32.min $push2=, $pop1, $pop0 +; SIMD128-NEXT: f32x4.replace_lane $push7=, $pop6, 1, $pop2 +; SIMD128-NEXT: f32x4.extract_lane $push9=, $0, 2 +; SIMD128-NEXT: f32x4.extract_lane $push8=, $1, 2 +; SIMD128-NEXT: f32.min $push10=, $pop9, $pop8 +; SIMD128-NEXT: f32x4.replace_lane $push11=, $pop7, 2, $pop10 +; SIMD128-NEXT: f32x4.extract_lane $push13=, $0, 3 +; SIMD128-NEXT: f32x4.extract_lane $push12=, $1, 3 +; SIMD128-NEXT: f32.min $push14=, $pop13, $pop12 +; SIMD128-NEXT: f32x4.replace_lane $push15=, $pop11, 3, $pop14 +; SIMD128-NEXT: return $pop15 ; ; SIMD128-FAST-LABEL: minnum_intrinsic_v4f32: ; SIMD128-FAST: .functype minnum_intrinsic_v4f32 (v128, v128) -> (v128) ; SIMD128-FAST-NEXT: # %bb.0: -; SIMD128-FAST-NEXT: f32x4.extract_lane $push27=, $0, 0 -; SIMD128-FAST-NEXT: local.tee $push26=, $3=, $pop27 -; SIMD128-FAST-NEXT: f32x4.extract_lane $push25=, $1, 0 -; SIMD128-FAST-NEXT: local.tee $push24=, $2=, $pop25 -; SIMD128-FAST-NEXT: f32.lt $push3=, $3, $2 -; SIMD128-FAST-NEXT: f32.select $push4=, $pop26, $pop24, $pop3 -; SIMD128-FAST-NEXT: f32x4.splat $push5=, $pop4 -; SIMD128-FAST-NEXT: f32x4.extract_lane $push23=, $0, 1 -; SIMD128-FAST-NEXT: local.tee $push22=, $3=, $pop23 -; SIMD128-FAST-NEXT: f32x4.extract_lane $push21=, $1, 1 -; SIMD128-FAST-NEXT: local.tee $push20=, $2=, $pop21 -; SIMD128-FAST-NEXT: f32.lt $push1=, $3, $2 -; SIMD128-FAST-NEXT: f32.select $push2=, $pop22, $pop20, $pop1 -; SIMD128-FAST-NEXT: f32x4.replace_lane $push6=, $pop5, 1, $pop2 -; SIMD128-FAST-NEXT: f32x4.extract_lane $push19=, $0, 2 -; SIMD128-FAST-NEXT: local.tee $push18=, $3=, $pop19 -; SIMD128-FAST-NEXT: f32x4.extract_lane $push17=, $1, 2 -; SIMD128-FAST-NEXT: local.tee $push16=, $2=, $pop17 -; SIMD128-FAST-NEXT: f32.lt $push7=, $3, $2 -; SIMD128-FAST-NEXT: f32.select $push8=, $pop18, $pop16, $pop7 -; SIMD128-FAST-NEXT: f32x4.replace_lane $push9=, $pop6, 2, $pop8 -; SIMD128-FAST-NEXT: f32x4.extract_lane $push15=, $0, 3 -; SIMD128-FAST-NEXT: local.tee $push14=, $3=, $pop15 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push5=, $0, 0 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push4=, $1, 0 +; SIMD128-FAST-NEXT: f32.min $push6=, $pop5, $pop4 +; SIMD128-FAST-NEXT: f32x4.splat $push7=, $pop6 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push2=, $0, 1 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push1=, $1, 1 +; SIMD128-FAST-NEXT: f32.min $push3=, $pop2, $pop1 +; SIMD128-FAST-NEXT: f32x4.replace_lane $push8=, $pop7, 1, $pop3 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push10=, $0, 2 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push9=, $1, 2 +; SIMD128-FAST-NEXT: f32.min $push11=, $pop10, $pop9 +; SIMD128-FAST-NEXT: f32x4.replace_lane $push12=, $pop8, 2, $pop11 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push14=, $0, 3 ; SIMD128-FAST-NEXT: f32x4.extract_lane $push13=, $1, 3 -; SIMD128-FAST-NEXT: local.tee $push12=, $2=, $pop13 -; SIMD128-FAST-NEXT: f32.lt $push10=, $3, $2 -; SIMD128-FAST-NEXT: f32.select $push11=, $pop14, $pop12, $pop10 -; SIMD128-FAST-NEXT: f32x4.replace_lane $push0=, $pop9, 3, $pop11 +; SIMD128-FAST-NEXT: f32.min $push15=, $pop14, $pop13 +; SIMD128-FAST-NEXT: f32x4.replace_lane $push0=, $pop12, 3, $pop15 ; SIMD128-FAST-NEXT: return $pop0 ; ; NO-SIMD128-LABEL: minnum_intrinsic_v4f32: ; NO-SIMD128: .functype minnum_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: f32.lt $push0=, $4, $8 -; NO-SIMD128-NEXT: f32.select $push1=, $4, $8, $pop0 -; NO-SIMD128-NEXT: f32.store 12($0), $pop1 -; NO-SIMD128-NEXT: f32.lt $push2=, $3, $7 -; NO-SIMD128-NEXT: f32.select $push3=, $3, $7, $pop2 -; NO-SIMD128-NEXT: f32.store 8($0), $pop3 -; NO-SIMD128-NEXT: f32.lt $push4=, $2, $6 -; NO-SIMD128-NEXT: f32.select $push5=, $2, $6, $pop4 -; NO-SIMD128-NEXT: f32.store 4($0), $pop5 -; NO-SIMD128-NEXT: f32.lt $push6=, $1, $5 -; NO-SIMD128-NEXT: f32.select $push7=, $1, $5, $pop6 -; NO-SIMD128-NEXT: f32.store 0($0), $pop7 +; NO-SIMD128-NEXT: f32.min $push0=, $4, $8 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: f32.min $push1=, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.min $push2=, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: f32.min $push3=, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: minnum_intrinsic_v4f32: ; NO-SIMD128-FAST: .functype minnum_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-FAST-NEXT: # %bb.0: -; NO-SIMD128-FAST-NEXT: f32.lt $push0=, $1, $5 -; NO-SIMD128-FAST-NEXT: f32.select $push1=, $1, $5, $pop0 -; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: f32.lt $push2=, $2, $6 -; NO-SIMD128-FAST-NEXT: f32.select $push3=, $2, $6, $pop2 -; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3 -; NO-SIMD128-FAST-NEXT: f32.lt $push4=, $3, $7 -; NO-SIMD128-FAST-NEXT: f32.select $push5=, $3, $7, $pop4 -; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop5 -; NO-SIMD128-FAST-NEXT: f32.lt $push6=, $4, $8 -; NO-SIMD128-FAST-NEXT: f32.select $push7=, $4, $8, $pop6 -; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop7 +; NO-SIMD128-FAST-NEXT: f32.min $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: f32.min $push1=, $2, $6 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: f32.min $push2=, $3, $7 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: f32.min $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = call nnan <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float> %y) ret <4 x float> %a @@ -11920,66 +11888,42 @@ define <4 x float> @fminnumv432_one_zero_intrinsic(<4 x float> %x) { ; SIMD128-LABEL: fminnumv432_one_zero_intrinsic: ; SIMD128: .functype fminnumv432_one_zero_intrinsic (v128) -> (v128) ; SIMD128-NEXT: # %bb.0: -; SIMD128-NEXT: f32x4.extract_lane $push27=, $0, 0 -; SIMD128-NEXT: local.tee $push26=, $1=, $pop27 -; SIMD128-NEXT: f32.const $push3=, -0x1p0 -; SIMD128-NEXT: f32.const $push25=, -0x1p0 -; SIMD128-NEXT: f32.lt $push4=, $1, $pop25 -; SIMD128-NEXT: f32.select $push5=, $pop26, $pop3, $pop4 +; SIMD128-NEXT: f32x4.extract_lane $push3=, $0, 0 +; SIMD128-NEXT: f32.const $push4=, -0x1p0 +; SIMD128-NEXT: f32.min $push5=, $pop3, $pop4 ; SIMD128-NEXT: f32x4.splat $push6=, $pop5 -; SIMD128-NEXT: f32x4.extract_lane $push24=, $0, 1 -; SIMD128-NEXT: local.tee $push23=, $1=, $pop24 -; SIMD128-NEXT: f32.const $push0=, 0x0p0 -; SIMD128-NEXT: f32.const $push22=, 0x0p0 -; SIMD128-NEXT: f32.lt $push1=, $1, $pop22 -; SIMD128-NEXT: f32.select $push2=, $pop23, $pop0, $pop1 +; SIMD128-NEXT: f32x4.extract_lane $push0=, $0, 1 +; SIMD128-NEXT: f32.const $push1=, 0x0p0 +; SIMD128-NEXT: f32.min $push2=, $pop0, $pop1 ; SIMD128-NEXT: f32x4.replace_lane $push7=, $pop6, 1, $pop2 -; SIMD128-NEXT: f32x4.extract_lane $push21=, $0, 2 -; SIMD128-NEXT: local.tee $push20=, $1=, $pop21 -; SIMD128-NEXT: f32.const $push19=, -0x1p0 -; SIMD128-NEXT: f32.const $push18=, -0x1p0 -; SIMD128-NEXT: f32.lt $push8=, $1, $pop18 -; SIMD128-NEXT: f32.select $push9=, $pop20, $pop19, $pop8 -; SIMD128-NEXT: f32x4.replace_lane $push10=, $pop7, 2, $pop9 -; SIMD128-NEXT: f32x4.extract_lane $push17=, $0, 3 -; SIMD128-NEXT: local.tee $push16=, $1=, $pop17 +; SIMD128-NEXT: f32x4.extract_lane $push8=, $0, 2 ; SIMD128-NEXT: f32.const $push15=, -0x1p0 +; SIMD128-NEXT: f32.min $push9=, $pop8, $pop15 +; SIMD128-NEXT: f32x4.replace_lane $push10=, $pop7, 2, $pop9 +; SIMD128-NEXT: f32x4.extract_lane $push11=, $0, 3 ; SIMD128-NEXT: f32.const $push14=, -0x1p0 -; SIMD128-NEXT: f32.lt $push11=, $1, $pop14 -; SIMD128-NEXT: f32.select $push12=, $pop16, $pop15, $pop11 +; SIMD128-NEXT: f32.min $push12=, $pop11, $pop14 ; SIMD128-NEXT: f32x4.replace_lane $push13=, $pop10, 3, $pop12 ; SIMD128-NEXT: return $pop13 ; ; SIMD128-FAST-LABEL: fminnumv432_one_zero_intrinsic: ; SIMD128-FAST: .functype fminnumv432_one_zero_intrinsic (v128) -> (v128) ; SIMD128-FAST-NEXT: # %bb.0: -; SIMD128-FAST-NEXT: f32x4.extract_lane $push27=, $0, 0 -; SIMD128-FAST-NEXT: local.tee $push26=, $1=, $pop27 -; SIMD128-FAST-NEXT: f32.const $push4=, -0x1p0 -; SIMD128-FAST-NEXT: f32.const $push25=, -0x1p0 -; SIMD128-FAST-NEXT: f32.lt $push5=, $1, $pop25 -; SIMD128-FAST-NEXT: f32.select $push6=, $pop26, $pop4, $pop5 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push4=, $0, 0 +; SIMD128-FAST-NEXT: f32.const $push5=, -0x1p0 +; SIMD128-FAST-NEXT: f32.min $push6=, $pop4, $pop5 ; SIMD128-FAST-NEXT: f32x4.splat $push7=, $pop6 -; SIMD128-FAST-NEXT: f32x4.extract_lane $push24=, $0, 1 -; SIMD128-FAST-NEXT: local.tee $push23=, $1=, $pop24 -; SIMD128-FAST-NEXT: f32.const $push1=, 0x0p0 -; SIMD128-FAST-NEXT: f32.const $push22=, 0x0p0 -; SIMD128-FAST-NEXT: f32.lt $push2=, $1, $pop22 -; SIMD128-FAST-NEXT: f32.select $push3=, $pop23, $pop1, $pop2 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push1=, $0, 1 +; SIMD128-FAST-NEXT: f32.const $push2=, 0x0p0 +; SIMD128-FAST-NEXT: f32.min $push3=, $pop1, $pop2 ; SIMD128-FAST-NEXT: f32x4.replace_lane $push8=, $pop7, 1, $pop3 -; SIMD128-FAST-NEXT: f32x4.extract_lane $push21=, $0, 2 -; SIMD128-FAST-NEXT: local.tee $push20=, $1=, $pop21 -; SIMD128-FAST-NEXT: f32.const $push19=, -0x1p0 -; SIMD128-FAST-NEXT: f32.const $push18=, -0x1p0 -; SIMD128-FAST-NEXT: f32.lt $push9=, $1, $pop18 -; SIMD128-FAST-NEXT: f32.select $push10=, $pop20, $pop19, $pop9 -; SIMD128-FAST-NEXT: f32x4.replace_lane $push11=, $pop8, 2, $pop10 -; SIMD128-FAST-NEXT: f32x4.extract_lane $push17=, $0, 3 -; SIMD128-FAST-NEXT: local.tee $push16=, $1=, $pop17 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push9=, $0, 2 ; SIMD128-FAST-NEXT: f32.const $push15=, -0x1p0 +; SIMD128-FAST-NEXT: f32.min $push10=, $pop9, $pop15 +; SIMD128-FAST-NEXT: f32x4.replace_lane $push11=, $pop8, 2, $pop10 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push12=, $0, 3 ; SIMD128-FAST-NEXT: f32.const $push14=, -0x1p0 -; SIMD128-FAST-NEXT: f32.lt $push12=, $1, $pop14 -; SIMD128-FAST-NEXT: f32.select $push13=, $pop16, $pop15, $pop12 +; SIMD128-FAST-NEXT: f32.min $push13=, $pop12, $pop14 ; SIMD128-FAST-NEXT: f32x4.replace_lane $push0=, $pop11, 3, $pop13 ; SIMD128-FAST-NEXT: return $pop0 ; @@ -11989,17 +11933,15 @@ define <4 x float> @fminnumv432_one_zero_intrinsic(<4 x float> %x) { ; NO-SIMD128-NEXT: f32.const $push0=, -0x1p0 ; NO-SIMD128-NEXT: f32.min $push1=, $4, $pop0 ; NO-SIMD128-NEXT: f32.store 12($0), $pop1 -; NO-SIMD128-NEXT: f32.const $push9=, -0x1p0 -; NO-SIMD128-NEXT: f32.min $push2=, $3, $pop9 +; NO-SIMD128-NEXT: f32.const $push7=, -0x1p0 +; NO-SIMD128-NEXT: f32.min $push2=, $3, $pop7 ; NO-SIMD128-NEXT: f32.store 8($0), $pop2 ; NO-SIMD128-NEXT: f32.const $push3=, 0x0p0 -; NO-SIMD128-NEXT: f32.const $push8=, 0x0p0 -; NO-SIMD128-NEXT: f32.lt $push4=, $2, $pop8 -; NO-SIMD128-NEXT: f32.select $push5=, $2, $pop3, $pop4 -; NO-SIMD128-NEXT: f32.store 4($0), $pop5 -; NO-SIMD128-NEXT: f32.const $push7=, -0x1p0 -; NO-SIMD128-NEXT: f32.min $push6=, $1, $pop7 -; NO-SIMD128-NEXT: f32.store 0($0), $pop6 +; NO-SIMD128-NEXT: f32.min $push4=, $2, $pop3 +; NO-SIMD128-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-NEXT: f32.const $push6=, -0x1p0 +; NO-SIMD128-NEXT: f32.min $push5=, $1, $pop6 +; NO-SIMD128-NEXT: f32.store 0($0), $pop5 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: fminnumv432_one_zero_intrinsic: @@ -12008,17 +11950,15 @@ define <4 x float> @fminnumv432_one_zero_intrinsic(<4 x float> %x) { ; NO-SIMD128-FAST-NEXT: f32.const $push0=, -0x1p0 ; NO-SIMD128-FAST-NEXT: f32.min $push1=, $1, $pop0 ; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: f32.const $push9=, -0x1p0 -; NO-SIMD128-FAST-NEXT: f32.min $push2=, $3, $pop9 -; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: f32.const $push3=, 0x0p0 -; NO-SIMD128-FAST-NEXT: f32.const $push8=, 0x0p0 -; NO-SIMD128-FAST-NEXT: f32.lt $push4=, $2, $pop8 -; NO-SIMD128-FAST-NEXT: f32.select $push5=, $2, $pop3, $pop4 -; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop5 +; NO-SIMD128-FAST-NEXT: f32.const $push2=, 0x0p0 +; NO-SIMD128-FAST-NEXT: f32.min $push3=, $2, $pop2 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3 ; NO-SIMD128-FAST-NEXT: f32.const $push7=, -0x1p0 -; NO-SIMD128-FAST-NEXT: f32.min $push6=, $4, $pop7 -; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop6 +; NO-SIMD128-FAST-NEXT: f32.min $push4=, $3, $pop7 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop4 +; NO-SIMD128-FAST-NEXT: f32.const $push6=, -0x1p0 +; NO-SIMD128-FAST-NEXT: f32.min $push5=, $4, $pop6 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop5 ; NO-SIMD128-FAST-NEXT: return %a = call nnan <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float>) ret <4 x float> %a @@ -12072,101 +12012,69 @@ define <4 x float> @maxnum_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; SIMD128-LABEL: maxnum_intrinsic_v4f32: ; SIMD128: .functype maxnum_intrinsic_v4f32 (v128, v128) -> (v128) ; SIMD128-NEXT: # %bb.0: -; SIMD128-NEXT: f32x4.extract_lane $push27=, $0, 0 -; SIMD128-NEXT: local.tee $push26=, $3=, $pop27 -; SIMD128-NEXT: f32x4.extract_lane $push25=, $1, 0 -; SIMD128-NEXT: local.tee $push24=, $2=, $pop25 -; SIMD128-NEXT: f32.gt $push2=, $3, $2 -; SIMD128-NEXT: f32.select $push3=, $pop26, $pop24, $pop2 -; SIMD128-NEXT: f32x4.splat $push4=, $pop3 -; SIMD128-NEXT: f32x4.extract_lane $push23=, $0, 1 -; SIMD128-NEXT: local.tee $push22=, $3=, $pop23 -; SIMD128-NEXT: f32x4.extract_lane $push21=, $1, 1 -; SIMD128-NEXT: local.tee $push20=, $2=, $pop21 -; SIMD128-NEXT: f32.gt $push0=, $3, $2 -; SIMD128-NEXT: f32.select $push1=, $pop22, $pop20, $pop0 -; SIMD128-NEXT: f32x4.replace_lane $push5=, $pop4, 1, $pop1 -; SIMD128-NEXT: f32x4.extract_lane $push19=, $0, 2 -; SIMD128-NEXT: local.tee $push18=, $3=, $pop19 -; SIMD128-NEXT: f32x4.extract_lane $push17=, $1, 2 -; SIMD128-NEXT: local.tee $push16=, $2=, $pop17 -; SIMD128-NEXT: f32.gt $push6=, $3, $2 -; SIMD128-NEXT: f32.select $push7=, $pop18, $pop16, $pop6 -; SIMD128-NEXT: f32x4.replace_lane $push8=, $pop5, 2, $pop7 -; SIMD128-NEXT: f32x4.extract_lane $push15=, $0, 3 -; SIMD128-NEXT: local.tee $push14=, $3=, $pop15 -; SIMD128-NEXT: f32x4.extract_lane $push13=, $1, 3 -; SIMD128-NEXT: local.tee $push12=, $2=, $pop13 -; SIMD128-NEXT: f32.gt $push9=, $3, $2 -; SIMD128-NEXT: f32.select $push10=, $pop14, $pop12, $pop9 -; SIMD128-NEXT: f32x4.replace_lane $push11=, $pop8, 3, $pop10 -; SIMD128-NEXT: return $pop11 +; SIMD128-NEXT: f32x4.extract_lane $push4=, $0, 0 +; SIMD128-NEXT: f32x4.extract_lane $push3=, $1, 0 +; SIMD128-NEXT: f32.max $push5=, $pop4, $pop3 +; SIMD128-NEXT: f32x4.splat $push6=, $pop5 +; SIMD128-NEXT: f32x4.extract_lane $push1=, $0, 1 +; SIMD128-NEXT: f32x4.extract_lane $push0=, $1, 1 +; SIMD128-NEXT: f32.max $push2=, $pop1, $pop0 +; SIMD128-NEXT: f32x4.replace_lane $push7=, $pop6, 1, $pop2 +; SIMD128-NEXT: f32x4.extract_lane $push9=, $0, 2 +; SIMD128-NEXT: f32x4.extract_lane $push8=, $1, 2 +; SIMD128-NEXT: f32.max $push10=, $pop9, $pop8 +; SIMD128-NEXT: f32x4.replace_lane $push11=, $pop7, 2, $pop10 +; SIMD128-NEXT: f32x4.extract_lane $push13=, $0, 3 +; SIMD128-NEXT: f32x4.extract_lane $push12=, $1, 3 +; SIMD128-NEXT: f32.max $push14=, $pop13, $pop12 +; SIMD128-NEXT: f32x4.replace_lane $push15=, $pop11, 3, $pop14 +; SIMD128-NEXT: return $pop15 ; ; SIMD128-FAST-LABEL: maxnum_intrinsic_v4f32: ; SIMD128-FAST: .functype maxnum_intrinsic_v4f32 (v128, v128) -> (v128) ; SIMD128-FAST-NEXT: # %bb.0: -; SIMD128-FAST-NEXT: f32x4.extract_lane $push27=, $0, 0 -; SIMD128-FAST-NEXT: local.tee $push26=, $3=, $pop27 -; SIMD128-FAST-NEXT: f32x4.extract_lane $push25=, $1, 0 -; SIMD128-FAST-NEXT: local.tee $push24=, $2=, $pop25 -; SIMD128-FAST-NEXT: f32.gt $push3=, $3, $2 -; SIMD128-FAST-NEXT: f32.select $push4=, $pop26, $pop24, $pop3 -; SIMD128-FAST-NEXT: f32x4.splat $push5=, $pop4 -; SIMD128-FAST-NEXT: f32x4.extract_lane $push23=, $0, 1 -; SIMD128-FAST-NEXT: local.tee $push22=, $3=, $pop23 -; SIMD128-FAST-NEXT: f32x4.extract_lane $push21=, $1, 1 -; SIMD128-FAST-NEXT: local.tee $push20=, $2=, $pop21 -; SIMD128-FAST-NEXT: f32.gt $push1=, $3, $2 -; SIMD128-FAST-NEXT: f32.select $push2=, $pop22, $pop20, $pop1 -; SIMD128-FAST-NEXT: f32x4.replace_lane $push6=, $pop5, 1, $pop2 -; SIMD128-FAST-NEXT: f32x4.extract_lane $push19=, $0, 2 -; SIMD128-FAST-NEXT: local.tee $push18=, $3=, $pop19 -; SIMD128-FAST-NEXT: f32x4.extract_lane $push17=, $1, 2 -; SIMD128-FAST-NEXT: local.tee $push16=, $2=, $pop17 -; SIMD128-FAST-NEXT: f32.gt $push7=, $3, $2 -; SIMD128-FAST-NEXT: f32.select $push8=, $pop18, $pop16, $pop7 -; SIMD128-FAST-NEXT: f32x4.replace_lane $push9=, $pop6, 2, $pop8 -; SIMD128-FAST-NEXT: f32x4.extract_lane $push15=, $0, 3 -; SIMD128-FAST-NEXT: local.tee $push14=, $3=, $pop15 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push5=, $0, 0 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push4=, $1, 0 +; SIMD128-FAST-NEXT: f32.max $push6=, $pop5, $pop4 +; SIMD128-FAST-NEXT: f32x4.splat $push7=, $pop6 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push2=, $0, 1 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push1=, $1, 1 +; SIMD128-FAST-NEXT: f32.max $push3=, $pop2, $pop1 +; SIMD128-FAST-NEXT: f32x4.replace_lane $push8=, $pop7, 1, $pop3 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push10=, $0, 2 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push9=, $1, 2 +; SIMD128-FAST-NEXT: f32.max $push11=, $pop10, $pop9 +; SIMD128-FAST-NEXT: f32x4.replace_lane $push12=, $pop8, 2, $pop11 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push14=, $0, 3 ; SIMD128-FAST-NEXT: f32x4.extract_lane $push13=, $1, 3 -; SIMD128-FAST-NEXT: local.tee $push12=, $2=, $pop13 -; SIMD128-FAST-NEXT: f32.gt $push10=, $3, $2 -; SIMD128-FAST-NEXT: f32.select $push11=, $pop14, $pop12, $pop10 -; SIMD128-FAST-NEXT: f32x4.replace_lane $push0=, $pop9, 3, $pop11 +; SIMD128-FAST-NEXT: f32.max $push15=, $pop14, $pop13 +; SIMD128-FAST-NEXT: f32x4.replace_lane $push0=, $pop12, 3, $pop15 ; SIMD128-FAST-NEXT: return $pop0 ; ; NO-SIMD128-LABEL: maxnum_intrinsic_v4f32: ; NO-SIMD128: .functype maxnum_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: f32.gt $push0=, $4, $8 -; NO-SIMD128-NEXT: f32.select $push1=, $4, $8, $pop0 -; NO-SIMD128-NEXT: f32.store 12($0), $pop1 -; NO-SIMD128-NEXT: f32.gt $push2=, $3, $7 -; NO-SIMD128-NEXT: f32.select $push3=, $3, $7, $pop2 -; NO-SIMD128-NEXT: f32.store 8($0), $pop3 -; NO-SIMD128-NEXT: f32.gt $push4=, $2, $6 -; NO-SIMD128-NEXT: f32.select $push5=, $2, $6, $pop4 -; NO-SIMD128-NEXT: f32.store 4($0), $pop5 -; NO-SIMD128-NEXT: f32.gt $push6=, $1, $5 -; NO-SIMD128-NEXT: f32.select $push7=, $1, $5, $pop6 -; NO-SIMD128-NEXT: f32.store 0($0), $pop7 +; NO-SIMD128-NEXT: f32.max $push0=, $4, $8 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: f32.max $push1=, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.max $push2=, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: f32.max $push3=, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: maxnum_intrinsic_v4f32: ; NO-SIMD128-FAST: .functype maxnum_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-FAST-NEXT: # %bb.0: -; NO-SIMD128-FAST-NEXT: f32.gt $push0=, $1, $5 -; NO-SIMD128-FAST-NEXT: f32.select $push1=, $1, $5, $pop0 -; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: f32.gt $push2=, $2, $6 -; NO-SIMD128-FAST-NEXT: f32.select $push3=, $2, $6, $pop2 -; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3 -; NO-SIMD128-FAST-NEXT: f32.gt $push4=, $3, $7 -; NO-SIMD128-FAST-NEXT: f32.select $push5=, $3, $7, $pop4 -; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop5 -; NO-SIMD128-FAST-NEXT: f32.gt $push6=, $4, $8 -; NO-SIMD128-FAST-NEXT: f32.select $push7=, $4, $8, $pop6 -; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop7 +; NO-SIMD128-FAST-NEXT: f32.max $push0=, $1, $5 +; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop0 +; NO-SIMD128-FAST-NEXT: f32.max $push1=, $2, $6 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 +; NO-SIMD128-FAST-NEXT: f32.max $push2=, $3, $7 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-FAST-NEXT: f32.max $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y) ret <4 x float> %a @@ -12218,66 +12126,42 @@ define <4 x float> @maxnum_one_zero_intrinsic_v4f32(<4 x float> %x, <4 x float> ; SIMD128-LABEL: maxnum_one_zero_intrinsic_v4f32: ; SIMD128: .functype maxnum_one_zero_intrinsic_v4f32 (v128, v128) -> (v128) ; SIMD128-NEXT: # %bb.0: -; SIMD128-NEXT: f32x4.extract_lane $push27=, $0, 0 -; SIMD128-NEXT: local.tee $push26=, $2=, $pop27 -; SIMD128-NEXT: f32.const $push3=, -0x1p0 -; SIMD128-NEXT: f32.const $push25=, -0x1p0 -; SIMD128-NEXT: f32.gt $push4=, $2, $pop25 -; SIMD128-NEXT: f32.select $push5=, $pop26, $pop3, $pop4 +; SIMD128-NEXT: f32x4.extract_lane $push3=, $0, 0 +; SIMD128-NEXT: f32.const $push4=, -0x1p0 +; SIMD128-NEXT: f32.max $push5=, $pop3, $pop4 ; SIMD128-NEXT: f32x4.splat $push6=, $pop5 -; SIMD128-NEXT: f32x4.extract_lane $push24=, $0, 1 -; SIMD128-NEXT: local.tee $push23=, $2=, $pop24 -; SIMD128-NEXT: f32.const $push0=, 0x0p0 -; SIMD128-NEXT: f32.const $push22=, 0x0p0 -; SIMD128-NEXT: f32.gt $push1=, $2, $pop22 -; SIMD128-NEXT: f32.select $push2=, $pop23, $pop0, $pop1 +; SIMD128-NEXT: f32x4.extract_lane $push0=, $0, 1 +; SIMD128-NEXT: f32.const $push1=, 0x0p0 +; SIMD128-NEXT: f32.max $push2=, $pop0, $pop1 ; SIMD128-NEXT: f32x4.replace_lane $push7=, $pop6, 1, $pop2 -; SIMD128-NEXT: f32x4.extract_lane $push21=, $0, 2 -; SIMD128-NEXT: local.tee $push20=, $2=, $pop21 -; SIMD128-NEXT: f32.const $push19=, -0x1p0 -; SIMD128-NEXT: f32.const $push18=, -0x1p0 -; SIMD128-NEXT: f32.gt $push8=, $2, $pop18 -; SIMD128-NEXT: f32.select $push9=, $pop20, $pop19, $pop8 -; SIMD128-NEXT: f32x4.replace_lane $push10=, $pop7, 2, $pop9 -; SIMD128-NEXT: f32x4.extract_lane $push17=, $0, 3 -; SIMD128-NEXT: local.tee $push16=, $2=, $pop17 +; SIMD128-NEXT: f32x4.extract_lane $push8=, $0, 2 ; SIMD128-NEXT: f32.const $push15=, -0x1p0 +; SIMD128-NEXT: f32.max $push9=, $pop8, $pop15 +; SIMD128-NEXT: f32x4.replace_lane $push10=, $pop7, 2, $pop9 +; SIMD128-NEXT: f32x4.extract_lane $push11=, $0, 3 ; SIMD128-NEXT: f32.const $push14=, -0x1p0 -; SIMD128-NEXT: f32.gt $push11=, $2, $pop14 -; SIMD128-NEXT: f32.select $push12=, $pop16, $pop15, $pop11 +; SIMD128-NEXT: f32.max $push12=, $pop11, $pop14 ; SIMD128-NEXT: f32x4.replace_lane $push13=, $pop10, 3, $pop12 ; SIMD128-NEXT: return $pop13 ; ; SIMD128-FAST-LABEL: maxnum_one_zero_intrinsic_v4f32: ; SIMD128-FAST: .functype maxnum_one_zero_intrinsic_v4f32 (v128, v128) -> (v128) ; SIMD128-FAST-NEXT: # %bb.0: -; SIMD128-FAST-NEXT: f32x4.extract_lane $push27=, $0, 0 -; SIMD128-FAST-NEXT: local.tee $push26=, $2=, $pop27 -; SIMD128-FAST-NEXT: f32.const $push4=, -0x1p0 -; SIMD128-FAST-NEXT: f32.const $push25=, -0x1p0 -; SIMD128-FAST-NEXT: f32.gt $push5=, $2, $pop25 -; SIMD128-FAST-NEXT: f32.select $push6=, $pop26, $pop4, $pop5 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push4=, $0, 0 +; SIMD128-FAST-NEXT: f32.const $push5=, -0x1p0 +; SIMD128-FAST-NEXT: f32.max $push6=, $pop4, $pop5 ; SIMD128-FAST-NEXT: f32x4.splat $push7=, $pop6 -; SIMD128-FAST-NEXT: f32x4.extract_lane $push24=, $0, 1 -; SIMD128-FAST-NEXT: local.tee $push23=, $2=, $pop24 -; SIMD128-FAST-NEXT: f32.const $push1=, 0x0p0 -; SIMD128-FAST-NEXT: f32.const $push22=, 0x0p0 -; SIMD128-FAST-NEXT: f32.gt $push2=, $2, $pop22 -; SIMD128-FAST-NEXT: f32.select $push3=, $pop23, $pop1, $pop2 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push1=, $0, 1 +; SIMD128-FAST-NEXT: f32.const $push2=, 0x0p0 +; SIMD128-FAST-NEXT: f32.max $push3=, $pop1, $pop2 ; SIMD128-FAST-NEXT: f32x4.replace_lane $push8=, $pop7, 1, $pop3 -; SIMD128-FAST-NEXT: f32x4.extract_lane $push21=, $0, 2 -; SIMD128-FAST-NEXT: local.tee $push20=, $2=, $pop21 -; SIMD128-FAST-NEXT: f32.const $push19=, -0x1p0 -; SIMD128-FAST-NEXT: f32.const $push18=, -0x1p0 -; SIMD128-FAST-NEXT: f32.gt $push9=, $2, $pop18 -; SIMD128-FAST-NEXT: f32.select $push10=, $pop20, $pop19, $pop9 -; SIMD128-FAST-NEXT: f32x4.replace_lane $push11=, $pop8, 2, $pop10 -; SIMD128-FAST-NEXT: f32x4.extract_lane $push17=, $0, 3 -; SIMD128-FAST-NEXT: local.tee $push16=, $2=, $pop17 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push9=, $0, 2 ; SIMD128-FAST-NEXT: f32.const $push15=, -0x1p0 +; SIMD128-FAST-NEXT: f32.max $push10=, $pop9, $pop15 +; SIMD128-FAST-NEXT: f32x4.replace_lane $push11=, $pop8, 2, $pop10 +; SIMD128-FAST-NEXT: f32x4.extract_lane $push12=, $0, 3 ; SIMD128-FAST-NEXT: f32.const $push14=, -0x1p0 -; SIMD128-FAST-NEXT: f32.gt $push12=, $2, $pop14 -; SIMD128-FAST-NEXT: f32.select $push13=, $pop16, $pop15, $pop12 +; SIMD128-FAST-NEXT: f32.max $push13=, $pop12, $pop14 ; SIMD128-FAST-NEXT: f32x4.replace_lane $push0=, $pop11, 3, $pop13 ; SIMD128-FAST-NEXT: return $pop0 ; @@ -12287,17 +12171,15 @@ define <4 x float> @maxnum_one_zero_intrinsic_v4f32(<4 x float> %x, <4 x float> ; NO-SIMD128-NEXT: f32.const $push0=, -0x1p0 ; NO-SIMD128-NEXT: f32.max $push1=, $4, $pop0 ; NO-SIMD128-NEXT: f32.store 12($0), $pop1 -; NO-SIMD128-NEXT: f32.const $push9=, -0x1p0 -; NO-SIMD128-NEXT: f32.max $push2=, $3, $pop9 +; NO-SIMD128-NEXT: f32.const $push7=, -0x1p0 +; NO-SIMD128-NEXT: f32.max $push2=, $3, $pop7 ; NO-SIMD128-NEXT: f32.store 8($0), $pop2 ; NO-SIMD128-NEXT: f32.const $push3=, 0x0p0 -; NO-SIMD128-NEXT: f32.const $push8=, 0x0p0 -; NO-SIMD128-NEXT: f32.gt $push4=, $2, $pop8 -; NO-SIMD128-NEXT: f32.select $push5=, $2, $pop3, $pop4 -; NO-SIMD128-NEXT: f32.store 4($0), $pop5 -; NO-SIMD128-NEXT: f32.const $push7=, -0x1p0 -; NO-SIMD128-NEXT: f32.max $push6=, $1, $pop7 -; NO-SIMD128-NEXT: f32.store 0($0), $pop6 +; NO-SIMD128-NEXT: f32.max $push4=, $2, $pop3 +; NO-SIMD128-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-NEXT: f32.const $push6=, -0x1p0 +; NO-SIMD128-NEXT: f32.max $push5=, $1, $pop6 +; NO-SIMD128-NEXT: f32.store 0($0), $pop5 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: maxnum_one_zero_intrinsic_v4f32: @@ -12306,17 +12188,15 @@ define <4 x float> @maxnum_one_zero_intrinsic_v4f32(<4 x float> %x, <4 x float> ; NO-SIMD128-FAST-NEXT: f32.const $push0=, -0x1p0 ; NO-SIMD128-FAST-NEXT: f32.max $push1=, $1, $pop0 ; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: f32.const $push9=, -0x1p0 -; NO-SIMD128-FAST-NEXT: f32.max $push2=, $3, $pop9 -; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: f32.const $push3=, 0x0p0 -; NO-SIMD128-FAST-NEXT: f32.const $push8=, 0x0p0 -; NO-SIMD128-FAST-NEXT: f32.gt $push4=, $2, $pop8 -; NO-SIMD128-FAST-NEXT: f32.select $push5=, $2, $pop3, $pop4 -; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop5 +; NO-SIMD128-FAST-NEXT: f32.const $push2=, 0x0p0 +; NO-SIMD128-FAST-NEXT: f32.max $push3=, $2, $pop2 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3 ; NO-SIMD128-FAST-NEXT: f32.const $push7=, -0x1p0 -; NO-SIMD128-FAST-NEXT: f32.max $push6=, $4, $pop7 -; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop6 +; NO-SIMD128-FAST-NEXT: f32.max $push4=, $3, $pop7 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop4 +; NO-SIMD128-FAST-NEXT: f32.const $push6=, -0x1p0 +; NO-SIMD128-FAST-NEXT: f32.max $push5=, $4, $pop6 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop5 ; NO-SIMD128-FAST-NEXT: return %a = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float>) ret <4 x float> %a diff --git a/llvm/test/CodeGen/WebAssembly/vector-reduce.ll b/llvm/test/CodeGen/WebAssembly/vector-reduce.ll index 1d194b640eab2..fa3711d6d2ddf 100644 --- a/llvm/test/CodeGen/WebAssembly/vector-reduce.ll +++ b/llvm/test/CodeGen/WebAssembly/vector-reduce.ll @@ -792,7 +792,7 @@ define double @pairwise_max_v2f64_fast(<2 x double> %arg) { ; SIMD128: .functype pairwise_max_v2f64_fast (v128) -> (f64) ; SIMD128-NEXT: # %bb.0: ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 -; SIMD128-NEXT: f64x2.pmax $push1=, $0, $pop0 +; SIMD128-NEXT: f64x2.max $push1=, $0, $pop0 ; SIMD128-NEXT: f64x2.extract_lane $push2=, $pop1, 0 ; SIMD128-NEXT: return $pop2 %res = tail call fast double @llvm.vector.reduce.fmax.v2f64(<2 x double> %arg) @@ -820,10 +820,10 @@ define float @pairwise_max_v4f32_fast(<4 x float> %arg) { ; SIMD128: .functype pairwise_max_v4f32_fast (v128) -> (f32) ; SIMD128-NEXT: # %bb.0: ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3 -; SIMD128-NEXT: f32x4.pmax $push5=, $0, $pop0 +; SIMD128-NEXT: f32x4.max $push5=, $0, $pop0 ; SIMD128-NEXT: local.tee $push4=, $0=, $pop5 ; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 -; SIMD128-NEXT: f32x4.pmax $push2=, $pop4, $pop1 +; SIMD128-NEXT: f32x4.max $push2=, $pop4, $pop1 ; SIMD128-NEXT: f32x4.extract_lane $push3=, $pop2, 0 ; SIMD128-NEXT: return $pop3 %res = tail call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> %arg) @@ -863,7 +863,7 @@ define double @pairwise_min_v2f64_fast(<2 x double> %arg) { ; SIMD128: .functype pairwise_min_v2f64_fast (v128) -> (f64) ; SIMD128-NEXT: # %bb.0: ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 -; SIMD128-NEXT: f64x2.pmin $push1=, $0, $pop0 +; SIMD128-NEXT: f64x2.min $push1=, $0, $pop0 ; SIMD128-NEXT: f64x2.extract_lane $push2=, $pop1, 0 ; SIMD128-NEXT: return $pop2 %res = tail call fast double @llvm.vector.reduce.fmin.v2f64(<2 x double> %arg) @@ -891,10 +891,10 @@ define float @pairwise_min_v4f32_fast(<4 x float> %arg) { ; SIMD128: .functype pairwise_min_v4f32_fast (v128) -> (f32) ; SIMD128-NEXT: # %bb.0: ; SIMD128-NEXT: i8x16.shuffle $push0=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 0, 1, 2, 3 -; SIMD128-NEXT: f32x4.pmin $push5=, $0, $pop0 +; SIMD128-NEXT: f32x4.min $push5=, $0, $pop0 ; SIMD128-NEXT: local.tee $push4=, $0=, $pop5 ; SIMD128-NEXT: i8x16.shuffle $push1=, $0, $0, 4, 5, 6, 7, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 -; SIMD128-NEXT: f32x4.pmin $push2=, $pop4, $pop1 +; SIMD128-NEXT: f32x4.min $push2=, $pop4, $pop1 ; SIMD128-NEXT: f32x4.extract_lane $push3=, $pop2, 0 ; SIMD128-NEXT: return $pop3 %res = tail call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> %arg)