Skip to content

Commit 3a98934

Browse files
[AArch64] Treat @llvm.ssub.sat the same as @llvm.aarch64.neon.sqsub (#140454)
Fixes #94463 Co-authored-by: Spencer Abson <spencer.abson@arm.com>
1 parent 3b6ff59 commit 3a98934

File tree

6 files changed

+258
-69
lines changed

6 files changed

+258
-69
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5989,6 +5989,26 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
59895989
DAG.getNode(
59905990
AArch64ISD::URSHR_I, dl, Op.getOperand(1).getValueType(), Op.getOperand(1), Op.getOperand(2)));
59915991
return SDValue();
5992+
case Intrinsic::aarch64_neon_sqadd:
5993+
if (Op.getValueType().isVector())
5994+
return DAG.getNode(ISD::SADDSAT, dl, Op.getValueType(), Op.getOperand(1),
5995+
Op.getOperand(2));
5996+
return SDValue();
5997+
case Intrinsic::aarch64_neon_sqsub:
5998+
if (Op.getValueType().isVector())
5999+
return DAG.getNode(ISD::SSUBSAT, dl, Op.getValueType(), Op.getOperand(1),
6000+
Op.getOperand(2));
6001+
return SDValue();
6002+
case Intrinsic::aarch64_neon_uqadd:
6003+
if (Op.getValueType().isVector())
6004+
return DAG.getNode(ISD::UADDSAT, dl, Op.getValueType(), Op.getOperand(1),
6005+
Op.getOperand(2));
6006+
return SDValue();
6007+
case Intrinsic::aarch64_neon_uqsub:
6008+
if (Op.getValueType().isVector())
6009+
return DAG.getNode(ISD::USUBSAT, dl, Op.getValueType(), Op.getOperand(1),
6010+
Op.getOperand(2));
6011+
return SDValue();
59926012
case Intrinsic::aarch64_sve_whilelt:
59936013
return optimizeIncrementingWhile(Op.getNode(), DAG, /*IsSigned=*/true,
59946014
/*IsEqual=*/false);

llvm/lib/Target/AArch64/AArch64InstrFormats.td

Lines changed: 16 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -6256,24 +6256,6 @@ multiclass SIMDThreeSameVector<bit U, bits<5> opc, string asm,
62566256
[(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (v2i64 V128:$Rm)))]>;
62576257
}
62586258

6259-
multiclass SIMDThreeSameVectorExtraPatterns<string inst, SDPatternOperator OpNode> {
6260-
def : Pat<(v8i8 (OpNode V64:$LHS, V64:$RHS)),
6261-
(!cast<Instruction>(inst#"v8i8") V64:$LHS, V64:$RHS)>;
6262-
def : Pat<(v4i16 (OpNode V64:$LHS, V64:$RHS)),
6263-
(!cast<Instruction>(inst#"v4i16") V64:$LHS, V64:$RHS)>;
6264-
def : Pat<(v2i32 (OpNode V64:$LHS, V64:$RHS)),
6265-
(!cast<Instruction>(inst#"v2i32") V64:$LHS, V64:$RHS)>;
6266-
6267-
def : Pat<(v16i8 (OpNode V128:$LHS, V128:$RHS)),
6268-
(!cast<Instruction>(inst#"v16i8") V128:$LHS, V128:$RHS)>;
6269-
def : Pat<(v8i16 (OpNode V128:$LHS, V128:$RHS)),
6270-
(!cast<Instruction>(inst#"v8i16") V128:$LHS, V128:$RHS)>;
6271-
def : Pat<(v4i32 (OpNode V128:$LHS, V128:$RHS)),
6272-
(!cast<Instruction>(inst#"v4i32") V128:$LHS, V128:$RHS)>;
6273-
def : Pat<(v2i64 (OpNode V128:$LHS, V128:$RHS)),
6274-
(!cast<Instruction>(inst#"v2i64") V128:$LHS, V128:$RHS)>;
6275-
}
6276-
62776259
// As above, but D sized elements unsupported.
62786260
multiclass SIMDThreeSameVectorBHS<bit U, bits<5> opc, string asm,
62796261
SDPatternOperator OpNode> {
@@ -9861,14 +9843,15 @@ multiclass SIMDIndexedLongSD<bit U, bits<4> opc, string asm,
98619843
}
98629844

98639845
multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
9864-
SDPatternOperator Accum> {
9846+
SDPatternOperator VecAcc,
9847+
SDPatternOperator ScalAcc> {
98659848
def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc,
98669849
V128, V64,
98679850
V128_lo, VectorIndexH,
98689851
asm, ".4s", ".4s", ".4h", ".h",
98699852
[(set (v4i32 V128:$dst),
9870-
(Accum (v4i32 V128:$Rd),
9871-
(v4i32 (int_aarch64_neon_sqdmull
9853+
(VecAcc (v4i32 V128:$Rd),
9854+
(v4i32 (int_aarch64_neon_sqdmull
98729855
(v4i16 V64:$Rn),
98739856
(dup_v8i16 (v8i16 V128_lo:$Rm),
98749857
VectorIndexH:$idx)))))]> {
@@ -9883,8 +9866,8 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
98839866
V128_lo, VectorIndexH,
98849867
asm#"2", ".4s", ".4s", ".8h", ".h",
98859868
[(set (v4i32 V128:$dst),
9886-
(Accum (v4i32 V128:$Rd),
9887-
(v4i32 (int_aarch64_neon_sqdmull
9869+
(VecAcc (v4i32 V128:$Rd),
9870+
(v4i32 (int_aarch64_neon_sqdmull
98889871
(extract_high_v8i16 (v8i16 V128:$Rn)),
98899872
(extract_high_dup_v8i16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx)))))]> {
98909873
bits<3> idx;
@@ -9898,8 +9881,8 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
98989881
V128, VectorIndexS,
98999882
asm, ".2d", ".2d", ".2s", ".s",
99009883
[(set (v2i64 V128:$dst),
9901-
(Accum (v2i64 V128:$Rd),
9902-
(v2i64 (int_aarch64_neon_sqdmull
9884+
(VecAcc (v2i64 V128:$Rd),
9885+
(v2i64 (int_aarch64_neon_sqdmull
99039886
(v2i32 V64:$Rn),
99049887
(dup_v4i32 (v4i32 V128:$Rm), VectorIndexS:$idx)))))]> {
99059888
bits<2> idx;
@@ -9912,8 +9895,8 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
99129895
V128, VectorIndexS,
99139896
asm#"2", ".2d", ".2d", ".4s", ".s",
99149897
[(set (v2i64 V128:$dst),
9915-
(Accum (v2i64 V128:$Rd),
9916-
(v2i64 (int_aarch64_neon_sqdmull
9898+
(VecAcc (v2i64 V128:$Rd),
9899+
(v2i64 (int_aarch64_neon_sqdmull
99179900
(extract_high_v4i32 (v4i32 V128:$Rn)),
99189901
(extract_high_dup_v4i32 (v4i32 V128:$Rm), VectorIndexS:$idx)))))]> {
99199902
bits<2> idx;
@@ -9930,8 +9913,8 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
99309913
let Inst{20} = idx{0};
99319914
}
99329915

9933-
def : Pat<(i32 (Accum (i32 FPR32Op:$Rd),
9934-
(i32 (vector_extract
9916+
def : Pat<(i32 (ScalAcc (i32 FPR32Op:$Rd),
9917+
(i32 (vector_extract
99359918
(v4i32 (int_aarch64_neon_sqdmull
99369919
(v4i16 V64:$Rn),
99379920
(v4i16 V64:$Rm))),
@@ -9942,8 +9925,8 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
99429925
(INSERT_SUBREG (IMPLICIT_DEF), V64:$Rm, dsub),
99439926
(i64 0))>;
99449927

9945-
def : Pat<(i32 (Accum (i32 FPR32Op:$Rd),
9946-
(i32 (vector_extract
9928+
def : Pat<(i32 (ScalAcc (i32 FPR32Op:$Rd),
9929+
(i32 (vector_extract
99479930
(v4i32 (int_aarch64_neon_sqdmull
99489931
(v4i16 V64:$Rn),
99499932
(dup_v8i16 (v8i16 V128_lo:$Rm),
@@ -9959,8 +9942,8 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
99599942
FPR64Op, FPR32Op, V128, VectorIndexS,
99609943
asm, ".s", "", "", ".s",
99619944
[(set (i64 FPR64Op:$dst),
9962-
(Accum (i64 FPR64Op:$Rd),
9963-
(i64 (int_aarch64_neon_sqdmulls_scalar
9945+
(ScalAcc (i64 FPR64Op:$Rd),
9946+
(i64 (int_aarch64_neon_sqdmulls_scalar
99649947
(i32 FPR32Op:$Rn),
99659948
(i32 (vector_extract (v4i32 V128:$Rm),
99669949
VectorIndexS:$idx))))))]> {

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5968,12 +5968,12 @@ defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp
59685968
defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", smax>;
59695969
defm SMINP : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>;
59705970
defm SMIN : SIMDThreeSameVectorBHS<0,0b01101,"smin", smin>;
5971-
defm SQADD : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>;
5971+
defm SQADD : SIMDThreeSameVector<0,0b00001,"sqadd", saddsat>;
59725972
defm SQDMULH : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>;
59735973
defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>;
59745974
defm SQRSHL : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>;
59755975
defm SQSHL : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>;
5976-
defm SQSUB : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>;
5976+
defm SQSUB : SIMDThreeSameVector<0,0b00101,"sqsub", ssubsat>;
59775977
defm SRHADD : SIMDThreeSameVectorBHS<0,0b00010,"srhadd", avgceils>;
59785978
defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>;
59795979
defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>;
@@ -5987,10 +5987,10 @@ defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp
59875987
defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", umax>;
59885988
defm UMINP : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>;
59895989
defm UMIN : SIMDThreeSameVectorBHS<1,0b01101,"umin", umin>;
5990-
defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>;
5990+
defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", uaddsat>;
59915991
defm UQRSHL : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>;
59925992
defm UQSHL : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>;
5993-
defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>;
5993+
defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", usubsat>;
59945994
defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", avgceilu>;
59955995
defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>;
59965996
defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>;
@@ -5999,12 +5999,6 @@ defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah",
59995999
defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh",
60006000
int_aarch64_neon_sqrdmlsh>;
60016001

6002-
// Extra saturate patterns, other than the intrinsics matches above
6003-
defm : SIMDThreeSameVectorExtraPatterns<"SQADD", saddsat>;
6004-
defm : SIMDThreeSameVectorExtraPatterns<"UQADD", uaddsat>;
6005-
defm : SIMDThreeSameVectorExtraPatterns<"SQSUB", ssubsat>;
6006-
defm : SIMDThreeSameVectorExtraPatterns<"UQSUB", usubsat>;
6007-
60086002
defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>;
60096003
defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic",
60106004
BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >;
@@ -6720,10 +6714,8 @@ defm SMLAL : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal",
67206714
defm SMLSL : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl",
67216715
TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
67226716
defm SMULL : SIMDLongThreeVectorBHS<0, 0b1100, "smull", AArch64smull>;
6723-
defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal",
6724-
int_aarch64_neon_sqadd>;
6725-
defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl",
6726-
int_aarch64_neon_sqsub>;
6717+
defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal", saddsat>;
6718+
defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl", ssubsat>;
67276719
defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull",
67286720
int_aarch64_neon_sqdmull>;
67296721
defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl",
@@ -8282,9 +8274,9 @@ defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal",
82828274
defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl",
82838275
TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
82848276
defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull", AArch64smull>;
8285-
defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal",
8277+
defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal", saddsat,
82868278
int_aarch64_neon_sqadd>;
8287-
defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl",
8279+
defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl", ssubsat,
82888280
int_aarch64_neon_sqsub>;
82898281
defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah",
82908282
int_aarch64_neon_sqrdmlah>;

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1622,8 +1622,10 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
16221622

16231623
bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
16241624
MachineInstr &MI) const {
1625-
auto LowerBinOp = [&MI](unsigned Opcode) {
1626-
MachineIRBuilder MIB(MI);
1625+
MachineIRBuilder &MIB = Helper.MIRBuilder;
1626+
MachineRegisterInfo &MRI = *MIB.getMRI();
1627+
1628+
auto LowerBinOp = [&MI, &MIB](unsigned Opcode) {
16271629
MIB.buildInstr(Opcode, {MI.getOperand(0)},
16281630
{MI.getOperand(2), MI.getOperand(3)});
16291631
MI.eraseFromParent();
@@ -1642,7 +1644,6 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
16421644
MachineFunction &MF = *MI.getMF();
16431645
auto Val = MF.getRegInfo().createGenericVirtualRegister(
16441646
LLT::scalar(VaListSize * 8));
1645-
MachineIRBuilder MIB(MI);
16461647
MIB.buildLoad(Val, MI.getOperand(2),
16471648
*MF.getMachineMemOperand(MachinePointerInfo(),
16481649
MachineMemOperand::MOLoad,
@@ -1655,7 +1656,6 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
16551656
return true;
16561657
}
16571658
case Intrinsic::get_dynamic_area_offset: {
1658-
MachineIRBuilder &MIB = Helper.MIRBuilder;
16591659
MIB.buildConstant(MI.getOperand(0).getReg(), 0);
16601660
MI.eraseFromParent();
16611661
return true;
@@ -1664,14 +1664,12 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
16641664
assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
16651665
// Anyext the value being set to 64 bit (only the bottom 8 bits are read by
16661666
// the instruction).
1667-
MachineIRBuilder MIB(MI);
16681667
auto &Value = MI.getOperand(3);
16691668
Register ExtValueReg = MIB.buildAnyExt(LLT::scalar(64), Value).getReg(0);
16701669
Value.setReg(ExtValueReg);
16711670
return true;
16721671
}
16731672
case Intrinsic::aarch64_prefetch: {
1674-
MachineIRBuilder MIB(MI);
16751673
auto &AddrVal = MI.getOperand(1);
16761674

16771675
int64_t IsWrite = MI.getOperand(2).getImm();
@@ -1694,8 +1692,6 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
16941692
case Intrinsic::aarch64_neon_smaxv:
16951693
case Intrinsic::aarch64_neon_uminv:
16961694
case Intrinsic::aarch64_neon_sminv: {
1697-
MachineIRBuilder MIB(MI);
1698-
MachineRegisterInfo &MRI = *MIB.getMRI();
16991695
bool IsSigned = IntrinsicID == Intrinsic::aarch64_neon_saddv ||
17001696
IntrinsicID == Intrinsic::aarch64_neon_smaxv ||
17011697
IntrinsicID == Intrinsic::aarch64_neon_sminv;
@@ -1720,8 +1716,6 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
17201716
}
17211717
case Intrinsic::aarch64_neon_uaddlp:
17221718
case Intrinsic::aarch64_neon_saddlp: {
1723-
MachineIRBuilder MIB(MI);
1724-
17251719
unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlp
17261720
? AArch64::G_UADDLP
17271721
: AArch64::G_SADDLP;
@@ -1732,9 +1726,6 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
17321726
}
17331727
case Intrinsic::aarch64_neon_uaddlv:
17341728
case Intrinsic::aarch64_neon_saddlv: {
1735-
MachineIRBuilder MIB(MI);
1736-
MachineRegisterInfo &MRI = *MIB.getMRI();
1737-
17381729
unsigned Opc = IntrinsicID == Intrinsic::aarch64_neon_uaddlv
17391730
? AArch64::G_UADDLV
17401731
: AArch64::G_SADDLV;
@@ -1790,11 +1781,30 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
17901781
return LowerBinOp(AArch64::G_UMULL);
17911782
case Intrinsic::aarch64_neon_abs: {
17921783
// Lower the intrinsic to G_ABS.
1793-
MachineIRBuilder MIB(MI);
17941784
MIB.buildInstr(TargetOpcode::G_ABS, {MI.getOperand(0)}, {MI.getOperand(2)});
17951785
MI.eraseFromParent();
17961786
return true;
17971787
}
1788+
case Intrinsic::aarch64_neon_sqadd: {
1789+
if (MRI.getType(MI.getOperand(0).getReg()).isVector())
1790+
return LowerBinOp(TargetOpcode::G_SADDSAT);
1791+
break;
1792+
}
1793+
case Intrinsic::aarch64_neon_sqsub: {
1794+
if (MRI.getType(MI.getOperand(0).getReg()).isVector())
1795+
return LowerBinOp(TargetOpcode::G_SSUBSAT);
1796+
break;
1797+
}
1798+
case Intrinsic::aarch64_neon_uqadd: {
1799+
if (MRI.getType(MI.getOperand(0).getReg()).isVector())
1800+
return LowerBinOp(TargetOpcode::G_UADDSAT);
1801+
break;
1802+
}
1803+
case Intrinsic::aarch64_neon_uqsub: {
1804+
if (MRI.getType(MI.getOperand(0).getReg()).isVector())
1805+
return LowerBinOp(TargetOpcode::G_USUBSAT);
1806+
break;
1807+
}
17981808

17991809
case Intrinsic::vector_reverse:
18001810
// TODO: Add support for vector_reverse

llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2539,16 +2539,16 @@ define <8 x i16> @cmplx_mul_combined_re_im(<8 x i16> noundef %a, i64 %scale.coer
25392539
; CHECK: // %bb.0: // %entry
25402540
; CHECK-NEXT: lsr x8, x0, #16
25412541
; CHECK-NEXT: movi v1.2d, #0xffff0000ffff0000
2542-
; CHECK-NEXT: fmov d5, x0
25432542
; CHECK-NEXT: rev32 v4.8h, v0.8h
25442543
; CHECK-NEXT: dup v2.8h, w8
25452544
; CHECK-NEXT: sqneg v3.8h, v2.8h
25462545
; CHECK-NEXT: bsl v1.16b, v2.16b, v3.16b
2547-
; CHECK-NEXT: sqdmull v2.4s, v0.4h, v5.h[0]
2548-
; CHECK-NEXT: sqdmull2 v0.4s, v0.8h, v5.h[0]
2549-
; CHECK-NEXT: sqdmlal v2.4s, v4.4h, v1.4h
2550-
; CHECK-NEXT: sqdmlal2 v0.4s, v4.8h, v1.8h
2551-
; CHECK-NEXT: uzp2 v0.8h, v2.8h, v0.8h
2546+
; CHECK-NEXT: fmov d3, x0
2547+
; CHECK-NEXT: sqdmull v2.4s, v4.4h, v1.4h
2548+
; CHECK-NEXT: sqdmull2 v1.4s, v4.8h, v1.8h
2549+
; CHECK-NEXT: sqdmlal v2.4s, v0.4h, v3.h[0]
2550+
; CHECK-NEXT: sqdmlal2 v1.4s, v0.8h, v3.h[0]
2551+
; CHECK-NEXT: uzp2 v0.8h, v2.8h, v1.8h
25522552
; CHECK-NEXT: ret
25532553
entry:
25542554
%scale.sroa.2.0.extract.shift23 = lshr i64 %scale.coerce, 16

0 commit comments

Comments
 (0)