@@ -37860,7 +37860,8 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
37860
37860
case X86ISD::UNPCKL:
37861
37861
case X86ISD::UNPCKH:
37862
37862
case X86ISD::BLENDI:
37863
- // Saturated Packs.
37863
+ // Integer ops.
37864
+ case X86ISD::AVG:
37864
37865
case X86ISD::PACKSS:
37865
37866
case X86ISD::PACKUS:
37866
37867
// Horizontal Ops.
@@ -44183,8 +44184,7 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
44183
44184
unsigned NumElems = VT.getVectorNumElements();
44184
44185
44185
44186
EVT ScalarVT = VT.getVectorElementType();
44186
- if (!((ScalarVT == MVT::i8 || ScalarVT == MVT::i16) &&
44187
- NumElems >= 2 && isPowerOf2_32(NumElems)))
44187
+ if (!((ScalarVT == MVT::i8 || ScalarVT == MVT::i16) && NumElems >= 2))
44188
44188
return SDValue();
44189
44189
44190
44190
// InScalarVT is the intermediate type in AVG pattern and it should be greater
@@ -44235,6 +44235,29 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
44235
44235
return DAG.getNode(X86ISD::AVG, DL, Ops[0].getValueType(), Ops);
44236
44236
};
44237
44237
44238
+ auto AVGSplitter = [&](SDValue Op0, SDValue Op1) {
44239
+ // Pad to a power-of-2 vector, split+apply and extract the original vector.
44240
+ unsigned NumElemsPow2 = PowerOf2Ceil(NumElems);
44241
+ EVT Pow2VT = EVT::getVectorVT(*DAG.getContext(), ScalarVT, NumElemsPow2);
44242
+ if (NumElemsPow2 != NumElems) {
44243
+ SmallVector<SDValue, 32> Ops0(NumElemsPow2, DAG.getUNDEF(ScalarVT));
44244
+ SmallVector<SDValue, 32> Ops1(NumElemsPow2, DAG.getUNDEF(ScalarVT));
44245
+ for (unsigned i = 0; i != NumElems; ++i) {
44246
+ SDValue Idx = DAG.getIntPtrConstant(i, DL);
44247
+ Ops0[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Op0, Idx);
44248
+ Ops1[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Op1, Idx);
44249
+ }
44250
+ Op0 = DAG.getBuildVector(Pow2VT, DL, Ops0);
44251
+ Op1 = DAG.getBuildVector(Pow2VT, DL, Ops1);
44252
+ }
44253
+ SDValue Res =
44254
+ SplitOpsAndApply(DAG, Subtarget, DL, Pow2VT, {Op0, Op1}, AVGBuilder);
44255
+ if (NumElemsPow2 == NumElems)
44256
+ return Res;
44257
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
44258
+ DAG.getIntPtrConstant(0, DL));
44259
+ };
44260
+
44238
44261
// Take care of the case when one of the operands is a constant vector whose
44239
44262
// element is in the range [1, 256].
44240
44263
if (IsConstVectorInRange(Operands[1], 1, ScalarVT == MVT::i8 ? 256 : 65536) &&
@@ -44245,9 +44268,7 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
44245
44268
SDValue VecOnes = DAG.getConstant(1, DL, InVT);
44246
44269
Operands[1] = DAG.getNode(ISD::SUB, DL, InVT, Operands[1], VecOnes);
44247
44270
Operands[1] = DAG.getNode(ISD::TRUNCATE, DL, VT, Operands[1]);
44248
- return SplitOpsAndApply(DAG, Subtarget, DL, VT,
44249
- { Operands[0].getOperand(0), Operands[1] },
44250
- AVGBuilder);
44271
+ return AVGSplitter(Operands[0].getOperand(0), Operands[1]);
44251
44272
}
44252
44273
44253
44274
// Matches 'add like' patterns: add(Op0,Op1) + zext(or(Op0,Op1)).
@@ -44294,8 +44315,7 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
44294
44315
}
44295
44316
44296
44317
// The pattern is detected, emit X86ISD::AVG instruction(s).
44297
- return SplitOpsAndApply(DAG, Subtarget, DL, VT, {Operands[0], Operands[1]},
44298
- AVGBuilder);
44318
+ return AVGSplitter(Operands[0], Operands[1]);
44299
44319
}
44300
44320
44301
44321
return SDValue();
0 commit comments