Skip to content

Commit fc44693

Browse files
committed
[X86] detectAVGPattern - accept non-pow2 vectors by padding.
Drop the pow2 vector limitation for AVG generation by padding the vector to the next pow2, creating the PAVG nodes and then extracting the final subvector. Fixes some poor codegen that has been annoying me for years.....
1 parent bccd2ec commit fc44693

File tree

2 files changed

+121
-604
lines changed

2 files changed

+121
-604
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -37860,7 +37860,8 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
3786037860
case X86ISD::UNPCKL:
3786137861
case X86ISD::UNPCKH:
3786237862
case X86ISD::BLENDI:
37863-
// Saturated Packs.
37863+
// Integer ops.
37864+
case X86ISD::AVG:
3786437865
case X86ISD::PACKSS:
3786537866
case X86ISD::PACKUS:
3786637867
// Horizontal Ops.
@@ -44183,8 +44184,7 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
4418344184
unsigned NumElems = VT.getVectorNumElements();
4418444185

4418544186
EVT ScalarVT = VT.getVectorElementType();
44186-
if (!((ScalarVT == MVT::i8 || ScalarVT == MVT::i16) &&
44187-
NumElems >= 2 && isPowerOf2_32(NumElems)))
44187+
if (!((ScalarVT == MVT::i8 || ScalarVT == MVT::i16) && NumElems >= 2))
4418844188
return SDValue();
4418944189

4419044190
// InScalarVT is the intermediate type in AVG pattern and it should be greater
@@ -44235,6 +44235,29 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
4423544235
return DAG.getNode(X86ISD::AVG, DL, Ops[0].getValueType(), Ops);
4423644236
};
4423744237

44238+
auto AVGSplitter = [&](SDValue Op0, SDValue Op1) {
44239+
// Pad to a power-of-2 vector, split+apply and extract the original vector.
44240+
unsigned NumElemsPow2 = PowerOf2Ceil(NumElems);
44241+
EVT Pow2VT = EVT::getVectorVT(*DAG.getContext(), ScalarVT, NumElemsPow2);
44242+
if (NumElemsPow2 != NumElems) {
44243+
SmallVector<SDValue, 32> Ops0(NumElemsPow2, DAG.getUNDEF(ScalarVT));
44244+
SmallVector<SDValue, 32> Ops1(NumElemsPow2, DAG.getUNDEF(ScalarVT));
44245+
for (unsigned i = 0; i != NumElems; ++i) {
44246+
SDValue Idx = DAG.getIntPtrConstant(i, DL);
44247+
Ops0[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Op0, Idx);
44248+
Ops1[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Op1, Idx);
44249+
}
44250+
Op0 = DAG.getBuildVector(Pow2VT, DL, Ops0);
44251+
Op1 = DAG.getBuildVector(Pow2VT, DL, Ops1);
44252+
}
44253+
SDValue Res =
44254+
SplitOpsAndApply(DAG, Subtarget, DL, Pow2VT, {Op0, Op1}, AVGBuilder);
44255+
if (NumElemsPow2 == NumElems)
44256+
return Res;
44257+
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
44258+
DAG.getIntPtrConstant(0, DL));
44259+
};
44260+
4423844261
// Take care of the case when one of the operands is a constant vector whose
4423944262
// element is in the range [1, 256].
4424044263
if (IsConstVectorInRange(Operands[1], 1, ScalarVT == MVT::i8 ? 256 : 65536) &&
@@ -44245,9 +44268,7 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
4424544268
SDValue VecOnes = DAG.getConstant(1, DL, InVT);
4424644269
Operands[1] = DAG.getNode(ISD::SUB, DL, InVT, Operands[1], VecOnes);
4424744270
Operands[1] = DAG.getNode(ISD::TRUNCATE, DL, VT, Operands[1]);
44248-
return SplitOpsAndApply(DAG, Subtarget, DL, VT,
44249-
{ Operands[0].getOperand(0), Operands[1] },
44250-
AVGBuilder);
44271+
return AVGSplitter(Operands[0].getOperand(0), Operands[1]);
4425144272
}
4425244273

4425344274
// Matches 'add like' patterns: add(Op0,Op1) + zext(or(Op0,Op1)).
@@ -44294,8 +44315,7 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
4429444315
}
4429544316

4429644317
// The pattern is detected, emit X86ISD::AVG instruction(s).
44297-
return SplitOpsAndApply(DAG, Subtarget, DL, VT, {Operands[0], Operands[1]},
44298-
AVGBuilder);
44318+
return AVGSplitter(Operands[0], Operands[1]);
4429944319
}
4430044320

4430144321
return SDValue();

0 commit comments

Comments
 (0)