-
Notifications
You must be signed in to change notification settings - Fork 13.7k
[LV] Reduce register usage for scaled reductions #133090
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
a994bf2
82151a8
f5d5ed6
54af7d2
b6b9063
a695124
f05ebee
ff432c9
0f68427
8427362
b905806
5f14165
296e3ce
c38fcab
e6061f2
f1e7e9b
bde39b4
f58b5e1
aefea41
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -64,11 +64,10 @@ static cl::opt<bool> PrintVPlansInDotFormat( | |
#define DEBUG_TYPE "loop-vectorize" | ||
|
||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | ||
raw_ostream &llvm::operator<<(raw_ostream &OS, const VPValue &V) { | ||
const VPInstruction *Instr = dyn_cast<VPInstruction>(&V); | ||
VPSlotTracker SlotTracker( | ||
(Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr); | ||
V.print(OS, SlotTracker); | ||
raw_ostream &llvm::operator<<(raw_ostream &OS, const VPRecipeBase &R) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of adding this operator, you can instead replace the one above There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
const VPBasicBlock *Parent = R.getParent(); | ||
VPSlotTracker SlotTracker(Parent ? Parent->getPlan() : nullptr); | ||
R.print(OS, "", SlotTracker); | ||
return OS; | ||
} | ||
#endif | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2033,6 +2033,9 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe, | |
/// Generate the phi/select nodes. | ||
void execute(VPTransformState &State) override; | ||
|
||
/// Get the factor that the VF of this recipe's output should be scaled by. | ||
unsigned getVFScaleFactor() const { return VFScaleFactor; } | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps good to have comments on both new functions added? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
|
||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | ||
/// Print the recipe. | ||
void print(raw_ostream &O, const Twine &Indent, | ||
|
@@ -2063,17 +2066,21 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe, | |
/// scalar value. | ||
class VPPartialReductionRecipe : public VPSingleDefRecipe { | ||
unsigned Opcode; | ||
/// The divisor by which the VF of this recipe's output should be divided | ||
/// during execution. | ||
unsigned VFScaleFactor; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. wuld be good to add a comment here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
|
||
public: | ||
VPPartialReductionRecipe(Instruction *ReductionInst, VPValue *Op0, | ||
VPValue *Op1) | ||
VPValue *Op1, unsigned VFScaleFactor) | ||
: VPPartialReductionRecipe(ReductionInst->getOpcode(), Op0, Op1, | ||
ReductionInst) {} | ||
VFScaleFactor, ReductionInst) {} | ||
VPPartialReductionRecipe(unsigned Opcode, VPValue *Op0, VPValue *Op1, | ||
unsigned VFScaleFactor, | ||
Instruction *ReductionInst = nullptr) | ||
: VPSingleDefRecipe(VPDef::VPPartialReductionSC, | ||
ArrayRef<VPValue *>({Op0, Op1}), ReductionInst), | ||
Opcode(Opcode) { | ||
Opcode(Opcode), VFScaleFactor(VFScaleFactor) { | ||
[[maybe_unused]] auto *AccumulatorRecipe = | ||
getOperand(1)->getDefiningRecipe(); | ||
assert((isa<VPReductionPHIRecipe>(AccumulatorRecipe) || | ||
|
@@ -2084,7 +2091,7 @@ class VPPartialReductionRecipe : public VPSingleDefRecipe { | |
|
||
VPPartialReductionRecipe *clone() override { | ||
return new VPPartialReductionRecipe(Opcode, getOperand(0), getOperand(1), | ||
getUnderlyingInstr()); | ||
VFScaleFactor, getUnderlyingInstr()); | ||
} | ||
|
||
VP_CLASSOF_IMPL(VPDef::VPPartialReductionSC) | ||
|
@@ -2099,6 +2106,9 @@ class VPPartialReductionRecipe : public VPSingleDefRecipe { | |
/// Get the binary op's opcode. | ||
unsigned getOpcode() const { return Opcode; } | ||
|
||
/// Get the factor that the VF of this recipe's output should be scaled by. | ||
unsigned getVFScaleFactor() const { return VFScaleFactor; } | ||
|
||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | ||
/// Print the recipe. | ||
void print(raw_ostream &O, const Twine &Indent, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: for the tests that check the debug output, can a check for this line be added?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unfortunately the VF that has partial reductions is pruned before the register usage is calculated. I can add one as part of #132190 once this is merged, though.