Skip to content

Commit 5d84ccb

Browse files
committed
Prepare for multi-exit LFTR [NFC]
This change does the plumbing to wire an ExitingBB parameter through the LFTR implementation, and reorganizes the code to work in terms of a set of individual loop exits. Most of it is fairly obvious, but there's one key complexity which makes it worthy of consideration. The actual multi-exit LFTR patch is in D62625 for context. Specifically, it turns out the existing code uses the backedge taken count from before a IV is widened. Oddly, we can end up with a different (more expensive, but semantically equivelent) BE count for the loop when requerying after widening. For the nestedIV example from elim-extend, we end up with the following BE counts: BEFORE: (-2 + (-1 * %innercount) + %limit) AFTER: (-1 + (sext i32 (-1 + %limit) to i64) + (-1 * (sext i32 %innercount to i64))<nsw>) This is the only test in tree which seems sensitive to this difference. The actual result of using the wider BETC on this example is that we actually produce slightly better code. :) In review, we decided to accept that test change. This patch is structured to preserve the old behavior, but a separate change will immediate follow with the behavior change. (I wanted it separate for problem attribution purposes.) Differential Revision: https://reviews.llvm.org/D62880 llvm-svn: 362971
1 parent a5f2c20 commit 5d84ccb

File tree

1 file changed

+77
-65
lines changed

1 file changed

+77
-65
lines changed

llvm/lib/Transforms/Scalar/IndVarSimplify.cpp

Lines changed: 77 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,8 @@ class IndVarSimplify {
146146
bool rewriteFirstIterationLoopExitValues(Loop *L);
147147
bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const;
148148

149-
bool linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
149+
bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
150+
const SCEV *BackedgeTakenCount,
150151
PHINode *IndVar, SCEVExpander &Rewriter);
151152

152153
bool sinkUnusedInvariants(Loop *L);
@@ -1979,33 +1980,6 @@ bool IndVarSimplify::simplifyAndExtend(Loop *L,
19791980
// linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
19801981
//===----------------------------------------------------------------------===//
19811982

1982-
/// Return true if this loop's backedge taken count expression can be safely and
1983-
/// cheaply expanded into an instruction sequence that can be used by
1984-
/// linearFunctionTestReplace.
1985-
static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE,
1986-
SCEVExpander &Rewriter) {
1987-
const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
1988-
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
1989-
return false;
1990-
1991-
// Better to break the backedge
1992-
if (BackedgeTakenCount->isZero())
1993-
return false;
1994-
1995-
// Loops with multiple exits are not currently suported by lftr
1996-
if (!L->getExitingBlock())
1997-
return false;
1998-
1999-
// Can't rewrite non-branch yet.
2000-
if (!isa<BranchInst>(L->getExitingBlock()->getTerminator()))
2001-
return false;
2002-
2003-
if (Rewriter.isHighCostExpansion(BackedgeTakenCount, L))
2004-
return false;
2005-
2006-
return true;
2007-
}
2008-
20091983
/// Given an Value which is hoped to be part of an add recurance in the given
20101984
/// loop, return the associated Phi node if so. Otherwise, return null. Note
20111985
/// that this is less general than SCEVs AddRec checking.
@@ -2048,25 +2022,24 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
20482022
/// Given a loop with one backedge and one exit, return the ICmpInst
20492023
/// controlling the sole loop exit. There is no guarantee that the exiting
20502024
/// block is also the latch.
2051-
static ICmpInst *getLoopTest(Loop *L) {
2052-
assert(L->getExitingBlock() && "expected loop exit");
2025+
static ICmpInst *getLoopTest(Loop *L, BasicBlock *ExitingBB) {
20532026

20542027
BasicBlock *LatchBlock = L->getLoopLatch();
20552028
// Don't bother with LFTR if the loop is not properly simplified.
20562029
if (!LatchBlock)
20572030
return nullptr;
20582031

2059-
BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
2032+
BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
20602033
assert(BI && "expected exit branch");
20612034

20622035
return dyn_cast<ICmpInst>(BI->getCondition());
20632036
}
20642037

20652038
/// linearFunctionTestReplace policy. Return true unless we can show that the
20662039
/// current exit test is already sufficiently canonical.
2067-
static bool needsLFTR(Loop *L) {
2040+
static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
20682041
// Do LFTR to simplify the exit condition to an ICMP.
2069-
ICmpInst *Cond = getLoopTest(L);
2042+
ICmpInst *Cond = getLoopTest(L, ExitingBB);
20702043
if (!Cond)
20712044
return true;
20722045

@@ -2188,12 +2161,11 @@ static bool isLoopCounter(PHINode* Phi, Loop *L,
21882161
/// BECount may be an i8* pointer type. The pointer difference is already
21892162
/// valid count without scaling the address stride, so it remains a pointer
21902163
/// expression as far as SCEV is concerned.
2191-
static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
2192-
ScalarEvolution *SE) {
2164+
static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
2165+
const SCEV *BECount, ScalarEvolution *SE) {
21932166
uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
21942167

2195-
Value *Cond =
2196-
cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition();
2168+
Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
21972169

21982170
// Loop over all of the PHI nodes, looking for a simple counter.
21992171
PHINode *BestPhi = nullptr;
@@ -2226,13 +2198,15 @@ static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
22262198
// We explicitly allow unknown phis as long as they are already used by
22272199
// the loop test. In this case we assume that performing LFTR could not
22282200
// increase the number of undef users.
2229-
if (ICmpInst *Cond = getLoopTest(L)) {
2230-
if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L) &&
2231-
Phi != getLoopPhiForCounter(Cond->getOperand(1), L)) {
2232-
continue;
2233-
}
2234-
}
2201+
// TODO: Generalize this to allow *any* loop exit which is known to
2202+
// execute on each iteration
2203+
if (L->getExitingBlock())
2204+
if (ICmpInst *Cond = getLoopTest(L, ExitingBB))
2205+
if (Phi != getLoopPhiForCounter(Cond->getOperand(0), L) &&
2206+
Phi != getLoopPhiForCounter(Cond->getOperand(1), L))
2207+
continue;
22352208
}
2209+
22362210
const SCEV *Init = AR->getStart();
22372211

22382212
if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
@@ -2261,7 +2235,8 @@ static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
22612235
/// Insert an IR expression which computes the value held by the IV IndVar
22622236
/// (which must be an loop counter w/unit stride) after the backedge of loop L
22632237
/// is taken IVCount times.
2264-
static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
2238+
static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
2239+
const SCEV *IVCount, Loop *L,
22652240
SCEVExpander &Rewriter, ScalarEvolution *SE) {
22662241
assert(isLoopCounter(IndVar, L, SE));
22672242
const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
@@ -2284,13 +2259,13 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
22842259
// Expand the code for the iteration count.
22852260
assert(SE->isLoopInvariant(IVOffset, L) &&
22862261
"Computed iteration count is not loop invariant!");
2287-
BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
2262+
BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
22882263
Value *GEPOffset = Rewriter.expandCodeFor(IVOffset, OfsTy, BI);
22892264

22902265
Value *GEPBase = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
22912266
assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter");
22922267
// We could handle pointer IVs other than i8*, but we need to compensate for
2293-
// gep index scaling. See canExpandBackedgeTakenCount comments.
2268+
// gep index scaling.
22942269
assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
22952270
cast<PointerType>(GEPBase->getType())
22962271
->getElementType())->isOne() &&
@@ -2328,7 +2303,7 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
23282303
IVLimit = SE->getAddExpr(IVInit, IVCount);
23292304
}
23302305
// Expand the code for the iteration count.
2331-
BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
2306+
BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
23322307
IRBuilder<> Builder(BI);
23332308
assert(SE->isLoopInvariant(IVLimit, L) &&
23342309
"Computed iteration count is not loop invariant!");
@@ -2347,10 +2322,9 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
23472322
/// determine a loop-invariant trip count of the loop, which is actually a much
23482323
/// broader range than just linear tests.
23492324
bool IndVarSimplify::
2350-
linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
2325+
linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
2326+
const SCEV *BackedgeTakenCount,
23512327
PHINode *IndVar, SCEVExpander &Rewriter) {
2352-
assert(canExpandBackedgeTakenCount(L, SE, Rewriter) && "precondition");
2353-
23542328
// Initialize CmpIndVar and IVCount to their preincremented values.
23552329
Value *CmpIndVar = IndVar;
23562330
const SCEV *IVCount = BackedgeTakenCount;
@@ -2360,7 +2334,7 @@ linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
23602334
// If the exiting block is the same as the backedge block, we prefer to
23612335
// compare against the post-incremented value, otherwise we must compare
23622336
// against the preincremented value.
2363-
if (L->getExitingBlock() == L->getLoopLatch()) {
2337+
if (ExitingBB == L->getLoopLatch()) {
23642338
// Add one to the "backedge-taken" count to get the trip count.
23652339
// This addition may overflow, which is valid as long as the comparison is
23662340
// truncated to BackedgeTakenCount->getType().
@@ -2369,7 +2343,7 @@ linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
23692343
// The BackedgeTaken expression contains the number of times that the
23702344
// backedge branches to the loop header. This is one less than the
23712345
// number of times the loop executes, so use the incremented indvar.
2372-
CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
2346+
CmpIndVar = IndVar->getIncomingValueForBlock(ExitingBB);
23732347
}
23742348

23752349
// It may be necessary to drop nowrap flags on the incrementing instruction
@@ -2393,13 +2367,13 @@ linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
23932367
BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
23942368
}
23952369

2396-
Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
2370+
Value *ExitCnt = genLoopLimit(IndVar, ExitingBB, IVCount, L, Rewriter, SE);
23972371
assert(ExitCnt->getType()->isPointerTy() ==
23982372
IndVar->getType()->isPointerTy() &&
23992373
"genLoopLimit missed a cast");
24002374

24012375
// Insert a new icmp_ne or icmp_eq instruction before the branch.
2402-
BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
2376+
BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
24032377
ICmpInst::Predicate P;
24042378
if (L->contains(BI->getSuccessor(0)))
24052379
P = ICmpInst::ICMP_NE;
@@ -2645,22 +2619,60 @@ bool IndVarSimplify::run(Loop *L) {
26452619
NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
26462620

26472621
// If we have a trip count expression, rewrite the loop's exit condition
2648-
// using it. We can currently only handle loops with a single exit.
2649-
if (!DisableLFTR && canExpandBackedgeTakenCount(L, SE, Rewriter) &&
2650-
needsLFTR(L)) {
2651-
PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE);
2652-
if (IndVar) {
2622+
// using it.
2623+
if (!DisableLFTR) {
2624+
// For the moment, we only do LFTR for single exit loops. The code is
2625+
// structured as it is in the expectation of generalization to multi-exit
2626+
// loops in the near future. See D62625 for context.
2627+
SmallVector<BasicBlock*, 16> ExitingBlocks;
2628+
if (auto *ExitingBB = L->getExitingBlock())
2629+
ExitingBlocks.push_back(ExitingBB);
2630+
for (BasicBlock *ExitingBB : ExitingBlocks) {
2631+
// Can't rewrite non-branch yet.
2632+
if (!isa<BranchInst>(ExitingBB->getTerminator()))
2633+
continue;
2634+
2635+
if (!needsLFTR(L, ExitingBB))
2636+
continue;
2637+
2638+
// Note: This block of code is here strictly to seperate an change into
2639+
// two parts: one NFC, one not. What's happening here is that SCEV is
2640+
// returning a more expensive expression for the BackedgeTakenCount for
2641+
// the loop after widening in rare circumstances. In review, we decided
2642+
// to accept that small difference - since it has minimal test suite
2643+
// impact - but for ease of attribution, the functional diff will be it's
2644+
// own change.
2645+
const SCEV *BETakenCount = L->getExitingBlock() ?
2646+
BackedgeTakenCount : SE->getExitCount(L, ExitingBB);
2647+
if (isa<SCEVCouldNotCompute>(BETakenCount))
2648+
continue;
2649+
2650+
// Better to fold to true (TODO: do so!)
2651+
if (BETakenCount->isZero())
2652+
continue;
2653+
2654+
PHINode *IndVar = FindLoopCounter(L, ExitingBB, BETakenCount, SE);
2655+
if (!IndVar)
2656+
continue;
2657+
2658+
// Avoid high cost expansions. Note: This heuristic is questionable in
2659+
// that our definition of "high cost" is not exactly principled.
2660+
if (Rewriter.isHighCostExpansion(BETakenCount, L))
2661+
continue;
2662+
26532663
// Check preconditions for proper SCEVExpander operation. SCEV does not
2654-
// express SCEVExpander's dependencies, such as LoopSimplify. Instead any
2655-
// pass that uses the SCEVExpander must do it. This does not work well for
2656-
// loop passes because SCEVExpander makes assumptions about all loops,
2657-
// while LoopPassManager only forces the current loop to be simplified.
2664+
// express SCEVExpander's dependencies, such as LoopSimplify. Instead
2665+
// any pass that uses the SCEVExpander must do it. This does not work
2666+
// well for loop passes because SCEVExpander makes assumptions about
2667+
// all loops, while LoopPassManager only forces the current loop to be
2668+
// simplified.
26582669
//
26592670
// FIXME: SCEV expansion has no way to bail out, so the caller must
26602671
// explicitly check any assumptions made by SCEV. Brittle.
2661-
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
2672+
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BETakenCount);
26622673
if (!AR || AR->getLoop()->getLoopPreheader())
2663-
Changed |= linearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
2674+
Changed |= linearFunctionTestReplace(L, ExitingBB,
2675+
BETakenCount, IndVar,
26642676
Rewriter);
26652677
}
26662678
}

0 commit comments

Comments
 (0)