@@ -146,7 +146,8 @@ class IndVarSimplify {
146
146
bool rewriteFirstIterationLoopExitValues (Loop *L);
147
147
bool hasHardUserWithinLoop (const Loop *L, const Instruction *I) const ;
148
148
149
- bool linearFunctionTestReplace (Loop *L, const SCEV *BackedgeTakenCount,
149
+ bool linearFunctionTestReplace (Loop *L, BasicBlock *ExitingBB,
150
+ const SCEV *BackedgeTakenCount,
150
151
PHINode *IndVar, SCEVExpander &Rewriter);
151
152
152
153
bool sinkUnusedInvariants (Loop *L);
@@ -1979,33 +1980,6 @@ bool IndVarSimplify::simplifyAndExtend(Loop *L,
1979
1980
// linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
1980
1981
// ===----------------------------------------------------------------------===//
1981
1982
1982
- // / Return true if this loop's backedge taken count expression can be safely and
1983
- // / cheaply expanded into an instruction sequence that can be used by
1984
- // / linearFunctionTestReplace.
1985
- static bool canExpandBackedgeTakenCount (Loop *L, ScalarEvolution *SE,
1986
- SCEVExpander &Rewriter) {
1987
- const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount (L);
1988
- if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
1989
- return false ;
1990
-
1991
- // Better to break the backedge
1992
- if (BackedgeTakenCount->isZero ())
1993
- return false ;
1994
-
1995
- // Loops with multiple exits are not currently suported by lftr
1996
- if (!L->getExitingBlock ())
1997
- return false ;
1998
-
1999
- // Can't rewrite non-branch yet.
2000
- if (!isa<BranchInst>(L->getExitingBlock ()->getTerminator ()))
2001
- return false ;
2002
-
2003
- if (Rewriter.isHighCostExpansion (BackedgeTakenCount, L))
2004
- return false ;
2005
-
2006
- return true ;
2007
- }
2008
-
2009
1983
// / Given an Value which is hoped to be part of an add recurance in the given
2010
1984
// / loop, return the associated Phi node if so. Otherwise, return null. Note
2011
1985
// / that this is less general than SCEVs AddRec checking.
@@ -2048,25 +2022,24 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
2048
2022
// / Given a loop with one backedge and one exit, return the ICmpInst
2049
2023
// / controlling the sole loop exit. There is no guarantee that the exiting
2050
2024
// / block is also the latch.
2051
- static ICmpInst *getLoopTest (Loop *L) {
2052
- assert (L->getExitingBlock () && " expected loop exit" );
2025
+ static ICmpInst *getLoopTest (Loop *L, BasicBlock *ExitingBB) {
2053
2026
2054
2027
BasicBlock *LatchBlock = L->getLoopLatch ();
2055
2028
// Don't bother with LFTR if the loop is not properly simplified.
2056
2029
if (!LatchBlock)
2057
2030
return nullptr ;
2058
2031
2059
- BranchInst *BI = dyn_cast<BranchInst>(L-> getExitingBlock () ->getTerminator ());
2032
+ BranchInst *BI = dyn_cast<BranchInst>(ExitingBB ->getTerminator ());
2060
2033
assert (BI && " expected exit branch" );
2061
2034
2062
2035
return dyn_cast<ICmpInst>(BI->getCondition ());
2063
2036
}
2064
2037
2065
2038
// / linearFunctionTestReplace policy. Return true unless we can show that the
2066
2039
// / current exit test is already sufficiently canonical.
2067
- static bool needsLFTR (Loop *L) {
2040
+ static bool needsLFTR (Loop *L, BasicBlock *ExitingBB ) {
2068
2041
// Do LFTR to simplify the exit condition to an ICMP.
2069
- ICmpInst *Cond = getLoopTest (L);
2042
+ ICmpInst *Cond = getLoopTest (L, ExitingBB );
2070
2043
if (!Cond)
2071
2044
return true ;
2072
2045
@@ -2188,12 +2161,11 @@ static bool isLoopCounter(PHINode* Phi, Loop *L,
2188
2161
// / BECount may be an i8* pointer type. The pointer difference is already
2189
2162
// / valid count without scaling the address stride, so it remains a pointer
2190
2163
// / expression as far as SCEV is concerned.
2191
- static PHINode *FindLoopCounter (Loop *L, const SCEV *BECount ,
2192
- ScalarEvolution *SE) {
2164
+ static PHINode *FindLoopCounter (Loop *L, BasicBlock *ExitingBB ,
2165
+ const SCEV *BECount, ScalarEvolution *SE) {
2193
2166
uint64_t BCWidth = SE->getTypeSizeInBits (BECount->getType ());
2194
2167
2195
- Value *Cond =
2196
- cast<BranchInst>(L->getExitingBlock ()->getTerminator ())->getCondition ();
2168
+ Value *Cond = cast<BranchInst>(ExitingBB->getTerminator ())->getCondition ();
2197
2169
2198
2170
// Loop over all of the PHI nodes, looking for a simple counter.
2199
2171
PHINode *BestPhi = nullptr ;
@@ -2226,13 +2198,15 @@ static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
2226
2198
// We explicitly allow unknown phis as long as they are already used by
2227
2199
// the loop test. In this case we assume that performing LFTR could not
2228
2200
// increase the number of undef users.
2229
- if (ICmpInst *Cond = getLoopTest (L)) {
2230
- if (Phi != getLoopPhiForCounter (Cond->getOperand (0 ), L) &&
2231
- Phi != getLoopPhiForCounter (Cond->getOperand (1 ), L)) {
2232
- continue ;
2233
- }
2234
- }
2201
+ // TODO: Generalize this to allow *any* loop exit which is known to
2202
+ // execute on each iteration
2203
+ if (L->getExitingBlock ())
2204
+ if (ICmpInst *Cond = getLoopTest (L, ExitingBB))
2205
+ if (Phi != getLoopPhiForCounter (Cond->getOperand (0 ), L) &&
2206
+ Phi != getLoopPhiForCounter (Cond->getOperand (1 ), L))
2207
+ continue ;
2235
2208
}
2209
+
2236
2210
const SCEV *Init = AR->getStart ();
2237
2211
2238
2212
if (BestPhi && !AlmostDeadIV (BestPhi, LatchBlock, Cond)) {
@@ -2261,7 +2235,8 @@ static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
2261
2235
// / Insert an IR expression which computes the value held by the IV IndVar
2262
2236
// / (which must be an loop counter w/unit stride) after the backedge of loop L
2263
2237
// / is taken IVCount times.
2264
- static Value *genLoopLimit (PHINode *IndVar, const SCEV *IVCount, Loop *L,
2238
+ static Value *genLoopLimit (PHINode *IndVar, BasicBlock *ExitingBB,
2239
+ const SCEV *IVCount, Loop *L,
2265
2240
SCEVExpander &Rewriter, ScalarEvolution *SE) {
2266
2241
assert (isLoopCounter (IndVar, L, SE));
2267
2242
const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV (IndVar));
@@ -2284,13 +2259,13 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
2284
2259
// Expand the code for the iteration count.
2285
2260
assert (SE->isLoopInvariant (IVOffset, L) &&
2286
2261
" Computed iteration count is not loop invariant!" );
2287
- BranchInst *BI = cast<BranchInst>(L-> getExitingBlock () ->getTerminator ());
2262
+ BranchInst *BI = cast<BranchInst>(ExitingBB ->getTerminator ());
2288
2263
Value *GEPOffset = Rewriter.expandCodeFor (IVOffset, OfsTy, BI);
2289
2264
2290
2265
Value *GEPBase = IndVar->getIncomingValueForBlock (L->getLoopPreheader ());
2291
2266
assert (AR->getStart () == SE->getSCEV (GEPBase) && " bad loop counter" );
2292
2267
// We could handle pointer IVs other than i8*, but we need to compensate for
2293
- // gep index scaling. See canExpandBackedgeTakenCount comments.
2268
+ // gep index scaling.
2294
2269
assert (SE->getSizeOfExpr (IntegerType::getInt64Ty (IndVar->getContext ()),
2295
2270
cast<PointerType>(GEPBase->getType ())
2296
2271
->getElementType ())->isOne () &&
@@ -2328,7 +2303,7 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
2328
2303
IVLimit = SE->getAddExpr (IVInit, IVCount);
2329
2304
}
2330
2305
// Expand the code for the iteration count.
2331
- BranchInst *BI = cast<BranchInst>(L-> getExitingBlock () ->getTerminator ());
2306
+ BranchInst *BI = cast<BranchInst>(ExitingBB ->getTerminator ());
2332
2307
IRBuilder<> Builder (BI);
2333
2308
assert (SE->isLoopInvariant (IVLimit, L) &&
2334
2309
" Computed iteration count is not loop invariant!" );
@@ -2347,10 +2322,9 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
2347
2322
// / determine a loop-invariant trip count of the loop, which is actually a much
2348
2323
// / broader range than just linear tests.
2349
2324
bool IndVarSimplify::
2350
- linearFunctionTestReplace (Loop *L, const SCEV *BackedgeTakenCount,
2325
+ linearFunctionTestReplace (Loop *L, BasicBlock *ExitingBB,
2326
+ const SCEV *BackedgeTakenCount,
2351
2327
PHINode *IndVar, SCEVExpander &Rewriter) {
2352
- assert (canExpandBackedgeTakenCount (L, SE, Rewriter) && " precondition" );
2353
-
2354
2328
// Initialize CmpIndVar and IVCount to their preincremented values.
2355
2329
Value *CmpIndVar = IndVar;
2356
2330
const SCEV *IVCount = BackedgeTakenCount;
@@ -2360,7 +2334,7 @@ linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
2360
2334
// If the exiting block is the same as the backedge block, we prefer to
2361
2335
// compare against the post-incremented value, otherwise we must compare
2362
2336
// against the preincremented value.
2363
- if (L-> getExitingBlock () == L->getLoopLatch ()) {
2337
+ if (ExitingBB == L->getLoopLatch ()) {
2364
2338
// Add one to the "backedge-taken" count to get the trip count.
2365
2339
// This addition may overflow, which is valid as long as the comparison is
2366
2340
// truncated to BackedgeTakenCount->getType().
@@ -2369,7 +2343,7 @@ linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
2369
2343
// The BackedgeTaken expression contains the number of times that the
2370
2344
// backedge branches to the loop header. This is one less than the
2371
2345
// number of times the loop executes, so use the incremented indvar.
2372
- CmpIndVar = IndVar->getIncomingValueForBlock (L-> getExitingBlock () );
2346
+ CmpIndVar = IndVar->getIncomingValueForBlock (ExitingBB );
2373
2347
}
2374
2348
2375
2349
// It may be necessary to drop nowrap flags on the incrementing instruction
@@ -2393,13 +2367,13 @@ linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
2393
2367
BO->setHasNoSignedWrap (AR->hasNoSignedWrap ());
2394
2368
}
2395
2369
2396
- Value *ExitCnt = genLoopLimit (IndVar, IVCount, L, Rewriter, SE);
2370
+ Value *ExitCnt = genLoopLimit (IndVar, ExitingBB, IVCount, L, Rewriter, SE);
2397
2371
assert (ExitCnt->getType ()->isPointerTy () ==
2398
2372
IndVar->getType ()->isPointerTy () &&
2399
2373
" genLoopLimit missed a cast" );
2400
2374
2401
2375
// Insert a new icmp_ne or icmp_eq instruction before the branch.
2402
- BranchInst *BI = cast<BranchInst>(L-> getExitingBlock () ->getTerminator ());
2376
+ BranchInst *BI = cast<BranchInst>(ExitingBB ->getTerminator ());
2403
2377
ICmpInst::Predicate P;
2404
2378
if (L->contains (BI->getSuccessor (0 )))
2405
2379
P = ICmpInst::ICMP_NE;
@@ -2645,22 +2619,60 @@ bool IndVarSimplify::run(Loop *L) {
2645
2619
NumElimIV += Rewriter.replaceCongruentIVs (L, DT, DeadInsts);
2646
2620
2647
2621
// If we have a trip count expression, rewrite the loop's exit condition
2648
- // using it. We can currently only handle loops with a single exit.
2649
- if (!DisableLFTR && canExpandBackedgeTakenCount (L, SE, Rewriter) &&
2650
- needsLFTR (L)) {
2651
- PHINode *IndVar = FindLoopCounter (L, BackedgeTakenCount, SE);
2652
- if (IndVar) {
2622
+ // using it.
2623
+ if (!DisableLFTR) {
2624
+ // For the moment, we only do LFTR for single exit loops. The code is
2625
+ // structured as it is in the expectation of generalization to multi-exit
2626
+ // loops in the near future. See D62625 for context.
2627
+ SmallVector<BasicBlock*, 16 > ExitingBlocks;
2628
+ if (auto *ExitingBB = L->getExitingBlock ())
2629
+ ExitingBlocks.push_back (ExitingBB);
2630
+ for (BasicBlock *ExitingBB : ExitingBlocks) {
2631
+ // Can't rewrite non-branch yet.
2632
+ if (!isa<BranchInst>(ExitingBB->getTerminator ()))
2633
+ continue ;
2634
+
2635
+ if (!needsLFTR (L, ExitingBB))
2636
+ continue ;
2637
+
2638
+ // Note: This block of code is here strictly to seperate an change into
2639
+ // two parts: one NFC, one not. What's happening here is that SCEV is
2640
+ // returning a more expensive expression for the BackedgeTakenCount for
2641
+ // the loop after widening in rare circumstances. In review, we decided
2642
+ // to accept that small difference - since it has minimal test suite
2643
+ // impact - but for ease of attribution, the functional diff will be it's
2644
+ // own change.
2645
+ const SCEV *BETakenCount = L->getExitingBlock () ?
2646
+ BackedgeTakenCount : SE->getExitCount (L, ExitingBB);
2647
+ if (isa<SCEVCouldNotCompute>(BETakenCount))
2648
+ continue ;
2649
+
2650
+ // Better to fold to true (TODO: do so!)
2651
+ if (BETakenCount->isZero ())
2652
+ continue ;
2653
+
2654
+ PHINode *IndVar = FindLoopCounter (L, ExitingBB, BETakenCount, SE);
2655
+ if (!IndVar)
2656
+ continue ;
2657
+
2658
+ // Avoid high cost expansions. Note: This heuristic is questionable in
2659
+ // that our definition of "high cost" is not exactly principled.
2660
+ if (Rewriter.isHighCostExpansion (BETakenCount, L))
2661
+ continue ;
2662
+
2653
2663
// Check preconditions for proper SCEVExpander operation. SCEV does not
2654
- // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
2655
- // pass that uses the SCEVExpander must do it. This does not work well for
2656
- // loop passes because SCEVExpander makes assumptions about all loops,
2657
- // while LoopPassManager only forces the current loop to be simplified.
2664
+ // express SCEVExpander's dependencies, such as LoopSimplify. Instead
2665
+ // any pass that uses the SCEVExpander must do it. This does not work
2666
+ // well for loop passes because SCEVExpander makes assumptions about
2667
+ // all loops, while LoopPassManager only forces the current loop to be
2668
+ // simplified.
2658
2669
//
2659
2670
// FIXME: SCEV expansion has no way to bail out, so the caller must
2660
2671
// explicitly check any assumptions made by SCEV. Brittle.
2661
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount );
2672
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BETakenCount );
2662
2673
if (!AR || AR->getLoop ()->getLoopPreheader ())
2663
- Changed |= linearFunctionTestReplace (L, BackedgeTakenCount, IndVar,
2674
+ Changed |= linearFunctionTestReplace (L, ExitingBB,
2675
+ BETakenCount, IndVar,
2664
2676
Rewriter);
2665
2677
}
2666
2678
}
0 commit comments