38
38
#include " llvm/Transforms/Utils/Cloning.h"
39
39
#include " llvm/Transforms/Utils/LoopSimplify.h"
40
40
#include " llvm/Transforms/Utils/LoopUtils.h"
41
+ #include " llvm/Transforms/Utils/ScalarEvolutionExpander.h"
41
42
#include " llvm/Transforms/Utils/ValueMapper.h"
42
43
#include < algorithm>
43
44
#include < cassert>
@@ -330,11 +331,7 @@ static unsigned peelToTurnInvariantLoadsDerefencebale(Loop &L,
330
331
331
332
bool llvm::canPeelLastIteration (const Loop &L, ScalarEvolution &SE) {
332
333
const SCEV *BTC = SE.getBackedgeTakenCount (&L);
333
- // The loop must execute at least 2 iterations to guarantee that peeled
334
- // iteration executes.
335
- // TODO: Add checks during codegen.
336
- if (isa<SCEVCouldNotCompute>(BTC) ||
337
- !SE.isKnownPredicate (CmpInst::ICMP_UGT, BTC, SE.getZero (BTC->getType ())))
334
+ if (isa<SCEVCouldNotCompute>(BTC))
338
335
return false ;
339
336
340
337
// Check if the exit condition of the loop can be adjusted by the peeling
@@ -354,6 +351,7 @@ bool llvm::canPeelLastIteration(const Loop &L, ScalarEvolution &SE) {
354
351
m_BasicBlock (Succ1), m_BasicBlock (Succ2))) &&
355
352
((Pred == CmpInst::ICMP_EQ && Succ2 == L.getHeader ()) ||
356
353
(Pred == CmpInst::ICMP_NE && Succ1 == L.getHeader ())) &&
354
+ Bound->getType ()->isIntegerTy () &&
357
355
SE.isLoopInvariant (SE.getSCEV (Bound), &L) &&
358
356
match (SE.getSCEV (Inc),
359
357
m_scev_AffineAddRec (m_SCEV (), m_scev_One (), m_SpecificLoop (&L)));
@@ -364,12 +362,18 @@ bool llvm::canPeelLastIteration(const Loop &L, ScalarEvolution &SE) {
364
362
// / is known at the second-to-last.
365
363
static bool shouldPeelLastIteration (Loop &L, CmpPredicate Pred,
366
364
const SCEVAddRecExpr *LeftAR,
367
- const SCEV *RightSCEV,
368
- ScalarEvolution &SE ) {
365
+ const SCEV *RightSCEV, ScalarEvolution &SE,
366
+ const TargetTransformInfo &TTI ) {
369
367
if (!canPeelLastIteration (L, SE))
370
368
return false ;
371
369
372
370
const SCEV *BTC = SE.getBackedgeTakenCount (&L);
371
+ SCEVExpander Expander (SE, L.getHeader ()->getDataLayout (), " loop-peel" );
372
+ if (!SE.isKnownNonZero (BTC) &&
373
+ Expander.isHighCostExpansion (BTC, &L, SCEVCheapExpansionBudget, &TTI,
374
+ L.getLoopPredecessor ()->getTerminator ()))
375
+ return false ;
376
+
373
377
const SCEV *ValAtLastIter = LeftAR->evaluateAtIteration (BTC, SE);
374
378
const SCEV *ValAtSecondToLastIter = LeftAR->evaluateAtIteration (
375
379
SE.getMinusSCEV (BTC, SE.getOne (BTC->getType ())), SE);
@@ -391,7 +395,8 @@ static bool shouldPeelLastIteration(Loop &L, CmpPredicate Pred,
391
395
// ..
392
396
// }
393
397
static std::pair<unsigned , unsigned >
394
- countToEliminateCompares (Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) {
398
+ countToEliminateCompares (Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE,
399
+ const TargetTransformInfo &TTI) {
395
400
assert (L.isLoopSimplifyForm () && " Loop needs to be in loop simplify form" );
396
401
unsigned DesiredPeelCount = 0 ;
397
402
unsigned DesiredPeelCountLast = 0 ;
@@ -479,7 +484,7 @@ countToEliminateCompares(Loop &L, unsigned MaxPeelCount, ScalarEvolution &SE) {
479
484
const SCEV *Step = LeftAR->getStepRecurrence (SE);
480
485
if (!PeelWhilePredicateIsKnown (NewPeelCount, IterVal, RightSCEV, Step,
481
486
Pred)) {
482
- if (shouldPeelLastIteration (L, Pred, LeftAR, RightSCEV, SE))
487
+ if (shouldPeelLastIteration (L, Pred, LeftAR, RightSCEV, SE, TTI ))
483
488
DesiredPeelCountLast = 1 ;
484
489
return ;
485
490
}
@@ -593,8 +598,8 @@ static bool violatesLegacyMultiExitLoopCheck(Loop *L) {
593
598
void llvm::computePeelCount (Loop *L, unsigned LoopSize,
594
599
TargetTransformInfo::PeelingPreferences &PP,
595
600
unsigned TripCount, DominatorTree &DT,
596
- ScalarEvolution &SE, AssumptionCache *AC ,
597
- unsigned Threshold) {
601
+ ScalarEvolution &SE, const TargetTransformInfo &TTI ,
602
+ AssumptionCache *AC, unsigned Threshold) {
598
603
assert (LoopSize > 0 && " Zero loop size is not allowed!" );
599
604
// Save the PP.PeelCount value set by the target in
600
605
// TTI.getPeelingPreferences or by the flag -unroll-peel-count.
@@ -656,7 +661,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
656
661
}
657
662
658
663
const auto &[CountToEliminateCmps, CountToEliminateCmpsLast] =
659
- countToEliminateCompares (*L, MaxPeelCount, SE);
664
+ countToEliminateCompares (*L, MaxPeelCount, SE, TTI );
660
665
DesiredPeelCount = std::max (DesiredPeelCount, CountToEliminateCmps);
661
666
662
667
if (DesiredPeelCount == 0 )
@@ -822,7 +827,7 @@ static void initBranchWeights(DenseMap<Instruction *, WeightInfo> &WeightInfos,
822
827
// / instructions in the last peeled-off iteration.
823
828
static void cloneLoopBlocks (
824
829
Loop *L, unsigned IterNumber, bool PeelLast, BasicBlock *InsertTop,
825
- BasicBlock *InsertBot,
830
+ BasicBlock *InsertBot, BasicBlock *OrigPreHeader,
826
831
SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges,
827
832
SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
828
833
ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT,
@@ -914,12 +919,22 @@ static void cloneLoopBlocks(
914
919
// loop iteration. Since this copy is no longer part of the loop, we
915
920
// resolve this statically:
916
921
if (PeelLast) {
917
- // For the last iteration, we use the value from the latch of the original
918
- // loop directly.
922
+ // For the last iteration, we introduce new phis for each header phi in
923
+ // InsertTop, using the incoming value from the preheader for the original
924
+ // preheader (when skipping the main loop) and the incoming value from the
925
+ // latch for the latch (when continuing from the main loop).
926
+ IRBuilder<> B (InsertTop, InsertTop->getFirstNonPHIIt ());
919
927
for (BasicBlock::iterator I = Header->begin (); isa<PHINode>(I); ++I) {
920
928
PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
921
- VMap[&*I] = NewPHI->getIncomingValueForBlock (Latch );
929
+ PHINode *PN = B. CreatePHI ( NewPHI->getType (), 2 );
922
930
NewPHI->eraseFromParent ();
931
+ if (OrigPreHeader)
932
+ PN->addIncoming (cast<PHINode>(&*I)->getIncomingValueForBlock (PreHeader),
933
+ OrigPreHeader);
934
+
935
+ PN->addIncoming (cast<PHINode>(&*I)->getIncomingValueForBlock (Latch),
936
+ Latch);
937
+ VMap[&*I] = PN;
923
938
}
924
939
} else {
925
940
// For the first iteration, we use the value from the preheader directly.
@@ -1053,7 +1068,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
1053
1068
// Set up all the necessary basic blocks.
1054
1069
BasicBlock *InsertTop;
1055
1070
BasicBlock *InsertBot;
1056
- BasicBlock *NewPreHeader;
1071
+ BasicBlock *NewPreHeader = nullptr ;
1057
1072
DenseMap<Instruction *, Value *> ExitValues;
1058
1073
if (PeelLast) {
1059
1074
// It is convenient to split the single exit block from the latch the
@@ -1084,11 +1099,34 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
1084
1099
for (PHINode &P : Exit->phis ())
1085
1100
ExitValues[&P] = P.getIncomingValueForBlock (Latch);
1086
1101
1102
+ const SCEV *BTC = SE->getBackedgeTakenCount (L);
1103
+
1087
1104
InsertTop = SplitEdge (Latch, Exit, &DT, LI);
1088
1105
InsertBot = SplitBlock (InsertTop, InsertTop->getTerminator (), &DT, LI);
1089
1106
1090
1107
InsertTop->setName (Exit->getName () + " .peel.begin" );
1091
1108
InsertBot->setName (Exit->getName () + " .peel.next" );
1109
+ NewPreHeader = nullptr ;
1110
+
1111
+ // If the original loop may only execute a single iteration we need to
1112
+ // insert a trip count check and skip the original loop with the last
1113
+ // iteration peeled off if necessary.
1114
+ if (!SE->isKnownNonZero (BTC)) {
1115
+ NewPreHeader = SplitEdge (PreHeader, Header, &DT, LI);
1116
+ SCEVExpander Expander (*SE, Latch->getDataLayout (), " loop-peel" );
1117
+
1118
+ BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator ());
1119
+ Value *BTCValue =
1120
+ Expander.expandCodeFor (BTC, BTC->getType (), PreHeaderBR);
1121
+ IRBuilder<> B (PreHeaderBR);
1122
+ Value *Cond =
1123
+ B.CreateICmpNE (BTCValue, ConstantInt::get (BTCValue->getType (), 0 ));
1124
+ B.CreateCondBr (Cond, NewPreHeader, InsertTop);
1125
+ PreHeaderBR->eraseFromParent ();
1126
+
1127
+ // PreHeader now dominates InsertTop.
1128
+ DT.changeImmediateDominator (InsertTop, PreHeader);
1129
+ }
1092
1130
} else {
1093
1131
// It is convenient to split the preheader into 3 parts - two blocks to
1094
1132
// anchor the peeled copy of the loop body, and a new preheader for the
@@ -1162,8 +1200,9 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
1162
1200
for (unsigned Iter = 0 ; Iter < PeelCount; ++Iter) {
1163
1201
SmallVector<BasicBlock *, 8 > NewBlocks;
1164
1202
1165
- cloneLoopBlocks (L, Iter, PeelLast, InsertTop, InsertBot, ExitEdges,
1166
- NewBlocks, LoopBlocks, VMap, LVMap, &DT, LI,
1203
+ cloneLoopBlocks (L, Iter, PeelLast, InsertTop, InsertBot,
1204
+ NewPreHeader ? PreHeader : nullptr , ExitEdges, NewBlocks,
1205
+ LoopBlocks, VMap, LVMap, &DT, LI,
1167
1206
LoopLocalNoAliasDeclScopes, *SE);
1168
1207
1169
1208
// Remap to use values from the current iteration instead of the
@@ -1216,9 +1255,11 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
1216
1255
1217
1256
if (PeelLast) {
1218
1257
// Now adjust users of the original exit values by replacing them with the
1219
- // exit value from the peeled iteration.
1220
- for (const auto &[P, E] : ExitValues)
1258
+ // exit value from the peeled iteration and remove them .
1259
+ for (const auto &[P, E] : ExitValues) {
1221
1260
P->replaceAllUsesWith (isa<Constant>(E) ? E : &*VMap.lookup (E));
1261
+ P->eraseFromParent ();
1262
+ }
1222
1263
formLCSSA (*L, DT, LI, SE);
1223
1264
} else {
1224
1265
// Now adjust the phi nodes in the loop header to get their initial values
0 commit comments