Skip to content

Commit 991e7c6

Browse files
committed
[LV] Skip sentinel value for FindLastIV reductions when start value is provably less than IV start.
Not all FindLastIV reductions require a sentinel value. If it can be proven that the start value of reduction is strictly less than the start value of the increasing induction variable, the sentinel is unnecessary. This patch simplifies the IR generation of FindLastIV reduction for such case.
1 parent 6c86b7d commit 991e7c6

File tree

6 files changed

+77
-53
lines changed

6 files changed

+77
-53
lines changed

llvm/include/llvm/Analysis/IVDescriptors.h

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -82,11 +82,12 @@ class RecurrenceDescriptor {
8282
RecurKind K, FastMathFlags FMF, Instruction *ExactFP,
8383
Type *RT, bool Signed, bool Ordered,
8484
SmallPtrSetImpl<Instruction *> &CI,
85-
unsigned MinWidthCastToRecurTy)
85+
unsigned MinWidthCastToRecurTy, Value *Sentinel)
8686
: IntermediateStore(Store), StartValue(Start), LoopExitInstr(Exit),
8787
Kind(K), FMF(FMF), ExactFPMathInst(ExactFP), RecurrenceType(RT),
8888
IsSigned(Signed), IsOrdered(Ordered),
89-
MinWidthCastToRecurrenceType(MinWidthCastToRecurTy) {
89+
MinWidthCastToRecurrenceType(MinWidthCastToRecurTy),
90+
SentinelValue(Sentinel) {
9091
CastInsts.insert_range(CI);
9192
}
9293

@@ -97,9 +98,10 @@ class RecurrenceDescriptor {
9798
: IsRecurrence(IsRecur), PatternLastInst(I),
9899
RecKind(RecurKind::None), ExactFPMathInst(ExactFP) {}
99100

100-
InstDesc(Instruction *I, RecurKind K, Instruction *ExactFP = nullptr)
101+
InstDesc(Instruction *I, RecurKind K, Value *Sentinel = nullptr,
102+
Instruction *ExactFP = nullptr)
101103
: IsRecurrence(true), PatternLastInst(I), RecKind(K),
102-
ExactFPMathInst(ExactFP) {}
104+
ExactFPMathInst(ExactFP), SentinelValue(Sentinel) {}
103105

104106
bool isRecurrence() const { return IsRecurrence; }
105107

@@ -111,6 +113,8 @@ class RecurrenceDescriptor {
111113

112114
Instruction *getPatternInst() const { return PatternLastInst; }
113115

116+
Value *getSentinelValue() const { return SentinelValue; }
117+
114118
private:
115119
// Is this instruction a recurrence candidate.
116120
bool IsRecurrence;
@@ -121,6 +125,9 @@ class RecurrenceDescriptor {
121125
RecurKind RecKind;
122126
// Recurrence does not allow floating-point reassociation.
123127
Instruction *ExactFPMathInst;
128+
// The value is used to replace the start value if required by the
129+
// recurrence.
130+
Value *SentinelValue = nullptr;
124131
};
125132

126133
/// Returns a struct describing if the instruction 'I' can be a recurrence
@@ -271,9 +278,7 @@ class RecurrenceDescriptor {
271278
/// value.
272279
Value *getSentinelValue() const {
273280
assert(isFindLastIVRecurrenceKind(Kind) && "Unexpected recurrence kind");
274-
Type *Ty = StartValue->getType();
275-
return ConstantInt::get(Ty,
276-
APInt::getSignedMinValue(Ty->getIntegerBitWidth()));
281+
return SentinelValue;
277282
}
278283

279284
/// Returns a reference to the instructions used for type-promoting the
@@ -332,6 +337,8 @@ class RecurrenceDescriptor {
332337
SmallPtrSet<Instruction *, 8> CastInsts;
333338
// The minimum width used by the recurrence.
334339
unsigned MinWidthCastToRecurrenceType;
340+
/// The value used to represent the starting value.
341+
Value *SentinelValue = nullptr;
335342
};
336343

337344
/// A struct for saving information about induction variables.

llvm/lib/Analysis/IVDescriptors.cpp

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,8 @@ bool RecurrenceDescriptor::AddReductionVar(
288288
// The first instruction in the use-def chain of the Phi node that requires
289289
// exact floating point operations.
290290
Instruction *ExactFPMathInst = nullptr;
291+
// Record the sentinel value on demand.
292+
Value *SentinelValue = nullptr;
291293

292294
// A value in the reduction can be used:
293295
// - By the reduction:
@@ -376,6 +378,10 @@ bool RecurrenceDescriptor::AddReductionVar(
376378
ExactFPMathInst = ExactFPMathInst == nullptr
377379
? ReduxDesc.getExactFPMathInst()
378380
: ExactFPMathInst;
381+
if (auto *Sentinel = ReduxDesc.getSentinelValue()) {
382+
assert(!SentinelValue && "Sentinel value can only be assigned once");
383+
SentinelValue = Sentinel;
384+
}
379385
if (!ReduxDesc.isRecurrence())
380386
return false;
381387
// FIXME: FMF is allowed on phi, but propagation is not handled correctly.
@@ -596,7 +602,8 @@ bool RecurrenceDescriptor::AddReductionVar(
596602
// Save the description of this reduction variable.
597603
RecurrenceDescriptor RD(RdxStart, ExitInstruction, IntermediateStore, Kind,
598604
FMF, ExactFPMathInst, RecurrenceType, IsSigned,
599-
IsOrdered, CastInsts, MinWidthCastToRecurrenceType);
605+
IsOrdered, CastInsts, MinWidthCastToRecurrenceType,
606+
SentinelValue);
600607
RedDes = RD;
601608

602609
return true;
@@ -700,18 +707,18 @@ RecurrenceDescriptor::isFindLastIVPattern(Loop *TheLoop, PHINode *OrigPhi,
700707
m_Value(NonRdxPhi)))))
701708
return InstDesc(false, I);
702709

703-
auto IsIncreasingLoopInduction = [&](Value *V) {
710+
auto IsIncreasingLoopInduction = [&](Value *V) -> std::pair<bool, Value*> {
704711
Type *Ty = V->getType();
705712
if (!SE.isSCEVable(Ty))
706-
return false;
713+
return {false, nullptr};
707714

708715
auto *AR = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(V));
709716
if (!AR || AR->getLoop() != TheLoop)
710-
return false;
717+
return {false, nullptr};
711718

712719
const SCEV *Step = AR->getStepRecurrence(SE);
713720
if (!SE.isKnownPositive(Step))
714-
return false;
721+
return {false, nullptr};
715722

716723
const ConstantRange IVRange = SE.getSignedRange(AR);
717724
unsigned NumBits = Ty->getIntegerBitWidth();
@@ -730,17 +737,32 @@ RecurrenceDescriptor::isFindLastIVPattern(Loop *TheLoop, PHINode *OrigPhi,
730737
<< IVRange << "\n");
731738
// Ensure the induction variable does not wrap around by verifying that its
732739
// range is fully contained within the valid range.
733-
return ValidRange.contains(IVRange);
740+
if (!ValidRange.contains(IVRange))
741+
return {false, nullptr};
742+
743+
// No sentinel is needed if it can be proven that the start value of
744+
// reduction is strictly less than the start value of increasing induction
745+
// variable.
746+
if (auto *ConstIVStart = dyn_cast<SCEVConstant>(AR->getStart())) {
747+
Value *RdxStart =
748+
OrigPhi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
749+
if (auto *ConstRdxStart = dyn_cast<ConstantInt>(RdxStart))
750+
if (ConstRdxStart->getValue().slt(ConstIVStart->getAPInt()))
751+
return {true, nullptr};
752+
}
753+
754+
return {true, ConstantInt::get(Ty, Sentinel)};
734755
};
735756

736757
// We are looking for selects of the form:
737758
// select(cmp(), phi, increasing_loop_induction) or
738759
// select(cmp(), increasing_loop_induction, phi)
739760
// TODO: Support for monotonically decreasing induction variable
740-
if (!IsIncreasingLoopInduction(NonRdxPhi))
761+
auto [IsRecurrence, Sentinel] = IsIncreasingLoopInduction(NonRdxPhi);
762+
if (!IsRecurrence)
741763
return InstDesc(false, I);
742764

743-
return InstDesc(I, RecurKind::FindLastIV);
765+
return InstDesc(I, RecurKind::FindLastIV, Sentinel);
744766
}
745767

746768
RecurrenceDescriptor::InstDesc

llvm/lib/Transforms/Utils/LoopUtils.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1247,15 +1247,17 @@ Value *llvm::createFindLastIVReduction(IRBuilderBase &Builder, Value *Src,
12471247
assert(RecurrenceDescriptor::isFindLastIVRecurrenceKind(
12481248
Desc.getRecurrenceKind()) &&
12491249
"Unexpected reduction kind");
1250-
Value *Sentinel = Desc.getSentinelValue();
1251-
Value *MaxRdx = Src->getType()->isVectorTy()
1250+
Value *Result = Src->getType()->isVectorTy()
12521251
? Builder.CreateIntMaxReduce(Src, true)
12531252
: Src;
12541253
// Correct the final reduction result back to the start value if the maximum
12551254
// reduction is sentinel value.
1256-
Value *Cmp =
1257-
Builder.CreateCmp(CmpInst::ICMP_NE, MaxRdx, Sentinel, "rdx.select.cmp");
1258-
return Builder.CreateSelect(Cmp, MaxRdx, Start, "rdx.select");
1255+
if (Value *Sentinel = Desc.getSentinelValue()) {
1256+
Value *Cmp =
1257+
Builder.CreateCmp(CmpInst::ICMP_NE, Result, Sentinel, "rdx.select.cmp");
1258+
Result = Builder.CreateSelect(Cmp, Result, Start, "rdx.select");
1259+
}
1260+
return Result;
12591261
}
12601262

12611263
Value *llvm::getReductionIdentity(Intrinsic::ID RdxID, Type *Ty,

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7568,7 +7568,8 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
75687568
"start value");
75697569
MainResumeValue = Cmp->getOperand(0);
75707570
} else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(
7571-
RdxDesc.getRecurrenceKind())) {
7571+
RdxDesc.getRecurrenceKind()) &&
7572+
RdxDesc.getSentinelValue()) {
75727573
using namespace llvm::PatternMatch;
75737574
Value *Cmp, *OrigResumeV, *CmpOp;
75747575
bool IsExpectedPattern =
@@ -9625,7 +9626,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
96259626
// Adjust the start value for FindLastIV recurrences to use the sentinel
96269627
// value after generating the ResumePhi recipe, which uses the original
96279628
// start value.
9628-
PhiR->setOperand(0, Plan->getOrAddLiveIn(RdxDesc.getSentinelValue()));
9629+
if (auto *Sentinel = RdxDesc.getSentinelValue())
9630+
PhiR->setOperand(0, Plan->getOrAddLiveIn(Sentinel));
96299631
}
96309632
}
96319633
for (VPRecipeBase *R : ToDelete)
@@ -10114,7 +10116,8 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
1011410116
IRBuilder<> Builder(PBB, PBB->getFirstNonPHIIt());
1011510117
ResumeV =
1011610118
Builder.CreateICmpNE(ResumeV, RdxDesc.getRecurrenceStartValue());
10117-
} else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) {
10119+
} else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK) &&
10120+
RdxDesc.getSentinelValue()) {
1011810121
ToFrozen[RdxDesc.getRecurrenceStartValue()] =
1011910122
cast<PHINode>(ResumeV)->getIncomingValueForBlock(
1012010123
EPI.MainLoopIterationCountCheck);

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3848,15 +3848,11 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
38483848
StartV = Iden = State.get(StartVPV);
38493849
}
38503850
} else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) {
3851-
// [I|F]FindLastIV will use a sentinel value to initialize the reduction
3852-
// phi or the resume value from the main vector loop when vectorizing the
3853-
// epilogue loop. In the exit block, ComputeReductionResult will generate
3854-
// checks to verify if the reduction result is the sentinel value. If the
3855-
// result is the sentinel value, it will be corrected back to the start
3856-
// value.
3857-
// TODO: The sentinel value is not always necessary. When the start value is
3858-
// a constant, and smaller than the start value of the induction variable,
3859-
// the start value can be directly used to initialize the reduction phi.
3851+
// FindLastIV may use a sentinel value to initialize the reduction phi or
3852+
// the resume value from the main vector loop when vectorizing the epilogue
3853+
// loop. In the exit block, ComputeFindLastIVResult will generate checks to
3854+
// verify if the reduction result is the sentinel value. If the result is
3855+
// the sentinel value, it will be corrected back to the start value.
38603856
Iden = StartV;
38613857
if (!ScalarPHI) {
38623858
IRBuilderBase::InsertPointGuard IPBuilder(Builder);

llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll

Lines changed: 15 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,7 @@ define i32 @select_fcmp_max_valid_const_ub(ptr %a) {
441441
; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]]
442442
; CHECK-VF4IC1: [[VECTOR_BODY]]:
443443
; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
444-
; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
444+
; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -1), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
445445
; CHECK-VF4IC1-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
446446
; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
447447
; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
@@ -454,12 +454,10 @@ define i32 @select_fcmp_max_valid_const_ub(ptr %a) {
454454
; CHECK-VF4IC1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
455455
; CHECK-VF4IC1: [[MIDDLE_BLOCK]]:
456456
; CHECK-VF4IC1-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP4]])
457-
; CHECK-VF4IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[TMP6]], -2147483648
458-
; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP6]], i32 -1
459457
; CHECK-VF4IC1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
460458
; CHECK-VF4IC1: [[SCALAR_PH]]:
461459
; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2147483648, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
462-
; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ -1, %[[ENTRY]] ]
460+
; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ -1, %[[ENTRY]] ]
463461
; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]]
464462
; CHECK-VF4IC1: [[FOR_BODY]]:
465463
; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
@@ -473,7 +471,7 @@ define i32 @select_fcmp_max_valid_const_ub(ptr %a) {
473471
; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 2147483648
474472
; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
475473
; CHECK-VF4IC1: [[EXIT]]:
476-
; CHECK-VF4IC1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
474+
; CHECK-VF4IC1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ]
477475
; CHECK-VF4IC1-NEXT: ret i32 [[SPEC_SELECT_LCSSA]]
478476
;
479477
; CHECK-VF4IC4-LABEL: define i32 @select_fcmp_max_valid_const_ub(
@@ -484,10 +482,10 @@ define i32 @select_fcmp_max_valid_const_ub(ptr %a) {
484482
; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]]
485483
; CHECK-VF4IC4: [[VECTOR_BODY]]:
486484
; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
487-
; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ]
488-
; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
489-
; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ]
490-
; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ]
485+
; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -1), %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ]
486+
; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ splat (i32 -1), %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
487+
; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ splat (i32 -1), %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ]
488+
; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ splat (i32 -1), %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ]
491489
; CHECK-VF4IC4-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
492490
; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
493491
; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
@@ -518,12 +516,10 @@ define i32 @select_fcmp_max_valid_const_ub(ptr %a) {
518516
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX7:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[RDX_MINMAX]], <4 x i32> [[TMP12]])
519517
; CHECK-VF4IC4-NEXT: [[RDX_MINMAX8:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[RDX_MINMAX7]], <4 x i32> [[TMP13]])
520518
; CHECK-VF4IC4-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[RDX_MINMAX8]])
521-
; CHECK-VF4IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[TMP15]], -2147483648
522-
; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP15]], i32 -1
523519
; CHECK-VF4IC4-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
524520
; CHECK-VF4IC4: [[SCALAR_PH]]:
525521
; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2147483648, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
526-
; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ -1, %[[ENTRY]] ]
522+
; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP15]], %[[MIDDLE_BLOCK]] ], [ -1, %[[ENTRY]] ]
527523
; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]]
528524
; CHECK-VF4IC4: [[FOR_BODY]]:
529525
; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
@@ -537,7 +533,7 @@ define i32 @select_fcmp_max_valid_const_ub(ptr %a) {
537533
; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 2147483648
538534
; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
539535
; CHECK-VF4IC4: [[EXIT]]:
540-
; CHECK-VF4IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
536+
; CHECK-VF4IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[TMP15]], %[[MIDDLE_BLOCK]] ]
541537
; CHECK-VF4IC4-NEXT: ret i32 [[SPEC_SELECT_LCSSA]]
542538
;
543539
; CHECK-VF1IC4-LABEL: define i32 @select_fcmp_max_valid_const_ub(
@@ -548,10 +544,10 @@ define i32 @select_fcmp_max_valid_const_ub(ptr %a) {
548544
; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]]
549545
; CHECK-VF1IC4: [[VECTOR_BODY]]:
550546
; CHECK-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
551-
; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
552-
; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ]
553-
; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ]
554-
; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP24:%.*]], %[[VECTOR_BODY]] ]
547+
; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i32 [ -1, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
548+
; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ -1, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ]
549+
; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ -1, %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ]
550+
; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i32 [ -1, %[[VECTOR_PH]] ], [ [[TMP24:%.*]], %[[VECTOR_BODY]] ]
555551
; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
556552
; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
557553
; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
@@ -582,12 +578,10 @@ define i32 @select_fcmp_max_valid_const_ub(ptr %a) {
582578
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP21]], i32 [[TMP22]])
583579
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX4:%.*]] = call i32 @llvm.smax.i32(i32 [[RDX_MINMAX]], i32 [[TMP23]])
584580
; CHECK-VF1IC4-NEXT: [[RDX_MINMAX5:%.*]] = call i32 @llvm.smax.i32(i32 [[RDX_MINMAX4]], i32 [[TMP24]])
585-
; CHECK-VF1IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[RDX_MINMAX5]], -2147483648
586-
; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[RDX_MINMAX5]], i32 -1
587581
; CHECK-VF1IC4-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
588582
; CHECK-VF1IC4: [[SCALAR_PH]]:
589583
; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2147483648, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
590-
; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ -1, %[[ENTRY]] ]
584+
; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_MINMAX5]], %[[MIDDLE_BLOCK]] ], [ -1, %[[ENTRY]] ]
591585
; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]]
592586
; CHECK-VF1IC4: [[FOR_BODY]]:
593587
; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
@@ -601,7 +595,7 @@ define i32 @select_fcmp_max_valid_const_ub(ptr %a) {
601595
; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 2147483648
602596
; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
603597
; CHECK-VF1IC4: [[EXIT]]:
604-
; CHECK-VF1IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
598+
; CHECK-VF1IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_MINMAX5]], %[[MIDDLE_BLOCK]] ]
605599
; CHECK-VF1IC4-NEXT: ret i32 [[SPEC_SELECT_LCSSA]]
606600
;
607601
entry:

0 commit comments

Comments
 (0)