Skip to content

Commit 3ea4b69

Browse files
committed
Use a better exit condition that is derived from the original loop
The new exit condition will not be `EVLIV > VF * BTC` where VF is the vectorization factor and BTC being the backedge taken count.
1 parent 1bb3aea commit 3ea4b69

File tree

2 files changed

+77
-46
lines changed

2 files changed

+77
-46
lines changed

llvm/lib/CodeGen/EVLIndVarSimplify.cpp

Lines changed: 21 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "llvm/Analysis/LoopInfo.h"
1818
#include "llvm/Analysis/LoopPass.h"
1919
#include "llvm/Analysis/ScalarEvolution.h"
20+
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
2021
#include "llvm/Analysis/ValueTracking.h"
2122
#include "llvm/IR/IRBuilder.h"
2223
#include "llvm/IR/PatternMatch.h"
@@ -28,6 +29,7 @@
2829
#include "llvm/Support/raw_ostream.h"
2930
#include "llvm/Transforms/Scalar/LoopPassManager.h"
3031
#include "llvm/Transforms/Utils/Local.h"
32+
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
3133

3234
#define DEBUG_TYPE "evl-iv-simplify"
3335

@@ -134,8 +136,6 @@ bool EVLIndVarSimplifyImpl::run(Loop &L) {
134136
}
135137
Value *CanonicalIVInit = &Bounds->getInitialIVValue();
136138
Value *CanonicalIVFinal = &Bounds->getFinalIVValue();
137-
const SCEV *CanonicalIVInitV = SE.getSCEV(CanonicalIVInit);
138-
const SCEV *CanonicalIVFinalV = SE.getSCEV(CanonicalIVFinal);
139139

140140
const SCEV *StepV = IVD.getStep();
141141
uint32_t VF = getVFFromIndVar(StepV, *L.getHeader()->getParent());
@@ -152,7 +152,7 @@ bool EVLIndVarSimplifyImpl::run(Loop &L) {
152152
BasicBlock *BB = IndVar->getParent();
153153

154154
Value *EVLIndVar = nullptr;
155-
Value *RemTC = nullptr, *TC = nullptr;
155+
Value *RemTC = nullptr;
156156
auto IntrinsicMatch = m_Intrinsic<Intrinsic::experimental_get_vector_length>(
157157
m_Value(RemTC), m_SpecificInt(VF),
158158
/*Scalable=*/m_SpecificInt(1));
@@ -192,53 +192,42 @@ bool EVLIndVarSimplifyImpl::run(Loop &L) {
192192
LLVM_DEBUG(dbgs() << "Found candidate PN of EVL-based IndVar: " << PN
193193
<< "\n");
194194

195-
// Check 3: Pattern match to find the EVL-based index and total trip count
196-
// (TC).
195+
// Check 3: Pattern match to find the EVL-based index.
197196
if (match(RecValue,
198197
m_c_Add(m_ZExtOrSelf(IntrinsicMatch), m_Specific(&PN))) &&
199-
match(RemTC, m_Sub(m_Value(TC), m_Specific(&PN)))) {
198+
match(RemTC, m_Sub(m_Value(), m_Specific(&PN)))) {
200199
EVLIndVar = RecValue;
201200
break;
202201
}
203202
}
204203

205-
if (!EVLIndVar || !TC)
204+
if (!EVLIndVar)
206205
return false;
207206

208-
// Make sure TC is related to the original trip count of the canonical IV.
209-
// Specifically, if the canonical trip count is derived from TC.
210-
const SCEV *TCV = SE.getSCEV(TC);
211-
bool MatchTC = false;
212-
if (const auto *ConstTCV = dyn_cast<SCEVConstant>(TCV)) {
213-
// If TC is a constant and vscale is also a constant, then the canonical
214-
// trip count will be constant. Canonical trip count * Step equals to the
215-
// round up of TC.
216-
if (const auto *ConstStep = dyn_cast<SCEVConstant>(StepV))
217-
if (unsigned CanonicalTC = SE.getSmallConstantTripCount(&L)) {
218-
APInt Step = ConstStep->getAPInt().abs().zextOrTrunc(64);
219-
APInt CanonicalTripCount(64, CanonicalTC);
220-
APInt TripCount = ConstTCV->getAPInt().zextOrTrunc(64);
221-
MatchTC = (CanonicalTripCount * Step - TripCount).ult(Step);
222-
}
223-
}
224-
// Otherwise, we simply check if the upper or lower bound expression of the
225-
// canonical IV contains TC.
226-
auto equalsTC = [&](const SCEV *S) -> bool { return S == TCV; };
227-
if (!MatchTC && !llvm::SCEVExprContains(CanonicalIVFinalV, equalsTC) &&
228-
!llvm::SCEVExprContains(CanonicalIVInitV, equalsTC))
207+
const SCEV *BTC = SE.getBackedgeTakenCount(&L);
208+
LLVM_DEBUG(dbgs() << "BTC: " << *BTC << "\n");
209+
if (isa<SCEVCouldNotCompute>(BTC))
229210
return false;
230211

231-
LLVM_DEBUG(dbgs() << "Using " << *EVLIndVar << " for EVL-based IndVar\n");
212+
const SCEV *VFV = SE.getConstant(BTC->getType(), VF);
213+
VFV = SE.getMulExpr(VFV, SE.getVScale(VFV->getType()));
214+
const SCEV *ExitValV = SE.getMulExpr(BTC, VFV);
215+
LLVM_DEBUG(dbgs() << "ExitVal: " << *ExitValV << "\n");
232216

233217
// Create an EVL-based comparison and replace the branch to use it as
234218
// predicate.
235219
ICmpInst *OrigLatchCmp = L.getLatchCmpInst();
236-
ICmpInst::Predicate Pred = OrigLatchCmp->getPredicate();
237-
if (!ICmpInst::isEquality(Pred))
220+
const DataLayout &DL = L.getHeader()->getDataLayout();
221+
SCEVExpander Expander(SE, DL, "evl.iv.exitcondition");
222+
if (!Expander.isSafeToExpandAt(ExitValV, OrigLatchCmp))
238223
return false;
239224

225+
LLVM_DEBUG(dbgs() << "Using " << *EVLIndVar << " for EVL-based IndVar\n");
226+
227+
Value *ExitVal =
228+
Expander.expandCodeFor(ExitValV, EVLIndVar->getType(), OrigLatchCmp);
240229
IRBuilder<> Builder(OrigLatchCmp);
241-
auto *NewPred = Builder.CreateICmp(Pred, EVLIndVar, TC);
230+
auto *NewPred = Builder.CreateICmp(ICmpInst::ICMP_UGT, EVLIndVar, ExitVal);
242231
OrigLatchCmp->replaceAllUsesWith(NewPred);
243232

244233
// llvm::RecursivelyDeleteDeadPHINode only deletes cycles whose values are

llvm/test/CodeGen/RISCV/evl-iv-simplify.ll

Lines changed: 56 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
; RUN: opt -S -mtriple=riscv64 -mattr='+v' --passes='loop(evl-iv-simplify)' < %s | FileCheck %s
33
; RUN: opt -S -mtriple=riscv64 -mattr='+v' --passes='loop(evl-iv-simplify),function(simplifycfg,dce)' < %s | FileCheck %s --check-prefix=LOOP-DEL
44

5-
define void @simple(ptr noalias %a, ptr noalias %b, <vscale x 4 x i32> %c, i64 %N) {
5+
define void @simple(ptr noalias %a, ptr noalias %b, <vscale x 4 x i32> %c, i64 %N) vscale_range(2, 1024) {
66
; CHECK-LABEL: define void @simple(
77
; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], <vscale x 4 x i32> [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
88
; CHECK-NEXT: entry:
@@ -20,6 +20,16 @@ define void @simple(ptr noalias %a, ptr noalias %b, <vscale x 4 x i32> %c, i64 %
2020
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP8]]
2121
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
2222
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
23+
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
24+
; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4
25+
; CHECK-NEXT: [[TMP22:%.*]] = udiv i64 [[N_RND_UP]], [[TMP10]]
26+
; CHECK-NEXT: [[TMP23:%.*]] = shl nuw nsw i64 [[TMP22]], 2
27+
; CHECK-NEXT: [[TMP24:%.*]] = sub i64 4, [[TMP23]]
28+
; CHECK-NEXT: [[TMP25:%.*]] = mul i64 [[TMP24]], [[TMP9]]
29+
; CHECK-NEXT: [[TMP15:%.*]] = sub i64 0, [[TMP25]]
30+
; CHECK-NEXT: [[TMP16:%.*]] = udiv i64 [[TMP15]], [[TMP10]]
31+
; CHECK-NEXT: [[TMP26:%.*]] = mul i64 [[TMP16]], [[TMP9]]
32+
; CHECK-NEXT: [[TMP27:%.*]] = shl i64 [[TMP26]], 2
2333
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
2434
; CHECK: vector.body:
2535
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -35,8 +45,8 @@ define void @simple(ptr noalias %a, ptr noalias %b, <vscale x 4 x i32> %c, i64 %
3545
; CHECK-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP18]], ptr align 4 [[TMP20]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
3646
; CHECK-NEXT: [[TMP21:%.*]] = zext i32 [[TMP12]] to i64
3747
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]]
38-
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
39-
; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
48+
; CHECK-NEXT: [[TMP28:%.*]] = icmp ugt i64 [[INDEX_EVL_NEXT]], [[TMP27]]
49+
; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
4050
; CHECK: middle.block:
4151
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
4252
; CHECK: scalar.ph:
@@ -65,6 +75,20 @@ define void @simple(ptr noalias %a, ptr noalias %b, <vscale x 4 x i32> %c, i64 %
6575
; LOOP-DEL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
6676
; LOOP-DEL-NEXT: br i1 [[TMP3]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]]
6777
; LOOP-DEL: vector.ph:
78+
; LOOP-DEL-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64()
79+
; LOOP-DEL-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 4
80+
; LOOP-DEL-NEXT: [[TMP19:%.*]] = sub i64 [[TMP18]], 1
81+
; LOOP-DEL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP19]]
82+
; LOOP-DEL-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
83+
; LOOP-DEL-NEXT: [[TMP8:%.*]] = mul i64 [[TMP20]], 4
84+
; LOOP-DEL-NEXT: [[TMP9:%.*]] = udiv i64 [[N_RND_UP]], [[TMP8]]
85+
; LOOP-DEL-NEXT: [[TMP21:%.*]] = shl nuw nsw i64 [[TMP9]], 2
86+
; LOOP-DEL-NEXT: [[TMP22:%.*]] = sub i64 4, [[TMP21]]
87+
; LOOP-DEL-NEXT: [[TMP23:%.*]] = mul i64 [[TMP22]], [[TMP20]]
88+
; LOOP-DEL-NEXT: [[TMP24:%.*]] = sub i64 0, [[TMP23]]
89+
; LOOP-DEL-NEXT: [[TMP25:%.*]] = udiv i64 [[TMP24]], [[TMP8]]
90+
; LOOP-DEL-NEXT: [[TMP15:%.*]] = mul i64 [[TMP25]], [[TMP20]]
91+
; LOOP-DEL-NEXT: [[TMP16:%.*]] = shl i64 [[TMP15]], 2
6892
; LOOP-DEL-NEXT: br label [[VECTOR_BODY:%.*]]
6993
; LOOP-DEL: vector.body:
7094
; LOOP-DEL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -80,8 +104,8 @@ define void @simple(ptr noalias %a, ptr noalias %b, <vscale x 4 x i32> %c, i64 %
80104
; LOOP-DEL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP11]], ptr align 4 [[TMP13]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
81105
; LOOP-DEL-NEXT: [[TMP14:%.*]] = zext i32 [[TMP5]] to i64
82106
; LOOP-DEL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP14]], [[EVL_BASED_IV]]
83-
; LOOP-DEL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
84-
; LOOP-DEL-NEXT: br i1 [[TMP15]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
107+
; LOOP-DEL-NEXT: [[TMP26:%.*]] = icmp ugt i64 [[INDEX_EVL_NEXT]], [[TMP16]]
108+
; LOOP-DEL-NEXT: br i1 [[TMP26]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
85109
; LOOP-DEL: for.body:
86110
; LOOP-DEL-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
87111
; LOOP-DEL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
@@ -129,7 +153,7 @@ vector.body: ; preds = %vector.body, %vecto
129153
call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> %18, ptr align 4 %20, <vscale x 4 x i1> splat (i1 true), i32 %12)
130154
%21 = zext i32 %12 to i64
131155
%index.evl.next = add i64 %21, %evl.based.iv
132-
%index.next = add i64 %index, %10
156+
%index.next = add nuw i64 %index, %10
133157
%22 = icmp eq i64 %index.next, %n.vec
134158
br i1 %22, label %middle.block, label %vector.body, !llvm.loop !0
135159

@@ -162,8 +186,15 @@ define void @fixed_iv_step(ptr %arg0, ptr %arg1, i64 %N) #0 {
162186
; CHECK-NEXT: entry:
163187
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
164188
; CHECK: vector.ph:
189+
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], 15
190+
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N_RND_UP]], -16
165191
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[ARG0]], i64 0
166192
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
193+
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[N_VEC]], -16
194+
; CHECK-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP5]], 4
195+
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
196+
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP6]], [[TMP7]]
197+
; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[TMP3]], 1
167198
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
168199
; CHECK: vector.body:
169200
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -173,8 +204,8 @@ define void @fixed_iv_step(ptr %arg0, ptr %arg1, i64 %N) #0 {
173204
; CHECK-NEXT: tail call void @llvm.vp.store.nxv2p0.p0(<vscale x 2 x ptr> [[BROADCAST_SPLAT]], ptr align 8 [[GEP]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP1]])
174205
; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
175206
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[EVL_BASED_IV]], [[TMP2]]
176-
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
177-
; CHECK-NEXT: br i1 [[TMP4]], label [[FOR_END_LOOPEXIT5:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3]]
207+
; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[INDEX_EVL_NEXT]], [[TMP4]]
208+
; CHECK-NEXT: br i1 [[TMP8]], label [[FOR_END_LOOPEXIT5:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3]]
178209
; CHECK: for.end.loopexit5:
179210
; CHECK-NEXT: br label [[FOR_END:%.*]]
180211
; CHECK: for.end:
@@ -183,8 +214,15 @@ define void @fixed_iv_step(ptr %arg0, ptr %arg1, i64 %N) #0 {
183214
; LOOP-DEL-LABEL: define void @fixed_iv_step(
184215
; LOOP-DEL-SAME: ptr [[ARG0:%.*]], ptr [[ARG1:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
185216
; LOOP-DEL-NEXT: entry:
217+
; LOOP-DEL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], 15
218+
; LOOP-DEL-NEXT: [[N_VEC:%.*]] = and i64 [[N_RND_UP]], -16
186219
; LOOP-DEL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[ARG0]], i64 0
187220
; LOOP-DEL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
221+
; LOOP-DEL-NEXT: [[TMP5:%.*]] = add i64 [[N_VEC]], -16
222+
; LOOP-DEL-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP5]], 4
223+
; LOOP-DEL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
224+
; LOOP-DEL-NEXT: [[TMP3:%.*]] = mul i64 [[TMP6]], [[TMP7]]
225+
; LOOP-DEL-NEXT: [[TMP4:%.*]] = shl i64 [[TMP3]], 1
188226
; LOOP-DEL-NEXT: br label [[VECTOR_BODY:%.*]]
189227
; LOOP-DEL: vector.body:
190228
; LOOP-DEL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -194,8 +232,8 @@ define void @fixed_iv_step(ptr %arg0, ptr %arg1, i64 %N) #0 {
194232
; LOOP-DEL-NEXT: tail call void @llvm.vp.store.nxv2p0.p0(<vscale x 2 x ptr> [[BROADCAST_SPLAT]], ptr align 8 [[GEP]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP1]])
195233
; LOOP-DEL-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
196234
; LOOP-DEL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[EVL_BASED_IV]], [[TMP2]]
197-
; LOOP-DEL-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
198-
; LOOP-DEL-NEXT: br i1 [[TMP3]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3]]
235+
; LOOP-DEL-NEXT: [[TMP8:%.*]] = icmp ugt i64 [[INDEX_EVL_NEXT]], [[TMP4]]
236+
; LOOP-DEL-NEXT: br i1 [[TMP8]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3]]
199237
; LOOP-DEL: for.end:
200238
; LOOP-DEL-NEXT: ret void
201239
;
@@ -238,6 +276,8 @@ define void @fixed_iv_step_tc(ptr %arg0, ptr %arg1) #0 {
238276
; CHECK: vector.ph:
239277
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[ARG0]], i64 0
240278
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
279+
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
280+
; CHECK-NEXT: [[TMP4:%.*]] = mul nuw nsw i64 [[TMP3]], 10
241281
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
242282
; CHECK: vector.body:
243283
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -247,8 +287,8 @@ define void @fixed_iv_step_tc(ptr %arg0, ptr %arg1) #0 {
247287
; CHECK-NEXT: tail call void @llvm.vp.store.nxv2p0.p0(<vscale x 2 x ptr> [[BROADCAST_SPLAT]], ptr align 8 [[GEP]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP1]])
248288
; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
249289
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[EVL_BASED_IV]], [[TMP2]]
250-
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 87
251-
; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_END_LOOPEXIT5:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3]]
290+
; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[INDEX_EVL_NEXT]], [[TMP4]]
291+
; CHECK-NEXT: br i1 [[TMP5]], label [[FOR_END_LOOPEXIT5:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3]]
252292
; CHECK: for.end.loopexit5:
253293
; CHECK-NEXT: br label [[FOR_END:%.*]]
254294
; CHECK: for.end:
@@ -259,6 +299,8 @@ define void @fixed_iv_step_tc(ptr %arg0, ptr %arg1) #0 {
259299
; LOOP-DEL-NEXT: entry:
260300
; LOOP-DEL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[ARG0]], i64 0
261301
; LOOP-DEL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer
302+
; LOOP-DEL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
303+
; LOOP-DEL-NEXT: [[TMP4:%.*]] = mul nuw nsw i64 [[TMP3]], 10
262304
; LOOP-DEL-NEXT: br label [[VECTOR_BODY:%.*]]
263305
; LOOP-DEL: vector.body:
264306
; LOOP-DEL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -268,8 +310,8 @@ define void @fixed_iv_step_tc(ptr %arg0, ptr %arg1) #0 {
268310
; LOOP-DEL-NEXT: tail call void @llvm.vp.store.nxv2p0.p0(<vscale x 2 x ptr> [[BROADCAST_SPLAT]], ptr align 8 [[GEP]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP1]])
269311
; LOOP-DEL-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
270312
; LOOP-DEL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[EVL_BASED_IV]], [[TMP2]]
271-
; LOOP-DEL-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 87
272-
; LOOP-DEL-NEXT: br i1 [[TMP3]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3]]
313+
; LOOP-DEL-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[INDEX_EVL_NEXT]], [[TMP4]]
314+
; LOOP-DEL-NEXT: br i1 [[TMP5]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3]]
273315
; LOOP-DEL: for.end:
274316
; LOOP-DEL-NEXT: ret void
275317
;

0 commit comments

Comments
 (0)