Skip to content

Commit 587674a

Browse files
committed
[LV] Use ICMP_UGE for BranchOnCount when VF is scalable
So that SCEV can analyse the bound of loop count. This can fix issue found in #100564.
1 parent 164d123 commit 587674a

File tree

71 files changed

+1758
-1616
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

71 files changed

+1758
-1616
lines changed

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -524,7 +524,10 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
524524
// First create the compare.
525525
Value *IV = State.get(getOperand(0), Part, /*IsScalar*/ true);
526526
Value *TC = State.get(getOperand(1), Part, /*IsScalar*/ true);
527-
Value *Cond = Builder.CreateICmpEQ(IV, TC);
527+
// Use ICMP_UGE so that SCEV can analyse the bound of loop count for
528+
// scalable VF.
529+
Value *Cond = Builder.CreateICmp(
530+
State.VF.isScalable() ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_EQ, IV, TC);
528531

529532
// Now create the branch.
530533
auto *Plan = getParent()->getPlan();

llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -709,7 +709,7 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt
709709
; DEFAULT-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP16]], i32 0
710710
; DEFAULT-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> zeroinitializer, ptr [[TMP17]], i32 4, <vscale x 4 x i1> [[TMP15]]), !alias.scope [[META20:![0-9]+]], !noalias [[META21:![0-9]+]]
711711
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
712-
; DEFAULT-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
712+
; DEFAULT-NEXT: [[TMP18:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]]
713713
; DEFAULT-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
714714
; DEFAULT: middle.block:
715715
; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]

llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) {
6060
; CHECK-NEXT: store <vscale x 2 x double> zeroinitializer, ptr [[TMP36]], align 8
6161
; CHECK-NEXT: store <vscale x 2 x double> zeroinitializer, ptr [[TMP39]], align 8
6262
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
63-
; CHECK-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
63+
; CHECK-NEXT: [[TMP40:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]]
6464
; CHECK-NEXT: br i1 [[TMP40]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
6565
; CHECK: [[MIDDLE_BLOCK]]:
6666
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]

llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ define void @f1(ptr %A) #0 {
2828
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
2929
; CHECK-NEXT: store <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), ptr [[TMP8]], align 4
3030
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
31-
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
31+
; CHECK-NEXT: [[TMP9:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]]
3232
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
3333
; CHECK: middle.block:
3434
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]

llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll

Lines changed: 65 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,32 @@ define dso_local double @test(ptr nocapture noundef readonly %data, ptr nocaptur
3838
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
3939
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
4040
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
41+
; CHECK: middle.block:
42+
; CHECK-NEXT: [[TMP16:%.*]] = call double @llvm.vector.reduce.fadd.v2f64(double -0.000000e+00, <2 x double> [[TMP14]])
43+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
44+
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
45+
; CHECK: scalar.ph:
46+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
47+
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ [[TMP16]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[FOR_BODY_PREHEADER]] ]
48+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
49+
; CHECK: for.cond.cleanup.loopexit:
50+
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi double [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[TMP16]], [[MIDDLE_BLOCK]] ]
51+
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
52+
; CHECK: for.cond.cleanup:
53+
; CHECK-NEXT: [[RES_0_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
54+
; CHECK-NEXT: ret double [[RES_0_LCSSA]]
55+
; CHECK: for.body:
56+
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
57+
; CHECK-NEXT: [[RES_07:%.*]] = phi double [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD]], [[FOR_BODY]] ]
58+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[OFFSET]], i64 [[INDVARS_IV]]
59+
; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
60+
; CHECK-NEXT: [[IDXPROM1:%.*]] = sext i32 [[TMP17]] to i64
61+
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[DATA]], i64 [[IDXPROM1]]
62+
; CHECK-NEXT: [[TMP18:%.*]] = load double, ptr [[ARRAYIDX2]], align 8
63+
; CHECK-NEXT: [[ADD]] = fadd double [[RES_07]], [[TMP18]]
64+
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
65+
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
66+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
4167
;
4268
; SVE-LABEL: @test(
4369
; SVE-NEXT: entry:
@@ -54,23 +80,49 @@ define dso_local double @test(ptr nocapture noundef readonly %data, ptr nocaptur
5480
; SVE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
5581
; SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]]
5682
; SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
57-
; SVE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
58-
; SVE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2
83+
; SVE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
84+
; SVE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
5985
; SVE-NEXT: br label [[VECTOR_BODY:%.*]]
6086
; SVE: vector.body:
6187
; SVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
62-
; SVE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x double> [ insertelement (<vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double -0.000000e+00, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer), double 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
63-
; SVE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0
64-
; SVE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[OFFSET:%.*]], i64 [[TMP4]]
65-
; SVE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
66-
; SVE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i32>, ptr [[TMP6]], align 4
67-
; SVE-NEXT: [[TMP7:%.*]] = sext <vscale x 2 x i32> [[WIDE_LOAD]] to <vscale x 2 x i64>
68-
; SVE-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, ptr [[DATA:%.*]], <vscale x 2 x i64> [[TMP7]]
69-
; SVE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> [[TMP8]], i32 8, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x double> poison)
70-
; SVE-NEXT: [[TMP9]] = fadd <vscale x 2 x double> [[VEC_PHI]], [[WIDE_MASKED_GATHER]]
71-
; SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
72-
; SVE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
88+
; SVE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x double> [ insertelement (<vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double -0.000000e+00, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer), double 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
89+
; SVE-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0
90+
; SVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[OFFSET:%.*]], i64 [[TMP6]]
91+
; SVE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
92+
; SVE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i32>, ptr [[TMP8]], align 4
93+
; SVE-NEXT: [[TMP9:%.*]] = sext <vscale x 2 x i32> [[WIDE_LOAD]] to <vscale x 2 x i64>
94+
; SVE-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, ptr [[DATA:%.*]], <vscale x 2 x i64> [[TMP9]]
95+
; SVE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> [[TMP10]], i32 8, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x double> poison)
96+
; SVE-NEXT: [[TMP11]] = fadd <vscale x 2 x double> [[VEC_PHI]], [[WIDE_MASKED_GATHER]]
97+
; SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
98+
; SVE-NEXT: [[TMP12:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]]
7399
; SVE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
100+
; SVE: middle.block:
101+
; SVE-NEXT: [[TMP13:%.*]] = call double @llvm.vector.reduce.fadd.nxv2f64(double -0.000000e+00, <vscale x 2 x double> [[TMP11]])
102+
; SVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
103+
; SVE-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
104+
; SVE: scalar.ph:
105+
; SVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
106+
; SVE-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[FOR_BODY_PREHEADER]] ]
107+
; SVE-NEXT: br label [[FOR_BODY:%.*]]
108+
; SVE: for.cond.cleanup.loopexit:
109+
; SVE-NEXT: [[ADD_LCSSA:%.*]] = phi double [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ]
110+
; SVE-NEXT: br label [[FOR_COND_CLEANUP]]
111+
; SVE: for.cond.cleanup:
112+
; SVE-NEXT: [[RES_0_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
113+
; SVE-NEXT: ret double [[RES_0_LCSSA]]
114+
; SVE: for.body:
115+
; SVE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
116+
; SVE-NEXT: [[RES_07:%.*]] = phi double [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD]], [[FOR_BODY]] ]
117+
; SVE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[OFFSET]], i64 [[INDVARS_IV]]
118+
; SVE-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
119+
; SVE-NEXT: [[IDXPROM1:%.*]] = sext i32 [[TMP14]] to i64
120+
; SVE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[DATA]], i64 [[IDXPROM1]]
121+
; SVE-NEXT: [[TMP15:%.*]] = load double, ptr [[ARRAYIDX2]], align 8
122+
; SVE-NEXT: [[ADD]] = fadd double [[RES_07]], [[TMP15]]
123+
; SVE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
124+
; SVE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
125+
; SVE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
74126
;
75127
entry:
76128
%cmp6 = icmp sgt i32 %size, 0

llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
7676
; DEFAULT-NEXT: store <vscale x 8 x i8> [[TMP36]], ptr [[TMP40]], align 1
7777
; DEFAULT-NEXT: store <vscale x 8 x i8> [[TMP37]], ptr [[TMP43]], align 1
7878
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]]
79-
; DEFAULT-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
79+
; DEFAULT-NEXT: [[TMP44:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]]
8080
; DEFAULT-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
8181
; DEFAULT: middle.block:
8282
; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
@@ -115,7 +115,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
115115
; DEFAULT-NEXT: [[TMP62:%.*]] = getelementptr i8, ptr [[TMP61]], i32 0
116116
; DEFAULT-NEXT: store <vscale x 4 x i8> [[TMP60]], ptr [[TMP62]], align 1
117117
; DEFAULT-NEXT: [[INDEX_NEXT12]] = add nuw i64 [[INDEX10]], [[TMP50]]
118-
; DEFAULT-NEXT: [[TMP63:%.*]] = icmp eq i64 [[INDEX_NEXT12]], [[N_VEC6]]
118+
; DEFAULT-NEXT: [[TMP63:%.*]] = icmp uge i64 [[INDEX_NEXT12]], [[N_VEC6]]
119119
; DEFAULT-NEXT: br i1 [[TMP63]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
120120
; DEFAULT: vec.epilog.middle.block:
121121
; DEFAULT-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC6]]

llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 {
3131
; TFNONE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
3232
; TFNONE-NEXT: store <vscale x 2 x i64> [[TMP7]], ptr [[TMP8]], align 8
3333
; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
34-
; TFNONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
34+
; TFNONE-NEXT: [[TMP9:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]]
3535
; TFNONE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
3636
; TFNONE: middle.block:
3737
; TFNONE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
@@ -169,7 +169,7 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 {
169169
; TFNONE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]]
170170
; TFNONE-NEXT: store <vscale x 2 x i64> [[PREDPHI]], ptr [[TMP10]], align 8
171171
; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
172-
; TFNONE-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
172+
; TFNONE-NEXT: [[TMP11:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]]
173173
; TFNONE-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
174174
; TFNONE: middle.block:
175175
; TFNONE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
@@ -340,7 +340,7 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 {
340340
; TFNONE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDEX]]
341341
; TFNONE-NEXT: store <vscale x 2 x i64> [[PREDPHI]], ptr [[TMP11]], align 8
342342
; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
343-
; TFNONE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
343+
; TFNONE-NEXT: [[TMP12:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]]
344344
; TFNONE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
345345
; TFNONE: middle.block:
346346
; TFNONE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
@@ -516,7 +516,7 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 {
516516
; TFNONE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
517517
; TFNONE-NEXT: store <vscale x 2 x i64> [[TMP7]], ptr [[TMP8]], align 8
518518
; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
519-
; TFNONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
519+
; TFNONE-NEXT: [[TMP9:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]]
520520
; TFNONE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
521521
; TFNONE: middle.block:
522522
; TFNONE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
@@ -574,7 +574,7 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 {
574574
; TFFALLBACK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
575575
; TFFALLBACK-NEXT: store <vscale x 2 x i64> [[TMP7]], ptr [[TMP8]], align 8
576576
; TFFALLBACK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
577-
; TFFALLBACK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
577+
; TFFALLBACK-NEXT: [[TMP9:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]]
578578
; TFFALLBACK-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
579579
; TFFALLBACK: scalar.ph:
580580
; TFFALLBACK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[N_VEC]], [[VECTOR_BODY]] ]
@@ -652,7 +652,7 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 {
652652
; TFNONE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]]
653653
; TFNONE-NEXT: store <vscale x 2 x i64> [[TMP7]], ptr [[TMP8]], align 8
654654
; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
655-
; TFNONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
655+
; TFNONE-NEXT: [[TMP9:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]]
656656
; TFNONE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
657657
; TFNONE: middle.block:
658658
; TFNONE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
@@ -823,7 +823,7 @@ define double @test_widen_fmuladd_and_call(ptr noalias %a, ptr readnone %b, doub
823823
; TFNONE-NEXT: store <vscale x 2 x i64> [[TMP9]], ptr [[TMP10]], align 8
824824
; TFNONE-NEXT: [[TMP11]] = call double @llvm.vector.reduce.fadd.nxv2f64(double [[VEC_PHI]], <vscale x 2 x double> [[TMP7]])
825825
; TFNONE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
826-
; TFNONE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
826+
; TFNONE-NEXT: [[TMP12:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]]
827827
; TFNONE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
828828
; TFNONE: middle.block:
829829
; TFNONE-NEXT: br i1 false, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]

0 commit comments

Comments
 (0)