Skip to content

Commit bdc69c8

Browse files
committed
[LV] Use ICMP_UGE for BranchOnCount when VF is scalable
So that SCEV can analyse the bound of loop count. This can fix issue found in #100564.
1 parent 2b077ed commit bdc69c8

File tree

70 files changed

+2939
-2797
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

70 files changed

+2939
-2797
lines changed

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -524,7 +524,10 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {
524524
// First create the compare.
525525
Value *IV = State.get(getOperand(0), Part, /*IsScalar*/ true);
526526
Value *TC = State.get(getOperand(1), Part, /*IsScalar*/ true);
527-
Value *Cond = Builder.CreateICmpEQ(IV, TC);
527+
// Use ICMP_UGE so that SCEV can analyse the bound of loop count for
528+
// scalable VF.
529+
Value *Cond = Builder.CreateICmp(
530+
State.VF.isScalable() ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_EQ, IV, TC);
528531

529532
// Now create the branch.
530533
auto *Plan = getParent()->getPlan();

llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -709,7 +709,7 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt
709709
; DEFAULT-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP16]], i32 0
710710
; DEFAULT-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> zeroinitializer, ptr [[TMP17]], i32 4, <vscale x 4 x i1> [[TMP15]]), !alias.scope [[META20:![0-9]+]], !noalias [[META21:![0-9]+]]
711711
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
712-
; DEFAULT-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
712+
; DEFAULT-NEXT: [[TMP18:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]]
713713
; DEFAULT-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
714714
; DEFAULT: middle.block:
715715
; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]

llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) {
6060
; CHECK-NEXT: store <vscale x 2 x double> zeroinitializer, ptr [[TMP36]], align 8
6161
; CHECK-NEXT: store <vscale x 2 x double> zeroinitializer, ptr [[TMP39]], align 8
6262
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
63-
; CHECK-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
63+
; CHECK-NEXT: [[TMP40:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]]
6464
; CHECK-NEXT: br i1 [[TMP40]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
6565
; CHECK: [[MIDDLE_BLOCK]]:
6666
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]

llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ define void @f1(ptr %A) #0 {
2828
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
2929
; CHECK-NEXT: store <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), ptr [[TMP8]], align 4
3030
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
31-
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
31+
; CHECK-NEXT: [[TMP9:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]]
3232
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
3333
; CHECK: middle.block:
3434
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]

llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll

Lines changed: 65 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,32 @@ define dso_local double @test(ptr nocapture noundef readonly %data, ptr nocaptur
3838
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
3939
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
4040
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
41+
; CHECK: middle.block:
42+
; CHECK-NEXT: [[TMP16:%.*]] = call double @llvm.vector.reduce.fadd.v2f64(double -0.000000e+00, <2 x double> [[TMP14]])
43+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
44+
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
45+
; CHECK: scalar.ph:
46+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
47+
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ [[TMP16]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[FOR_BODY_PREHEADER]] ]
48+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
49+
; CHECK: for.cond.cleanup.loopexit:
50+
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi double [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[TMP16]], [[MIDDLE_BLOCK]] ]
51+
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
52+
; CHECK: for.cond.cleanup:
53+
; CHECK-NEXT: [[RES_0_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
54+
; CHECK-NEXT: ret double [[RES_0_LCSSA]]
55+
; CHECK: for.body:
56+
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
57+
; CHECK-NEXT: [[RES_07:%.*]] = phi double [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD]], [[FOR_BODY]] ]
58+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[OFFSET]], i64 [[INDVARS_IV]]
59+
; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
60+
; CHECK-NEXT: [[IDXPROM1:%.*]] = sext i32 [[TMP17]] to i64
61+
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[DATA]], i64 [[IDXPROM1]]
62+
; CHECK-NEXT: [[TMP18:%.*]] = load double, ptr [[ARRAYIDX2]], align 8
63+
; CHECK-NEXT: [[ADD]] = fadd double [[RES_07]], [[TMP18]]
64+
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
65+
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
66+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
4167
;
4268
; SVE-LABEL: @test(
4369
; SVE-NEXT: entry:
@@ -54,23 +80,49 @@ define dso_local double @test(ptr nocapture noundef readonly %data, ptr nocaptur
5480
; SVE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
5581
; SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]]
5682
; SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
57-
; SVE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
58-
; SVE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2
83+
; SVE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
84+
; SVE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
5985
; SVE-NEXT: br label [[VECTOR_BODY:%.*]]
6086
; SVE: vector.body:
6187
; SVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
62-
; SVE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x double> [ insertelement (<vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double -0.000000e+00, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer), double 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
63-
; SVE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0
64-
; SVE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[OFFSET:%.*]], i64 [[TMP4]]
65-
; SVE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
66-
; SVE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i32>, ptr [[TMP6]], align 4
67-
; SVE-NEXT: [[TMP7:%.*]] = sext <vscale x 2 x i32> [[WIDE_LOAD]] to <vscale x 2 x i64>
68-
; SVE-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, ptr [[DATA:%.*]], <vscale x 2 x i64> [[TMP7]]
69-
; SVE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> [[TMP8]], i32 8, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x double> poison)
70-
; SVE-NEXT: [[TMP9]] = fadd <vscale x 2 x double> [[VEC_PHI]], [[WIDE_MASKED_GATHER]]
71-
; SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
72-
; SVE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
88+
; SVE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x double> [ insertelement (<vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double -0.000000e+00, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer), double 0.000000e+00, i32 0), [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
89+
; SVE-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0
90+
; SVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[OFFSET:%.*]], i64 [[TMP6]]
91+
; SVE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
92+
; SVE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i32>, ptr [[TMP8]], align 4
93+
; SVE-NEXT: [[TMP9:%.*]] = sext <vscale x 2 x i32> [[WIDE_LOAD]] to <vscale x 2 x i64>
94+
; SVE-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, ptr [[DATA:%.*]], <vscale x 2 x i64> [[TMP9]]
95+
; SVE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> [[TMP10]], i32 8, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x double> poison)
96+
; SVE-NEXT: [[TMP11]] = fadd <vscale x 2 x double> [[VEC_PHI]], [[WIDE_MASKED_GATHER]]
97+
; SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
98+
; SVE-NEXT: [[TMP12:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]]
7399
; SVE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
100+
; SVE: middle.block:
101+
; SVE-NEXT: [[TMP13:%.*]] = call double @llvm.vector.reduce.fadd.nxv2f64(double -0.000000e+00, <vscale x 2 x double> [[TMP11]])
102+
; SVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
103+
; SVE-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
104+
; SVE: scalar.ph:
105+
; SVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
106+
; SVE-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[FOR_BODY_PREHEADER]] ]
107+
; SVE-NEXT: br label [[FOR_BODY:%.*]]
108+
; SVE: for.cond.cleanup.loopexit:
109+
; SVE-NEXT: [[ADD_LCSSA:%.*]] = phi double [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ]
110+
; SVE-NEXT: br label [[FOR_COND_CLEANUP]]
111+
; SVE: for.cond.cleanup:
112+
; SVE-NEXT: [[RES_0_LCSSA:%.*]] = phi double [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
113+
; SVE-NEXT: ret double [[RES_0_LCSSA]]
114+
; SVE: for.body:
115+
; SVE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
116+
; SVE-NEXT: [[RES_07:%.*]] = phi double [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD]], [[FOR_BODY]] ]
117+
; SVE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[OFFSET]], i64 [[INDVARS_IV]]
118+
; SVE-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
119+
; SVE-NEXT: [[IDXPROM1:%.*]] = sext i32 [[TMP14]] to i64
120+
; SVE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[DATA]], i64 [[IDXPROM1]]
121+
; SVE-NEXT: [[TMP15:%.*]] = load double, ptr [[ARRAYIDX2]], align 8
122+
; SVE-NEXT: [[ADD]] = fadd double [[RES_07]], [[TMP15]]
123+
; SVE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
124+
; SVE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
125+
; SVE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
74126
;
75127
entry:
76128
%cmp6 = icmp sgt i32 %size, 0

llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
7676
; DEFAULT-NEXT: store <vscale x 8 x i8> [[TMP36]], ptr [[TMP40]], align 1
7777
; DEFAULT-NEXT: store <vscale x 8 x i8> [[TMP37]], ptr [[TMP43]], align 1
7878
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]]
79-
; DEFAULT-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
79+
; DEFAULT-NEXT: [[TMP44:%.*]] = icmp uge i64 [[INDEX_NEXT]], [[N_VEC]]
8080
; DEFAULT-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
8181
; DEFAULT: middle.block:
8282
; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
@@ -115,7 +115,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
115115
; DEFAULT-NEXT: [[TMP62:%.*]] = getelementptr i8, ptr [[TMP61]], i32 0
116116
; DEFAULT-NEXT: store <vscale x 4 x i8> [[TMP60]], ptr [[TMP62]], align 1
117117
; DEFAULT-NEXT: [[INDEX_NEXT12]] = add nuw i64 [[INDEX10]], [[TMP50]]
118-
; DEFAULT-NEXT: [[TMP63:%.*]] = icmp eq i64 [[INDEX_NEXT12]], [[N_VEC6]]
118+
; DEFAULT-NEXT: [[TMP63:%.*]] = icmp uge i64 [[INDEX_NEXT12]], [[N_VEC6]]
119119
; DEFAULT-NEXT: br i1 [[TMP63]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
120120
; DEFAULT: vec.epilog.middle.block:
121121
; DEFAULT-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC6]]

0 commit comments

Comments
 (0)