Skip to content

Commit d528e52

Browse files
[LLVM][SCEV] Look through common multiplicand when simplifying compares.
My usecase is simplifying the control flow generated by LoopVectorize when vectorising loops whose tripcount is a function of the runtime vector length. This can be problematic because: * CSE is a pre-LoopVectorize transform and so it's common for an IR function to include several calls to llvm.vscale(). (NOTE: Code generation will typically remove the duplicates) * Pre-LoopVectorize instcombines will rewrite some multiplies as shifts. This leads to a mismatch between VL based maths of the scalar loop and that created for the vector loop, which prevents some obvious simplifications. SCEV does not suffer these issues because it effectively does CSE during construction and shifts are represented as multiplies.
1 parent 9aebf4c commit d528e52

File tree

2 files changed

+38
-82
lines changed

2 files changed

+38
-82
lines changed

llvm/lib/Analysis/ScalarEvolution.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10748,6 +10748,22 @@ bool ScalarEvolution::SimplifyICmpOperands(CmpPredicate &Pred, const SCEV *&LHS,
1074810748
if (Depth >= 3)
1074910749
return false;
1075010750

10751+
// (X * Z) icmp (Y * Z) ==> X icmp Y
10752+
// when neither multiply wraps and Z is positive.
10753+
if (isa<SCEVMulExpr>(LHS) && isa<SCEVMulExpr>(RHS)) {
10754+
const SCEVMulExpr *LMul = cast<SCEVMulExpr>(LHS);
10755+
const SCEVMulExpr *RMul = cast<SCEVMulExpr>(RHS);
10756+
10757+
if (LMul->getNumOperands() == 2 && RMul->getNumOperands() == 2 &&
10758+
LMul->getOperand(1) == RMul->getOperand(1) &&
10759+
isKnownPositive(LMul->getOperand(1)) && ICmpInst::isUnsigned(Pred) &&
10760+
LMul->hasNoUnsignedWrap() && RMul->hasNoUnsignedWrap()) {
10761+
LHS = LMul->getOperand(0);
10762+
RHS = RMul->getOperand(0);
10763+
Changed = true;
10764+
}
10765+
}
10766+
1075110767
// Canonicalize a constant to the right side.
1075210768
if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
1075310769
// Check for both operands constant.

llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll

Lines changed: 22 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -9,54 +9,13 @@ define void @vscale_mul_4(ptr noalias noundef readonly captures(none) %a, ptr no
99
; CHECK-NEXT: [[ENTRY:.*]]:
1010
; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
1111
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
12-
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
13-
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8
14-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], [[TMP3]]
15-
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
16-
; CHECK: [[VECTOR_PH]]:
17-
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
18-
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8
19-
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP5]]
20-
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
2112
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
2213
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP10]], 8
23-
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
24-
; CHECK: [[VECTOR_BODY]]:
25-
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
26-
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX]]
27-
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i32 0
28-
; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
29-
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP18]], 4
30-
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP11]]
31-
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, ptr [[TMP14]], align 4
32-
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP26]], align 4
33-
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX]]
34-
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i32 0
35-
; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
36-
; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4
37-
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP16]]
38-
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float>, ptr [[TMP17]], align 4
39-
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 4 x float>, ptr [[TMP27]], align 4
40-
; CHECK-NEXT: [[TMP19:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD3]]
41-
; CHECK-NEXT: [[TMP28:%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD4]]
42-
; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
43-
; CHECK-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 4
44-
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP21]]
45-
; CHECK-NEXT: store <vscale x 4 x float> [[TMP19]], ptr [[TMP17]], align 4
46-
; CHECK-NEXT: store <vscale x 4 x float> [[TMP28]], ptr [[TMP22]], align 4
47-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
48-
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
49-
; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
50-
; CHECK: [[MIDDLE_BLOCK]]:
51-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
52-
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]]
53-
; CHECK: [[SCALAR_PH]]:
54-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
5514
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
56-
; CHECK: [[FOR_COND_CLEANUP]]:
15+
; CHECK: [[FOR_COND_CLEANUP:.*]]:
5716
; CHECK-NEXT: ret void
5817
; CHECK: [[FOR_BODY]]:
59-
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
18+
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
6019
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
6120
; CHECK-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX]], align 4
6221
; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDVARS_IV]]
@@ -65,7 +24,7 @@ define void @vscale_mul_4(ptr noalias noundef readonly captures(none) %a, ptr no
6524
; CHECK-NEXT: store float [[MUL4]], ptr [[ARRAYIDX3]], align 4
6625
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
6726
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[TMP1]]
68-
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
27+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
6928
;
7029
entry:
7130
%0 = tail call i64 @llvm.vscale.i64()
@@ -136,7 +95,7 @@ define void @vscale_mul_8(ptr noalias noundef readonly captures(none) %a, ptr n
13695
; CHECK-NEXT: store float [[MUL5]], ptr [[ARRAYIDX4]], align 4
13796
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
13897
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[MUL1]]
139-
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
98+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
14099
;
141100
entry:
142101
%0 = tail call i64 @llvm.vscale.i64()
@@ -167,9 +126,6 @@ define void @vscale_mul_12(ptr noalias noundef readonly captures(none) %a, ptr n
167126
; CHECK-NEXT: [[MUL1:%.*]] = mul nuw nsw i64 [[TMP0]], 12
168127
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
169128
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 8
170-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[MUL1]], [[TMP2]]
171-
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
172-
; CHECK: [[VECTOR_PH]]:
173129
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
174130
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 8
175131
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[MUL1]], [[TMP4]]
@@ -178,7 +134,7 @@ define void @vscale_mul_12(ptr noalias noundef readonly captures(none) %a, ptr n
178134
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
179135
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
180136
; CHECK: [[VECTOR_BODY]]:
181-
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
137+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
182138
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX]]
183139
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP7]], i32 0
184140
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
@@ -202,17 +158,14 @@ define void @vscale_mul_12(ptr noalias noundef readonly captures(none) %a, ptr n
202158
; CHECK-NEXT: store <vscale x 4 x float> [[TMP25]], ptr [[TMP21]], align 4
203159
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
204160
; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
205-
; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
161+
; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
206162
; CHECK: [[MIDDLE_BLOCK]]:
207163
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[MUL1]], [[N_VEC]]
208-
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]]
209-
; CHECK: [[SCALAR_PH]]:
210-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
211-
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
164+
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY:.*]]
212165
; CHECK: [[FOR_COND_CLEANUP]]:
213166
; CHECK-NEXT: ret void
214167
; CHECK: [[FOR_BODY]]:
215-
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
168+
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ]
216169
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
217170
; CHECK-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX]], align 4
218171
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDVARS_IV]]
@@ -221,7 +174,7 @@ define void @vscale_mul_12(ptr noalias noundef readonly captures(none) %a, ptr n
221174
; CHECK-NEXT: store float [[MUL5]], ptr [[ARRAYIDX4]], align 4
222175
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
223176
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[MUL1]]
224-
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
177+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
225178
;
226179
entry:
227180
%0 = tail call i64 @llvm.vscale.i64()
@@ -252,9 +205,6 @@ define void @vscale_mul_31(ptr noalias noundef readonly captures(none) %a, ptr n
252205
; CHECK-NEXT: [[MUL1:%.*]] = mul nuw nsw i64 [[TMP0]], 31
253206
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
254207
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 8
255-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[MUL1]], [[TMP2]]
256-
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
257-
; CHECK: [[VECTOR_PH]]:
258208
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
259209
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 8
260210
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[MUL1]], [[TMP4]]
@@ -263,7 +213,7 @@ define void @vscale_mul_31(ptr noalias noundef readonly captures(none) %a, ptr n
263213
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
264214
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
265215
; CHECK: [[VECTOR_BODY]]:
266-
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
216+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
267217
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX]]
268218
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP7]], i32 0
269219
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
@@ -287,17 +237,14 @@ define void @vscale_mul_31(ptr noalias noundef readonly captures(none) %a, ptr n
287237
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP21]], align 4
288238
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
289239
; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
290-
; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
240+
; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
291241
; CHECK: [[MIDDLE_BLOCK]]:
292242
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[MUL1]], [[N_VEC]]
293-
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]]
294-
; CHECK: [[SCALAR_PH]]:
295-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
296-
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
243+
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY:.*]]
297244
; CHECK: [[FOR_COND_CLEANUP]]:
298245
; CHECK-NEXT: ret void
299246
; CHECK: [[FOR_BODY]]:
300-
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
247+
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ]
301248
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
302249
; CHECK-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX]], align 4
303250
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDVARS_IV]]
@@ -306,7 +253,7 @@ define void @vscale_mul_31(ptr noalias noundef readonly captures(none) %a, ptr n
306253
; CHECK-NEXT: store float [[MUL5]], ptr [[ARRAYIDX4]], align 4
307254
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
308255
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[MUL1]]
309-
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
256+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
310257
;
311258
entry:
312259
%0 = tail call i64 @llvm.vscale.i64()
@@ -337,9 +284,6 @@ define void @vscale_mul_64(ptr noalias noundef readonly captures(none) %a, ptr n
337284
; CHECK-NEXT: [[MUL1:%.*]] = mul nuw nsw i64 [[TMP0]], 64
338285
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
339286
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 8
340-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[MUL1]], [[TMP2]]
341-
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
342-
; CHECK: [[VECTOR_PH]]:
343287
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
344288
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 8
345289
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[MUL1]], [[TMP4]]
@@ -348,7 +292,7 @@ define void @vscale_mul_64(ptr noalias noundef readonly captures(none) %a, ptr n
348292
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
349293
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
350294
; CHECK: [[VECTOR_BODY]]:
351-
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
295+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
352296
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX]]
353297
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP7]], i32 0
354298
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
@@ -372,17 +316,14 @@ define void @vscale_mul_64(ptr noalias noundef readonly captures(none) %a, ptr n
372316
; CHECK-NEXT: store <vscale x 4 x float> [[TMP18]], ptr [[TMP21]], align 4
373317
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
374318
; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
375-
; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
319+
; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
376320
; CHECK: [[MIDDLE_BLOCK]]:
377321
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[MUL1]], [[N_VEC]]
378-
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]]
379-
; CHECK: [[SCALAR_PH]]:
380-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
381-
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
322+
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY:.*]]
382323
; CHECK: [[FOR_COND_CLEANUP]]:
383324
; CHECK-NEXT: ret void
384325
; CHECK: [[FOR_BODY]]:
385-
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
326+
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ]
386327
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
387328
; CHECK-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX]], align 4
388329
; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDVARS_IV]]
@@ -391,7 +332,7 @@ define void @vscale_mul_64(ptr noalias noundef readonly captures(none) %a, ptr n
391332
; CHECK-NEXT: store float [[MUL5]], ptr [[ARRAYIDX4]], align 4
392333
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
393334
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[MUL1]]
394-
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
335+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
395336
;
396337
entry:
397338
%0 = tail call i64 @llvm.vscale.i64()
@@ -419,14 +360,13 @@ declare i64 @llvm.vscale.i64()
419360
attributes #0 = { vscale_range(1,16) "target-features"="+sve" }
420361
;.
421362
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
422-
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
423-
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
424-
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
363+
; CHECK: [[META1]] = !{!"llvm.loop.unroll.runtime.disable"}
364+
; CHECK: [[META2]] = !{!"llvm.loop.isvectorized", i32 1}
365+
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
425366
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META2]], [[META1]]}
426367
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
427368
; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META2]], [[META1]]}
428369
; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]]}
429370
; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META2]], [[META1]]}
430371
; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]}
431-
; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META2]], [[META1]]}
432372
;.

0 commit comments

Comments
 (0)